## ---------------------------------------------------------------------
## 1. COMPUTE TRANSCRIPTOME COVERAGE OF A SET OF ALIGNED READS
## ---------------------------------------------------------------------
## Load the aligned reads:
library(pasillaBamSubset)
library(GenomicAlignments)
reads <- readGAlignments(untreated1_chr4())
## Load the transcripts:
library(TxDb.Dmelanogaster.UCSC.dm3.ensGene)
txdb <- TxDb.Dmelanogaster.UCSC.dm3.ensGene
transcripts <- exonsBy(txdb, by="tx", use.names=TRUE)
## Compute the transcript coverage with coverageByTranscript():
tx_cvg <- coverageByTranscript(reads, transcripts, ignore.strand=TRUE)
tx_cvg
## A sanity check:
stopifnot(identical(elementNROWS(tx_cvg), sum(width(transcripts))))
## We can also use pcoverageByTranscript() to compute 'tx_cvg'.
## For this we first create a GAlignmentsList object "parallel" to
## 'transcripts' where the i-th list element contains the aligned reads
## that overlap with the i-th transcript:
hits <- findOverlaps(reads, transcripts, ignore.strand=TRUE)
tx2reads <- setNames(as(t(hits), "List"), names(transcripts))
reads_by_tx <- extractList(reads, tx2reads) # GAlignmentsList object
reads_by_tx
## Call pcoverageByTranscript():
tx_cvg2 <- pcoverageByTranscript(reads_by_tx, transcripts,
ignore.strand=TRUE)
stopifnot(identical(tx_cvg, tx_cvg2))
## A more meaningful coverage is obtained by counting for each
## transcript only the reads that are *compatible* with its splicing:
compat_hits <- findCompatibleOverlaps(reads, transcripts)
tx2reads <- setNames(as(t(compat_hits), "List"), names(transcripts))
compat_reads_by_tx <- extractList(reads, tx2reads)
tx_compat_cvg <- pcoverageByTranscript(compat_reads_by_tx,
transcripts,
ignore.strand=TRUE)
## A sanity check:
stopifnot(all(all(tx_compat_cvg <= tx_cvg)))
## ---------------------------------------------------------------------
## 2. COMPUTE CDS COVERAGE OF A SET OF ALIGNED READS
## ---------------------------------------------------------------------
## coverageByTranscript() can also be used to compute CDS coverage:
cds <- cdsBy(txdb, by="tx", use.names=TRUE)
cds_cvg <- coverageByTranscript(reads, cds, ignore.strand=TRUE)
cds_cvg
## A sanity check:
stopifnot(identical(elementNROWS(cds_cvg), sum(width(cds))))
## ---------------------------------------------------------------------
## 3. ALTERNATIVELY, THE CDS COVERAGE CAN BE OBTAINED FROM THE
## TRANSCRIPT COVERAGE BY TRIMMING THE 5' AND 3' UTRS
## ---------------------------------------------------------------------
tx_lens <- transcriptLengths(txdb, with.utr5_len=TRUE, with.utr3_len=TRUE)
stopifnot(identical(tx_lens$tx_name, names(tx_cvg))) # sanity
## Keep the rows in 'tx_lens' that correspond to a list element in
## 'cds_cvg' and put them in the same order as in 'cds_cvg':
m <- match(names(cds_cvg), names(tx_cvg))
tx_lens <- tx_lens[m, ]
utr5_width <- tx_lens$utr5_len
utr3_width <- tx_lens$utr3_len
trimListElements <- function(x, ltrim=0, rtrim=0)
{
x_eltNROWS <- elementNROWS(x)
n1 <- pmax(x_eltNROWS - rtrim, 0)
n2 <- pmax(n1 - ltrim, 0)
ptail(phead(x, n=n1), n=n2)
}
cds_cvg2 <- trimListElements(tx_cvg[m], utr5_width, utr3_width)
## A sanity check:
stopifnot(identical(cds_cvg2, cds_cvg))
Run the code above in your browser using DataLab