## Input
## - A BAM file:
bamfile <- BamFile(system.file("extdata", "ex1.bam", package="Rsamtools"))
seqinfo(bamfile) # to see the seqlevels and seqlengths
stackStringsFromBam(bamfile, param="seq1:1-21") # a quick look at
# the reads
## - A GPos object containing Genomic Positions Of Interest:
my_GPOI <- GPos(c("seq1:1-5", "seq1:21-21", "seq1:1575-1575",
"seq2:1513-1514"))
## Some preliminary massage on 'my_GPOI'
seqinfo(my_GPOI) <- merge(seqinfo(my_GPOI), seqinfo(bamfile))
seqlevels(my_GPOI) <- seqlevelsInUse(my_GPOI)
## Load the BAM file in a GAlignments object. Note that we load only
## the reads aligned to the sequences in 'seqlevels(my_GPOI)'. Also,
## in order to be consistent with applyPileups() and SAMtools (m)pileup,
## we filter out the following BAM records:
## - secondary alignments (flag bit 0x100);
## - reads not passing quality controls (flag bit 0x200);
## - PCR or optical duplicates (flag bit 0x400).
## See ?ScanBamParam and the SAM Spec for more information.
which <- as(seqinfo(my_GPOI), "GRanges")
flag <- scanBamFlag(isSecondaryAlignment=FALSE,
isNotPassingQualityControls=FALSE,
isDuplicate=FALSE)
what <- c("seq", "qual")
param <- ScanBamParam(flag=flag, what=c("seq", "qual"), which=which)
gal <- readGAlignments(bamfile, param=param)
seqlevels(gal) <- seqlevels(my_GPOI)
## Extract the read sequences (a.k.a. query sequences) and quality
## strings. Both are reported with respect to the + strand.
qseq <- mcols(gal)$seq
qual <- mcols(gal)$qual
nucl_piles <- pileLettersAt(qseq, seqnames(gal), start(gal), cigar(gal),
my_GPOI)
qual_piles <- pileLettersAt(qual, seqnames(gal), start(gal), cigar(gal),
my_GPOI)
mcols(my_GPOI)$nucl_piles <- nucl_piles
mcols(my_GPOI)$qual_piles <- qual_piles
my_GPOI
## Finally, to summarize A/C/G/T frequencies at each position:
alphabetFrequency(nucl_piles, baseOnly=TRUE)
## Note that the pileup() function defined in the Rsamtools package
## can be used to obtain a similar result:
scanbam_param <- ScanBamParam(flag=flag, which=my_GPOI)
pileup_param <- PileupParam(max_depth=5000,
min_base_quality=0,
distinguish_strands=FALSE)
pileup(bamfile, scanBamParam=scanbam_param, pileupParam=pileup_param)
Run the code above in your browser using DataLab