gal1 <- GAlignments(
seqnames=Rle(factor(c("chr1", "chr2", "chr1", "chr3")),
c(1, 3, 2, 4)),
pos=1:10, cigar=paste0(10:1, "M"),
strand=Rle(strand(c("-", "+", "*", "+", "-")), c(1, 2, 2, 3, 2)),
names=head(letters, 10), score=1:10)
gal2 <- GAlignments(
seqnames=Rle(factor(c("chr2", "chr4")), c(3, 4)), pos=1:7,
cigar=c("5M", "3M2N3M2N3M", "5M", "10M", "5M1N4M", "8M2N1M", "5M"),
strand=Rle(strand(c("-", "+")), c(4, 3)),
names=tail(letters, 7), score=1:7)
galist <- GAlignmentsList(noGaps=gal1, Gaps=gal2)
## ---------------------------------------------------------------------
## A. BASIC MANIPULATION
## ---------------------------------------------------------------------
length(galist)
names(galist)
seqnames(galist)
strand(galist)
head(cigar(galist))
head(qwidth(galist))
head(start(galist))
head(end(galist))
head(width(galist))
head(njunc(galist))
seqlevels(galist)
## Rename the reference sequences:
seqlevels(galist) <- sub("chr", "seq", seqlevels(galist))
seqlevels(galist)
grglist(galist) # a GRangesList object
rglist(galist) # an IRangesList object
## ---------------------------------------------------------------------
## B. SUBSETTING
## ---------------------------------------------------------------------
galist[strand(galist) == "-"]
has_junctions <- sapply(galist,
function(x) any(grepl("N", cigar(x), fixed=TRUE)))
galist[has_junctions]
## Different ways to subset:
galist[2] # a GAlignments object of length 1
galist[[2]] # a GAlignments object of length 1
grglist(galist[2]) # a GRangesList object of length 1
rglist(galist[2]) # a NormalIRangesList object of length 1
## ---------------------------------------------------------------------
## C. mcols()/elementMetadata()
## ---------------------------------------------------------------------
## Metadata can be defined on the individual GAlignment elements
## and the overall GAlignmentsList object. By default, 'level=between'
## extracts the GALignmentsList metadata. Using 'level=within'
## will extract the metadata on the individual GAlignments objects.
mcols(galist) ## no metadata on the GAlignmentsList object
mcols(galist, level="within")
## ---------------------------------------------------------------------
## D. readGAlignmentsList()
## ---------------------------------------------------------------------
library(pasillaBamSubset)
## 'file' as character.
fl <- untreated3_chr4()
galist1 <- readGAlignmentsList(fl)
galist1[1:3]
length(galist1)
table(elementNROWS(galist1))
## When 'file' is a BamFile, 'asMates' must be TRUE. If FALSE,
## the data are treated as single-end and each list element of the
## GAlignmentsList will be of length 1. For single-end data
## use readGAlignments() instead of readGAlignmentsList().
bf <- BamFile(fl, yieldSize=3, asMates=TRUE)
readGAlignmentsList(bf)
## Use a 'param' to fine tune the results.
param <- ScanBamParam(flag=scanBamFlag(isProperPair=TRUE))
galist2 <- readGAlignmentsList(fl, param=param)
length(galist2)
## ---------------------------------------------------------------------
## E. COERCION
## ---------------------------------------------------------------------
## The granges() and grlist() coercions support 'ignore.strand' to
## allow ranges from different strands to be combined. In this example
## paired-end reads aligned to opposite strands were read into a
## GAlignmentsList. If the desired operation is to combine these ranges,
## regardless of junctions or the space between pairs, 'ignore.strand'
## must be TRUE.
granges(galist[1])
granges(galist[1], ignore.strand=TRUE)
## grglist()
galist <- GAlignmentsList(noGaps=gal1, Gaps=gal2)
grglist(galist)
grglist(galist, ignore.strand=TRUE)
Run the code above in your browser using DataLab