## Not run:
# if (require(Rsamtools) && require(RNAseqData.HNRNPC.bam.chr14) &&
# require(GenomicAlignments) && require(ShortRead)) {
#
# ## ----------------------------------------------------------------------
# ## Iterate through a BAM file
# ## ----------------------------------------------------------------------
#
# ## Select a single file and set 'yieldSize' in the BamFile object.
# fl <- RNAseqData.HNRNPC.bam.chr14_BAMFILES[[1]]
# bf <- BamFile(fl, yieldSize = 300000)
#
# ## bamIterator() is initialized with a BAM file and returns a function.
# ## The return function requires no arguments and iterates through the
# ## file returning data chunks the size of yieldSize.
# bamIterator <- function(bf) {
# done <- FALSE
# if (!isOpen( bf))
# open(bf)
#
# function() {
# if (done)
# return(NULL)
# yld <- readGAlignments(bf)
# if (length(yld) == 0L) {
# close(bf)
# done <<- TRUE
# NULL
# } else yld
# }
# }
#
# ## FUN counts reads in a region of interest.
# roi <- GRanges("chr14", IRanges(seq(19e6, 107e6, by = 10e6), width = 10e6))
# counter <- function(reads, roi, ...) {
# countOverlaps(query = roi, subject = reads)
# }
#
# ## Initialize the iterator.
# ITER <- bamIterator(bf)
#
# ## The number of chunks returned by ITER() determines the result length.
# bpparam <- MulticoreParam(workers = 3)
# bpiterate(ITER, counter, roi = roi, BPPARAM = bpparam)
#
# ## Re-initialize the iterator and combine on the fly with REDUCE:
# ITER <- bamIterator(bf)
# bpparam <- MulticoreParam(workers = 3)
# bpiterate(ITER, counter, REDUCE = sum, roi = roi, BPPARAM = bpparam)
#
# ## ----------------------------------------------------------------------
# ## Iterate through a FASTA file
# ## ----------------------------------------------------------------------
#
# ## Set data chunk size with 'n' in the FastqStreamer object.
# sp <- SolexaPath(system.file('extdata', package = 'ShortRead'))
# fl <- file.path(analysisPath(sp), "s_1_sequence.txt")
#
# ## Create an iterator that returns data chunks the size of 'n'.
# fastqIterator <- function(fqs) {
# done <- FALSE
# if (!isOpen(fqs))
# open(fqs)
#
# function() {
# if (done)
# return(NULL)
# yld <- yield(fqs)
# if (length(yld) == 0L) {
# close(fqs)
# done <<- TRUE
# NULL
# } else yld
# }
# }
#
# ## The process function summarizes the number of times each sequence occurs.
# summary <- function(reads, ...) {
# ShortRead::tables(reads, n = 0)$distribution
# }
#
# ## Create a param.
# bpparam <- SnowParam(workers = 2)
#
# ## Initialize the streamer and iterator.
# fqs <- FastqStreamer(fl, n = 100)
# ITER <- fastqIterator(fqs)
# bpiterate(ITER, summary, BPPARAM = bpparam)
#
# ## Results from the workers are combined on the fly when REDUCE is used.
# ## Collapsing the data in this way can substantially reduce memory
# ## requirements.
# fqs <- FastqStreamer(fl, n = 100)
# ITER <- fastqIterator(fqs)
# bpiterate(ITER, summary, REDUCE = merge, all = TRUE, BPPARAM = bpparam)
#
# }
# ## End(Not run)
Run the code above in your browser using DataLab