library(GenomicRanges)
##
## Simulate 2 loci at which CNVs are common
##
set.seed(100)
starts <- rpois(1000, 100) + 10e6L
ends <- rpois(1000, 100) + 10.1e6L
cnv1 <- GRanges("chr1", IRanges(starts, ends))
cnv1$id <- paste0("sample", seq_along(cnv1))
starts <- rpois(500, 1000) + 101e6L
ends <- rpois(500, 1000) + 101.4e6L
cnv2 <- GRanges("chr5", IRanges(starts, ends))
cnv2$id <- paste0("sample", seq_along(cnv2))
##
## Simulate a few other CNVs that are less common because they are
## very large, or because they occur in regions that in which copy
## number alerations are not common
##
cnv3 <- GRanges("chr1", IRanges(9e6L, 15e6L), id="sample1400")
starts <- seq(5e6L, 200e6L, 10e6L)
ends <- starts + rpois(length(starts), 25e3L)
cnv4 <- GRanges("chr1", IRanges(starts, ends),
id=paste0("sample", sample(1000:1500, length(starts))))
all_cnvs <- suppressWarnings(c(cnv1, cnv2, cnv3, cnv4))
grl <- split(all_cnvs, all_cnvs$id)
cnps <- consensusCNP(grl)
##
## 2nd CNP is filtered because of its size
##
truth <- GRanges("chr1", IRanges(10000100L, 10100100L))
seqinfo(truth) <- seqinfo(grl)
identical(cnps, truth)
##
## Both CNVs identified
##
cnps <- consensusCNP(grl, max.width=500e3)
truth <- GRanges(c("chr1", "chr5"),
IRanges(c(10000100L, 101000999L),
c(10100100L, 101400999L)))
seqlevels(truth, force=TRUE) <- seqlevels(grl)
seqinfo(truth) <- seqinfo(grl)
identical(cnps, truth)
Run the code above in your browser using DataLab