library(BSgenome.Hsapiens.UCSC.hg19)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
## ----------------------------
## VCF object as query
## ----------------------------
## Read variants from a VCF file
fl <- system.file("extdata", "chr22.vcf.gz", package="VariantAnnotation")
vcf <- readVcf(fl, "hg19")
## Rename seqlevels in the VCF object to match those in the TxDb.
vcf <- renameSeqlevels(vcf, "chr22")
## Confirm common seqlevels
intersect(seqlevels(vcf), seqlevels(txdb))
## When 'query' is a VCF object the varAllele argument is missing.
coding1 <- predictCoding(vcf, txdb, Hsapiens)
head(coding1, 3)
## Exon-centric or cDNA locations:
exonsbytx <- exonsBy(txdb, "tx")
cDNA <- mapToTranscripts(coding1, exonsbytx)
mcols(cDNA)$TXID <- names(exonsbytx)[mcols(cDNA)$transcriptsHits]
cDNA <- cDNA[mcols(cDNA)$TXID == mcols(coding1)$TXID[mcols(cDNA)$xHits]]
## Make sure cDNA is parallel to coding1
stopifnot(identical(mcols(cDNA)$xHits, seq_along(coding1)))
coding1$cDNA <- ranges(cDNA)
## ----------------------------
## GRanges object as query
## ----------------------------
## A GRanges can also be used as the 'query'. The seqlevels in the VCF
## were adjusted in previous example so the GRanges extracted with
## has the correct seqlevels.
rd <- rowRanges(vcf)
## The GRanges must be expanded to have one row per alternate allele.
## Variants 1, 2 and 10 have two alternate alleles.
altallele <- alt(vcf)
eltROWS <- elementNROWS(altallele)
rd_exp <- rep(rd, eltROWS)
## Call predictCoding() with the expanded GRanges and the unlisted
## alternate allele as the 'varAllele'.
coding2 <- predictCoding(rd_exp, txdb, Hsapiens, unlist(altallele))
Run the code above in your browser using DataLab