## See vignette.
if (require(charmData) & require(BSgenome.Hsapiens.UCSC.hg18)) {
dataDir <- system.file("data", package="charmData")
phenodataDir <- system.file("extdata", package="charmData")
pd <- read.delim(file.path(phenodataDir, "phenodata.txt"))
res <- validatePd(pd)
## Read in raw data:
rawData <- readCharm(files=pd$filename, path=dataDir, sampleKey=pd, sampleNames=pd$sampleID)
## Check quality of arrays:
#qual <- qcReport(rawData, file="qcReport.pdf")
## Assess individual probe qualities:
pmq = pmQuality(rawData)
rmpmq = rowMeans(pmq)
okqc = which(rmpmq>75)
## Identify control probes as the probes at positions surrounded by a CpG-free 600bp window:
ctrlIdx <- getControlIndex(rawData, subject=Hsapiens, noCpGWindow=600)
## Check that these control probes do indeed have lower intensities than the non-control probes (after spatial and background corrections, but no normalization, since normalization uses the control probes):
#controlQC(rawData=rawData, controlIndex=ctrlIdx, IDcol="sampleID", expcol="tissue", ylimits=c(-6,8), outfile="boxplots_check.pdf", height=7, width=9)
chr = pmChr(rawData)
pns = probeNames(rawData)
pos = pmPosition(rawData)
seq = pmSequence(rawData)
pd = pData(rawData)
## Estimate percent methylation:
p <- methp(rawData, controlIndex=ctrlIdx, plotDensityGroups=pd$tissue)
## unsupervised clustering of samples:
#cmdsplot(labcols=c("red","black","blue"), expcol="tissue", rawData=rawData, p=p, okqc=okqc, noXorY=TRUE, outfile="cmds_topN.pdf", topN=c(100000,1000))
## Do not look for DMRs among control probes or probes with average probe quality score less than or equal to 75 for example:
Index=setdiff(which(rmpmq>75),ctrlIdx)
Index = Index[order(chr[Index], pos[Index])]
p = p[Index,]
seq = seq[Index]
chr = chr[Index]
pos = pos[Index]
pns = pns[Index]
pns=clusterMaker(chr,pos)
## Identify DMR candidates between colon and liver (in this example not adjusting for any other covariates (besides batch)):
mod0 = matrix(1,nrow=nrow(pd),ncol=1)
mod = model.matrix(~1 +factor(pd$tissue,levels=c("liver","colon","spleen")))
thedmrs = dmrFind(p=p, mod=mod, mod0=mod0, coeff=2, pns=pns, chr=chr, pos=pos)
## Obtain FDR q-values for each DMR candidate. In practice, numiter should be set much higher.
withq = qval(p=p, dmr=thedmrs, numiter=2, verbose=FALSE, mc=1)
#### Plotting not run:
## Plot DMR candidates 1,2, and 4, for example. First have to load a table of CpG islands.
#cpg.cur = read.delim("http://rafalab.jhsph.edu/CGI/model-based-cpg-islands-hg18.txt", as.is=TRUE)
#plotDMRs(dmrs=thedmrs, Genome=Hsapiens, cpg.islands=cpg.cur, exposure=pd$tissue, outfile="./colon-liver.pdf", which_plot=c(1,2,4), which_points=c("colon","liver"), smoo="loess", ADD=3000, cols=c("black","red","blue"))
## Plot DMR candidates, and in the 3rd panel plot the difference in average green channel:
dat0 = spatialAdjust(rawData, copy=FALSE)
dat0 = bgAdjust(dat0, copy=FALSE)
G = pm(dat0)[,,1] #from oligo
G = G[Index,]
#plotDMRs(dmrs=thedmrs, Genome=Hsapiens, cpg.islands=cpg.cur, exposure=pd$tissue, outfile="./colon-liver2.pdf", which_plot=c(1), which_points=c("colon","liver"), smoo="loess", ADD=3000, cols=c("black","red","blue"), panel3="G", G=G, seq=seq)
## Example if covariate of interest is continuous:
pd$x = c(1,2,3,4,5,6)
mod0 = matrix(1,nrow=nrow(pd),ncol=1)
mod = model.matrix(~1 +pd$x)
coeff = 2
thedmrs2 = dmrFind(p=p, mod=mod, mod0=mod0, coeff=coeff, pns=pns, chr=chr, pos=pos)
## If covariate of interest is continuous, you can still plot it like it is categorical by categorizing it for plotting purposes:
groups = as.numeric(cut(mod[,coeff],c(-Inf,2,4,Inf))) #You can change these cutpoints.
pd$groups = c("low","medium","high")[groups]
#plotDMRs(dmrs=thedmrs2, Genome=Hsapiens, cpg.islands=cpg.cur, exposure=pd$groups, outfile="./test.pdf", which_plot=c(1), smoo="loess", ADD=3000, cols=c("black","red","blue"))
## Otherwise, if covariate of interest is continuous, plot will show correlation with covariate:
#plotDMRs(dmrs=thedmrs2, Genome=Hsapiens, cpg.islands=cpg.cur, exposure=pd$x, outfile="./x.pdf", which_plot=c(1), smoo="loess", ADD=3000, cols=c("black","red","blue"))
## Plot arbitrary regions:
mytable = thedmrs$dmrs[,c("chr","start","end")]
mytable[2,] = c("chr1",1,1000) #not on array
mytable$start = as.numeric(mytable$start)
mytable$end = as.numeric(mytable$end)
#plotRegions(thetable=mytable[1:5,], cleanp=thedmrs$cleanp, chr=chr, pos=pos, Genome=Hsapiens, cpg.islands=cpg.cur, outfile="myregions.pdf", exposure=pd$tissue, exposure.continuous=FALSE)
}
Run the code above in your browser using DataLab