set.seed(1)
# this is the number of complex (mixed) tissue samples, e.g. arrays
m=10
# true count data (e.g. pure cells in the mixed sample)
datTrueCounts=as.matrix(data.frame(TrueCount1=rpois(m,lambda=16),
TrueCount2=rpois(m,lambda=8),TrueCount3=rpois(m,lambda=4),
TrueCount4=rpois(m,lambda=2)))
no.pure=dim(datTrueCounts)[[2]]
# now we transform the counts into proportions
divideBySum=function(x) t(x)/sum(x)
datProportions= t(apply(datTrueCounts,1,divideBySum))
dimnames(datProportions)[[2]]=paste("TrueProp",1:dim(datTrueCounts)[[2]],sep=".")
# number of genes that are highly expressed in each pure population
no.genesPerPure=rep(5, no.pure)
no.genes= sum(no.genesPerPure)
GeneIndicator=rep(1:no.pure, no.genesPerPure)
# true mean values of the genes in the pure populations
# in the end we hope to estimate them from the mixed samples
datTrueMeans0=matrix( rnorm(no.genes*no.pure,sd=.3), nrow= no.genes,ncol=no.pure)
for (i in 1:no.pure ){
datTrueMeans0[GeneIndicator==i,i]= datTrueMeans0[GeneIndicator==i,i]+1
}
dimnames(datTrueMeans0)[[1]]=paste("Gene",1:dim(datTrueMeans0)[[1]],sep="." )
dimnames(datTrueMeans0)[[2]]=paste("MeanPureCellType",1:dim(datTrueMeans0)[[2]],
sep=".")
# plot.mat(datTrueMeans0)
# simulate the (expression) values of the admixed population samples
noise=matrix(rnorm(m*no.genes,sd=.1),nrow=m,ncol= no.genes)
datE.Admixture= as.matrix(datProportions) %*% t(datTrueMeans0) + noise
dimnames(datE.Admixture)[[1]]=paste("MixedTissue",1:m,sep=".")
datPredictedMeans=populationMeansInAdmixture(datProportions,datE.Admixture)
par(mfrow=c(2,2))
for (i in 1:4 ){
verboseScatterplot(datPredictedMeans[,i],datTrueMeans0[,i],
xlab="predicted mean",ylab="true mean",main="all populations")
abline(0,1)
}
#assume we only study 2 populations (ie we ignore the others)
selectPopulations=c(1,2)
datPredictedMeansTooFew=populationMeansInAdmixture(datProportions[,selectPopulations],
datE.Admixture)
par(mfrow=c(2,2))
for (i in 1:length(selectPopulations) ){
verboseScatterplot(datPredictedMeansTooFew[,i],datTrueMeans0[,i],
xlab="predicted mean",ylab="true mean",main="too few populations")
abline(0,1)
}
#assume we erroneously add a population
datProportionsTooMany=data.frame(datProportions,WrongProp=sample(datProportions[,1]))
datPredictedMeansTooMany=populationMeansInAdmixture(datProportionsTooMany,
datE.Admixture)
par(mfrow=c(2,2))
for (i in 1:4 ){
verboseScatterplot(datPredictedMeansTooMany[,i],datTrueMeans0[,i],
xlab="predicted mean",ylab="true mean",main="too many populations")
abline(0,1)
}
Run the code above in your browser using DataLab