library(MASS)
p=200 ## number of genomic variables
n=50 ## number of observations
f=20 ## number of gene sets
## Create annotation matrix with disjoint gene sets
gene.sets = matrix(0, nrow=f, ncol=p)
for (i in 1:f) {
gene.sets[i, ((i-1)*p/f + 1):(i*p/f)] = 1
}
## Simulate MVN data with two population PCs whose loadings are
## associated with the first and second gene sets, respectively.
var1=2 ## variance of first population PC
var2=1 ## variance of second population PC
default.var=.1 ## background variance of population PCs
load = sqrt(.1) ## value of population loading vector for gene set 1 on PC 1 and set 2 on PC 2
## Creates a first PC with loadings for just the first 20 genes and a
## second PC with loadings for just the second 20 genes
loadings1 = c(rep(load,p/f), rep(0,p-p/f))
loadings2 = c(rep(0,p/f), rep(load, p/f), rep(0, p-2*p/f))
## Create the population covariance matrix
sigma = var1 * loadings1 %*% t(loadings1) + var2 * loadings2 %*% t(loadings2) +
diag(rep(default.var, p))
## Simulate MVN data
data = mvrnorm(n=n, mu=rep(0, p), Sigma=sigma)
## Perform PCA on the standardized data
prcomp.output = prcomp(data, center=TRUE, scale=TRUE)
## Execute PCGSE using Fisher-transformed correlation coefficients as the gene-level statistics,
## the standardized mean difference as the gene set statistic and an unadjusted two-sided,
## two-sample t-test for the determination of statistical significance.
pcgse.results = pcgse(data=data,
prcomp.output=prcomp.output,
pc.indexes=1:2,
gene.sets=gene.sets,
gene.statistic="z",
transformation="none",
gene.set.statistic="mean.diff",
gene.set.test="parametric")
## Apply Bonferroni correction to p-values
for (i in 1:2) {
pcgse.results$p.values[,i] = p.adjust(pcgse.results$p.values[,i], method="bonferroni")
}
## Display the p-values for the first 5 gene sets for PCs 1 and 2
pcgse.results$p.values[1:5,]
## Execute PCGSE again but using a correlation-adjusted t-test
pcgse.results = pcgse(data=data,
prcomp.output=prcomp.output,
pc.indexes=1:2,
gene.sets=gene.sets,
gene.statistic="z",
transformation="none",
gene.set.statistic="mean.diff",
gene.set.test="cor.adj.parametric")
## Apply Bonferroni correction to p-values
for (i in 1:2) {
pcgse.results$p.values[,i] = p.adjust(pcgse.results$p.values[,i], method="bonferroni")
}
## Display the p-values for the first 5 gene sets for PCs 1 and 2
pcgse.results$p.values[1:5,]
Run the code above in your browser using DataLab