library(MASS)
p=200 ## number of genomic variables
n=50 ## number of observations
f=20 ## number of gene sets
## Create annotation matrix with disjoint gene sets
gene.sets = matrix(0, nrow=f, ncol=p)
for (i in 1:f) {
gene.sets[i, ((i-1)*p/f + 1):(i*p/f)] = 1
}
## Simulate MVN data where the
## first population PC loadings are
## associated with the first gene set.
var=2 ## variance of first population PC
default.var=.1 ## background variance of population PCs
load = sqrt(.1) ## value of population loading vector for gene set 1 on PC 1
## Creates a first PC with loadings for just the first 20 genes and a
loadings = c(rep(load,p/f), rep(0,p-p/f))
## Create the population covariance matrix
sigma = var * loadings %*% t(loadings) + diag(rep(default.var, p))
## Simulate MVN data
data = mvrnorm(n=n, mu=rep(0, p), Sigma=sigma)
## Perform PCA on the standardized data
prcomp.output = prcomp(data, center=TRUE, scale=TRUE)
## Execute SGSE using Fisher-transformed correlation coefficients as
## the gene-level statistics, the standardized mean difference as the
## gene set statistic and a correlation adjusted two-sided,
## two-sample t-test for the determination of statistical significance,
## all PCs with non-zero eigenvalues for spectral enrichment and
## variance weights
sgse.results = sgse(data=data,
prcomp.output=prcomp.output,
gene.sets=gene.sets,
gene.statistic="z",
transformation="none",
gene.set.statistic="mean.diff",
gene.set.test="cor.adj.parametric",
pc.selection.method="all",
pcgse.weight="variance")
## Display the PCGSE p-values for the first 5 gene sets for PC 1
sgse.results$pcgse$p.values[1:5,1]
## Display the SGSE weights for the first 5 PCs
sgse.results$weights[1:5]
## Display the SGSE p-values for the first 5 gene sets
sgse.results$sgse[1:5]
## Execute SGSE again but using RMT scaled variance weights
sgse.results = sgse(data=data,
prcomp.output=prcomp.output,
gene.sets=gene.sets,
gene.statistic="z",
transformation="none",
gene.set.statistic="mean.diff",
gene.set.test="cor.adj.parametric",
pc.selection.method="all",
pcgse.weight="rmt.scaled.var")
## Display the SGSE weights for the first 5 PCs
sgse.results$weights[1:5]
## Display the SGSE p-values for the first 5 gene sets
sgse.results$sgse[1:5]
## Execute SGSE again using RMT scaled variance weights and
## all RMT-significant PCs at alpha=.05
sgse.results = sgse(data=data,
prcomp.output=prcomp.output,
gene.sets=gene.sets,
gene.statistic="z",
transformation="none",
gene.set.statistic="mean.diff",
gene.set.test="cor.adj.parametric",
pc.selection.method="rmt",
rmt.alpha=.05,
pcgse.weight="rmt.scaled.var")
## Display the indexes of the RMT-significant PCs
sgse.results$pc.indexes
## Display the SGSE p-values for the first 5 gene sets
sgse.results$sgse[1:5]
Run the code above in your browser using DataLab