if (FALSE) {
# The data set has been generated as follows:
# get the data set from GEO
library(GEOquery)
gambia <- getGEO("GSE28623")[[1]]
# Convert to limma and normalize
library(limma)
e <- new("EListRaw", list(E= exprs(gambia), genes=fData(gambia), targets= pData(gambia)))
e.bg <- backgroundCorrect(e, method= "normexp")
en <- normalizeBetweenArrays(e.bg, method= "q")
en <- avereps(en, ID=en$genes$NAME)
en <- en[ en$genes$CONTROL_TYPE == "FALSE", ]
en$targets$group <- factor(gsub(" whole blood RNA *", "", en$targets$description))
# Fill in Entrez Gene IDs
library(org.Hs.eg.db)
en$genes$EG <- ""
sel <- en$genes$REFSEQ %in% ls(org.Hs.egREFSEQ2EG)
en$genes$EG[sel] <- mget(as.character(en$genes$REFSEQ[sel]), org.Hs.egREFSEQ2EG)
# Filter by IQR and missing EG's
iqrs <- apply(en$E, 1, IQR)
en2 <- en[ iqrs > quantile(iqrs, 0.75) & en$genes$EG != "", ]
# Select 10 random samples from NID and TB groups
en2 <- en2[ , c(sample(which(en2$targets$group == "NID"), 10),
sample(which(en2$targets$group == "TB"), 10)) ]
colnames(en2$E) <- en2$targets$group
Egambia <- cbind(en2$genes[ , c("GENE_SYMBOL", "GENE_NAME", "EG") ], en2$E)
}
Run the code above in your browser using DataLab