data("cancer_pathways")
if(interactive()){
##get the data from Vijver publication
#clinical data
import_xls_from_zip <- function(urlPath, filename, zipname, skip=0){
zipFile <- paste0(zipname, ".zip")
download.file(paste0(urlPath, zipFile), zipFile)
unzip(zipFile, exdir="./temp_unzip")
xlsFile <- paste0("./temp_unzip/", filename, ".xls")
res <- readxl::read_xls(xlsFile, skip=skip)
unlink(zipFile)
unlink("./temp_unzip", recursive=TRUE)
return(res)
}
BC_dat_clin <- import_xls_from_zip2(urlPath="http://ccb.nki.nl/data/",
filename="Table1_ClinicalData_Table",
zipname="nejm_table1",
skip=2
)
BC_dat_clin <- BC_dat_clin[order(BC_dat_clin$SampleID), ]
col2rmv <- 1:ncol(BC_dat_clin)
BC_dat_clin$ID <- paste0("S", BC_dat_clin$SampleID)
rownames(BC_dat_clin) <- BC_dat_clin$ID
BC_dat_clin$evdeath <- BC_dat_clin$EVENTdeath
BC_dat_clin$tsurv <- BC_dat_clin$TIMEsurvival
BC_dat_clin$evmeta <- BC_dat_clin$EVENTmeta
BC_dat_clin$tmeta<- pmin(BC_dat_clin$TIMEsurvival, BC_dat_clin$TIMEmeta, na.rm=TRUE)
samples2rmv <- c("S28", "S122", "S123", "S124", "S133", "S138", "S139", "S141", "S221", "S222",
"S224", "S226", "S227", "S228", "S229", "S230", "S231", "S237", "S238", "S240",
"S241", "S248", "S250", "S251", "S252", "S254", "S292", "S317", "S342", "S371",
"S379", "S380", "S397", "S398", "S401")
BC_dat_clin <- BC_dat_clin[-which(BC_dat_clin$ID %in% samples2rmv), -col2rmv]
head(BC_dat_clin)
#import genomics data
urlPath="http://ccb.nki.nl/data/"
zipFile <- paste0("ZipFiles295Samples", ".zip")
download.file(paste0(urlPath, zipFile), zipFile)
unzip(zipFile, exdir="./temp_unzip")
unlink(zipFile)
unlink("./temp_unzip/Readme.txt", recursive=FALSE)
txtfiles <- list.files("./temp_unzip/")
BC_dat_exp <- NULL
for(f in txtfiles){
temp_exp <- read.delim(paste0("./temp_unzip/", f))
if(f==txtfiles[1]){
gene_id <- as.character(temp_exp[-1, 1])
gene_symbol <- as.character(temp_exp[-1, 2])
}
temp_exp <- temp_exp[-1, grep("Sample.", colnames(temp_exp))]
colnames(temp_exp) <- gsub("Sample.", "S", colnames(temp_exp))
if(f==txtfiles[1]){
BC_dat_exp <- temp_exp
}else{
BC_dat_exp <- cbind(BC_dat_exp, temp_exp)
}
}
BC_dat_exp_all <- cbind.data.frame("SYMBOL"=gene_symbol, BC_dat_exp[, BC_dat_clin$ID])
unlink("./temp_unzip", recursive=TRUE)
# translating the pathways from Entrez ID to gene symbol
if (requireNamespace("org.Hs.eg.db", quietly = TRUE)){
library(org.Hs.eg.db)
x <- org.Hs.egSYMBOL
mapped_genes <- mappedkeys(x)
xx <- as.list(x[mapped_genes])
cancer_pathways_Symbol <- lapply(cancer_pathways, function(v){unlist(xx[v])})
sapply(cancer_pathways, function(x){length(intersect(x, rownames(BC_dat_exp)))/length(x)})
}
}
Run the code above in your browser using DataLab