# NOT RUN {
## ------------------------------------------------------------
## Minimal depth variable selection
## survival analysis
## use larger node size which is better for minimal depth
## ------------------------------------------------------------
data(pbc, package = "randomForestSRC")
pbc.obj <- rfsrc(Surv(days, status) ~ ., pbc, nodesize = 20, importance = TRUE)
# default call corresponds to minimal depth selection
vs.pbc <- var.select(object = pbc.obj)
topvars <- vs.pbc$topvars
# the above is equivalent to
max.subtree(pbc.obj)$topvars
# different levels of conservativeness
var.select(object = pbc.obj, conservative = "low")
var.select(object = pbc.obj, conservative = "medium")
var.select(object = pbc.obj, conservative = "high")
## ------------------------------------------------------------
## Minimal depth variable selection
## competing risk analysis
## use larger node size which is better for minimal depth
## ------------------------------------------------------------
## competing risk data set involving AIDS in women
data(wihs, package = "randomForestSRC")
vs.wihs <- var.select(Surv(time, status) ~ ., wihs, nsplit = 3,
nodesize = 20, ntree = 100, importance = TRUE)
## competing risk analysis of pbc data from survival package
## implement cause-specific variable selection
if (library("survival", logical.return = TRUE)) {
data(pbc, package = "survival")
pbc$id <- NULL
var.select(Surv(time, status) ~ ., pbc, cause = 1)
var.select(Surv(time, status) ~ ., pbc, cause = 2)
}
## ------------------------------------------------------------
## Minimal depth variable selection
## classification analysis
## ------------------------------------------------------------
vs.iris <- var.select(Species ~ ., iris)
## ------------------------------------------------------------
## Variable hunting high-dimensional example
## van de Vijver microarray breast cancer survival data
## nrep is small for illustration; typical values are nrep = 100
## ------------------------------------------------------------
data(vdv, package = "randomForestSRC")
vh.breast <- var.select(Surv(Time, Censoring) ~ ., vdv,
method = "vh", nrep = 10, nstep = 5)
# plot top 10 variables
plot.variable(vh.breast$rfsrc.refit.obj,
xvar.names = vh.breast$topvars[1:10])
plot.variable(vh.breast$rfsrc.refit.obj,
xvar.names = vh.breast$topvars[1:10], partial = TRUE)
## similar analysis, but using weights from univarate cox p-values
if (library("survival", logical.return = TRUE))
{
cox.weights <- function(rfsrc.f, rfsrc.data) {
event.names <- all.vars(rfsrc.f)[1:2]
p <- ncol(rfsrc.data) - 2
event.pt <- match(event.names, names(rfsrc.data))
xvar.pt <- setdiff(1:ncol(rfsrc.data), event.pt)
sapply(1:p, function(j) {
cox.out <- coxph(rfsrc.f, rfsrc.data[, c(event.pt, xvar.pt[j])])
pvalue <- summary(cox.out)$coef[5]
if (is.na(pvalue)) 1.0 else 1/(pvalue + 1e-100)
})
}
data(vdv, package = "randomForestSRC")
rfsrc.f <- as.formula(Surv(Time, Censoring) ~ .)
cox.wts <- cox.weights(rfsrc.f, vdv)
vh.breast.cox <- var.select(rfsrc.f, vdv, method = "vh", nstep = 5,
nrep = 10, xvar.wt = cox.wts)
}
# }
Run the code above in your browser using DataLab