# \donttest{
## ------------------------------------------------------------
## regression analysis
## ------------------------------------------------------------
## new York air quality measurements
airq.obj <- holdout.vimp(Ozone ~ ., data = airquality, na.action = "na.impute")
print(airq.obj$importance)
## ------------------------------------------------------------
## classification analysis
## ------------------------------------------------------------
## iris data
iris.obj <- holdout.vimp(Species ~., data = iris)
print(iris.obj$importance)
## iris data using brier prediction error
iris.obj <- holdout.vimp(Species ~., data = iris, perf.type = "brier")
print(iris.obj$importance)
## ------------------------------------------------------------
## illustration of targeted holdout vimp analysis
## ------------------------------------------------------------
## iris data - only interested in variables 3 and 4
vtry <- list(xvar = c(3, 4), joint = FALSE)
print(holdout.vimp(Species ~., data = iris, vtry = vtry)$impor)
## iris data - joint importance of variables 3 and 4
vtry <- list(xvar = c(3, 4), joint = TRUE)
print(holdout.vimp(Species ~., data = iris, vtry = vtry)$impor)
## iris data - joint importance of variables 1 and 2
vtry <- list(xvar = c(1, 2), joint = TRUE)
print(holdout.vimp(Species ~., data = iris, vtry = vtry)$impor)
## ------------------------------------------------------------
## imbalanced classification (using RFQ)
## ------------------------------------------------------------
if (library("caret", logical.return = TRUE)) {
## experimental settings
n <- 400
q <- 20
ir <- 6
f <- as.formula(Class ~ .)
## simulate the data, create minority class data
d <- twoClassSim(n, linearVars = 15, noiseVars = q)
d$Class <- factor(as.numeric(d$Class) - 1)
idx.0 <- which(d$Class == 0)
idx.1 <- sample(which(d$Class == 1), sum(d$Class == 1) / ir , replace = FALSE)
d <- d[c(idx.0,idx.1),, drop = FALSE]
## VIMP for RFQ with and without blocking
vmp1 <- imbalanced(f, d, importance = TRUE, block.size = 1)$importance[, 1]
vmp10 <- imbalanced(f, d, importance = TRUE, block.size = 10)$importance[, 1]
## holdout VIMP for RFQ with and without blocking
hvmp1 <- holdout.vimp(f, d, rfq = TRUE,
perf.type = "g.mean", block.size = 1)$importance[, 1]
hvmp10 <- holdout.vimp(f, d, rfq = TRUE,
perf.type = "g.mean", block.size = 10)$importance[, 1]
## compare VIMP values
imp <- 100 * cbind(vmp1, vmp10, hvmp1, hvmp10)
legn <- c("vimp-1", "vimp-10","hvimp-1", "hvimp-10")
colr <- rep(4,20+q)
colr[1:20] <- 2
ylim <- range(c(imp))
nms <- 1:(20+q)
par(mfrow=c(2,2))
barplot(imp[,1],col=colr,las=2,main=legn[1],ylim=ylim,names.arg=nms)
barplot(imp[,2],col=colr,las=2,main=legn[2],ylim=ylim,names.arg=nms)
barplot(imp[,3],col=colr,las=2,main=legn[3],ylim=ylim,names.arg=nms)
barplot(imp[,4],col=colr,las=2,main=legn[4],ylim=ylim,names.arg=nms)
}
## ------------------------------------------------------------
## multivariate regression analysis
## ------------------------------------------------------------
mtcars.mreg <- holdout.vimp(Multivar(mpg, cyl) ~., data = mtcars,
vtry = 3,
block.size = 1,
samptype = "swr",
sampsize = dim(mtcars)[1])
print(mtcars.mreg$importance)
## ------------------------------------------------------------
## mixed outcomes analysis
## ------------------------------------------------------------
mtcars.new <- mtcars
mtcars.new$cyl <- factor(mtcars.new$cyl)
mtcars.new$carb <- factor(mtcars.new$carb, ordered = TRUE)
mtcars.mix <- holdout.vimp(cbind(carb, mpg, cyl) ~., data = mtcars.new,
ntree = 100,
block.size = 2,
vtry = 1)
print(mtcars.mix$importance)
##------------------------------------------------------------
## survival analysis
##------------------------------------------------------------
## Primary biliary cirrhosis (PBC) of the liver
data(pbc, package = "randomForestSRC")
pbc.obj <- holdout.vimp(Surv(days, status) ~ ., pbc,
nsplit = 10,
ntree = 1000,
na.action = "na.impute")
print(pbc.obj$importance)
##------------------------------------------------------------
## competing risks
##------------------------------------------------------------
## WIHS analysis
## cumulative incidence function (CIF) for HAART and AIDS stratified by IDU
data(wihs, package = "randomForestSRC")
wihs.obj <- holdout.vimp(Surv(time, status) ~ ., wihs,
nsplit = 3,
ntree = 100)
print(wihs.obj$importance)
# }
Run the code above in your browser using DataLab