# \donttest{
## ------------------------------------------------------------
## regression
## ------------------------------------------------------------
## training the forest
reg.o <- rfsrc(Ozone ~ ., airquality)
## default subsample call
reg.smp.o <- subsample(reg.o)
## plot confidence regions
plot.subsample(reg.smp.o)
## summary of results
print(reg.smp.o)
## joint vimp and confidence region for generalization error
reg.smp.o2 <- subsample(reg.o, performance = TRUE,
joint = TRUE, xvar.names = c("Day", "Month"))
plot.subsample(reg.smp.o2)
## now try the double bootstrap (slower)
reg.dbs.o <- subsample(reg.o, B = 25, bootstrap = TRUE)
print(reg.dbs.o)
plot.subsample(reg.dbs.o)
## standard error and confidence region for generalization error only
gerror <- subsample(reg.o, performance.only = TRUE)
plot.subsample(gerror)
## ------------------------------------------------------------
## classification
## ------------------------------------------------------------
## 3 non-linear, 15 linear, and 5 noise variables
if (library("caret", logical.return = TRUE)) {
d <- twoClassSim(1000, linearVars = 15, noiseVars = 5)
## VIMP based on (default) misclassification error
cls.o <- rfsrc(Class ~ ., d)
cls.smp.o <- subsample(cls.o, B = 100)
plot.subsample(cls.smp.o, cex.axis = .7)
## same as above, but with VIMP defined using normalized Brier score
cls.o2 <- rfsrc(Class ~ ., d, perf.type = "brier")
cls.smp.o2 <- subsample(cls.o2, B = 100)
plot.subsample(cls.smp.o2, cex.axis = .7)
}
## ------------------------------------------------------------
## class-imbalanced data using RFQ classifier with G-mean VIMP
## ------------------------------------------------------------
if (library("caret", logical.return = TRUE)) {
## experimental settings
n <- 1000
q <- 20
ir <- 6
f <- as.formula(Class ~ .)
## simulate the data, create minority class data
d <- twoClassSim(n, linearVars = 15, noiseVars = q)
d$Class <- factor(as.numeric(d$Class) - 1)
idx.0 <- which(d$Class == 0)
idx.1 <- sample(which(d$Class == 1), sum(d$Class == 1) / ir , replace = FALSE)
d <- d[c(idx.0,idx.1),, drop = FALSE]
## RFQ classifier
oq <- imbalanced(Class ~ ., d, importance = TRUE, block.size = 10)
## subsample the RFQ-classifier
smp.oq <- subsample(oq, B = 100)
plot.subsample(smp.oq, cex.axis = .7)
}
## ------------------------------------------------------------
## survival
## ------------------------------------------------------------
data(pbc, package = "randomForestSRC")
srv.o <- rfsrc(Surv(days, status) ~ ., pbc)
srv.smp.o <- subsample(srv.o, B = 100)
plot(srv.smp.o)
## ------------------------------------------------------------
## competing risks
## target event is death (event = 2)
## ------------------------------------------------------------
if (library("survival", logical.return = TRUE)) {
data(pbc, package = "survival")
pbc$id <- NULL
cr.o <- rfsrc(Surv(time, status) ~ ., pbc, splitrule = "logrankCR", cause = 2)
cr.smp.o <- subsample(cr.o, B = 100)
plot.subsample(cr.smp.o, target = 2)
}
## ------------------------------------------------------------
## multivariate
## ------------------------------------------------------------
if (library("mlbench", logical.return = TRUE)) {
## simulate the data
data(BostonHousing)
bh <- BostonHousing
bh$rm <- factor(round(bh$rm))
o <- rfsrc(cbind(medv, rm) ~ ., bh)
so <- subsample(o)
plot.subsample(so)
plot.subsample(so, m.target = "rm")
##generalization error
gerror <- subsample(o, performance.only = TRUE)
plot.subsample(gerror, m.target = "medv")
plot.subsample(gerror, m.target = "rm")
}
## ------------------------------------------------------------
## largish data example - use rfsrc.fast for fast forests
## ------------------------------------------------------------
if (library("caret", logical.return = TRUE)) {
## largish data set
d <- twoClassSim(1000, linearVars = 15, noiseVars = 5)
## use a subsampled forest with Brier score performance
## remember to set forest=TRUE for rfsrc.fast
o <- rfsrc.fast(Class ~ ., d, ntree = 100,
forest = TRUE, perf.type = "brier")
so <- subsample(o, B = 100)
plot.subsample(so, cex.axis = .7)
}
# }
Run the code above in your browser using DataLab