# NOT RUN {
## ------------------------------------------------------------
## use the breast data for illustration
## ------------------------------------------------------------
data(breast, package = "randomForestSRC")
breast <- na.omit(breast)
f <- as.formula(status ~ .)
##----------------------------------------------------------------
## example 1: default RFQ call
##----------------------------------------------------------------
o.rfq <- imbalanced(f, breast)
print(o.rfq)
## equivalent to:
## rfsrc(f, breast, rfq = TRUE, perf.type = "g.mean")
##----------------------------------------------------------------
## example 2: RFQ call with fast rfsrc
##----------------------------------------------------------------
o.rfq <- imbalanced(f, breast, fast = TRUE)
print(o.rfq)
## equivalent to:
## rfsrcFast(f, breast, rfq = TRUE, perf.type = "g.mean")
##-----------------------------------------------------------------
## example 3: standard RF (uses misclassification)
## ------------------------------------------------------------
o.std <- imbalanced(f, breast, method = "stand")
##-----------------------------------------------------------------
## example 4: standard RF using G-mean performance
## ------------------------------------------------------------
o.std <- imbalanced(f, breast, method = "stand", perf.type = "g.mean")
## equivalent to:
## rfsrc(f, breast, perf.type = "g.mean")
##----------------------------------------------------------------
## example 5: default BRF call
##----------------------------------------------------------------
o.brf <- imbalanced(f, breast, method = "brf")
## equivalent to:
## imbalanced(f, breast, method = "brf", perf.type = "g.mean")
##----------------------------------------------------------------
## example 6: BRF call with misclassification performance
##----------------------------------------------------------------
o.brf <- imbalanced(f, breast, method = "brf", perf.type = "misclass")
##----------------------------------------------------------------
## example 7: RFQ with optimized threshold
##----------------------------------------------------------------
o.rfq.opt <- imbalanced(f, breast, optimize = TRUE)
plot(o.rfq.opt$gmean, type = "l")
##----------------------------------------------------------------
## example 8: train/test example
##----------------------------------------------------------------
trn <- sample(1:nrow(breast), size = nrow(breast) / 2)
o.trn <- imbalanced(f, breast[trn,], importance = TRUE)
o.tst <- predict(o.trn, breast[-trn,], importance = TRUE)
print(o.trn)
print(o.tst)
print(100 * cbind(o.trn$impo[, 1], o.tst$impo[, 1]))
##----------------------------------------------------------------
## example 9: simulation example using the caret R-package
## simulate classification data with strong and noisy predictors
## create imbalanced data by randomly sampling the class 1 data
##
## illustrates the effectiveness of blocked VIMP
## note that rfsrc uses blocked VIMP by default
##
##----------------------------------------------------------------
if (library("caret", logical.return = TRUE)) {
## experimental settings
n <- 1000
q <- 20
ir <- 6
f <- as.formula(Class ~ .)
## simulate the data, create minority class data
d <- twoClassSim(n, linearVars = 15, noiseVars = q)
d$Class <- factor(as.numeric(d$Class) - 1)
idx.0 <- which(d$Class == 0)
idx.1 <- sample(which(d$Class == 1), sum(d$Class == 1) / ir , replace = FALSE)
d <- d[c(idx.0,idx.1),, drop = FALSE]
## VIMP for BRF with and without blocking
## blocked VIMP is a hybrid of Breiman-Cutler/Ishwaran-Kogalur VIMP
brf <- imbalanced(f, d, method = "brf", importance = TRUE, block.size = 1)
brfB <- imbalanced(f, d, method = "brf", importance = TRUE, block.size = 10)
## VIMP for RFQ with and without blocking
rfq <- imbalanced(f, d, importance = TRUE, block.size = 1)
rfqB <- imbalanced(f, d, importance = TRUE, block.size = 10)
## compare VIMP values
imp <- 100 * cbind(brf$importance[, 1], brfB$importance[, 1],
rfq$importance[, 1], rfqB$importance[, 1])
legn <- c("BRF", "BRF-block", "RFQ", "RFQ-block")
colr <- rep(4,20+q)
colr[1:20] <- 2
ylim <- range(c(imp))
nms <- 1:(20+q)
par(mfrow=c(2,2))
barplot(imp[,1],col=colr,las=2,main=legn[1],ylim=ylim,names.arg=nms)
barplot(imp[,2],col=colr,las=2,main=legn[2],ylim=ylim,names.arg=nms)
barplot(imp[,3],col=colr,las=2,main=legn[3],ylim=ylim,names.arg=nms)
barplot(imp[,4],col=colr,las=2,main=legn[4],ylim=ylim,names.arg=nms)
}
# }
Run the code above in your browser using DataLab