# \donttest{
## ------------------------------------------------------------
## compare synthetic forests to regular forest (classification)
## ------------------------------------------------------------
## rfsrc and synthetic calls
if (library("mlbench", logical.return = TRUE)) {
## simulate the data
ring <- data.frame(mlbench.ringnorm(250, 20))
## classification forests
ringRF <- rfsrc(classes ~., ring)
## synthetic forests
## 1 = nodesize varied
## 2 = nodesize/mtry varied
ringSyn1 <- synthetic(classes ~., ring)
ringSyn2 <- synthetic(classes ~., ring, mtrySeq = c(1, 10, 20))
## test-set performance
ring.test <- data.frame(mlbench.ringnorm(500, 20))
pred.ringRF <- predict(ringRF, newdata = ring.test)
pred.ringSyn1 <- synthetic(object = ringSyn1, newdata = ring.test)$rfSynPred
pred.ringSyn2 <- synthetic(object = ringSyn2, newdata = ring.test)$rfSynPred
print(pred.ringRF)
print(pred.ringSyn1)
print(pred.ringSyn2)
}
## ------------------------------------------------------------
## compare synthetic forest to regular forest (regression)
## ------------------------------------------------------------
## simulate the data
n <- 250
ntest <- 1000
N <- n + ntest
d <- 50
std <- 0.1
x <- matrix(runif(N * d, -1, 1), ncol = d)
y <- 1 * (x[,1] + x[,4]^3 + x[,9] + sin(x[,12]*x[,18]) + rnorm(n, sd = std)>.38)
dat <- data.frame(x = x, y = y)
test <- (n+1):N
## regression forests
regF <- rfsrc(y ~ ., dat[-test, ], )
pred.regF <- predict(regF, dat[test, ])
## synthetic forests using fast rfsrc
synF1 <- synthetic(y ~ ., dat[-test, ], newdata = dat[test, ])
synF2 <- synthetic(y ~ ., dat[-test, ],
newdata = dat[test, ], mtrySeq = c(1, 10, 20, 30, 40, 50))
## standardized MSE performance
mse <- c(tail(pred.regF$err.rate, 1),
tail(synF1$rfSynPred$err.rate, 1),
tail(synF2$rfSynPred$err.rate, 1)) / var(y[-test])
names(mse) <- c("forest", "synthetic1", "synthetic2")
print(mse)
## ------------------------------------------------------------
## multivariate synthetic forests
## ------------------------------------------------------------
mtcars.new <- mtcars
mtcars.new$cyl <- factor(mtcars.new$cyl)
mtcars.new$carb <- factor(mtcars.new$carb, ordered = TRUE)
trn <- sample(1:nrow(mtcars.new), nrow(mtcars.new)/2)
mvSyn <- synthetic(cbind(carb, mpg, cyl) ~., mtcars.new[trn,])
mvSyn.pred <- synthetic(object = mvSyn, newdata = mtcars.new[-trn,])
# }
Run the code above in your browser using DataLab