if (FALSE) {
# Regression example:
nRow <- 5000
x <- data.frame(replicate(6, rnorm(nRow)))
y <- with(x, X1^2 + sin(X2) + X3 * X4) # courtesy of S. Welling.
# Classification example:
data(iris)
# Generic invocation:
rb <- rfArb(x, y)
# Causes 300 trees to be trained:
rb <- rfArb(x, y, nTree = 300)
# Causes rows to be sampled without replacement:
rb <- rfArb(x, y, withRepl=FALSE)
# Causes validation census to report class probabilities:
rb <- rfArb(iris[-5], iris[5], ctgCensus="prob")
# Applies table-weighting to classification categories:
rb <- rfArb(iris[-5], iris[5], classWeight = "balance")
# Weights first category twice as heavily as remaining two:
rb <- rfArb(iris[-5], iris[5], classWeight = c(2.0, 1.0, 1.0))
# Does not split nodes when doing so yields less than a 2% gain in
# information over the parent node:
rb <- rfArb(x, y, minInfo=0.02)
# Does not split nodes representing fewer than 10 unique samples:
rb <- rfArb(x, y, minNode=10)
# Trains a maximum of 20 levels:
rb <- rfArb(x, y, nLevel = 20)
# Trains, but does not perform subsequent validation:
rb <- rfArb(x, y, noValidate=TRUE)
# Chooses 500 rows (with replacement) to root each tree.
rb <- rfArb(x, y, nSamp=500)
# Chooses 2 predictors as splitting candidates at each node (or
# fewer, when choices exhausted):
rb <- rfArb(x, y, predFixed = 2)
# Causes each predictor to be selected as a splitting candidate with
# distribution Bernoulli(0.3):
rb <- rfArb(x, y, predProb = 0.3)
# Causes first three predictors to be selected as splitting candidates
# twice as often as the other two:
rb <- rfArb(x, y, predWeight=c(2.0, 2.0, 2.0, 1.0, 1.0))
# Causes (default) quantiles to be computed at validation:
rb <- rfArb(x, y, quantiles=TRUE)
qPred <- rb$validation$qPred
# Causes specfied quantiles (deciles) to be computed at validation:
rb <- rfArb(x, y, quantVec = seq(0.1, 1.0, by = 0.10))
qPred <- rb$validation$qPred
# Constrains modelled response to be increasing with respect to X1
# and decreasing with respect to X5.
rb <- rfArb(x, y, regMono=c(1.0, 0, 0, 0, -1.0, 0))
# Causes rows to be sampled with random weighting:
rb <- rfArb(x, y, samplingWeight=runif(nRow))
# Suppresses creation of detailed leaf information needed for
# quantile prediction and external tools.
rb <- rfArb(x, y, thinLeaves = TRUE)
# Directs prediction to take a random branch on encountering
# values not observed during training, such as NA or an
# unrecognized category.
predict(rb, trapUnobserved = FALSE)
# Directs prediction to silently trap unobserved values, reporting a
# score associated with the current nonterminal tree node.
predict(rb, trapUnobserved = TRUE)
# Sets splitting position for predictor 0 to far left and predictor
# 1 to far right, others to default (median) position.
spq <- rep(0.5, ncol(x))
spq[0] <- 0.0
spq[1] <- 1.0
rb <- rfArb(x, y, splitQuant = spq)
}
Run the code above in your browser using DataLab