# NOT RUN {
# 2-dimensional data
# loading data
data(hacide)
# in the following examples
# use of a small subset of observations only --> argument subset
dat <- hacide.train
table(dat$cls)
##Example 1
# classification with logit model
# arguments to glm are passed through control.learner
# leave-one-out cross-validation estimate of auc of classifier
# trained on balanced data
ROSE.eval(cls~., data=dat, glm, subset=c(1:50, 981:1000),
method.assess="LKOCV", K=5,
control.learner=list(family=binomial), seed=1)
# }
# NOT RUN {
##Example 2
# classification with decision tree
# require package rpart
library(rpart)
# function is needed to extract predicted probability of cls 1
f.pred.rpart <- function(x) x[,2]
# holdout estimate of auc of two classifiers
# first classifier trained on ROSE unbalanced sample
# proportion of rare events in original data
p <- (table(dat$cls)/sum(table(dat$cls)))[2]
ROSE.eval(cls~., data=dat, rpart, subset=c(1:50, 981:1000),
control.rose=list(p = p), extr.pred=f.pred.rpart, seed=1)
# second classifier trained on ROSE balanced sample
# optional arguments to plot the roc.curve are passed through
# control.accuracy
ROSE.eval(cls~., data=dat, rpart, subset=c(1:50, 981:1000),
control.rose=list(p = 0.5), control.accuracy = list(add.roc = TRUE,
col = 2), extr.pred=f.pred.rpart, seed=1)
##Example 3
# classification with linear discriminant analysis
library(MASS)
# function is needed to extract the predicted values from predict.lda
f.pred.lda <- function(z) z$posterior[,2]
# bootstrap estimate of precision of learner trained on balanced data
prec.distr <- ROSE.eval(cls~., data=dat, lda, subset=c(1:50, 981:1000),
extr.pred=f.pred.lda, acc.measure="precision",
method.assess="BOOT", B=100, trace=TRUE)
summary(prec.distr)
##Example 4
# compare auc of classification with neural network
# with auc of classification with tree
# require package nnet
# require package tree
library(nnet)
library(tree)
# optional arguments to nnet are passed through control.learner
ROSE.eval(cls~., data=dat, nnet, subset=c(1:50, 981:1000),
method.assess="holdout", control.learn=list(size=1), seed=1)
# optional arguments to plot the roc.curve are passed through
# control.accuracy
# a function is needed to extract predicted probability of class 1
f.pred.rpart <- function(x) x[,2]
f.pred.tree <- function(x) x[,2]
ROSE.eval(cls~., data=dat, tree, subset=c(1:50, 981:1000),
method.assess="holdout", extr.pred=f.pred.tree,
control.acc=list(add=TRUE, col=2), seed=1)
##Example 5
# An user defined learner with a standard behavior
# Consider a dummy example for illustrative purposes only
# Note that function name and the name of the class returned match
DummyStump <- function(formula, ...)
{
mc <- match.call()
m <- match(c("formula", "data", "na.action", "subset"), names(mc), 0L)
mf <- mc[c(1L, m)]
mf[[1L]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
data.st <- data.frame(mf)
out <- list(colname=colnames(data.st)[2], threshold=1)
class(out) <- "DummyStump"
out
}
# Associate to DummyStump a predict method
# Usual S3 definition: predic.classname
predict.DummyStump <- function(object, newdata)
{
out <- newdata[,object$colname]>object$threshold
out
}
ROSE.eval(formula=cls~., data=dat, learner=DummyStump,
subset=c(1:50, 981:1000), method.assess="holdout", seed=3)
##Example 6
# The use of the wrapper for a function with non standard behaviour
# Consider knn in package class
# require package class
library(class)
# the wrapper require two mandatory arguments: data, newdata.
# optional arguments can be passed by including the object '...'
# note that we are going to specify data=data in ROSE.eval
# therefore data in knn.wrap will receive a data set structured
# as dat as well as newdata but with the class label variable dropped
# note that inside the wrapper we dispense to knn
# the needed quantities accordingly
knn.wrap <- function(data, newdata, ...)
{
knn(train=data[,-1], test=newdata, cl=data[,1], ...)
}
# optional arguments to knn.wrap may be specified in control.learner
ROSE.eval(formula=cls~., data=dat, learner=knn.wrap,
subset=c(1:50, 981:1000), method.assess="holdout",
control.learner=list(k=2, prob=T), seed=1)
# if we swap the columns of dat we have to change the wrapper accordingly
dat <- dat[,c("x1","x2","cls")]
# now class label variable is the last one
knn.wrap <- function(data, newdata, ...)
{
knn(train=data[,-3], test=newdata, cl=data[,3], ...)
}
ROSE.eval(formula=cls~., data=dat, learner=knn.wrap,
subset=c(1:50, 981:1000), method.assess="holdout",
control.learner=list(k=2, prob=T), seed=1)
# }
Run the code above in your browser using DataLab