### dontrun is used when the execution of the example requires some computational effort.
### simple regression (with a formula) example.
x1=rnorm(200,100,20); x2=rnorm(200,100,20)
y=0.7*sin(x1/(25*pi))+0.3*sin(x2/(25*pi))
M=fit(y~x1+x2,model="mlpe")
new1=rnorm(100,100,20); new2=rnorm(100,100,20)
ynew=0.7*sin(new1/(25*pi))+0.3*sin(new2/(25*pi))
P=predict(M,data.frame(x1=new1,x2=new2,y=rep(NA,100)))
print(mmetric(ynew,P,"MAE"))
### simple classification example.
## Not run:
# data(iris)
# M=fit(Species~.,iris,model="rpart")
# plot(M@object); text(M@object) # show model
# P=predict(M,iris)
# print(mmetric(iris$Species,P,"CONF"))
# print(mmetric(iris$Species,P,"ALL"))
# mgraph(iris$Species,P,graph="ROC",TC=2,main="versicolor ROC",
# baseline=TRUE,leg="Versicolor",Grid=10)
#
# M2=fit(Species~.,iris,model="ctree")
# plot(M2@object) # show model
# P2=predict(M2,iris)
# print(mmetric(iris$Species,P2,"CONF"))
#
# # ctree with different setup:
# # (ctree_control is from the party package)
# M3=fit(Species~.,iris,model="ctree",controls = party::ctree_control(testtype="MonteCarlo"))
# plot(M3@object) # show model
# ## End(Not run)
### classification example with discrete classes, probabilities and holdout
## Not run:
# data(iris)
# H=holdout(iris$Species,ratio=2/3)
# M=fit(Species~.,iris[H$tr,],model="ksvm",task="class")
# M2=fit(Species~.,iris[H$tr,],model="ksvm",task="prob")
# P=predict(M,iris[H$ts,])
# P2=predict(M2,iris[H$ts,])
# print(mmetric(iris$Species[H$ts],P,"CONF"))
# print(mmetric(iris$Species[H$ts],P2,"CONF"))
# print(mmetric(iris$Species[H$ts],P,"CONF",TC=1))
# print(mmetric(iris$Species[H$ts],P2,"CONF",TC=1))
# print(mmetric(iris$Species[H$ts],P2,"AUC"))
#
# ### exploration of some rminer classification models:
# models=c("lda","naiveBayes","kknn","randomForest")
# for(m in models)
# { cat("model:",m,"\n")
# M=fit(Species~.,iris[H$tr,],model=m)
# P=predict(M,iris[H$ts,])
# print(mmetric(iris$Species[H$ts],P,"AUC")[[1]])
# }
# ## End(Not run)
### classification example with hyperparameter selection
### note: for regression, similar code can be used
### SVM
## Not run:
# data(iris)
# # large list of SVM configurations:
# # SVM with kpar="automatic" sigma rbfdot kernel estimation and default C=1:
# # note: each execution can lead to different M@mpar due to sigest stochastic nature:
# M=fit(Species~.,iris,model="ksvm")
# print(M@mpar) # model hyperparameters/arguments
# # same thing, explicit use of mparheuristic:
# M=fit(Species~.,iris,model="ksvm",search=list(search=mparheuristic("ksvm")))
# print(M@mpar) # model hyperparameters
#
# # SVM with C=3, sigma=2^-7
# M=fit(Species~.,iris,model="ksvm",C=3,kpar=list(sigma=2^-7))
# print(M@mpar)
# # SVM with different kernels:
# M=fit(Species~.,iris,model="ksvm",kernel="polydot",kpar="automatic")
# print(M@mpar)
# # fit already has a scale argument, thus the only way to fix scale of "tanhdot"
# # is to use the special search argument with the "none" method:
# s=list(smethod="none",search=list(scale=2,offset=2))
# M=fit(Species~.,iris,model="ksvm",kernel="tanhdot",search=s)
# print(M@mpar)
# # heuristic: 10 grid search values for sigma, rbfdot kernel (fdebug is used only for more verbose):
# s=list(search=mparheuristic("ksvm",10)) # advised "heuristic10" usage
# M=fit(Species~.,iris,model="ksvm",search=s,fdebug=TRUE)
# print(M@mpar)
# # same thing, uses older search="heuristic10" that works for fewer rminer models
# M=fit(Species~.,iris,model="ksvm",search="heuristic10",fdebug=TRUE)
# print(M@mpar)
# # identical search under a different and explicit code:
# s=list(search=2^seq(-15,3,2))
# M=fit(Species~.,iris,model="ksvm",search=2^seq(-15,3,2),fdebug=TRUE)
# print(M@mpar)
#
# # uniform design "UD" for sigma and C, rbfdot kernel, two level of grid searches,
# # under exponential (2^x) search scale:
# M=fit(Species~.,iris,model="ksvm",search="UD",fdebug=TRUE)
# print(M@mpar)
# M=fit(Species~.,iris,model="ksvm",search="UD1",fdebug=TRUE)
# print(M@mpar)
# M=fit(Species~.,iris,model="ksvm",search=2^seq(-15,3,2),fdebug=TRUE)
# print(M@mpar)
# # now the more powerful search argument is used for modeling SVM:
# # grid 3 x 3 search:
# s=list(smethod="grid",search=list(sigma=2^c(-15,-5,3),C=2^c(-5,0,15)),convex=0,
# metric="AUC",method=c("kfold",3,12345))
# print(s)
# M=fit(Species~.,iris,model="ksvm",search=s,fdebug=TRUE)
# print(M@mpar)
# # identical search with different argument smethod="matrix"
# s$smethod="matrix"
# s$search=list(sigma=rep(2^c(-15,-5,3),times=3),C=rep(2^c(-5,0,15),each=3))
# print(s)
# M=fit(Species~.,iris,model="ksvm",search=s,fdebug=TRUE)
# print(M@mpar)
# # search for best kernel (only works for kpar="automatic"):
# s=list(smethod="grid",search=list(kernel=c("rbfdot","laplacedot","polydot","vanilladot")),
# convex=0,metric="AUC",method=c("kfold",3,12345))
# print(s)
# M=fit(Species~.,iris,model="ksvm",search=s,fdebug=TRUE)
# print(M@mpar)
# # search for best parameters of "rbfdot" or "laplacedot" (which use same kpar):
# s$search=list(kernel=c("rbfdot","laplacedot"),sigma=2^seq(-15,3,5))
# print(s)
# M=fit(Species~.,iris,model="ksvm",search=s,fdebug=TRUE)
# print(M@mpar)
#
# ### randomForest
# # search for mtry and ntree
# s=list(smethod="grid",search=list(mtry=c(1,2,3),ntree=c(100,200,500)),
# convex=0,metric="AUC",method=c("kfold",3,12345))
# print(search)
# M=fit(Species~.,iris,model="randomForest",search=s,fdebug=TRUE)
# print(M@mpar)
#
# ### rpart
# # simpler way to tune cp in 0.01 to 0.9 (10 searches):
# s=list(search=mparheuristic("rpart",n=10,lower=0.01,upper=0.9),method=c("kfold",3,12345))
# M=fit(Species~.,iris,model="rpart",search=s,fdebug=TRUE)
# print(M@mpar)
#
# # same thing but with more lines of code
# # note: this code can be adapted to tune other rpart parameters,
# # while mparheuristic only tunes cp
# # a vector list needs to be used for the search$search parameter
# lcp=vector("list",10) # 10 grid values for the complexity cp
# names(lcp)=rep("cp",10) # same cp name
# scp=seq(0.01,0.9,length.out=10) # 10 values from 0.01 to 0.18
# for(i in 1:10) lcp[[i]]=scp[i] # cycle needed due to [[]] notation
# s=list(smethod="grid",search=list(control=lcp),
# convex=0,metric="AUC",method=c("kfold",3,12345))
# M=fit(Species~.,iris,model="rpart",search=s,fdebug=TRUE)
# print(M@mpar)
#
# ### ctree
# # simpler way to tune mincriterion in 0.1 to 0.98 (9 searches):
# mint=c("kfold",3,123) # internal validation method
# s=list(search=mparheuristic("ctree",n=8,lower=0.1,upper=0.99),method=mint)
# M=fit(Species~.,iris,model="ctree",search=s,fdebug=TRUE)
# print(M@mpar)
# # same thing but with more lines of code
# # note: this code can be adapted to tune other ctree parameters,
# # while mparheuristic only tunes mincriterion
# # a vector list needs to be used for the search$search parameter
# lmc=vector("list",9) # 9 grid values for the mincriterion
# smc=seq(0.1,0.99,length.out=9)
# for(i in 1:9) lmc[[i]]=party::ctree_control(mincriterion=smc[i])
# s=list(smethod="grid",search=list(controls=lmc),method=mint,convex=0)
# M=fit(Species~.,iris,model="ctree",search=s,fdebug=TRUE)
# print(M@mpar)
#
# ### some MLP fitting examples:
# # simplest use:
# M=fit(Species~.,iris,model="mlpe")
# print(M@mpar)
# # same thing, with explicit use of mparheuristic:
# M=fit(Species~.,iris,model="mlpe",search=list(search=mparheuristic("mlpe")))
# print(M@mpar)
#
# print(M@mpar) # hidden nodes and number of ensemble mlps
# # setting some nnet parameters:
# M=fit(Species~.,iris,model="mlpe",size=3,decay=0.1,maxit=100,rang=0.9)
# print(M@mpar) # mlpe hyperparameters
# # MLP, 5 grid search fdebug is only used to put some verbose in the console:
# s=list(search=mparheuristic("mlpe",n=5)) # 5 searches for size
# print(s) # show search
# M=fit(Species~.,iris,model="mlpe",search=s,fdebug=TRUE)
# print(M@mpar)
# # previous searches used a random holdout (seed=NULL), now a fixed seed (123) is used:
# s=list(smethod="grid",search=mparheuristic("mlpe",n=5),convex=0,metric="AUC",
# method=c("holdout",2/3,123))
# print(search)
# M=fit(Species~.,iris,model="mlpe",search=s,fdebug=TRUE)
# print(M@mpar)
# # faster and greedy grid search:
# s$convex=1;s$search=list(size=0:9)
# print(search)
# M=fit(Species~.,iris,model="mlpe",search=s,fdebug=TRUE)
# print(M@mpar)
# # 2 level grid with total of 5 searches
# # note of caution: some "2L" ranges may lead to non integer (e.g. 1.3) values at
# # the 2nd level search. And some R functions crash if non integer values are used for
# # integer parameters.
# s$smethod="2L";s$convex=0;s$search=list(size=c(4,8,12))
# print(s)
# M=fit(Species~.,iris,model="mlpe",search=s,fdebug=TRUE)
# print(M@mpar)
# ## End(Not run)
### example of an error (warning) generated using fit:
## Not run:
# data(iris)
# # size needs to be a positive integer, thus 0.1 leads to an error:
# M=fit(Species~.,iris,model="mlp",size=0.1)
# print(M@object)
# ## End(Not run)
### exploration of some rminer regression models:
## Not run:
# data(sa_ssin)
# H=holdout(sa_ssin$y,ratio=2/3,seed=12345)
# models=c("mr","ctree","mars","cubist","rvm")
# for(m in models)
# { cat("model:",m,"\n")
# M=fit(y~.,sa_ssin[H$tr,],model=m)
# P=predict(M,sa_ssin[H$ts,])
# print(mmetric(sa_ssin$y[H$ts],P,"MAE"))
# }
# ## End(Not run)
### regression example with hyperparameter selection:
## Not run:
# data(sa_ssin)
# # some SVM experiments:
# # default SVM:
# M=fit(y~.,data=sa_ssin,model="svm")
# print(M@mpar)
# # SVM with (Cherkassy and Ma, 2004) heuristics to set C and epsilon:
# M=fit(y~.,data=sa_ssin,model="svm",C=NA,epsilon=NA)
# print(M@mpar)
# # SVM with Uniform Design set sigma, C and epsilon:
# M=fit(y~.,data=sa_ssin,model="ksvm",search="UD",fdebug=TRUE)
# print(M@mpar)
#
# # sensitivity analysis feature selection
# M=fit(y~.,data=sa_ssin,model="ksvm",search=list(search=mparheuristic("ksvm",n=5)),feature="sabs")
# print(M@mpar)
# print(M@attributes) # selected attributes (1, 2 and 3 are the relevant inputs)
#
# # example that shows how transform works:
# M=fit(y~.,data=sa_ssin,model="mr") # linear regression
# P=predict(M,data.frame(x1=-1000,x2=0,x3=0,x4=0,y=NA)) # P should be negative
# print(P)
# M=fit(y~.,data=sa_ssin,model="mr",transform="positive")
# P=predict(M,data.frame(x1=-1000,x2=0,x3=0,x4=0,y=NA)) # P is not negative
# print(P)
# ## End(Not run)
### pure classification example with a generic R model ###
## Not run:
# ### nnet is adopted here but virtually ANY fitting function/package could be used:
#
# # since the default nnet prediction is to provide probabilities, there is
# # a need to create this "wrapping" function:
# predictprob=function(object,newdata)
# { predict(object,newdata,type="class") }
# # list with a fit and predict function:
# # nnet::nnet (package::function)
# model=list(fit=nnet::nnet,predict=predictprob,name="nnet")
# data(iris)
# # note that size is not a fit parameter and it is sent directly to nnet:
# M=fit(Species~.,iris,model=model,size=3,task="class")
# P=predict(M,iris)
# print(P)
# ## End(Not run)
Run the code above in your browser using DataLab