train(x, ...)## S3 method for class 'default':
train(x, y,
method = "rf",
preProcess = NULL,
...,
weights = NULL,
metric = ifelse(is.factor(y), "Accuracy", "RMSE"),
maximize = ifelse(metric == "RMSE", FALSE, TRUE),
trControl = trainControl(),
tuneGrid = NULL,
tuneLength = 3)
## S3 method for class 'formula':
train(form, data, ..., weights, subset, na.action, contrasts = NULL)
y ~ x1 + x2 + ...
formula
are preferentially to be taken.ada
, avNNet
, bag
, bagEarth
, bagFDA
, bayesglm
, bdk
, blackboost
randomForest
). Errors will occur if values
for tuning parameters are passed here.trainControl
. (NOTE: If given, this argument must be named.)tuneGrid
with arguments called len<
createGrid
. (NOTE: If given, this argument must be named.)train
containing:NULL
or an object of class preProcess
NULL
. The returnResamp
argument of trainControl
controls how much of the resampled results are saved.everything
is for the entire call to train
, final
for the final model fit and, optionally, prediction
for the time to predict new samples (see trainControl
)train
can be used to tune models by picking the complexity parameters that are associated with the optimal resampling statistics. For particular model, a grid of parameters (if any) is created and the model is trained on slightly different data for each candidate combination of tuning parameters. Across each data set, the performance of held-out samples is calculated and the mean and standard deviation is summarized for each combination. The combination with the optimal resampling statistic is chosen as the final model and the entire training set is used to fit a final model.A variety of models are currently available. The lists below enumerate the models and the values of the method
argument, as well as the complexity parameters used by train
.
Bagging
bag
from packagevars
(dual use)bagEarth
from packagenprune
,degree
(dual use)bagFDA
from packagedegree
,nprune
(classification only)logicBag
from packagentrees
,nleaves
(dual use)treebag
from packageBayesian Methods
nb
from packagefL
,usekernel
(classification only)vbmpRadial
from packageestimateTheta
(classification only)Boosted Trees
ada
from packageiter
,maxdepth
,nu
(classification only)blackboost
from packagemaxdepth
,mstop
(dual use)bstTree
from packagenu
,maxdepth
,mstop
(dual use)C5.0
from packagewinnow
,model
,trials
(classification only)gbm
from packageinteraction.depth
,n.trees
,shrinkage
(dual use)Boosting (Non-Tree)
bstLs
from packagemstop
,nu
(dual use)bstSm
from packagenu
,mstop
(dual use)gamboost
from packageprune
,mstop
(dual use)glmboost
from packageprune
,mstop
(dual use)logitBoost
from packagenIter
(classification only)Elastic Net
glmnet
from packagealpha
,lambda
(dual use)Flexible Discriminant Analysis (MARS basis)
fda
from packagenprune
,degree
(classification only)Gaussian Processes
gaussprLinear
from packagegaussprPoly
from packagedegree
,scale
(dual use)gaussprRadial
from packagesigma
(dual use)Generalized additive model
gam
from packageselect
,method
(dual use)gamLoess
from packagedegree
,span
(dual use)gamSpline
from packagedf
(dual use)Generalized linear model
glm
from packagebayesglm
from packageglmStepAIC
from packageHeteroscedastic Discriminant Analysis
hda
from packagenewdim
,lambda
,gamma
(classification only)High Dimensional Discriminant Analysis
hdda
from packagemodel
,threshold
(classification only)Independent Component Regression
icr
from packagen.comp
(regression only)K Nearest Neighbor
knn
from packagek
(dual use)Learned Vector Quantization
lvq
from packagesize
,k
(classification only)Linear Discriminant Analysis
lda
from packagelda2
from packagedimen
(classification only)Linda
from packagerrlda
from packagelambda
,alpha
(classification only)sda
from packagediagonal
(classification only)sddaLDA
from packageslda
from packagestepLDA
from packagedirection
,maxvar
(classification only)Linear Least Squares
leapBackward
from packagenvmax
(regression only)leapForward
from packagenvmax
(regression only)leapSeq
from packagenvmax
(regression only)lm
from packagelmStepAIC
from packagerlm
from packageLogic Regression
logforest
from packagelogreg
from packagetreesize
,ntrees
(dual use)Logistic Model Trees
LMT
from packageiter
(classification only)Logistic/Multinomial Regression
multinom
from packagedecay
(classification only)plr
from packagecp
,lambda
(classification only)Mixture Discriminant Analysis
mda
from packagesubclasses
(classification only)smda
from packageR
,lambda
,NumVars
(classification only)Multivariate Adaptive Regression Spline
earth
from packagenprune
,degree
(dual use)gcvEarth
from packagedegree
(dual use)Nearest Shrunken Centroids
pam
from packagethreshold
(classification only)Neural Networks
avNNet
from packagesize
,bag
,decay
(dual use)mlp
from packagesize
(dual use)mlpWeightDecay
from packagedecay
,size
(dual use)neuralnet
from packagelayer2
,layer1
,layer3
(regression only)nnet
from packagesize
,decay
(dual use)pcaNNet
from packagesize
,decay
(dual use)qrnn
from packagepenalty
,bag
,n.hidden
(regression only)Partial Least Squares
gpls
from packageK.prov
(classification only)kernelpls
from packagencomp
(dual use)pls
from packagencomp
(dual use)simpls
from packagencomp
(dual use)spls
from packageeta
,kappa
,K
(dual use)widekernelpls
from packagencomp
(dual use)Penalized Discriminant Analysis
pda
from packagelambda
(classification only)pda2
from packagedf
(classification only)Penalized Linear Models
enet
from packagefraction
,lambda
(regression only)foba
from packagelambda
,k
(regression only)krlsPoly
from packagelambda
,degree
(regression only)krlsRadial
from packagesigma
,lambda
(regression only)lars
from packagefraction
(regression only)lars2
from packagestep
(regression only)lasso
from packagefraction
(regression only)penalized
from packagelambda1
,lambda2
(regression only)relaxo
from packagelambda
,phi
(regression only)ridge
from packagelambda
(regression only)Principal Component Regression
pcr
from packagencomp
(regression only)Projection Pursuit Regression
ppr
from packagenterms
(regression only)Quadratic Discriminant Analysis
qda
from packageQdaCov
from packagesddaQDA
from packagestepQDA
from packagemaxvar
,direction
(classification only)Radial Basis Function Networks
rbf
from packagesize
(dual use)rbfDDA
from packagenegativeThreshold
(classification only)Random Forests
Boruta
from packagemtry
(dual use)cforest
from packagemtry
(dual use)ORFlog
from packagemtry
(classification only)ORFpls
from packagemtry
(classification only)ORFridge
from packagemtry
(classification only)ORFsvm
from packagemtry
(classification only)parRF
from packagemtry
(dual use)qrf
from packagemtry
(regression only)rf
from packagemtry
(dual use)rFerns
from packagedepth
(classification only)RRF
from packagemtry
,coefReg
,coefImp
(dual use)RRFglobal
from packagecoefReg
,mtry
(dual use)Recursive Partitioning
C5.0Tree
from packagectree
from packagemincriterion
(dual use)ctree2
from packagemaxdepth
(dual use)evtree
from packagealpha
(dual use)J48
from packageC
(classification only)nodeHarvest
from packagemaxinter
,mode
(dual use)obliqueTree
from packagevariable.selection
,oblique.splits
(dual use)partDSA
from packagecut.off.growth
,MPD
(dual use)rpart
from packagecp
(dual use)rpart2
from packagemaxdepth
(dual use)Regularized Discriminant Analysis
rda
from packagelambda
,gamma
(classification only)Relevance Vector Machines
rvmLinear
from packagervmPoly
from packagescale
,degree
(regression only)rvmRadial
from packagesigma
(regression only)ROC Curves
rocc
from packagexgenes
(classification only)Rule-Based Models
C5.0Rules
from packagecubist
from packagecommittees
,neighbors
(regression only)JRip
from packageNumOpt
(classification only)M5
from packagerules
,pruned
,smoothed
(regression only)M5Rules
from packagepruned
,smoothed
(regression only)OneR
from packagePART
from packagepruned
,threshold
(classification only)Self-Organizing Maps
bdk
from packagetopo
,ydim
,xweight
,xdim
(dual use)xyf
from packagexdim
,ydim
,topo
,xweight
(dual use)Sparse Linear Discriminant Analysis
PenalizedLDA
from packageK
,lambda
(classification only)sparseLDA
from packagelambda
,NumVars
(classification only)Supervised Principal Components
superpc
from packagethreshold
,n.components
(regression only)Support Vector Machines
lssvmRadial
from packagesigma
(classification only)svmLinear
from packageC
(dual use)svmPoly
from packagedegree
,scale
,C
(dual use)svmRadial
from packageC
,sigma
(dual use)svmRadialCost
from packageC
(dual use)
By default, the function createGrid
is used to define the candidate values of the tuning parameters. The user can also specify their own. To do this, a data fame is created with columns for each tuning parameter in the model. The column names must be the same as those listed in the table above with a leading dot. For example, ncomp
would have the column heading .ncomp
. This data frame can then be passed to createGrid
.
In some cases, models may require control arguments. These can be passed via the three dots argument. Note that some models can specify tuning parameters in the control objects. If specified, these values will be superseded by those given in the createGrid
argument.
The formula interface to train
will always convert factor variables to dummy variables. For several models (rpart
, rf
, gbm
, treebag
, nb
, J48
, PART
, JRip
, OneR
, ctree
, cforest
, bag
, cubist
, C5.0
, C5.0Tree
, C5.0Rules
and custom
) factor predictors variables can be passed directly to the underlying modeling function using the interface train(x, y)
. In these cases, it is possible for the models to treat factor variables in a manner different than most (i.e. not as a decomposed set of dummy variables).
The web page
train
can be used with "explicit parallelism", where different resamples (e.g. cross-validation group) and models can be split up and run on multiple machines or processors. By default, train
will use a single processor on the host machine. As of version 4.99 of this package, the framework used for parallel processing uses the train
does not change; prior to the call to train
, a parallel backend is registered with
trainControl
, update.train
,
modelLookup
, createGrid
,
createFolds
#######################################
## Classification Example
data(iris)
TrainData <- iris[,1:4]
TrainClasses <- iris[,5]
knnFit1 <- train(TrainData, TrainClasses,
method = "knn",
preProcess = c("center", "scale"),
tuneLength = 10,
trControl = trainControl(method = "cv"))
knnFit2 <- train(TrainData, TrainClasses,
method = "knn",
preProcess = c("center", "scale"),
tuneLength = 10,
trControl = trainControl(method = "boot"))
library(MASS)
nnetFit <- train(TrainData, TrainClasses,
method = "nnet",
preProcess = "range",
tuneLength = 2,
trace = FALSE,
maxit = 100)
#######################################
## Regression Example
library(mlbench)
data(BostonHousing)
lmFit <- train(medv ~ . + rm:lstat,
data = BostonHousing,
"lm")
library(rpart)
rpartFit <- train(medv ~ .,
data = BostonHousing,
"rpart",
tuneLength = 9)
#######################################
## Example with a custom metric
madSummary <- function (data,
lev = NULL,
model = NULL)
{
out <- mad(data$obs - data$pred,
na.rm = TRUE)
names(out) <- "MAD"
out
}
robustControl <- trainControl(summaryFunction = madSummary)
marsGrid <- expand.grid(.degree = 1,
.nprune = (1:10) * 2)
earthFit <- train(medv ~ .,
data = BostonHousing,
"earth",
tuneGrid = marsGrid,
metric = "MAD",
maximize = FALSE,
trControl = robustControl)
#######################################
## Parallel Processing Example via multicore package
## library(doMC)
## registerDoMC(2)
## NOTE: don't run models form RWeka when using
### multicore. The session will crash.
## The code for train() does not change:
set.seed(1)
usingMC <- train(medv ~ .,
data = BostonHousing,
"glmboost")
## or use:
## library(doMPI) or
## library(doSMP) and so on
Run the code above in your browser using DataLab