## load transcription factor binding site data
data(TFBS)
enhancerFB
## The C-svc implementation from LiblineaR is chosen for most of the
## examples because it is the fastest SVM implementation. With SVMs from
## other packages slightly better results could be achievable.
## To get a realistic image of possible performance values, kernel behavior
## and speed of grid search together with 10-fold cross validation a
## resonable number of sequences is needed which would exceed the runtime
## restrictions for automatically executed examples. Therefore the grid
## search examples must be run manually. In these examples we use the full
## dataset for grid search.
train <- sample(1:length(enhancerFB), length(enhancerFB))
## grid search with single kernel object and multiple hyperparameter values
## create gappy pair kernel with normalization
gappyK1M3 <- gappyPairKernel(k=1, m=3)
## show details of single gappy pair kernel object
gappyK1M3
## grid search for a single kernel object and multiple values for cost
pkg <- "LiblineaR"
svm <- "C-svc"
cost <- c(0.01,0.1,1,10,100,1000)
model <- kbsvm(x=enhancerFB[train], y=yFB[train], kernel=gappyK1M3,
pkg=pkg, svm=svm, cost=cost, explicit="yes", cross=3)
## show grid search results
modelSelResult(model)
## Not run:
# ## create the list of spectrum kernel objects with normalization and
# ## kernel parameters values for k from 1 to 5
# specK15 <- spectrumKernel(k=1:5)
# ## show details of the four spectrum kernel objects
# specK15
#
# ## run grid search with several kernel parameter settings for the
# ## spectrum kernel with a single SVM parameter setting
# ## ATTENTION: DO NOT USE THIS VARIANT!
# ## This variant does not bring comparable performance for the different
# ## kernel parameter settings because usually the best performing
# ## hyperparameter values could be quite different for different kernel
# ## parameter settings or between different kernels, grid search for
# ## multiple kernel objects should be done as shown in the next example
# pkg <- "LiblineaR"
# svm <- "C-svc"
# cost <- 2
# model <- kbsvm(x=enhancerFB[train], y=yFB[train], kernel=specK15,
# pkg=pkg, svm=svm, cost=cost, explicit="yes", cross=10)
#
# ## show grid search results
# modelSelResult(model)
#
# ## grid search with multiple kernel objects and multiple values for
# ## hyperparameter cost
# pkg <- "LiblineaR"
# svm <- "C-svc"
# cost <- c(0.01,0.1,1,10,50,100,150,200,500,1000)
# model <- kbsvm(x=enhancerFB, sel=train, y=yFB[train], kernel=specK15,
# pkg=pkg, svm=svm, cost=cost, explicit="yes", cross=10,
# showProgress=TRUE)
#
# ## show grid search results
# modelSelResult(model)
#
# ## grid search for a single kernel object with multiple SVMs
# ## from different packages
# ## here with display of cross validation runtimes for each grid point
# ## pkg, svm and cost vectors must have same length and the corresponding
# ## entry in each of these vectors are one SVM + SVM hyperparameter setting
# pkg <- rep(c("kernlab", "e1071", "LiblineaR"),3)
# svm <- rep("C-svc", 9)
# cost <- rep(c(0.01,0.1,1),each=3)
# model <- kbsvm(x=enhancerFB[train], y=yFB[train], kernel=gappyK1M3,
# pkg=pkg, svm=svm, cost=cost, explicit="yes", cross=3,
# showCVTimes=TRUE)
#
# ## show grid search results
# modelSelResult(model)
#
# ## run grid search for a single kernel with multiple SVMs from same package
# ## here all from LiblineaR: C-SVM, L2 regularized SVM with L2 loss and
# ## SVM with L1 regularization and L2 loss
# ## attention: for different formulation of the SMV objective use different
# ## values for the hyperparameters even if they have the same name
# pkg <- rep("LiblineaR", 9)
# svm <- rep(c("C-svc","l2rl2l-svc","l1rl2l-svc"), each=3)
# cost <- c(1,150,1000,1,40,100,1,40,100)
# model <- kbsvm(x=enhancerFB, sel=train, y=yFB[train], kernel=gappyK1M3,
# pkg=pkg, svm=svm, cost=cost, explicit="yes", cross=3)
#
# ## show grid search results
# modelSelResult(model)
#
# ## create the list of kernel objects for gappy pair kernel
# gappyK1M15 <- gappyPairKernel(k=1, m=1:5)
# ## show details of kernel objects
# gappyK1M15
#
# ## run grid search with progress indication with ten kernels and ten
# ## hyperparameter values for cost and 10 fold cross validation on full
# ## dataset (500 samples)
# pkg <- rep("LiblineaR", 10)
# svm <- rep("C-svc", 10)
# cost <- c(0.0001,0.001,0.01,0.1,1,10,100,1000,10000,100000)
# model <- kbsvm(x=enhancerFB, y=yFB, kernel=c(specK15, gappyK1M15),
# pkg=pkg, svm=svm, cost=cost, cross=10, explicit="yes",
# showCVTimes=TRUE, showProgress=TRUE)
#
# ## show grid search results
# modelSelResult(model)
# ## End(Not run)
Run the code above in your browser using DataLab