## load transcription factor binding site data
data(TFBS)
enhancerFB
## The C-svc implementation from LiblineaR is chosen for most of the
## examples because it is the fastest SVM implementation. With SVMs from
## other packages slightly better results could be achievable.
## To get a realistic image of possible performance values, kernel behavior
## and speed of grid search together with 10-fold cross validation a
## resonable number of sequences is needed which would exceed the runtime
## restrictions for automatically executed examples. Therefore the grid
## search examples must be run manually. In these examples we use the full
## dataset for grid search.
train <- sample(1:length(enhancerFB), length(enhancerFB))
## grid search with single kernel object and multiple hyperparameter values
## create gappy pair kernel with normalization
gappyK1M3 <- gappyPairKernel(k=1, m=3)
## show details of single gappy pair kernel object
gappyK1M3
## grid search for a single kernel object and multiple values for cost
pkg <- "LiblineaR"
svm <- "C-svc"
cost <- c(0.01,0.1,1,10,100,1000)
model <- kbsvm(x=enhancerFB[train], y=yFB[train], kernel=gappyK1M3,
pkg=pkg, svm=svm, cost=cost, explicit="yes", cross=3)
## show grid search results
modelSelResult(model)
## create the list of spectrum kernel objects with normalization and
## kernel parameters values for k from 1 to 5
specK15 <- spectrumKernel(k=1:5)
## show details of the four spectrum kernel objects
specK15
## run grid search with several kernel parameter settings for the
## spectrum kernel with a single SVM parameter setting
## ATTENTION: DO NOT USE THIS VARIANT!
## This variant does not bring comparable performance for the different
## kernel parameter settings because usually the best performing
## hyperparameter values could be quite different for different kernel
## parameter settings or between different kernels, grid search for
## multiple kernel objects should be done as shown in the next example
pkg <- "LiblineaR"
svm <- "C-svc"
cost <- 2
model <- kbsvm(x=enhancerFB[train], y=yFB[train], kernel=specK15,
pkg=pkg, svm=svm, cost=cost, explicit="yes", cross=10)
## show grid search results
modelSelResult(model)
## grid search with multiple kernel objects and multiple values for
## hyperparameter cost
pkg <- "LiblineaR"
svm <- "C-svc"
cost <- c(0.01,0.1,1,10,50,100,150,200,500,1000)
model <- kbsvm(x=enhancerFB, sel=train, y=yFB[train], kernel=specK15,
pkg=pkg, svm=svm, cost=cost, explicit="yes", cross=10,
showProgress=TRUE)
## show grid search results
modelSelResult(model)
## grid search for a single kernel object with multiple SVMs
## from different packages
## here with display of cross validation runtimes for each grid point
## pkg, svm and cost vectors must have same length and the corresponding
## entry in each of these vectors are one SVM + SVM hyperparameter setting
pkg <- rep(c("kernlab", "e1071", "LiblineaR"),3)
svm <- rep("C-svc", 9)
cost <- rep(c(0.01,0.1,1),each=3)
model <- kbsvm(x=enhancerFB[train], y=yFB[train], kernel=gappyK1M3,
pkg=pkg, svm=svm, cost=cost, explicit="yes", cross=3,
showCVTimes=TRUE)
## show grid search results
modelSelResult(model)
## run grid search for a single kernel with multiple SVMs from same package
## here all from LiblineaR: C-SVM, L2 regularized SVM with L2 loss and
## SVM with L1 regularization and L2 loss
## attention: for different formulation of the SMV objective use different
## values for the hyperparameters even if they have the same name
pkg <- rep("LiblineaR", 9)
svm <- rep(c("C-svc","l2rl2l-svc","l1rl2l-svc"), each=3)
cost <- c(1,150,1000,1,40,100,1,40,100)
model <- kbsvm(x=enhancerFB, sel=train, y=yFB[train], kernel=gappyK1M3,
pkg=pkg, svm=svm, cost=cost, explicit="yes", cross=3)
## show grid search results
modelSelResult(model)
## create the list of kernel objects for gappy pair kernel
gappyK1M15 <- gappyPairKernel(k=1, m=1:5)
## show details of kernel objects
gappyK1M15
## run grid search with progress indication with ten kernels and ten
## hyperparameter values for cost and 10 fold cross validation on full
## dataset (500 samples)
pkg <- rep("LiblineaR", 10)
svm <- rep("C-svc", 10)
cost <- c(0.0001,0.001,0.01,0.1,1,10,100,1000,10000,100000)
model <- kbsvm(x=enhancerFB, y=yFB, kernel=c(specK15, gappyK1M15),
pkg=pkg, svm=svm, cost=cost, cross=10, explicit="yes",
showCVTimes=TRUE, showProgress=TRUE)
## show grid search results
modelSelResult(model)
Run the code above in your browser using DataLab