## load transcription factor binding site data
data(TFBS)
enhancerFB
## select a few samples for training - here for demonstration purpose
## normally you would use 70 or 80% of the samples for training and
## the rest for test
## train <- sample(1:length(enhancerFB), length(enhancerFB) * 0.7)
## test <- c(1:length(enhancerFB))[-train]
train <- sample(1:length(enhancerFB), 50)
## create a kernel object for the gappy pair kernel with normalization
gappy <- gappyPairKernel(k=1, m=4)
## show details of kernel object
gappy
## run cross validation with the kernel on C-svc in LiblineaR for cost=10
model <- kbsvm(x=enhancerFB[train], y=yFB[train], kernel=gappy,
pkg="LiblineaR", svm="C-svc", cost=10, cross=3)
## show cross validation result
cvResult(model)
## Not run:
# ## perform tive cross validation runs
# model <- kbsvm(x=enhancerFB[train], y=yFB[train], kernel=gappy,
# pkg="LiblineaR", svm="C-svc", cost=10, cross=10, noCross=5)
#
# ## show cross validation result
# cvResult(model)
#
# ## plot cross validation result
# plot(cvResult(model))
#
#
# ## run Leave-One-Out cross validation
# model <- kbsvm(x=enhancerFB[train], y=yFB[train], kernel=gappy,
# pkg="LiblineaR", svm="C-svc", cost=10, cross=-1)
#
# ## show cross validation result
# cvResult(model)
#
# ## run gouped cross validation with full data
# ## on coiled coil dataset
# ##
# ## In this example the groups were determined through single linkage
# ## clustering of sequence similarities derived from ungapped heptad-specific
# ## pairwise alignment of the sequences. The variable {\tt ccgroup} contains
# ## the pre-calculated group assignments for the individual sequences.
# data(CCoil)
# ccseq
# head(yCC)
# head(ccgroups)
# gappyK1M6 <- gappyPairKernel(k=1, m=4)
#
# ## run k-fold CV without groups
# model <- kbsvm(x=ccseq, y=as.numeric(yCC), kernel=gappyK1M6,
# pkg="LiblineaR", svm="C-svc", cost=10, cross=3, noCross=2,
# perfObjective="BACC",perfParameters=c("ACC", "BACC"))
#
# ## show result without groups
# cvResult(model)
#
# ## run grouped CV
# model <- kbsvm(x=ccseq, y=as.numeric(yCC), kernel=gappyK1M6,
# pkg="LiblineaR", svm="C-svc", cost=10, cross=3,
# noCross=2, groupBy=ccgroups, perfObjective="BACC",
# perfParameters=c("ACC", "BACC"))
#
# ## show result with groups
# cvResult(model)
#
# ## For grouped CV the samples in the held out fold are from a group which
# ## is not present in training on the other folds. The simimar CV error
# ## with and without groups shows that learning is not just assigning
# ## labels based on similarity within the groups but is focusing on features
# ## that are indicative for the class also in the CV without groups. For the
# ## GCV no information about group membership for the samples in the held
# ## out fold is present in the model. This example should show how GCV
# ## is performed. Because of package size limitations no specific dataset is
# ## available in this package where GCV is necessary.
# ## End(Not run)
Run the code above in your browser using DataLab