## plot predefined weighting functions for sigma=10
curve(linWeight(x, sigma=10), from=-20, to=20, xlab="pattern distance",
ylab="weight", main="Predefined Distance Weighting Functions", col="green")
curve(expWeight(x, sigma=10), from=-20, to=20, col="blue", add=TRUE)
curve(gaussWeight(x, sigma=10), from=-20, to=20, col="red", add=TRUE)
curve(swdWeight(x), from=-20, to=20, col="orange", add=TRUE)
legend('topright', inset=0.03, title="Weighting Functions", c("linWeight",
"expWeight", "gaussWeight", "swdWeight"),
fill=c("green", "blue", "red", "orange"))
text(14, 0.70, "sigma = 10")
## instead of user provided sequences in XStringSet format
## for this example a set of DNA sequences is created
## RNA- or AA-sequences can be used as well with the motif kernel
dnaseqs <- DNAStringSet(c("AGACTTAAGGGACCTGGTCACCACGCTCGGTGAGGGGGACGGGGTGT",
"ATAAAGGTTGCAGACATCATGTCCTTTTTGTCCCTAATTATTTCAGC",
"CAGGAATCAGCACAGGCAGGGGCACGGCATCCCAAGACATCTGGGCC",
"GGACATATACCCACCGTTACGTGTCATACAGGATAGTTCCACTGCCC",
"ATAAAGGTTGCAGACATCATGTCCTTTTTGTCCCTAATTATTTCAGC"))
names(dnaseqs) <- paste("S", 1:length(dnaseqs), sep="")
## create a distance weighted spectrum kernel with linear decrease of
## weights in a range of 20 bases
spec20 <- spectrumKernel(k=3, distWeight=linWeight(sigma=20))
## show details of kernel object
kernelParameters(spec20)
## this kernel can be now be used in a classification or regression task
## in the usual way or a kernel matrix can be generated for use with
## another learning method
km <- spec20(x=dnaseqs, selx=1:5)
km[1:5,1:5]
## Not run:
# ## instead of a distance weighting function also a weight vector can be
# ## passed in the distWeight parameter but the values must be chosen such
# ## that they lead to a positive definite kernel
# ##
# ## in this example only patterns within a 5 base range are considered with
# ## slightly decreasing weights
# specv <- spectrumKernel(k=3, distWeight=c(1,0.95,0.9,0.85,0.8))
# km <- specv(dnaseqs)
# km[1:5,1:5]
#
# ## position specific spectrum kernel
# specps <- spectrumKernel(k=3, distWeight=1)
# km <- specps(dnaseqs)
# km[1:5,1:5]
#
# ## get position specific kernel matrix
# km <- specps(dnaseqs)
# km[1:5,1:5]
#
# ## example with offset to align sequence positions (e.g. the
# ## transcription start site), the value gives the offset to position 1
# positionOne <- c(9,6,3,1,6)
# positionMetadata(dnaseqs) <- positionOne
# ## show position metadata
# positionMetadata(dnaseqs)
# ## generate kernel matrix with position-specific spectrum kernel
# km1 <- specps(dnaseqs)
# km1[1:5,1:5]
#
# ## example for a user defined weighting function
# ## please stick to the order as described in the comments below and
# ## make sure that the resulting kernel is positive definite
#
# expWeightUserDefined <- function(x, sigma=1)
# {
# ## check presence and validity of all parameters except for x
# if (!isSingleNumber(sigma))
# stop("'sigma' must be a number")
#
# ## if x is missing the function returns a closure where all parameters
# ## except for x have a defined value
# if (missing(x))
# return(function(x) expWeightUserDefined(x, sigma=sigma))
#
# ## pattern distance vector x must be numeric
# if (!is.numeric(x))
# stop("'x' must be a numeric vector")
#
# ## create vector of distance weights from the
# ## input vector of pattern distances x
# exp(-abs(x)/sigma)
# }
#
# ## define kernel object with user defined weighting function
# specud <- spectrumKernel(k=3, distWeight=expWeightUserDefined(sigma=5),
# normalized=FALSE)
# ## End(Not run)
Run the code above in your browser using DataLab