## plot predefined weighting functions for sigma=10
curve(linWeight(x, sigma=10), from=-20, to=20, xlab="pattern distance",
ylab="weight", main="Predefined Distance Weighting Functions", col="green")
curve(expWeight(x, sigma=10), from=-20, to=20, col="blue", add=TRUE)
curve(gaussWeight(x, sigma=10), from=-20, to=20, col="red", add=TRUE)
curve(swdWeight(x), from=-20, to=20, col="orange", add=TRUE)
legend('topright', inset=0.03, title="Weighting Functions", c("linWeight",
"expWeight", "gaussWeight", "swdWeight"),
fill=c("green", "blue", "red", "orange"))
text(14, 0.70, "sigma = 10")
## instead of user provided sequences in XStringSet format
## for this example a set of DNA sequences is created
## RNA- or AA-sequences can be used as well with the motif kernel
dnaseqs <- DNAStringSet(c("AGACTTAAGGGACCTGGTCACCACGCTCGGTGAGGGGGACGGGGTGT",
"ATAAAGGTTGCAGACATCATGTCCTTTTTGTCCCTAATTATTTCAGC",
"CAGGAATCAGCACAGGCAGGGGCACGGCATCCCAAGACATCTGGGCC",
"GGACATATACCCACCGTTACGTGTCATACAGGATAGTTCCACTGCCC",
"ATAAAGGTTGCAGACATCATGTCCTTTTTGTCCCTAATTATTTCAGC"))
names(dnaseqs) <- paste("S", 1:length(dnaseqs), sep="")
## create a distance weighted spectrum kernel with linear decrease of
## weights in a range of 20 bases
spec20 <- spectrumKernel(k=3, distWeight=linWeight(sigma=20))
## show details of kernel object
kernelParameters(spec20)
## this kernel can be now be used in a classification or regression task
## in the usual way or a kernel matrix can be generated for use with
## another learning method
km <- spec20(x=dnaseqs, selx=1:5)
km[1:5,1:5]
## instead of a distance weighting function also a weight vector can be
## passed in the distWeight parameter but the values must be chosen such
## that they lead to a positive definite kernel
##
## in this example only patterns within a 5 base range are considered with
## slightly decreasing weights
specv <- spectrumKernel(k=3, distWeight=c(1,0.95,0.9,0.85,0.8))
km <- specv(dnaseqs)
km[1:5,1:5]
## position specific spectrum kernel
specps <- spectrumKernel(k=3, distWeight=1)
km <- specps(dnaseqs)
km[1:5,1:5]
## get position specific kernel matrix
km <- specps(dnaseqs)
km[1:5,1:5]
## example with offset to align sequence positions (e.g. the
## transcription start site), the value gives the offset to position 1
positionOne <- c(9,6,3,1,6)
positionMetadata(dnaseqs) <- positionOne
## show position metadata
positionMetadata(dnaseqs)
## generate kernel matrix with position-specific spectrum kernel
km1 <- specps(dnaseqs)
km1[1:5,1:5]
## example for a user defined weighting function
## please stick to the order as described in the comments below and
## make sure that the resulting kernel is positive definite
expWeightUserDefined <- function(x, sigma=1)
{
## check presence and validity of all parameters except for x
if (!isSingleNumber(sigma))
stop("'sigma' must be a number")
## if x is missing the function returns a closure where all parameters
## except for x have a defined value
if (missing(x))
return(function(x) expWeightUserDefined(x, sigma=sigma))
## pattern distance vector x must be numeric
if (!is.numeric(x))
stop("'x' must be a numeric vector")
## create vector of distance weights from the
## input vector of pattern distances x
exp(-abs(x)/sigma)
}
## define kernel object with user defined weighting function
specud <- spectrumKernel(k=3, distWeight=expWeightUserDefined(sigma=5),
normalized=FALSE)
Run the code above in your browser using DataLab