## create a set of annotated DNA sequences
## instead of user provided sequences in XStringSet format
## for this example a set of DNA sequences is created
x <- DNAStringSet(c("AGACTTAAGGGACCTGGTCACCACGCTCGGTGAGGGGGACGGGGTGT",
"ATAAAGGTTGCAGACATCATGTCCTTTTTGTCCCTAATTATTTCAGC",
"CAGGAATCAGCACAGGCAGGGGCACGGCATCCCAAGACATCTGGGCC",
"GGACATATACCCACCGTTACGTGTCATACAGGATAGTTCCACTGCCC",
"ATAAAGGTTGCAGACATCATGTCCTTTTTGTCCCTAATTATTTCAGC"))
names(x) <- paste("S", 1:length(x), sep="")
## define the character set used in annotation
## the masking character '-' is is not part of the character set
anncs <- "ei"
## annotation strings for each sequence as character vector
## in the third and fourth sample a part of the sequence is masked
annotStrings <- c("eeeeeeeeeeeeiiiiiiiiieeeeeeeeeeeeeeeeiiiiiiiiii",
"eeeeeeeeeiiiiiiiiiiiiiiiiiiieeeeeeeeeeeeeeeeeee",
"---------eeeeeeeeeeeeeeeeiiiiiiiiiiiiiiiiiiiiii",
"eeeeeeeeeeeeeeeeeeeeeeeiiiiiiiiiiiiiiiiiiii----",
"eeeeeeeeeeeeiiiiiiiiiiiiiiiiiiiiiiieeeeeeeeeeee")
## assign metadata to DNAString object
annotationMetadata(x, annCharset=anncs) <- annotStrings
## show annotation
annotationMetadata(x)
annotationCharset(x)
## show sequence 3 aligned with annotation string
showAnnotatedSeq(x, sel=3)
## create annotation specific spectrum kernel
speca <- spectrumKernel(k=3, annSpec=TRUE, normalized=FALSE)
## show details of kernel object
kernelParameters(speca)
## this kernel object can be now be used in a classification or regression
## task in the usual way or you can use the kernel for example to generate
## the kernel matrix for use with another learning method in another R
## package.
kma <- speca(x)
kma[1:5,1:5]
## generate a dense explicit representation for annotation-specific kernel
era <- getExRep(x, speca, sparse=FALSE)
era[1:5,1:8]
## when a standard spectrum kernel is used with annotated
## sequences the anntotation information is not evaluated
spec <- spectrumKernel(k=3, normalized=FALSE)
km <- spec(x)
km[1:5,1:5]
## finally delete annotation metadata if no longer needed
annotationMetadata(x) <- NULL
## show empty metadata
annotationMetadata(x)
annotationCharset(x)
Run the code above in your browser using DataLab