fas <- system.file("extdata", "50S_ribosomal_protein_L2.fas", package="DECIPHER")
dna <- readDNAStringSet(fas)
# introduce artificial indels
n_ins <- 2 # insertions per sequence
shifted <- replaceAt(dna,
lapply(width(dna),
sample,
n_ins),
sample(DNA_BASES,
n_ins,
replace=TRUE))
n_dels <- 1 # deletions per sequence
shifted <- replaceAt(shifted,
RangesList(lapply(width(shifted),
function(x) {
IRanges(sample(x,
n_dels),
width=1)
})))
# to make frameshift correction more challenging,
# only supply 20 reference amino acid sequences
s <- sample(length(dna), 20)
x <- CorrectFrameshifts(shifted,
translate(dna[s]),
type="both")
# there was a wide range of distances
# to the nearest reference sequence
quantile(unlist(lapply(x[[1]], `[`, "distance")))
# none of the sequences were > rejectDistance
# from the nearest reference sequence
length(which(unlist(lapply(x[[1]], `[`, "index"))==0))
# the number of indels was generally correct
table(unlist(lapply(x[[1]], function(x) {
length(x$insertions)})))/length(shifted)
table(unlist(lapply(x[[1]], function(x) {
length(x$deletions)})))/length(shifted)
# align and display the translations
AA <- AlignTranslation(x$sequences,
readingFrame=1,
asAAStringSet=TRUE)
BrowseSeqs(AA)
Run the code above in your browser using DataLab