## ---------------------------------------------------------------------
## 1. BASIC EXAMPLES
## ---------------------------------------------------------------------
dna1 <- DNAString("TATAAATGGAGTAGATAA")
translate(dna1)
SGC1 <- getGeneticCode("SGC1") # Vertebrate Mitochondrial code
translate(dna1, genetic.code=SGC1)
## All codons except 1st are fuzzy:
dna2 <- DNAString("TATANATGRAGYMGRTRA")
## Not run:
# translate(dna2) # error because of fuzzy codons
# ## End(Not run)
## Codons 4 to 6 are non-ambiguous and can be solved. 2nd and 3rd codons
## are ambiguous and are translated to X:
translate(dna2, if.fuzzy.codon="solve")
## Fuzzy codons that are non-ambiguous with a given genetic code can
## become ambiguous with another genetic code and vice versa:
translate(dna2, genetic.code=SGC1, if.fuzzy.codon="solve")
## ---------------------------------------------------------------------
## 2. TRANSLATING AN OPEN READING FRAME
## ---------------------------------------------------------------------
file <- system.file("extdata", "someORF.fa", package="Biostrings")
x <- readDNAStringSet(file)
x
## The first and last 1000 nucleotides are not part of the ORFs:
x <- DNAStringSet(x, start=1001, end=-1001)
## Before calling translate() on an ORF, we need to mask the introns
## if any. We can get this information fron the SGD database
## (http://www.yeastgenome.org/).
## According to SGD, the 1st ORF (YAL001C) has an intron at 71..160
## (see http://db.yeastgenome.org/cgi-bin/locus.pl?locus=YAL001C)
y1 <- x[[1]]
mask1 <- Mask(length(y1), start=71, end=160)
masks(y1) <- mask1
y1
translate(y1)
## Codons:
codons(y1)
which(width(codons(y1)) != 3)
codons(y1)[20:28]
## ---------------------------------------------------------------------
## 3. AN ADVANCED EXAMPLE
## ---------------------------------------------------------------------
## Translation on the '-' strand:
dna3 <- DNAStringSet(c("ATC", "GCTG", "CGACT"))
translate(reverseComplement(dna3))
## Translate sequences on both '+' and '-' strand across all
## possible reading frames (i.e., codon position 1, 2 or 3):
## First create a DNAStringSet of '+' and '-' strand sequences,
## removing the nucleotides prior to the reading frame start position.
dna3_subseqs <- lapply(1:3, function(pos)
subseq(c(dna3, reverseComplement(dna3)), start=pos))
## Translation of 'dna3_subseqs' produces a list of length 3, each with
## 6 elements (3 '+' strand results followed by 3 '-' strand results).
lapply(dna3_subseqs, translate)
## Note that translate() throws a warning when the length of the sequence
## is not divisible by 3. To avoid this warning wrap the function in
## suppressWarnings().
Run the code above in your browser using DataLab