## create an object from file
origMAlign <-
readDNAMultipleAlignment(filepath =
system.file("extdata",
"msx2_mRNA.aln",
package="Biostrings"),
format="clustal")
## list the names of the sequences in the alignment
rownames(origMAlign)
## rename the sequences to be the underlying species for MSX2
rownames(origMAlign) <- c("Human","Chimp","Cow","Mouse","Rat",
"Dog","Chicken","Salmon")
origMAlign
## See a detailed pager view
if (interactive()) {
detail(origMAlign)
}
## operations to mask rows
## For columns, just use colmask() and do the same kinds of operations
rowMasked <- origMAlign
rowmask(rowMasked) <- IRanges(start=1,end=3)
rowMasked
## remove rowumn masks
rowmask(rowMasked) <- NULL
rowMasked
## "select" rows of interest
rowmask(rowMasked, invert=TRUE) <- IRanges(start=4,end=7)
rowMasked
## or mask the rows that intersect with masked rows
rowmask(rowMasked, append="intersect") <- IRanges(start=1,end=5)
rowMasked
## TATA-masked
tataMasked <- maskMotif(origMAlign, "TATA")
colmask(tataMasked)
## automatically mask rows based on consecutive gaps
autoMasked <- maskGaps(origMAlign, min.fraction=0.5, min.block.width=4)
colmask(autoMasked)
autoMasked
## calculate frequencies
alphabetFrequency(autoMasked)
consensusMatrix(autoMasked, baseOnly=TRUE)[, 84:90]
## get consensus values
consensusString(autoMasked)
consensusViews(autoMasked)
## cluster the masked alignments
sdist <- stringDist(as(autoMasked,"DNAStringSet"), method="hamming")
clust <- hclust(sdist, method = "single")
plot(clust)
fourgroups <- cutree(clust, 4)
fourgroups
## write out the alignement object (with current masks) to Phylip format
write.phylip(x = autoMasked, filepath = tempfile("foo.txt",tempdir()))
Run the code above in your browser using DataLab