bio3d-package: Biological Structure Analysis

Description

Utilities for the analysis of protein structure and sequence data.

Arguments

Details

ll{ Package: bio3d Type: Package Version: 2.1-2 Date: 2014-10-16 License: GPL version 2 or newer URL: http://thegrantlab.org/bio3d/ }

Features include the ability to read and write structure (read.pdb, write.pdb, read.fasta.pdb), sequence (read.fasta, write.fasta) and dynamics trajectory data (read.dcd, read.ncdf, write.ncdf).

Perform sequence and structure database searches (blast.pdb, hmmer), atom summaries (summary.pdb), atom selection (atom.select), alignment (pdbaln, seqaln, mustang) superposition (rot.lsq, fit.xyz), rigid core identification (core.find, plot.core, fit.xyz), dynamic domain analysis (geostas), torsion/dihedral analysis (torsion.pdb, torsion.xyz), clustering (via hclust), principal component analysis (pca.xyz, pca.pdbs, pca.tor, plot.pca, plot.pca.loadings, mktrj.pca), dynamical cross-correlation analysis (dccm, lmi, plot.dccm) and correlation network analysis (cna, plot.cna) of structure data.

Perform conservation analysis of sequence (seqaln, conserv, seqidentity, entropy, consensus) and structural (pdbaln, rmsd, rmsf, core.find) data.

Perform normal mode analysis (nma, build.hessian), ensemble normal mode analysis (nma.pdbs), mode comparison (rmsip) and (overlap), atomic fluctuation prediction (fluct.nma), cross-correlation analysis (dccm.nma), cross-correlation visualization (view.dccm), deformation analysis (deformation.nma), and mode visualization (view.modes), (mktrj.nma).

In addition, various utility functions are provided to facilitate manipulation and analysis of biological sequence and structural data (e.g. get.pdb, get.seq, aa123, aa321, pdbseq, aln2html, atom.select, rot.lsq, fit.xyz, is.gap, gap.inspect, orient.pdb, pairwise, plot.bio3d, plot.nma, plot.blast, etc.).

References

Grant, B.J. et al. (2006) Bioinformatics 22, 2695--2696.

Examples

Run this code

help(package="bio3d")     # list the functions within the package
#lbio3d()                  # list bio3d function names only

## Or visit:
##   http://thegrantlab.org/bio3d/html/

## See the individual functions for further documentation and examples, e.g.
#help(read.pdb)

## Or online:
##    http://thegrantlab.org/bio3d/html/read.pdb.html

##-- See the list of Bio3D demos 
demo(package="bio3d")

## Try some out, e.g: 
demo(pdb) # PDB Reading, Manipulation, Searching and Alignment
demo(pca) # Principal Component Analysis
demo(md)  # Molecular Dynamics Trajectory Analysis
demo(nma) # Normal Mode Analysis


##-- Start by reading a PDB file
pdb <- read.pdb("1BG2")

##-- Print summary information
pdb

##
##-- Distance matrix
##
k <- dm(pdb, selection="calpha")
plot(k)

## Extract SEQRES PDB sequence
s1 <- aa321(pdb$seqres)

## Extract ATOM PDB sequence
s2 <- pdbseq(pdb)

## write a FASTA format sequence file
write.fasta(seqs=seqbind(s1, s2), id=c("seqres","atom"), file="eg.fa")

##----------------------------------------##

##
##-- Select alpha carbon atom subset
##
ca.inds <- atom.select(pdb, "calpha")

## Plot of B-factor values along with secondary structure
plot.bio3d(pdb$atom[ca.inds$atom, "b"], sse=pdb, ylab="B-factor")

## Secondary structure assignment with DSSP
#sse <- dssp(pdb)

##----------------------------------------##

##
##-- Torsion angle analysis and basic Ramachandran plot
##
tor <- torsion.pdb(pdb)
plot(tor$phi, tor$psi)

##----------------------------------------##

##
##-- Search for related structures in the PDB database
blast <- blast.pdb( pdbseq(pdb) )
hits  <- plot.blast(blast)
head(hits$hits)

## Download these with function "get.pdb()"
#rawpdbs <- get.pdb( hits$pdb.id, "PDB_downloads")

## Split by chain with function "pdbsplit()"
#pdbsplit(rawpdbs, path="PDB_downloads/split_chain")

## and then align with "pdbaln()" and superpose with "pdbfit()"
#hitfiles <- paste0("PDB_downloads/split_chain", hits$pdb.id, ".pdb")
#pdbs <- pdbaln(hitfiles)
#xyz  <- pdbfit(pdbs)

##----------------------------------------##

##
##-- Read an example FASTA sequence alignment from PFAM
##
infile <- "http://pfam.sanger.ac.uk/family/PF00071/alignment/seed/format?format=fasta"

aln <- read.fasta( infile )

## Entropy and similarity scores for alignment positions
h <- entropy(aln)   
s <- conserv(aln)   # see other"conserv()" options
plot(h$H.norm, typ="h", ylab="Normalized entropy score", col="gray")
points( s, typ="h", col="red")

## Alignment consensus sequence
con <- consensus(aln)
con$seq

## add consensus sequence to conservation plot
ind <- which(s > 0.6)
text(ind, s[ind], labels=con$seq[ind])

## Render the alignment as coloured HTML
aln2html(aln, append=FALSE, file="eg.html")

##----------------------------------------##

##
##-- Read an alignment of sequences and their corresponding structures
##
aln  <- read.fasta( system.file("examples/kif1a.fa", package="bio3d") )
pdbs <- read.fasta.pdb( aln )

##-- DDM: Difference Distance Matrix
a <- dm(pdbs$xyz[2,])
b <- dm(pdbs$xyz[3,])
ddm <- a - b
plot(ddm,key=FALSE, grid=FALSE)


##-- Superpose structures on non gap positions
xyz <- pdbfit(pdbs)

##-- RMSD of non gap positions
gaps <- gap.inspect(pdbs$xyz)
rmsd(pdbs$xyz[, gaps$f.inds])
rmsd(xyz[, gaps$f.inds])


##-- Rigid 'core' identification
core <- core.find(pdbs)
#plot(core)

## Core fit the structures (superpose on rigid zones)
xyz2 <- pdbfit(pdbs, inds=core$c0.5A.xyz)

## Note larger overall RMSD but lower core-residue RMSF
rmsd(xyz2[, gaps$f.inds])

plot(rmsf(xyz), typ="l", col="blue", ylab="RMSF")
points(rmsf(xyz2), typ="l", col="red")



##
##-- PCA of experimental structures
##
# Ignore gap containing positions
gaps.res <- gap.inspect(pdbs$ali)
gaps.pos <- gap.inspect(pdbs$xyz)


##-- Do PCA
pc.xray <- pca.xyz(xyz[, gaps.pos$f.inds])
## Or more simply
# pc.xray <- pca.xyz(xyz, rm.gaps=TRUE)

## Plot results
plot(pc.xray)

plot.pca.loadings(pc.xray$au)

## Write a PC trajectory (for viewing as tube in VMD)
rn <- pdbs$resno[1, gaps.res$f.inds]
rd <- aa123(pdbs$ali[1, gaps.res$f.inds])

#p1 <- mktrj.pca(pc.xray, pc=1, resno =rn, resid = rd, file="pc1.pdb")
#p2 <- mktrj.pca(pc.xray, pc=2, resno =rn, resid = rd, file="pc2.pdb")
#p3 <- mktrj.pca(pc.xray, pc=3, resno =rn, resid = rd, file="pc3.pdb")


##----------------------------------------##

##
##-- Read a CHARMM/X-PLOR/NAMD trajectory file
##
trtfile <- system.file("examples/hivp.dcd", package="bio3d")
trj <- read.dcd(trtfile)

## Read the starting PDB file to determine atom correspondence
pdbfile <- system.file("examples/hivp.pdb", package="bio3d")
pdb <- read.pdb(pdbfile)

## Fit trj on PDB based on residues 23 to 31 and 84 to 87 in both chains
inds <- atom.select(pdb, resno=c(23:31,84:87), elety="CA")
fit.xyz <- fit.xyz(pdb$xyz, trj, fixed.inds=inds$xyz, mobile.inds=inds$xyz)

##-- RMSD of trj frames from PDB
r <- rmsd(a=pdb, b=fit.xyz)

##-- PCA of trj
pc.trj <- pca.xyz(fit.xyz)

## Plot PCA results
plot(pc.trj)

## Examine residue-wise contributions to PCs
plot.pca.loadings(pc.trj$au)

## cluster in PC1 subspace
hc <- hclust(dist(pc.trj$z[,1]))
plot(pc.trj, col=cutree(hc, k=2))


## Write PC trajectory for viewing as tube in VMD
a <- mktrj.pca(pc.trj, pc=1, file="pc1_trj.pdb")
## Other examples include: normal mode analysis, alignment, clustering etc...
## See: http://thegrantlab.org/bio3d/tutorials

Run the code above in your browser using DataLab