CVA: Canonical Variate Analysis

Description

performs a Canonical Variate Analysis.

Usage

CVA(
  dataarray,
  groups,
  weighting = TRUE,
  tolinv = 1e-10,
  plot = TRUE,
  rounds = 0,
  cv = FALSE,
  p.adjust.method = "none",
  robust = c("classical", "mve", "mcd"),
  prior = NULL,
  ...
)

Value

CV: A matrix containing the Canonical Variates
CVscores: A matrix containing the individual Canonical Variate scores
Grandm: a vector or a matrix containing the Grand Mean (depending if the input is an array or a matrix)
groupmeans: a matrix or an array containing the group means (depending if the input is an array or a matrix)
Var: Variance explained by the Canonical Variates
CVvis: Canonical Variates projected back into the original space - to be used for visualization purposes, for details see example below
Dist: Mahalanobis Distances between group means - if requested tested by permutation test if the input is an array it is assumed to be superimposed Landmark Data and Procrustes Distance will be calculated
CVcv: A matrix containing crossvalidated CV scores
groups: factor containing the grouping variable
class: classification results based on posteriror probabilities. If cv=TRUE, this will be done by a leaving-one-out procedure
posterior: posterior probabilities
prior: prior probabilities

Arguments

dataarray: Either a k x m x n real array, where k is the number of points, m is the number of dimensions, and n is the sample size. Or alternatively a n x m Matrix where n is the numeber of observations and m the number of variables (this can be PC scores for example)
groups: a character/factor vector containgin grouping variable.
weighting: Logical: Determines whether the between group covariance matrix and Grandmean is to be weighted according to group size.
tolinv: Threshold for the eigenvalues of the pooled within-group-covariance matrix to be taken as zero - for calculating the general inverse of the pooled withing groups covariance matrix.
plot: Logical: determins whether in the two-sample case a histogramm ist to be plotted.
rounds: integer: number of permutations if a permutation test of the Mahalanobis distances (from the pooled within-group covariance matrix) and Euclidean distance between group means is requested If rounds = 0, no test is performed.
cv: logical: requests a Jackknife Crossvalidation.
p.adjust.method: method to adjust p-values for multiple comparisons see p.adjust.methods for options.
robust: character: determines covariance estimation methods, allowing for robust estimations using MASS::cov.rob
prior: vector assigning each group a prior probability.
...: additional parameters passed to MASS::cov.rob for robust covariance and mean estimations

Author

Stefan Schlager

References

Cambell, N. A. & Atchley, W. R.. 1981 The Geometry of Canonical Variate Analysis: Syst. Zool., 30(3), 268-280.

Klingenberg, C. P. & Monteiro, L. R. 2005 Distances and directions in multidimensional shape spaces: implications for morphometric applications. Systematic Biology 54, 678-688.

Examples

Run this code


## all examples are kindly provided by Marta Rufino

if (require(shapes)) {
# perform procrustes fit on raw data
alldat<-procSym(abind(gorf.dat,gorm.dat))
# create factors
groups<-as.factor(c(rep("female",30),rep("male",29)))
# perform CVA and test Mahalanobis distance
# between groups with permutation test by 100 rounds)            
cvall<-CVA(alldat$orpdata,groups,rounds=10000)     
## visualize a shape change from score -5 to 5:
cvvis5 <- 5*matrix(cvall$CVvis[,1],nrow(cvall$Grandm),ncol(cvall$Grandm))+cvall$Grandm
cvvisNeg5 <- -5*matrix(cvall$CVvis[,1],nrow(cvall$Grandm),ncol(cvall$Grandm))+cvall$Grandm
plot(cvvis5,asp=1)
points(cvvisNeg5,col=2)
for (i in 1:nrow(cvvisNeg5))
  lines(rbind(cvvis5[i,],cvvisNeg5[i,]))
}
### Morpho CVA
data(iris)
vari <- iris[,1:4]
facto <- iris[,5]

cva.1=CVA(vari, groups=facto)
## get the typicality probabilities and resulting classifications - tagging
## all specimens with a probability of < 0.01 as outliers (assigned to no class)
typprobs <- typprobClass(cva.1$CVscores,groups=facto)
print(typprobs)
## visualize the CV scores by their groups estimated from (cross-validated)
## typicality probabilities:
if (require(car)) {
scatterplot(cva.1$CVscores[,1],cva.1$CVscores[,2],groups=typprobs$groupaffinCV,
                  smooth=FALSE,reg.line=FALSE)
}
# plot the CVA
plot(cva.1$CVscores, col=facto, pch=as.numeric(facto), typ="n",asp=1,
   xlab=paste("1st canonical axis", paste(round(cva.1$Var[1,2],1),"%")),
   ylab=paste("2nd canonical axis", paste(round(cva.1$Var[2,2],1),"%")))
  
  text(cva.1$CVscores, as.character(facto), col=as.numeric(facto), cex=.7)

  # add chull (merge groups)
  for(jj in 1:length(levels(facto))){
        ii=levels(facto)[jj]
    kk=chull(cva.1$CVscores[facto==ii,1:2])
    lines(cva.1$CVscores[facto==ii,1][c(kk, kk[1])],
    cva.1$CVscores[facto==ii,2][c(kk, kk[1])], col=jj)
    }

  # add 80% ellipses
  if (require(car)) {
  for(ii in 1:length(levels(facto))){
    dataEllipse(cva.1$CVscores[facto==levels(facto)[ii],1],
    cva.1$CVscores[facto==levels(facto)[ii],2], 
                    add=TRUE,levels=.80, col=c(1:7)[ii])}
  }
  # histogram per group
  if (require(lattice)) {
  histogram(~cva.1$CVscores[,1]|facto,
  layout=c(1,length(levels(facto))),
          xlab=paste("1st canonical axis", paste(round(cva.1$Var[1,2],1),"%")))
  histogram(~cva.1$CVscores[,2]|facto, layout=c(1,length(levels(facto))),
          xlab=paste("2nd canonical axis", paste(round(cva.1$Var[2,2],1),"%")))
  } 
  # plot Mahalahobis
  dendroS=hclust(cva.1$Dist$GroupdistMaha)
  dendroS$labels=levels(facto)
  par(mar=c(4,4.5,1,1))
  dendroS=as.dendrogram(dendroS)
  plot(dendroS, main='',sub='', xlab="Geographic areas",
          ylab='Mahalahobis distance')

 
   # Variance explained by the canonical roots:
   cva.1$Var
   # or plot it:
   barplot(cva.1$Var[,2])

# another landmark based example in 3D: 
data(boneData)
groups <- name2factor(boneLM,which=3:4)
proc <- procSym(boneLM)
cvall<-CVA(proc$orpdata,groups)    
#' ## visualize a shape change from score -5 to 5:
cvvis5 <- 5*matrix(cvall$CVvis[,1],nrow(cvall$Grandm),ncol(cvall$Grandm))+cvall$Grandm
cvvisNeg5 <- -5*matrix(cvall$CVvis[,1],nrow(cvall$Grandm),ncol(cvall$Grandm))+cvall$Grandm
if (FALSE) {
#visualize it
deformGrid3d(cvvis5,cvvisNeg5,ngrid = 0)
}

#for using (e.g. the first 5) PCscores, one will do:
cvall <- CVA(proc$PCscores[,1:5],groups)    
#' ## visualize a shape change from score -5 to 5:
cvvis5 <- 5*cvall$CVvis[,1]+cvall$Grandm
cvvisNeg5 <- -5*cvall$CVvis[,1]+cvall$Grandm
cvvis5 <- restoreShapes(cvvis5,proc$PCs[,1:5],proc$mshape)
cvvisNeg5 <- restoreShapes(cvvisNeg5,proc$PCs[,1:5],proc$mshape)
if (FALSE) {
#visualize it
deformGrid3d(cvvis5,cvvisNeg5,ngrid = 0)
}

Run the code above in your browser using DataLab