data(sample.ExpressionSet)
layout(1)
lm1 = lmPerGene( sample.ExpressionSet,~score+type)
CD = CooksDPerGene(lm1)
### How does the distribution of mean Cook's distances across samples look?
boxplot(log2(CD) ~ col(CD),names=colnames(CD),ylab="Log Cook's
Distance",xlab="Sample")
### There are a few gross individual-observation outliers (which is why we plot on the log
### scale), but otherwise no single sample pops out as problematic. Here's
### one commonly-used alert level for problems:
lines(c(-5,30),rep(log2(2/sqrt(26)),2),col=2)
DFB = dfbetasPerGene(lm1)
### Looking for simultaneous two-effect outliers - 500 genes times 26
### samples makes 13000 data points on this plot
plot(DFB[,,2],DFB[,,3],main="DFBETAS for Score and Type (all genes)",xlab="Score Effect
Offset (normalized units)",ylab="Type Effect Offset (normalized units)",pch='+',cex=.5)
lines(c(-100,100),rep(0,2),col=2)
lines(rep(0,2),c(-100,100),col=2)
DFF = dffitsPerGene(lm1)
summary(apply(DFF,2,mean))
Lev = Leverage(lm1)
table(Lev)
### should have only two unique values because this is a dichotomous one-factor model
Run the code above in your browser using DataLab