##Central limit theorem
#Four sample sizes, one at a time
exp.parent<-rexp(100000)
samp.dist(parent=exp.parent, s.size=1, R=1000) ## n = 1
samp.dist(parent=exp.parent, s.size=5, R=1000) ## n = 5
samp.dist(parent=exp.parent, s.size=10, R=1000) ## n = 10
samp.dist(parent=exp.parent, s.size=50, R=1000)## n = 50
#All four at once
par(mfrow=c(2,2),mar=c(4.4,4.5,1,0.5))
samp.dist(parent=exp.parent, s.size=1, R=300,anim=FALSE) ## n = 1
samp.dist(parent=exp.parent, s.size=5, R=300,anim=FALSE) ## n = 5
samp.dist(parent=exp.parent, s.size=10, R=300,anim=FALSE) ## n = 10
samp.dist(parent=exp.parent, s.size=50, R=300,anim=FALSE) ## n = 50
##n not fixed -- sample mean
exp.parent<-rexp(10000)
samp.dist(parent=exp.parent, col.anim="heat.colors",fix.n=FALSE,interval=.3)
##n not fixed -- sample mean and sample median (both are consistent and unbiased,
# but which is more efficient for mu?).
# This will take a few seconds.
parent<-rnorm(10000,sd=3)
dev.new()
samp.dist(parent, R=1000,col.anim="heat.colors",fix.n=FALSE,interval=.1,
n.seq=seq(1,100),breaks=50,show.SE=TRUE)
dev.new()
samp.dist(parent, R=1000,col.anim="heat.colors",fix.n=FALSE,interval=.1,stat=median,
n.seq=seq(1,100),xlab="Median",show.SE=TRUE,breaks=50)
#How do the efficiency of the median and mean compare in a distribution with 10%
#contamination?
parent<-c(rnorm(9000),rnorm(1000,mean=10))
samp.dist(parent, col.anim="heat.colors",fix.n=FALSE,interval=.3,
breaks=50,show.SE=TRUE)
dev.new()
samp.dist(parent, col.anim="heat.colors",fix.n=FALSE,interval=.3,stat=median,
xlab="Median",est.ylim=TRUE,show.SE=TRUE,breaks=50)
##Distribution of t-statistics under valid and invalid assumptions
#valid
parent<-rnorm(100000)
t.star<-function(s.dist1,s.dist2,s.dist3,s.dist4,s.size=6,s.size2=s.size){
MSE<-(((s.size-1)*s.dist3)+((s.size2-1)*s.dist4))/(s.size+s.size2-2)
func.res<-(s.dist1-s.dist2)/(sqrt(MSE)*sqrt((1/s.size)+(1/s.size2)))
func.res}
samp.dist(parent, parent2=parent, s.size=6, R=1000, breaks=35,stat=mean,stat2=mean,
stat3=var,stat4=var,xlab="t*", ylab="Relative frequency",func=t.star,show.n=FALSE)
curve(dt(x,10),from=-6,to=6,add=TRUE,lwd=2)
legend("topleft",lwd=2,col=1,legend="t(10)")
#invalid; same means (null true) but different variances and other distributional
#characteristics.
parent<-runif(100000, min=0,max=2)
parent2<-rexp(100000)
samp.dist(parent, parent2=parent2, s.size=6, R=1000, breaks=35,stat=mean,stat2=mean,
stat3=var,stat4=var,xlab="t*", ylab="Relative frequency",func=t.star,show.n=FALSE)
curve(dt(x,10),from=-6,to=6,add=TRUE,lwd=2)
legend("topleft",lwd=2,col=1,legend="t(10)")
#Interactive GUI, require package 'tcltk'
samp.dist.tck()
Run the code above in your browser using DataLab