# NOT RUN {
# }
# NOT RUN {
####################################
library(EMMREML)
library(STPGA)
data(WheatData)
svdWheat<-svd(Wheat.K, nu=5, nv=5)
PC50WHeat<-Wheat.K%*%svdWheat$v
plot(PC50WHeat[,1],PC50WHeat[,2])
rownames(PC50WHeat)<-rownames(Wheat.K)
DistWheat<-dist(PC50WHeat)
TreeWheat<-hclust(DistWheat)
TreeWheat<-cutree(TreeWheat, k=4)
Test<-rownames(PC50WHeat)[TreeWheat==4]
length(Test)
Candidates<-setdiff(rownames(PC50WHeat), Test)
###instead of using the algorithm directly using a wrapper to
###implement an for multiple starting points for genetic algorithm.
repeatgenalg<-function(numrepsouter,numrepsinner){
StartingPopulation2=NULL
for (i in 1:numrepsouter){
print("Rep:")
print(i)
StartingPopulation<-lapply(1:numrepsinner, function(x){
GenAlgForSubsetSelection(P=PC50WHeat,Candidates=Candidates,
Test=Test, ntoselect=50, InitPop=StartingPopulation2,
npop=50, nelite=5, mutprob=.5, mutintensity = rpois(1,4),
niterations=10,minitbefstop=5, tabumemsize = 2,plotiters=TRUE,
lambda=1e-9,errorstat="CDMEAN", mc.cores=1)})
StartingPopulation2<-vector(mode="list", length = numrepsouter*1)
ij=1
for (i in 1:numrepsinner){
for (j in 1:1){
StartingPopulation2[[ij]]<-StartingPopulation[[i]][[j]]
ij=ij+1
}
}
}
ListTrain<-GenAlgForSubsetSelection(P=PC50WHeat,Candidates=Candidates,
Test=Test,ntoselect=50, InitPop=StartingPopulation2,npop=100,
nelite=10, mutprob=.5, mutintensity = 1,niterations=300,
minitbefstop=100, tabumemsize = 1,plotiters=T,
lambda=1e-9,errorstat="CDMEAN", mc.cores=1)
return(ListTrain)
}
ListTrain<-repeatgenalg(20, 3)
###test sample
deptestopt<-Wheat.Y[Wheat.Y$id%in%Test,]
##predictions by optimized sample
deptrainopt<-Wheat.Y[(Wheat.Y$id%in%ListTrain[[1]]),]
Ztrain<-model.matrix(~-1+deptrainopt$id)
Ztest<-model.matrix(~-1+deptestopt$id)
modelopt<-emmreml(y=deptrainopt$plant.height,X=matrix(1, nrow=nrow(deptrainopt), ncol=1),
Z=Ztrain, K=Wheat.K)
predictopt<-Ztest%*%modelopt$uhat
corvecrs<-c()
for (rep in 1:300){
###predictions by a random sample of the same size
rs<-sample(Candidates, 50)
deptestrs<-Wheat.Y[Wheat.Y$id%in%Test,]
deptrainrs<-Wheat.Y[(Wheat.Y$id%in%rs),]
Ztrain<-model.matrix(~-1+deptrainrs$id)
Ztest<-model.matrix(~-1+deptestrs$id)
library(EMMREML)
modelrs<-emmreml(y=deptrainrs$plant.height,X=matrix(1, nrow=nrow(deptrainrs), ncol=1),
Z=Ztrain, K=Wheat.K)
predictrs<-Ztest%*%modelrs$uhat
corvecrs<-c(corvecrs,cor(predictrs, deptestrs$plant.height))
}
mean(corvecrs)
cor(predictopt, deptestopt$plant.height)
plot(PC50WHeat[,1],PC50WHeat[,2], col=rownames(PC50WHeat)%in%ListTrain[[1]]+1,
pch=2*rownames(PC50WHeat)%in%Test+1, xlab="pc1", ylab="pc2")
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab