# set up 'parallel' cluster
cls <- makeCluster(2)
setclsinfo(cls)
# generate simulated test data, as distributed data frame
n <- 10000
p <- 2
tmp <- matrix(rnorm((p+1)*n),nrow=n)
u <- tmp[,1:p] # "X" values
# add a "Y" col
u <- cbind(u,u %*% rep(1,p) + tmp[,p+1])
# now in u, cols 1,2 are the "X" variables, and col 3 is "Y",
# with regress coefs (0,1,1), with tmp[,p+1] being the error term
distribsplit(cls,"u") # form distributed d.f.
# apply the function
#### calm(cls,"u[,3] ~ u[,1]+u[,2]")$tht
calm(cls,"V3 ~ .,data=u")$tht
# check; results should be approximately the same
lm(u[,3] ~ u[,1]+u[,2])
# without the wrapper
ovf <- function(dummy=NULL) lm(V3 ~ .,data=z168)
ca(cls,u,ovf,estf=coef,estcovf=vcov)$tht
## Not run: ------------------------------------
# # Census data on programmers and engineers; include a quadratic term for
# # age, due to nonmonotone relation to income
# data(prgeng)
# distribsplit(cls,"prgeng")
# caout <- calm(cls,"wageinc ~ age+I(age^2)+sex+wkswrkd,data=prgeng")
# caout$tht
# # compare to nonparallel
# lm(wageinc ~ age+I(age^2)+sex+wkswrkd,data=prgeng)
# # get standard errors of the beta-hats
# sqrt(diag(caout$thtcov))
#
# # find mean age for all combinations of the cit and sex variables
# caagg(cls,"age",c("cit","sex"),"prgeng","mean")
# # compare to nonparallel
# aggregate(age ~ cit+sex,data=prgeng,mean)
#
# data(newadult)
# distribsplit(cls,"newadult")
# caglm(cls," gt50 ~ ., family = binomial,data=newadult")$tht
#
# caprcomp(cls,'newadult,scale=TRUE',5)$sdev
# prcomp(newadult,scale=TRUE)$sdev
#
# cameans(cls,"prgeng")
# cameans(cls,"prgeng[,c('age','wageinc')]")
# caquantile(cls,'prgeng$age')
#
# pe <- prgeng[,c(1,3,8)]
# distribsplit(cls,"pe")
# z1 <- cakm(cls,'pe',3,3); z1$size; z1$centers
# # check algorithm unstable
# z1$thts # looks unstable
#
# pe <- prgeng
# pe$ms <- as.integer(pe$educ == 14)
# pe$phd <- as.integer(pe$educ == 16)
# pe <- pe[,c(1,7,8,9,12,13)]
# distribsplit(cls,'pe',scramble=TRUE)
# kout <- caknn(cls,'pe[,3]',50,'pe[,-3]')
## ---------------------------------------------
stopCluster(cls)
Run the code above in your browser using DataLab