# \donttest{
# The following comparison among glm(), bigglm() and speedglm() cannot be considered rigorous
# and exhaustive, but it is only to give an idea of the computation time.
# It may take a long time.
library(biglm)
n<-10000
k<-70
y <- rgamma(n,1.5,1)
x <-round( matrix(rnorm(n*k),n,k),digits=3)
colnames(x) <-paste("s",1:k,sep = "")
da<- data.frame(y,x)
fo <- as.formula(paste("y~",paste(paste("s",1:k,sep=""),collapse="+")))
system.time(m1 <- glm(fo,data=da,family=Gamma(log)))
system.time(m2 <- bigglm(fo,data=da,family=Gamma(log)))
system.time(m3 <- speedglm(fo,data=da,family=Gamma(log)))
# You may also try speedglm when R is linked against an optimized BLAS,
# otherwise try to run the following function. In some computers, it is
# faster for large data sets.
system.time(m4 <- speedglm(fo,data=da,family=Gamma(log),set.default=list(row.chunk=50)))
##################
## An example of function using a connection to an out-memory file
## This is a slightly modified version of the function from the bigglm's help page
make.data<-function(filename, chunksize,...){
conn<-NULL
function(reset=FALSE){
if(reset){
if(!is.null(conn)) close(conn)
conn<<-file(filename,open="r")
} else{
rval<-read.table(conn, nrows=chunksize,...)
if ((nrow(rval)==0)) {
close(conn)
conn<<-NULL
rval<-NULL
}
return(rval)
}
}
}
# data1 is a small toy dataset
data(data1)
tmp_data1<-tempfile("data1",fileext=".txt")
write.table(data1,tmp_data1,row.names=FALSE,col.names=FALSE)
da<-make.data(tmp_data1,chunksize=50,col.names=c("y","fat1","x1","x2"))
# Caution! make sure to close the connection once you have run command #1
da(reset=TRUE) #1: opens the connection to "data1.txt"
da(reset=FALSE) #2: reads the first 50 rows (out of 100) of the dataset
da(reset=FALSE) #3: reads the second 50 rows (out of 100) of the dataset
da(reset=FALSE) #4: is NULL: this latter command closes the connection
# fat1 is a factor with four levels
b1<-shglm(y~factor(fat1)+x1,weights=~I(x2^2),datafun=da,family=Gamma(log))
b2<-bigglm(y~factor(fat1)+x1,weights=~I(x2^2),data=da,family=Gamma(log))
summary(b1)
summary(b2)
# }
Run the code above in your browser using DataLab