## Benchmark
if(require(hflights)){
formula <- ArrDelay ~ DepDelay
print(system.time(a <- lm(formula, data=hflights))) ## ~0.4 seconds
print(system.time(b <- lmfreq(formula, data=hflights))) ## ~0.12 seconds. 4x faster
}
l0 <- lm(Sepal.Length ~ Sepal.Width,iris)
summary(l0)
tfq <- tablefreq(iris[,1:2])
lf <- lmfreq(Sepal.Length ~ Sepal.Width,tfq, freq="freq")
summary(lf)
all.equal(coef(lf),coef(l0))
all.equal(AIC(lf),AIC(l0))
newdata <- data.frame(Sepal.Width=c(1,NA,7))
predict(lf, newdata)
if(require(MASS)){
stepAIC(lf)
}
system.time(lmfreq(Sepal.Length ~ Sepal.Width,tfq, freq="freq"))
system.time(.lmfreq(Sepal.Length ~ Sepal.Width,tfq)) # Fast
library(dplyr)
igrouped <- iris %>% group_by(Species)
models <- igrouped %>% do(model=lmfreq(Sepal.Length ~ Sepal.Width, .))
coefs <- models %>%
do(cbind(as.data.frame(rbind(coef(.$model))),
Species=.$Species))
coefs
## Not run: ------------------------------------
# ## If data is too granular, benchmark is worst
# n <- 10^6
# data <- data.frame(y=rnorm(n),x=rnorm(n))
# system.time(lm(y~x,data)) ## ~5 seconds
# system.time(lmfreq(y~x,data)) ## ~ 15 seconds
# system.time(tfq <- tablefreq(data)) ## ~ 5 seconds
# nrow(tfq) # same number of rows than original data
# system.time(.lmfreq(y~x,tfq)) ## ~ 10 seconds
## ---------------------------------------------
Run the code above in your browser using DataLab