# NOT RUN {
sparkR.session()
t <- as.data.frame(Titanic, stringsAsFactors = FALSE)
df <- createDataFrame(t)
model <- spark.glm(df, Freq ~ Sex + Age, family = "gaussian")
summary(model)
# fitted values on training data
fitted <- predict(model, df)
head(select(fitted, "Freq", "prediction"))
# save fitted model to input path
path <- "path/to/model"
write.ml(model, path)
# can also read back the saved model and print
savedModel <- read.ml(path)
summary(savedModel)
# note that the default string encoding is different from R's glm
model2 <- glm(Freq ~ Sex + Age, family = "gaussian", data = t)
summary(model2)
# use stringIndexerOrderType = "alphabetDesc" to force string encoding
# to be consistent with R
model3 <- spark.glm(df, Freq ~ Sex + Age, family = "gaussian",
stringIndexerOrderType = "alphabetDesc")
summary(model3)
# fit tweedie model
model <- spark.glm(df, Freq ~ Sex + Age, family = "tweedie",
var.power = 1.2, link.power = 0)
summary(model)
# use the tweedie family from statmod
library(statmod)
model <- spark.glm(df, Freq ~ Sex + Age, family = tweedie(1.2, 0))
summary(model)
# }
Run the code above in your browser using DataLab