### a simple example: cars data with one random variable
set.seed(1234)
cars$z <- rnorm(50)
########################################
## Plot linear models
########################################
## fit a linear model
cars.lm <- glmboost(dist ~ speed + z, data = cars)
## plot coefficient paths of glmboost
par(mfrow = c(3, 1), mar = c(4, 4, 4, 8))
plot(cars.lm,
main = "Coefficient paths (offset not included)")
plot(cars.lm, off2int = TRUE,
main = "Coefficient paths (offset included in intercept)")
## plot coefficient paths only for the first 15 steps,
## i.e., bevore z is selected
mstop(cars.lm) <- 15
plot(cars.lm, off2int = TRUE, main = "z is not yet selected")
########################################
## Plot additive models; basics
########################################
## fit an additive model
cars.gam <- gamboost(dist ~ speed + z, data = cars)
## plot effects
par(mfrow = c(1, 2), mar = c(4, 4, 0.1, 0.1))
plot(cars.gam)
## use same y-lims
plot(cars.gam, ylim = c(-50, 50))
## plot only the effect of speed
plot(cars.gam, which = "speed")
## as partial matching is used we could also use
plot(cars.gam, which = "sp")
########################################
## More complex plots
########################################
## Let us use more boosting iterations and compare the effects.
## We change the plot type and plot both effects in one figure:
par(mfrow = c(1, 1), mar = c(4, 4, 4, 0.1))
mstop(cars.gam) <- 100
plot(cars.gam, which = 1, col = "red", type = "l", rug = FALSE,
main = "Compare effect for various models")
## Now the same model with 1000 iterations
mstop(cars.gam) <- 1000
lines(cars.gam, which = 1, col = "grey", lty = "dotted")
## There are some gaps in the data. Use newdata to get a smoother curve:
newdata <- data.frame(speed = seq(min(cars$speed), max(cars$speed),
length = 200))
lines(cars.gam, which = 1, col = "grey", lty = "dashed",
newdata = newdata)
## The model with 1000 steps seems to overfit the data.
## Usually one should use e.g. cross-validation to tune the model.
## Finally we refit the model using linear effects as comparison
cars.glm <- gamboost(dist ~ speed + z, baselearner = bols, data = cars)
lines(cars.glm, which = 1, col = "black")
## We see that all effects are more or less linear.
## Add a legend
legend("topleft", title = "Model",
legend = c("... with mstop = 100", "... with mstop = 1000",
"... with linear effects"),
lty = c("solid", "dashed", "solid"),
col = c("red", "grey", "black"))
Run the code above in your browser using DataLab