# generate artificial data
library(MASS)
set.seed(4321)
x <- 1:100
y <- (x + x^2 + x^3) + rnorm(length(x), mean = 0, sd = mean(x^3) / 4)
my.data <- data.frame(x, y)
# plot residuals from linear model
ggplot(my.data, aes(x, y)) +
geom_smooth(method = "lm", formula = y ~ x) +
stat_fit_deviations(method = "lm", formula = y ~ x, colour = "red") +
geom_point()
# plot residuals from linear model with y as explanatory variable
ggplot(my.data, aes(x, y)) +
geom_smooth(method = "lm", formula = y ~ x, orientation = "y") +
stat_fit_deviations(method = "lm", formula = x ~ y, colour = "red") +
geom_point()
# as above using orientation
ggplot(my.data, aes(x, y)) +
geom_smooth(method = "lm", orientation = "y") +
stat_fit_deviations(orientation = "y", colour = "red") +
geom_point()
# both regressions and their deviations
ggplot(my.data, aes(x, y)) +
geom_smooth(method = "lm") +
stat_fit_deviations(colour = "blue") +
geom_smooth(method = "lm", orientation = "y", colour = "red") +
stat_fit_deviations(orientation = "y", colour = "red") +
geom_point()
# give a name to a formula
my.formula <- y ~ poly(x, 3, raw = TRUE)
# plot linear regression
ggplot(my.data, aes(x, y)) +
geom_smooth(method = "lm", formula = my.formula) +
stat_fit_deviations(formula = my.formula, colour = "red") +
geom_point()
ggplot(my.data, aes(x, y)) +
geom_smooth(method = "lm", formula = my.formula) +
stat_fit_deviations(formula = my.formula, method = stats::lm, colour = "red") +
geom_point()
# plot robust regression
ggplot(my.data, aes(x, y)) +
stat_smooth(method = "rlm", formula = my.formula) +
stat_fit_deviations(formula = my.formula, method = "rlm", colour = "red") +
geom_point()
# plot robust regression with weights indicated by colour
my.data.outlier <- my.data
my.data.outlier[6, "y"] <- my.data.outlier[6, "y"] * 10
ggplot(my.data.outlier, aes(x, y)) +
stat_smooth(method = MASS::rlm, formula = my.formula) +
stat_fit_deviations(formula = my.formula, method = "rlm",
mapping = aes(colour = after_stat(weights)),
show.legend = TRUE) +
scale_color_gradient(low = "red", high = "blue", limits = c(0, 1),
guide = "colourbar") +
geom_point()
# plot quantile regression (= median regression)
ggplot(my.data, aes(x, y)) +
stat_quantile(formula = my.formula, quantiles = 0.5) +
stat_fit_deviations(formula = my.formula, method = "rq", colour = "red") +
geom_point()
# plot quantile regression (= "quartile" regression)
ggplot(my.data, aes(x, y)) +
stat_quantile(formula = my.formula, quantiles = 0.75) +
stat_fit_deviations(formula = my.formula, colour = "red",
method = "rq", method.args = list(tau = 0.75)) +
geom_point()
# inspecting the returned data with geom_debug()
gginnards.installed <- requireNamespace("gginnards", quietly = TRUE)
if (gginnards.installed)
library(gginnards)
# plot, using geom_debug() to explore the after_stat data
if (gginnards.installed)
ggplot(my.data, aes(x, y)) +
geom_smooth(method = "lm", formula = my.formula) +
stat_fit_deviations(formula = my.formula, geom = "debug") +
geom_point()
if (gginnards.installed)
ggplot(my.data.outlier, aes(x, y)) +
stat_smooth(method = MASS::rlm, formula = my.formula) +
stat_fit_deviations(formula = my.formula, method = "rlm", geom = "debug") +
geom_point()
Run the code above in your browser using DataLab