# NOT RUN {
#
# Residuals for binary GLMs using the jittering method
#
# Simulate logistic regression data with quadratic trend
set.seed(101) # for reproducibility
n <- 1000
x <- runif(n, min = 1, max = 7)
y <- rbinom(n, size = 1, prob = plogis(16 - 8 * x + 1 * x ^ 2))
d <- data.frame("x" = x, "y" = as.factor(y))
# Fit logistic regression models (with and without quadratic trend)
fit1 <- glm(y ~ x + I(x ^ 2), data = d, family = binomial) # correct model
fit2 <- glm(y ~ x, data = d, family = binomial) # missing quadratic trend
# Construct residuals
set.seed(102) # for reproducibility
res1 <- resids(fit1)
res2 <- resids(fit2)
# Residual-vs-covariate plots
par(mfrow = c(1, 2))
scatter.smooth(d$x, res1, lpars = list(lwd = 2, col = "red"),
xlab = expression(x), ylab = "Surrogate residual",
main = "Correct model")
scatter.smooth(d$x, res2, lpars = list(lwd = 2, col = "red"),
xlab = expression(x), ylab = "Surrogate residual",
main = "Incorrect model")
# }
# NOT RUN {
#
# Residuals for cumulative link models using the latent method
#
# Load required packages
library(ggplot2) # for autoplot function
library(MASS) # for polr function
library(ordinal) # for clm function
#
# Detecting a misspecified mean structure
#
# Data simulated from a probit model with a quadratic trend
data(df1)
?df1
# Fit a probit model with/without a quadratic trend
fit.bad <- polr(y ~ x, data = df1, method = "probit")
fit.good <- polr(y ~ x + I(x ^ 2), data = df1, method = "probit")
# Some residual plots
p1 <- autoplot(fit.bad, what = "covariate", x = df1$x)
p2 <- autoplot(fit.bad, what = "qq")
p3 <- autoplot(fit.good, what = "covariate", x = df1$x)
p4 <- autoplot(fit.good, what = "qq")
# Display all four plots together (top row corresponds to bad model)
grid.arrange(p1, p2, p3, p4, ncol = 2)
#
# Detecting heteroscedasticity
#
# Data simulated from a probit model with heteroscedasticity.
data(df2)
?df2
# Fit a probit model with/without a quadratic trend
fit <- polr(y ~ x, data = df2, method = "probit")
# Some residual plots
p1 <- autoplot(fit, what = "covariate", x = df1$x)
p2 <- autoplot(fit, what = "qq")
p3 <- autoplot(fit, what = "fitted")
# Display all three plots together
grid.arrange(p1, p2, p3, ncol = 3)
#
# Detecting a misspecified link function
#
# Data simulated from a log-log model with a quadratic trend.
data(df3)
?df3
# Fit models with correctly specified link function
clm.loglog <- clm(y ~ x + I(x ^ 2), data = df3, link = "loglog")
polr.loglog <- polr(y ~ x + I(x ^ 2), data = df3, method = "loglog")
# Fit models with misspecified link function
clm.probit <- clm(y ~ x + I(x ^ 2), data = df3, link = "probit")
polr.probit <- polr(y ~ x + I(x ^ 2), data = df3, method = "probit")
# Q-Q plots of the residuals (with bootstrapping)
p1 <- autoplot(clm.probit, nsim = 50, what = "qq") +
ggtitle("clm: probit link")
p2 <- autoplot(clm.loglog, nsim = 50, what = "qq") +
ggtitle("clm: log-log link (correct link function)")
p3 <- autoplot(polr.probit, nsim = 50, what = "qq") +
ggtitle("polr: probit link")
p4 <- autoplot(polr.loglog, nsim = 50, what = "qq") +
ggtitle("polr: log-log link (correct link function)")
grid.arrange(p1, p2, p3, p4, ncol = 2)
# We can also try various goodness-of-fit tests
par(mfrow = c(1, 2))
plot(gof(clm.probit, nsim = 50))
plot(gof(clm.loglog, nsim = 50))
# }
Run the code above in your browser using DataLab