resids: Surrogate Residuals

Description

Surrogate-based residuals for cumulative link and general regression models.

Usage

resids(object, ...)
# S3 method for default
resids(object, method = c("latent", "jitter"),
  jitter.scale = c("probability", "response"), nsim = 1L, ...)
# S3 method for lrm
resids(object, method = c("latent", "jitter"),
  jitter.scale = c("probability", "response"), nsim = 1L, ...)
# S3 method for glm
resids(object, jitter.scale = c("probability", "response"),
  nsim = 1L, ...)

Arguments

object

An object of class clm, glm, lrm, orm, polr, or vglm.

...

Additional optional arguments. (Currently ignored.)

method

Character string specifying the type of surrogate to use; for details, see Liu and Zhang (2017). For cumulative link models, the latent variable method is used. For binary GLMs, the jittering approach is employed. (Currently ignored.)

jitter.scale

Character string specifying the scale on which to perform the jittering. Should be one of "probability" or "response". (Currently ignored for cumulative link models.)

nsim

Integer specifying the number of bootstrap replicates to use. Default is 1L meaning no bootstrap samples.

Value

A numeric vector of class c("numeric", "resid") containing the residuals. Additionally, if nsim > 1, then the result will contain the attributes:

boot.reps: A matrix with nsim columns, one for each bootstrap replicate of the residuals. Note, these are random and do not correspond to the original ordering of the data;
boot.id: A matrix with nsim columns. Each column contains the observation number each residual corresponds to in boot.reps. (This is used for plotting purposes.)

References

Liu, Dungang and Zhang, Heping. Residuals and Diagnostics for Ordinal Regression Models: A Surrogate Approach. Journal of the American Statistical Association (accepted). URL http://www.tandfonline.com/doi/abs/10.1080/01621459.2017.1292915?journalCode=uasa20

Nadarajah, Saralees and Kotz, Samuel. R Programs for Truncated Distributions. Journal of Statistical Software, Code Snippet, 16(2), 1-8, 2006. URL https://www.jstatsoft.org/v016/c02.

Examples

Run this code

# NOT RUN {
#
# Residuals for binary GLMs using the jittering method
#

# Simulate logistic regression data with quadratic trend
set.seed(101)  # for reproducibility
n <- 1000
x <- runif(n, min = 1, max = 7)
y <- rbinom(n, size = 1, prob = plogis(16 - 8 * x + 1 * x ^ 2))
d <- data.frame("x" = x, "y" = as.factor(y))

# Fit logistic regression models (with and without quadratic trend)
fit1 <- glm(y ~ x + I(x ^ 2), data = d, family = binomial)  # correct model
fit2 <- glm(y ~ x, data = d, family = binomial)  # missing quadratic trend

# Construct residuals
set.seed(102)  # for reproducibility
res1 <- resids(fit1)
res2 <- resids(fit2)

# Residual-vs-covariate plots
par(mfrow = c(1, 2))
scatter.smooth(d$x, res1, lpars = list(lwd = 2, col = "red"),
               xlab = expression(x), ylab = "Surrogate residual",
               main = "Correct model")
scatter.smooth(d$x, res2, lpars = list(lwd = 2, col = "red"),
               xlab = expression(x), ylab = "Surrogate residual",
               main = "Incorrect model")

# }
# NOT RUN {
#
# Residuals for cumulative link models using the latent method
#

# Load required packages
library(ggplot2)  # for autoplot function
library(MASS)     # for polr function
library(ordinal)  # for clm function

#
# Detecting a misspecified mean structure
#

# Data simulated from a probit model with a quadratic trend
data(df1)
?df1

# Fit a probit model with/without a quadratic trend
fit.bad <- polr(y ~ x, data = df1, method = "probit")
fit.good <- polr(y ~ x + I(x ^ 2), data = df1, method = "probit")

# Some residual plots
p1 <- autoplot(fit.bad, what = "covariate", x = df1$x)
p2 <- autoplot(fit.bad, what = "qq")
p3 <- autoplot(fit.good, what = "covariate", x = df1$x)
p4 <- autoplot(fit.good, what = "qq")

# Display all four plots together (top row corresponds to bad model)
grid.arrange(p1, p2, p3, p4, ncol = 2)

#
# Detecting heteroscedasticity
#

# Data simulated from a probit model with heteroscedasticity.
data(df2)
?df2

# Fit a probit model with/without a quadratic trend
fit <- polr(y ~ x, data = df2, method = "probit")

# Some residual plots
p1 <- autoplot(fit, what = "covariate", x = df1$x)
p2 <- autoplot(fit, what = "qq")
p3 <- autoplot(fit, what = "fitted")

# Display all three plots together
grid.arrange(p1, p2, p3, ncol = 3)

#
# Detecting a misspecified link function
#

# Data simulated from a log-log model with a quadratic trend.
data(df3)
?df3

# Fit models with correctly specified link function
clm.loglog <- clm(y ~ x + I(x ^ 2), data = df3, link = "loglog")
polr.loglog <- polr(y ~ x + I(x ^ 2), data = df3, method = "loglog")

# Fit models with misspecified link function
clm.probit <- clm(y ~ x + I(x ^ 2), data = df3, link = "probit")
polr.probit <- polr(y ~ x + I(x ^ 2), data = df3, method = "probit")

# Q-Q plots of the residuals (with bootstrapping)
p1 <- autoplot(clm.probit, nsim = 50, what = "qq") +
  ggtitle("clm: probit link")
p2 <- autoplot(clm.loglog, nsim = 50, what = "qq") +
  ggtitle("clm: log-log link (correct link function)")
p3 <- autoplot(polr.probit, nsim = 50, what = "qq") +
  ggtitle("polr: probit link")
p4 <- autoplot(polr.loglog, nsim = 50, what = "qq") +
  ggtitle("polr: log-log link (correct link function)")
grid.arrange(p1, p2, p3, p4, ncol = 2)

# We can also try various goodness-of-fit tests
par(mfrow = c(1, 2))
plot(gof(clm.probit, nsim = 50))
plot(gof(clm.loglog, nsim = 50))
# }

Run the code above in your browser using DataLab