set.seed(1)
age <- rnorm(200,40,12)
sex <- factor(sample(c('female','male'),200,TRUE))
logit <- (sex=='male') + (age-40)/5
y <- ifelse(runif(200) <= plogis(logit), 1, 0)
f <- lrm(y ~ pol(age,2)*sex)
anova(f)
# Compare a 30 year old female to a 40 year old male
# (with or without age x sex interaction in the model)
contrast(f, list(sex='female', age=30), list(sex='male', age=40))
# Test for interaction between age and sex, duplicating anova
contrast(f, list(sex='female', age=30),
list(sex='male', age=30),
list(sex='female', age=c(40,50)),
list(sex='male', age=c(40,50)), type='joint')
# Duplicate overall sex effect in anova with 3 d.f.
contrast(f, list(sex='female', age=c(30,40,50)),
list(sex='male', age=c(30,40,50)), type='joint')
# For females get an array of odds ratios against age=40
k <- contrast(f, list(sex='female', age=30:50),
list(sex='female', age=40))
print(k, fun=exp)
# Plot odds ratios with pointwise 0.95 confidence bands using log scale
k <- as.data.frame(k[c('Contrast','Lower','Upper')])
ggplot(k, aes(x=30:50, y=exp(Contrast))) + geom_line() +
geom_ribbon(aes(ymin=exp(Lower), ymax=exp(Upper)),
alpha=0.15, linetype=0) +
scale_y_continuous(trans='log10', n.breaks=10,
minor_breaks=c(seq(0.1, 1, by=.1), seq(1, 10, by=.5))) +
xlab('Age') + ylab('OR against age 40')
# For a model containing two treatments, centers, and treatment
# x center interaction, get 0.95 confidence intervals separately
# by center
center <- factor(sample(letters[1 : 8], 500, TRUE))
treat <- factor(sample(c('a','b'), 500, TRUE))
y <- 8*(treat == 'b') + rnorm(500, 100, 20)
f <- ols(y ~ treat*center)
lc <- levels(center)
contrast(f, list(treat='b', center=lc),
list(treat='a', center=lc))
# Get 'Type III' contrast: average b - a treatment effect over
# centers, weighting centers equally (which is almost always
# an unreasonable thing to do)
contrast(f, list(treat='b', center=lc),
list(treat='a', center=lc),
type='average')
# Get 'Type II' contrast, weighting centers by the number of
# subjects per center. Print the design contrast matrix used.
k <- contrast(f, list(treat='b', center=lc),
list(treat='a', center=lc),
type='average', weights=table(center))
print(k, X=TRUE)
# Note: If other variables had interacted with either treat
# or center, we may want to list settings for these variables
# inside the list()'s, so as to not use default settings
# For a 4-treatment study, get all comparisons with treatment 'a'
treat <- factor(sample(c('a','b','c','d'), 500, TRUE))
y <- 8*(treat == 'b') + rnorm(500, 100, 20)
dd <- datadist(treat, center); options(datadist='dd')
f <- ols(y ~ treat*center)
lt <- levels(treat)
contrast(f, list(treat=lt[-1]),
list(treat=lt[ 1]),
cnames=paste(lt[-1], lt[1], sep=':'), conf.int=1 - .05 / 3)
# Compare each treatment with average of all others
for(i in 1 : length(lt)) {
cat('Comparing with', lt[i], '\n\n')
print(contrast(f, list(treat=lt[-i]),
list(treat=lt[ i]), type='average'))
}
options(datadist=NULL)
# Six ways to get the same thing, for a variable that
# appears linearly in a model and does not interact with
# any other variables. We estimate the change in y per
# unit change in a predictor x1. Methods 4, 5 also
# provide confidence limits. Method 6 computes nonparametric
# bootstrap confidence limits. Methods 2-6 can work
# for models that are nonlinear or non-additive in x1.
# For that case more care is needed in choice of settings
# for x1 and the variables that interact with x1.
if (FALSE) {
coef(fit)['x1'] # method 1
diff(predict(fit, gendata(x1=c(0,1)))) # method 2
g <- Function(fit) # method 3
g(x1=1) - g(x1=0)
summary(fit, x1=c(0,1)) # method 4
k <- contrast(fit, list(x1=1), list(x1=0)) # method 5
print(k, X=TRUE)
fit <- update(fit, x=TRUE, y=TRUE) # method 6
b <- bootcov(fit, B=500)
contrast(fit, list(x1=1), list(x1=0))
# In a model containing age, race, and sex,
# compute an estimate of the mean response for a
# 50 year old male, averaged over the races using
# observed frequencies for the races as weights
f <- ols(y ~ age + race + sex)
contrast(f, list(age=50, sex='male', race=levels(race)),
type='average', weights=table(race))
# For a Bayesian model get the highest posterior interval for the
# difference in two nonlinear functions of predicted values
# Start with the mean from a proportional odds model
g <- blrm(y ~ x)
M <- Mean(g)
contrast(g, list(x=1), list(x=0), fun=M)
# For the median we have to make sure that contrast can pass the
# per-posterior-draw vector of intercepts through
qu <- Quantile(g)
med <- function(lp, intercepts) qu(0.5, lp, intercepts=intercepts)
contrast(g, list(x=1), list(x=0), fun=med)
}
# Plot the treatment effect (drug - placebo) as a function of age
# and sex in a model in which age nonlinearly interacts with treatment
# for females only
set.seed(1)
n <- 800
treat <- factor(sample(c('drug','placebo'), n,TRUE))
sex <- factor(sample(c('female','male'), n,TRUE))
age <- rnorm(n, 50, 10)
y <- .05*age + (sex=='female')*(treat=='drug')*.05*abs(age-50) + rnorm(n)
f <- ols(y ~ rcs(age,4)*treat*sex)
d <- datadist(age, treat, sex); options(datadist='d')
# show separate estimates by treatment and sex
ggplot(Predict(f, age, treat, sex='female'))
ggplot(Predict(f, age, treat, sex='male'))
ages <- seq(35,65,by=5); sexes <- c('female','male')
w <- contrast(f, list(treat='drug', age=ages, sex=sexes),
list(treat='placebo', age=ages, sex=sexes))
# add conf.type="simultaneous" to adjust for having done 14 contrasts
xYplot(Cbind(Contrast, Lower, Upper) ~ age | sex, data=w,
ylab='Drug - Placebo')
w <- as.data.frame(w[c('age','sex','Contrast','Lower','Upper')])
ggplot(w, aes(x=age, y=Contrast)) + geom_point() + facet_grid(sex ~ .) +
geom_errorbar(aes(ymin=Lower, ymax=Upper), width=0)
ggplot(w, aes(x=age, y=Contrast)) + geom_line() + facet_grid(sex ~ .) +
geom_ribbon(aes(ymin=Lower, ymax=Upper), width=0, alpha=0.15, linetype=0)
xYplot(Cbind(Contrast, Lower, Upper) ~ age, groups=sex, data=w,
ylab='Drug - Placebo', method='alt bars')
options(datadist=NULL)
# Examples of type='joint' contrast tests
set.seed(1)
x1 <- rnorm(100)
x2 <- factor(sample(c('a','b','c'), 100, TRUE))
dd <- datadist(x1, x2); options(datadist='dd')
y <- x1 + (x2=='b') + rnorm(100)
# First replicate a test statistic from anova()
f <- ols(y ~ x2)
anova(f)
contrast(f, list(x2=c('b','c')), list(x2='a'), type='joint')
# Repeat with a redundancy; compare a vs b, a vs c, b vs c
contrast(f, list(x2=c('a','a','b')), list(x2=c('b','c','c')), type='joint')
# Get a test of association of a continuous predictor with y
# First assume linearity, then cubic
f <- lrm(y>0 ~ x1 + x2)
anova(f)
contrast(f, list(x1=1), list(x1=0), type='joint') # a minimum set of contrasts
xs <- seq(-2, 2, length=20)
contrast(f, list(x1=0), list(x1=xs), type='joint')
# All contrasts were redundant except for the first, because of
# linearity assumption
f <- lrm(y>0 ~ pol(x1,3) + x2)
anova(f)
contrast(f, list(x1=0), list(x1=xs), type='joint')
print(contrast(f, list(x1=0), list(x1=xs), type='joint'), jointonly=TRUE)
# All contrasts were redundant except for the first 3, because of
# cubic regression assumption
# Now do something that is difficult to do without cryptic contrast
# matrix operations: Allow each of the three x2 groups to have a different
# shape for the x1 effect where x1 is quadratic. Test whether there is
# a difference in mean levels of y for x2='b' vs. 'c' or whether
# the shape or slope of x1 is different between x2='b' and x2='c' regardless
# of how they differ when x2='a'. In other words, test whether the mean
# response differs between group b and c at any value of x1.
# This is a 3 d.f. test (intercept, linear, quadratic effects) and is
# a better approach than subsetting the data to remove x2='a' then
# fitting a simpler model, as it uses a better estimate of sigma from
# all the data.
f <- ols(y ~ pol(x1,2) * x2)
anova(f)
contrast(f, list(x1=xs, x2='b'),
list(x1=xs, x2='c'), type='joint')
# Note: If using a spline fit, there should be at least one value of
# x1 between any two knots and beyond the outer knots.
options(datadist=NULL)
Run the code above in your browser using DataLab