## EXAMPLE 1:
## A cross sectional study investigating the relationship between dry cat
## food (DCF) and feline urologic syndrome (FUS) was conducted (Willeberg
## 1977). Counts of individuals in each group were as follows:
## DCF-exposed cats (cases, non-cases) 13, 2163
## Non DCF-exposed cats (cases, non-cases) 5, 3349
## Outcome variable (FUS) as columns:
dat.v01 <- c(13,2163,5,3349); dat.v01
epi.2by2(dat = dat.v01, method = "cross.sectional", digits = 2,
conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns")
## Outcome variable (FUS) as rows:
dat.v01 <- c(13,5,2163,3349); dat.v01
epi.2by2(dat = dat.v01, method = "cross.sectional", digits = 2,
conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.rows")
## The prevalence of FUS in DCF exposed cats was 4.01 (95% CI 1.43 to 11.23)
## times greater than the prevalence of FUS in non-DCF exposed cats.
## In DCF exposed cats, 75% (95% CI 30% to 91%) of the FUS cases were
## attributable to DCF.
## Fifty-four percent of FUS cases in the population was attributable
## to DCF (95% CI 4% to 78%).
## EXAMPLE 2:
## This example shows how the table function in base R can be used to pass
## data to epi.2by2. Here we use the birthwt data set from the MASS package.
library(MASS)
dat.df02 <- birthwt; head(dat.df02)
## Generate a table of cell frequencies. First, set the outcome and exposure
## as factors and set their levels appropriately so the frequencies in the
## 2 by 2 table come out in the conventional format:
dat.df02$low <- factor(dat.df02$low, levels = c(1,0))
dat.df02$smoke <- factor(dat.df02$smoke, levels = c(1,0))
dat.df02$race <- factor(dat.df02$race, levels = c(1,2,3))
dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dnn = c("Smoke", "Low BW"))
print(dat.tab02)
## Compute the odds ratio and other measures of association:
epi.2by2(dat = dat.tab02, method = "cohort.count", digits = 2,
conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns")
## The odds of having a low birth weight child for smokers was 2.02
## (95% CI 1.08 to 3.78) times greater than the odds of having a low birth
## weight child for non-smokers.
## Stratify by race:
dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dat.df02$race,
dnn = c("Smoke", "Low BW", "Race"))
print(dat.tab02)
## Compute the crude odds ratio, the Mantel-Haenszel adjusted odds ratio
## and other measures of association:
dat.epi02 <- epi.2by2(dat = dat.tab02, method = "cohort.count", digits = 2,
conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns")
print(dat.epi02)
## The Mantel-Haenszel test of homogeneity of the strata odds ratios is not
## significant (chi square test statistic 2.800; df 2; p-value = 0.25).
## We accept the null hypothesis and conclude that the odds ratios for
## each strata of race are the same.
## After accounting for the confounding effect of race, the odds of
## having a low birth weight child for smokers was 3.09 (95% CI 1.49 to 6.39)
## times that of non-smokers.
## Compare the Greenland-Robins confidence intervals for the Mantel-Haenszel
## adjusted attributable risk with the Wald confidence intervals for the
## Mantel-Haenszel adjusted attributable risk:
dat.epi02$massoc.detail$ARisk.mh.green
dat.epi02$massoc.detail$ARisk.mh.wald
## How many mothers need to stop smoking to avoid one low birth weight baby?
dat.epi02$massoc.interp$text[dat.epi02$massoc.interp$var ==
"NNTB NNTH (crude)"]
## If we don't account for confounding the number of mothers that need to
## stop smoking to avoid one low birth weight baby (NNTB) is
## 7 (95% CI 3 to 62).
dat.epi02$massoc.interp$text[dat.epi02$massoc.interp$var == "NNTB NNTH (M-H)"]
## After accounting for the confounding effect of race the number of mothers
## that need to stop smoking to avoid one low birth weight baby (NNTB) is
## 5 (95% CI 2 to 71).
## Now turn dat.tab02 into a data frame where the frequencies of individuals in
## each exposure-outcome category are provided. Often your data will be
## presented in this summary format:
dat.df02 <- data.frame(dat.tab02); head(dat.df02)
## Re-format dat.df02 (a summary count data frame) into tabular format using
## the xtabs function:
dat.tab02 <- xtabs(Freq ~ Smoke + Low.BW + Race, data = dat.df02)
print(dat.tab02)
# dat02.tab can now be passed to epi.2by2:
dat.epi02 <- epi.2by2(dat = dat.tab02, method = "cohort.count", digits = 2,
conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns")
print(dat.epi02)
## The Mantel-Haenszel adjusted odds ratio is 3.09 (95% CI 1.49 to 6.39). The
## ratio of the crude odds ratio to the Mantel-Haensel adjusted odds ratio is
## 0.66.
## What are the Cornfield confidence limits, the maximum likelihood
## confidence limits and the score confidence limits for the crude odds ratio?
dat.epi02$massoc.detail$OR.crude.cfield
dat.epi02$massoc.detail$OR.crude.mle
dat.epi02$massoc.detail$OR.crude.score
## Cornfield: 2.02 (95% CI 1.07 to 3.79)
## Maximum likelihood: 2.01 (1.03 to 3.96)
# Score: 2.02 (95% CI 1.08 to 3.77)
## Plot the individual strata-level odds ratios and compare them with the
## Mantel-Haenszel adjusted odds ratio.
if (FALSE) {
library(ggplot2); library(scales)
nstrata <- 1:dim(dat.tab02)[3]
strata.lab <- paste("Strata ", nstrata, sep = "")
y.at <- c(nstrata, max(nstrata) + 1)
y.lab <- c("M-H", strata.lab)
x.at <- c(0.25,0.5,1,2,4,8,16,32)
or.p <- c(dat.epi02$massoc.detail$OR.mh$est,
dat.epi02$massoc.detail$OR.strata.cfield$est)
or.l <- c(dat.epi02$massoc.detail$OR.mh$lower,
dat.epi02$massoc.detail$OR.strata.cfield$lower)
or.u <- c(dat.epi02$massoc.detail$OR.mh$upper,
dat.epi02$massoc.detail$OR.strata.cfield$upper)
dat.df02 <- data.frame(y.at, y.lab, or.p, or.l, or.u)
ggplot(data = dat.df02, aes(x = or.p, y = y.at)) +
geom_point() +
geom_errorbarh(aes(xmax = or.l, xmin = or.u, height = 0.2)) +
labs(x = "Odds ratio", y = "Strata") +
scale_x_continuous(trans = log2_trans(), breaks = x.at,
limits = c(0.25,32)) +
scale_y_continuous(breaks = y.at, labels = y.lab) +
geom_vline(xintercept = 1, lwd = 1) +
coord_fixed(ratio = 0.75 / 1) +
theme(axis.title.y = element_text(vjust = 0))
}
## EXAMPLE 3:
## Same as Example 2 but showing how a 2 by 2 contingency table can be prepared
## using functions from the tidyverse package:
if (FALSE) {
library(MASS); library(tidyverse)
dat.df03 <- birthwt; head(dat.df03)
dat.df03 <- dat.df03 %>%
mutate(low = factor(low, levels = c(1,0), labels = c("yes","no"))) %>%
mutate(smoke = factor(smoke, levels = c(1,0), labels = c("yes","no"))) %>%
group_by(smoke, low) %>%
summarise(n = n())
dat.df03
## View the data in conventional 2 by 2 table format:
pivot_wider(dat.df03, id_cols = c(smoke), names_from = low, values_from = n)
dat.epi03 <- epi.2by2(dat = dat.df03, method = "cohort.count", digits = 2,
conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns")
dat.epi03
}
## The odds of having a low birth weight child for smokers was 2.02
## (95% CI 1.08 to 3.78) times greater than the odds of having a low birth
## weight child for non-smokers.
## Stratify by race:
if (FALSE) {
library(MASS); library(tidyverse)
dat.df04 <- birthwt; head(dat.df04)
dat.df04 <- dat.df04 %>%
mutate(low = factor(low, levels = c(1,0), labels = c("yes","no"))) %>%
mutate(smoke = factor(smoke, levels = c(1,0), labels = c("yes","no"))) %>%
mutate(race = factor(race)) %>%
group_by(race, smoke, low) %>%
summarise(n = n())
dat.df04
## View the data in conventional 2 by 2 table format:
pivot_wider(dat.df04, id_cols = c(race, smoke),
names_from = low, values_from = n)
dat.epi04 <- epi.2by2(dat = dat.df04, method = "cohort.count", digits = 2,
conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns")
dat.epi04
}
## The Mantel-Haenszel test of homogeneity of the strata odds ratios is not
## significant (chi square test statistic 2.800; df 2; p-value = 0.25).
## We accept the null hypothesis and conclude that the odds ratios for
## each strata of race are the same.
## After accounting for the confounding effect of race, the odds of
## having a low birth weight child for smokers was 3.09 (95% CI 1.49 to 6.39)
## times that of non-smokers.
## EXAMPLE 4:
## Sometimes you'll have only event count data for a stratified analysis. This
## example shows how to coerce a three column matrix listing (in order) counts
## of outcome positive individuals, counts of outcome negative individuals (or
## total time at risk, as in the example below) and strata into a three
## dimensional array.
## We assume that two rows are recorded for each strata. The first for those
## exposed and the second for those unexposed. Note that the strata variable
## needs to be numeric (not a factor).
dat.m04 <- matrix(c(1308,884,200,190,4325264,13142619,1530342,5586741,1,1,2,2),
nrow = 4, ncol = 3, byrow = FALSE)
colnames(dat.m04) <- c("obs","tar","grp")
dat.df04 <- data.frame(dat.m04)
## Here we use the apply function to coerce the two rows for each strata into
## tabular format. An array is created of with the length of the third
## dimension of the array equal to the number of strata:
dat.tab04 <- sapply(1:length(unique(dat.df04$grp)), function(x)
as.matrix(dat.df04[dat.df04$grp == x,1:2], ncol = 2, byrow = TRUE),
simplify = "array")
dat.tab04
epi.2by2(dat = dat.tab04, method = "cohort.time", digits = 2,
conf.level = 0.95, units = 1000 * 365.25, interpret = FALSE,
outcome = "as.columns")
## The Mantel-Haenszel adjusted incidence rate ratio was 4.39 (95% CI 4.06
## to 4.75).
## EXAMPLE 5:
## A study was conducted by Feychting et al (1998) comparing cancer occurrence
## among the blind with occurrence among those who were not blind but had
## severe visual impairment. From these data we calculate a cancer rate of
## 136/22050 person-years among the blind compared with 1709/127650 person-
## years among those who were visually impaired but not blind.
dat.v05 <- c(136,22050,1709,127650)
dat.epi05 <- epi.2by2(dat = dat.v05, method = "cohort.time", digits = 2,
conf.level = 0.95, units = 1000, interpret = FALSE, outcome = "as.columns")
summary(dat.epi05)$massoc.detail$ARate.strata.wald
## The incidence rate of cancer was 7.22 (95% CI 6.00 to 8.43) cases per
## 1000 person-years less in the blind, compared with those who were not
## blind but had severe visual impairment.
round(summary(dat.epi05)$massoc.detail$IRR.strata.wald, digits = 2)
## The incidence rate of cancer in the blind group was less than half that
## of the comparison group (incidence rate ratio 0.46, 95% CI 0.38 to 0.55).
## EXAMPLE 6:
## A study has been conducted to assess the effect of a new treatment for
## mastitis in dairy cows. Eight herds took part in the study. The following
## data were obtained. The vectors ai, bi, ci and di list (for each herd) the
## number of cows in the E+D+, E+D-, E-D+ and E-D- groups, respectively.
if (FALSE) {
hid <- 1:8
ai <- c(23,10,20,5,14,6,10,3)
bi <- c(10,2,1,2,2,2,3,0)
ci <- c(3,2,3,2,1,3,3,2)
di <- c(6,4,3,2,6,3,1,1)
dat.df06 <- data.frame(hid, ai, bi, ci, di)
head(dat.df06)
## Re-format data into a format suitable for epi.2by2:
hid <- rep(1:8, times = 4)
exp <- factor(rep(c(1,1,0,0), each = 8), levels = c(1,0))
out <- factor(rep(c(1,0,1,0), each = 8), levels = c(1,0))
dat.df06 <- data.frame(hid, exp, out, n = c(ai,bi,ci,di))
dat.tab06 <- xtabs(n ~ exp + out + hid, data = dat.df06)
print(dat.tab06)
epi.2by2(dat = dat.tab06, method = "cohort.count", digits = 2,
conf.level = 0.95, units = 1000, interpret = FALSE, outcome = "as.columns")
## The Mantel-Haenszel test of homogeneity of the strata odds ratios is not
## significant (chi square test statistic 5.276; df 7; p-value = 0.63).
## We accept the null hypothesis and conclude that the odds ratios for each
## strata of herd are the same.
## After adjusting for the effect of herd, compared to untreated cows, treatment
## increased the odds of recovery by a factor of 5.97 (95% CI 2.72 to 13.13).
}
Run the code above in your browser using DataLab