# simulate some data
# x,y ... level 1 variables
# v,w ... level 2 variables
G <- 250 # number of groups
n <- 20 # number of persons
beta <- .3 # regression coefficient
rho <- .30 # residual intraclass correlation
rho.miss <- .10 # correlation with missing response
missrate <- .50 # missing proportion
y1 <- rep(rnorm(G, sd = sqrt(rho)), each = n) + rnorm(G * n, sd = sqrt(1 - rho))
w <- rep(round(rnorm(G), 2), each = n)
v <- rep(round(runif(G, 0, 3)), each = n)
x <- rnorm(G * n)
y <- y1 + beta * x + .2 * w + .1 * v
dfr0 <- dfr <- data.frame("group" = rep(1:G, each = n), "x" = x, "y" = y, "w" = w, "v" = v)
dfr[rho.miss * x + rnorm(G * n, sd = sqrt(1 - rho.miss)) < qnorm(missrate), "y"] <- NA
dfr[rep(rnorm(G), each = n) < qnorm(missrate), "w"] <- NA
dfr[rep(rnorm(G), each = n) < qnorm(missrate), "v"] <- NA
# empty mice imputation
imp0 <- mice(as.matrix(dfr), maxit = 0)
predM <- imp0$predictorMatrix
impM <- imp0$method
# multilevel imputation
predM1 <- predM
predM1[c("w", "y", "v"), "group"] <- -2
predM1["y", "x"] <- 1 # fixed x effects imputation
impM1 <- impM
impM1[c("y", "w", "v")] <- c("2l.pan", "2lonly.norm", "2lonly.pmm")
# y ... imputation using pan
# w ... imputation at level 2 using norm
# v ... imputation at level 2 using pmm
imp1 <- mice(as.matrix(dfr),
m = 1, predictorMatrix = predM1,
method = impM1, maxit = 1, paniter = 500
)
# Demonstration that 2lonly.norm aborts for partial missing data.
# Better use 2lonly.mean for repair.
data <- data.frame(
patid = rep(1:4, each = 5),
sex = rep(c(1, 2, 1, 2), each = 5),
crp = c(
68, 78, 93, NA, 143,
5, 7, 9, 13, NA,
97, NA, 56, 52, 34,
22, 30, NA, NA, 45
)
)
pred <- make.predictorMatrix(data)
pred[, "patid"] <- -2
# only missing value (out of five) for patid == 1
data[3, "sex"] <- NA
if (FALSE) {
# The following fails because 2lonly.norm found partially missing
# level-2 data
# imp <- mice(data, method = c("", "2lonly.norm", "2l.pan"),
# predictorMatrix = pred, maxit = 1, m = 2)
# > iter imp variable
# > 1 1 sex crpError in .imputation.level2(y = y, ... :
# > Method 2lonly.norm found the following clusters with partially missing
# > level-2 data: 1
# > Method 2lonly.mean can fix such inconsistencies.
}
# In contrast, if all sex values are missing for patid == 1, it runs fine,
# except on r-patched-solaris-x86. I used dontrun to evade CRAN errors.
if (FALSE) {
data[1:5, "sex"] <- NA
imp <- mice(data,
method = c("", "2lonly.norm", "2l.pan"),
predictorMatrix = pred, maxit = 1, m = 2
)
}
Run the code above in your browser using DataLab