# We normally call mice.impute.pmm() from within mice()
# But we may call it directly as follows (not recommended)
set.seed(53177)
xname <- c("age", "hgt", "wgt")
r <- stats::complete.cases(boys[, xname])
x <- boys[r, xname]
y <- boys[r, "tv"]
ry <- !is.na(y)
table(ry)
# percentage of missing data in tv
sum(!ry) / length(ry)
# Impute missing tv data
yimp <- mice.impute.pmm(y, ry, x)
length(yimp)
hist(yimp, xlab = "Imputed missing tv")
# Impute all tv data
yimp <- mice.impute.pmm(y, ry, x, wy = rep(TRUE, length(y)))
length(yimp)
hist(yimp, xlab = "Imputed missing and observed tv")
plot(jitter(y), jitter(yimp),
main = "Predictive mean matching on age, height and weight",
xlab = "Observed tv (n = 224)",
ylab = "Imputed tv (n = 224)"
)
abline(0, 1)
cor(y, yimp, use = "pair")
# Use blots to exclude different values per column
# Create blots object
blots <- make.blots(boys)
# Exclude ml 1 through 5 from tv donor pool
blots$tv$exclude <- c(1:5)
# Exclude 100 random observed heights from tv donor pool
blots$hgt$exclude <- sample(unique(boys$hgt), 100)
imp <- mice(boys, method = "pmm", print = FALSE, blots = blots, seed=123)
blots$hgt$exclude %in% unlist(c(imp$imp$hgt)) # MUST be all FALSE
blots$tv$exclude %in% unlist(c(imp$imp$tv)) # MUST be all FALSE
# Factor quantification
xname <- c("age", "hgt", "wgt")
br <- boys[c(1:10, 101:110, 501:510, 601:620, 701:710), ]
r <- stats::complete.cases(br[, xname])
x <- br[r, xname]
y <- factor(br[r, "tv"])
ry <- !is.na(y)
table(y)
# impute factor by optimizing canonical correlation y, x
mice.impute.pmm(y, ry, x)
# only categories with at least 2 cases can be donor
mice.impute.pmm(y, ry, x, trim = 2L)
# in addition, eliminate category 20
mice.impute.pmm(y, ry, x, trim = 2L, exclude = 20)
# to get old behavior: as.integer(y))
mice.impute.pmm(y, ry, x, quantify = FALSE)
Run the code above in your browser using DataLab