### the examples shows how inconsistencies in the SD2011 data are picked up
### by syn.passive()
ods <- SD2011[, c("height", "weight", "bmi", "age", "agegr")]
ods$hsq <- ods$height^2
ods$sex <- SD2011$sex
meth <- c("cart", "cart", "~I(weight / height^2 * 10000)",
"cart", "~I(cut(age, c(15, 24, 34, 44, 59, 64, 120)))",
"~I(height^2)", "logreg")
if (FALSE) {
### fails for bmi
s1 <- syn(ods, method = meth, seed = 6756, models = TRUE)
### fails for agegr
ods$bmi <- ods$weight / ods$height^2 * 10000
s2 <- syn(ods, method = meth, seed = 6756, models = TRUE)
### fails because of wrong order
ods$agegr <- cut(ods$age, c(15, 24, 34, 44, 59, 64, 120))
s3 <- syn(ods, method = meth, visit.sequence = 7:1,
seed = 6756, models = TRUE)
}
### runs without errors
ods$bmi <- ods$weight / ods$height^2 * 10000
ods$agegr <- cut(ods$age, c(15, 24, 34, 44, 59, 64, 120))
s4 <- syn(ods, method = meth, seed = 6756, models = TRUE)
### bmi and hsq do not predict sex because of missing values
s4$models$sex
### hsq with no missing values used to predict sex
ods2 <- ods[!is.na(ods$height),]
s5 <- syn(ods2, method = meth, seed = 6756, models = TRUE)
s5$models$sex
### agegr with missing values used to predict sex because not numeric
ods3 <- ods
ods3$age[1:4] <- NA
ods3$agegr <- cut(ods3$age, c(15, 24, 34, 44, 59, 64, 120))
s6 <- syn(ods3, method = meth, seed = 6756, models = TRUE)
s6$models$sex
Run the code above in your browser using DataLab