data("credit_data", package = "modeldata")
## missing data per column
vapply(credit_data, function(x) mean(is.na(x)), c(num = 0))
set.seed(342)
in_training <- sample(1:nrow(credit_data), 2000)
credit_tr <- credit_data[in_training, ]
credit_te <- credit_data[-in_training, ]
missing_examples <- c(14, 394, 565)
rec <- recipe(Price ~ ., data = credit_tr)
if (FALSE) {
impute_rec <- rec %>%
step_impute_bag(Status, Home, Marital, Job, Income, Assets, Debt)
imp_models <- prep(impute_rec, training = credit_tr)
imputed_te <- bake(imp_models, new_data = credit_te, everything())
credit_te[missing_examples, ]
imputed_te[missing_examples, names(credit_te)]
tidy(impute_rec, number = 1)
tidy(imp_models, number = 1)
## Specifying which variables to imputate with
impute_rec <- rec %>%
step_impute_bag(Status, Home, Marital, Job, Income, Assets, Debt,
impute_with = imp_vars(Time, Age, Expenses),
# for quick execution, nbagg lowered
options = list(nbagg = 5, keepX = FALSE)
)
imp_models <- prep(impute_rec, training = credit_tr)
imputed_te <- bake(imp_models, new_data = credit_te, everything())
credit_te[missing_examples, ]
imputed_te[missing_examples, names(credit_te)]
tidy(impute_rec, number = 1)
tidy(imp_models, number = 1)
}
Run the code above in your browser using DataLab