data(ames, package = "modeldata")
set.seed(393)
ames_missing <- ames
ames_missing$Longitude[sample(1:nrow(ames), 200)] <- NA
imputed_ames <-
recipe(Sale_Price ~ ., data = ames_missing) %>%
step_impute_linear(
Longitude,
impute_with = imp_vars(Latitude, Neighborhood, MS_Zoning, Alley)
) %>%
prep(ames_missing)
imputed <-
bake(imputed_ames, new_data = ames_missing) %>%
dplyr::rename(imputed = Longitude) %>%
bind_cols(ames %>% dplyr::select(original = Longitude)) %>%
bind_cols(ames_missing %>% dplyr::select(Longitude)) %>%
dplyr::filter(is.na(Longitude))
library(ggplot2)
ggplot(imputed, aes(x = original, y = imputed)) +
geom_abline(col = "green") +
geom_point(alpha = .3) +
coord_equal() +
labs(title = "Imputed Values")
Run the code above in your browser using DataLab