library(recipes)
data(biomass, package = "modeldata")
biomass_tr <- biomass[biomass$dataset == "Training", ]
biomass_te <- biomass[biomass$dataset == "Testing", ]
biomass_te_whole <- biomass_te
# induce some missing data at random
set.seed(9039)
carb_missing <- sample(1:nrow(biomass_te), 3)
nitro_missing <- sample(1:nrow(biomass_te), 3)
biomass_te$carbon[carb_missing] <- NA
biomass_te$nitrogen[nitro_missing] <- NA
rec <- recipe(
HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur,
data = biomass_tr
)
ratio_recipe <- rec %>%
step_impute_knn(all_predictors(), neighbors = 3)
ratio_recipe2 <- prep(ratio_recipe, training = biomass_tr)
imputed <- bake(ratio_recipe2, biomass_te)
# how well did it work?
summary(biomass_te_whole$carbon)
cbind(
before = biomass_te_whole$carbon[carb_missing],
after = imputed$carbon[carb_missing]
)
summary(biomass_te_whole$nitrogen)
cbind(
before = biomass_te_whole$nitrogen[nitro_missing],
after = imputed$nitrogen[nitro_missing]
)
tidy(ratio_recipe, number = 1)
tidy(ratio_recipe2, number = 1)
Run the code above in your browser using DataLab