# NOT RUN {
data(biomass)
set.seed(3535)
biomass$duplicate <- biomass$carbon + rnorm(nrow(biomass))
biomass_tr <- biomass[biomass$dataset == "Training",]
biomass_te <- biomass[biomass$dataset == "Testing",]
rec <- recipe(HHV ~ carbon + hydrogen + oxygen + nitrogen +
sulfur + duplicate,
data = biomass_tr)
corr_filter <- rec %>%
step_corr(all_predictors(), threshold = .5)
filter_obj <- prep(corr_filter, training = biomass_tr)
filtered_te <- bake(filter_obj, biomass_te)
round(abs(cor(biomass_tr[, c(3:7, 9)])), 2)
round(abs(cor(filtered_te)), 2)
tidy(corr_filter, number = 1)
tidy(filter_obj, number = 1)
# }
Run the code above in your browser using DataLab