# NOT RUN {
###############################################
# simple example:
library(modeldata)
data(biomass)
# split data
biomass_tr <- biomass[biomass$dataset == "Training",]
biomass_te <- biomass[biomass$dataset == "Testing",]
# When only predictors and outcomes, a simplified formula can be used.
rec <- recipe(HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur,
data = biomass_tr)
# Now add preprocessing steps to the recipe.
sp_signed <- rec %>%
step_normalize(all_numeric_predictors()) %>%
step_spatialsign(all_numeric_predictors())
sp_signed
# now estimate required parameters
sp_signed_trained <- prep(sp_signed, training = biomass_tr)
sp_signed_trained
# apply the preprocessing to a data set
test_set_values <- bake(sp_signed_trained, new_data = biomass_te)
# or use pipes for the entire workflow:
rec <- biomass_tr %>%
recipe(HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur) %>%
step_normalize(all_numeric_predictors()) %>%
step_spatialsign(all_numeric_predictors())
###############################################
# multivariate example
# no need for `cbind(carbon, hydrogen)` for left-hand side
multi_y <- recipe(carbon + hydrogen ~ oxygen + nitrogen + sulfur,
data = biomass_tr)
multi_y <- multi_y %>%
step_center(all_numeric_predictors()) %>%
step_scale(all_numeric_predictors())
multi_y_trained <- prep(multi_y, training = biomass_tr)
results <- bake(multi_y_trained, biomass_te)
###############################################
# example with manually updating different roles
# best choice for high-dimensional data:
rec <- recipe(biomass_tr) %>%
update_role(carbon, hydrogen, oxygen, nitrogen, sulfur,
new_role = "predictor") %>%
update_role(HHV, new_role = "outcome") %>%
update_role(sample, new_role = "id variable") %>%
update_role(dataset, new_role = "splitting indicator")
rec
# }
Run the code above in your browser using DataLab