# NOT RUN {
library(recipes)
# ---------------------------------------------------------------------------
# Setup
train <- iris[1:100,]
test <- iris[101:150,]
# ---------------------------------------------------------------------------
# Recipes example
# Create a recipe that logs a predictor
rec <- recipe(Species ~ Sepal.Length + Sepal.Width, train) %>%
step_log(Sepal.Length)
processed <- mold(rec, train)
# Sepal.Length has been logged
processed$predictors
processed$outcomes
# The underlying blueprint is a prepped recipe
processed$blueprint$recipe
# Call forge() with the blueprint and the test data
# to have it preprocess the test data in the same way
forge(test, processed$blueprint)
# Use `outcomes = TRUE` to also extract the preprocessed outcome!
# This logged the Sepal.Length column of `new_data`
forge(test, processed$blueprint, outcomes = TRUE)
# ---------------------------------------------------------------------------
# With an intercept
# You can add an intercept with `intercept = TRUE`
processed <- mold(rec, train, blueprint = default_recipe_blueprint(intercept = TRUE))
processed$predictors
# But you also could have used a recipe step
rec2 <- step_intercept(rec)
mold(rec2, iris)$predictors
# ---------------------------------------------------------------------------
# Non standard roles
# If you have custom recipe roles, they are processed and returned in
# the `$extras$roles` slot of the return value of `mold()` and `forge()`.
rec_roles <- recipe(train) %>%
update_role(Sepal.Width, new_role = "predictor") %>%
update_role(Species, new_role = "outcome") %>%
update_role(Sepal.Length, new_role = "custom_role") %>%
update_role(Petal.Length, new_role = "custom_role2")
processed_roles <- mold(rec_roles, train)
processed_roles$extras
forge(test, processed_roles$blueprint)
# ---------------------------------------------------------------------------
# Matrix output for predictors
# You can change the `composition` of the predictor data set
bp <- default_recipe_blueprint(composition = "dgCMatrix")
processed <- mold(rec, train, blueprint = bp)
class(processed$predictors)
# }
Run the code above in your browser using DataLab