data(Sacramento, package = "modeldata")
# Original data: city has 37 levels
length(unique(Sacramento$city))
unique(Sacramento$city) %>% sort()
rec <- recipe(~ city + sqft + price, data = Sacramento)
# Default dummy coding: 36 dummy variables
dummies <- rec %>%
step_dummy(city) %>%
prep(training = Sacramento)
dummy_data <- bake(dummies, new_data = NULL)
dummy_data %>%
select(starts_with("city")) %>%
names() # level "anything" is the reference level
# Obtain the full set of 37 dummy variables using `one_hot` option
dummies_one_hot <- rec %>%
step_dummy(city, one_hot = TRUE) %>%
prep(training = Sacramento)
dummy_data_one_hot <- bake(dummies_one_hot, new_data = NULL)
dummy_data_one_hot %>%
select(starts_with("city")) %>%
names() # no reference level
tidy(dummies, number = 1)
tidy(dummies_one_hot, number = 1)
Run the code above in your browser using DataLab