library(dplyr)
data(attrition, package = "modeldata")
attrition %>%
group_by(StockOptionLevel) %>%
count()
amnt <- c("nothin", "meh", "some", "copious")
rec <-
recipe(Attrition ~ StockOptionLevel, data = attrition) %>%
step_num2factor(
StockOptionLevel,
transform = function(x) x + 1,
levels = amnt
)
encoded <- rec %>%
prep() %>%
bake(new_data = NULL)
table(encoded$StockOptionLevel, attrition$StockOptionLevel)
# an example for binning
binner <- function(x) {
x <- cut(x, breaks = 1000 * c(0, 5, 10, 20), include.lowest = TRUE)
# now return the group number
as.numeric(x)
}
inc <- c("low", "med", "high")
rec <-
recipe(Attrition ~ MonthlyIncome, data = attrition) %>%
step_num2factor(
MonthlyIncome,
transform = binner,
levels = inc,
ordered = TRUE
) %>%
prep()
encoded <- bake(rec, new_data = NULL)
table(encoded$MonthlyIncome, binner(attrition$MonthlyIncome))
# What happens when a value is out of range?
ceo <- attrition %>%
slice(1) %>%
mutate(MonthlyIncome = 10^10)
bake(rec, ceo)
Run the code above in your browser using DataLab