# NOT RUN {
data(okc)
orig <- table(okc$diet, useNA = "always")
sort(orig, decreasing = TRUE)
up_rec <- recipe( ~ ., data = okc) %>%
# Bring the minority levels up to about 200 each
# 200/16562 is approx 0.0121
step_upsample(diet, over_ratio = 0.0121) %>%
prep(training = okc, retain = TRUE)
training <- table(juice(up_rec)$diet, useNA = "always")
# Since `skip` defaults to TRUE, baking the step has no effect
baked_okc <- bake(up_rec, new_data = okc)
baked <- table(baked_okc$diet, useNA = "always")
# Note that if the original data contained more rows than the
# target n (= ratio * majority_n), the data are left alone:
data.frame(
level = names(orig),
orig_freq = as.vector(orig),
train_freq = as.vector(training),
baked_freq = as.vector(baked)
)
# }
Run the code above in your browser using DataLab