# Given a set with rare "C"
library(data.table)
data_set <- data.table(cat_col = c(sample(c("A", "B"), 1000, replace=TRUE), "C"))
# When calling function
data_set <- remove_rare_categorical(data_set, cols = "cat_col",
threshold = 0.01, verbose = TRUE)
# Then there are no "C"
unique(data_set[["cat_col"]])
Run the code above in your browser using DataLab