library(cheapr)
x <- factor_(sample(letters[sample.int(26, 10)], 100, TRUE), levels = letters)
x
# Used/unused levels
levels_used(x)
levels_unused(x)
# Drop unused levels
levels_drop(x)
# Top 3 letters by by frequency
lumped_letters <- levels_lump(x, 3)
levels_count(lumped_letters)
# To remove the "other" category, use `levels_rm()`
levels_count(levels_rm(lumped_letters, "Other"))
# We can use levels_lump to create a generic top n function for non-factors too
get_top_n <- function(x, n){
f <- levels_lump(factor_(x, order = FALSE), n = n)
levels_count(f)
}
get_top_n(x, 3)
# A neat way to order the levels of a factor by frequency
# is the following:
levels(levels_lump(x, prop = 1)) # Highest to lowest
levels(levels_lump(x, prop = -1)) # Lowest to highest
Run the code above in your browser using DataLab