library(cheapr)
# `as_discrete()` is very similar to `cut()`
# but more flexible as it allows you to supply
# formatting functions and symbols for the discrete bins
# Here is an example of how to use the formatting functions to
# categorise age groups nicely
ages <- 1:100
age_group <- function(x, breaks){
age_groups <- as_discrete(
x,
breaks = breaks,
intv_sep = "-",
intv_end_fun = function(x) x - 1,
intv_openers = c("", ""),
intv_closers = c("", ""),
include_oob = TRUE,
ordered = TRUE
)
# Below is just renaming the last age group
lvls <- levels(age_groups)
n_lvls <- length(lvls)
max_ages <- paste0(max(breaks), "+")
attr(age_groups, "levels") <- c(lvls[-n_lvls], max_ages)
age_groups
}
age_group(ages, seq(0, 80, 20))
age_group(ages, seq(0, 25, 5))
age_group(ages, 5)
# To closely replicate `cut()` with `as_discrete()` we can use the following
cheapr_cut <- function(x, breaks, right = TRUE,
include.lowest = FALSE,
ordered.result = FALSE){
if (length(breaks) == 1){
breaks <- get_breaks(x, breaks, pretty = FALSE,
expand_min = FALSE, expand_max = FALSE)
adj <- diff(range(breaks)) * 0.001
breaks[1] <- breaks[1] - adj
breaks[length(breaks)] <- breaks[length(breaks)] + adj
}
as_discrete(x, breaks, left_closed = !right,
include_endpoint = include.lowest,
ordered = ordered.result,
intv_start_fun = function(x) formatC(x, digits = 3, width = 1),
intv_end_fun = function(x) formatC(x, digits = 3, width = 1))
}
x <- rnorm(100)
cheapr_cut(x, 10)
identical(cut(x, 10), cheapr_cut(x, 10))
Run the code above in your browser using DataLab