## fmutate() examples ---------------------------------------------------------------
# Please note that expressions are vectorized whenever they contain 'ANY' fast function
mtcars |>
fgroup_by(cyl, vs, am) |>
fmutate(mean_mpg = fmean(mpg), # Vectorized
mean_mpg_base = mean(mpg), # Non-vectorized
mpg_cumpr = fcumsum(mpg) / fsum(mpg), # Vectorized
mpg_cumpr_base = cumsum(mpg) / sum(mpg), # Non-vectorized
mpg_cumpr_mixed = fcumsum(mpg) / sum(mpg)) # Vectorized: division by overall sum
# Using across: here fmean() gets vectorized across both groups and columns (requiring a single
# call to fmean.data.frame which goes to C), whereas weighted.mean needs to be called many times.
mtcars |> fgroup_by(cyl, vs, am) |>
fmutate(across(disp:qsec, list(mu = fmean, mu2 = weighted.mean), w = wt, .names = "flip"))
# Can do more complex things...
mtcars |> fgroup_by(cyl) |>
fmutate(res = resid(lm(mpg ~ carb + hp, weights = wt)))
# Since v1.9.0: supports arbitrary expressions returning suitable lists
if (FALSE)
mtcars |> fgroup_by(cyl) |>
fmutate(broom::augment(lm(mpg ~ carb + hp, weights = wt)))
# Same thing using across() (supported before 1.9.0)
modelfun <- function(data) broom::augment(lm(mpg ~ carb + hp, data, weights = wt))
mtcars |> fgroup_by(cyl) |>
fmutate(across(c(mpg, carb, hp, wt), modelfun, .apply = FALSE))
## ftransform() / fcompute() examples: ----------------------------------------------
## ftransform modifies and returns a data.frame
head(ftransform(airquality, Ozone = -Ozone))
head(ftransform(airquality, new = -Ozone, Temp = (Temp-32)/1.8))
head(ftransform(airquality, new = -Ozone, new2 = 1, Temp = NULL)) # Deleting Temp
head(ftransform(airquality, Ozone = NULL, Temp = NULL)) # Deleting columns
# With collapse's grouped and weighted functions, complex operations are done on the fly
head(ftransform(airquality, # Grouped operations by month:
Ozone_Month_median = fmedian(Ozone, Month, TRA = "fill"),
Ozone_Month_sd = fsd(Ozone, Month, TRA = "replace"),
Ozone_Month_centered = fwithin(Ozone, Month)))
# Grouping by month and above/below average temperature in each month
head(ftransform(airquality, Ozone_Month_high_median =
fmedian(Ozone, list(Month, Temp > fbetween(Temp, Month)), TRA = "fill")))
## ftransformv can be used to modify multiple columns using a function
head(ftransformv(airquality, 1:3, log))
head(`[<-`(airquality, 1:3, value = lapply(airquality[1:3], log))) # Same thing in base R
head(ftransformv(airquality, 1:3, log, apply = FALSE))
head(`[<-`(airquality, 1:3, value = log(airquality[1:3]))) # Same thing in base R
# Using apply = FALSE yields meaningful performance gains with collapse functions
# This calls fwithin.default, and repeates the grouping by month 3 times:
head(ftransformv(airquality, 1:3, fwithin, Month))
# This calls fwithin.data.frame, and only groups one time -> 5x faster!
head(ftransformv(airquality, 1:3, fwithin, Month, apply = FALSE))
# This also works for grouped and panel data frames (calling fwithin.grouped_df)
airquality |> fgroup_by(Month) |>
ftransformv(1:3, fwithin, apply = FALSE) |> head()
# But this gives the WRONG result (calling fwithin.default). Need option apply = FALSE!!
airquality |> fgroup_by(Month) |>
ftransformv(1:3, fwithin) |> head()
# For grouped modification of single columns in a grouped dataset, we can use GRP():
library(magrittr)
airquality |> fgroup_by(Month) %>%
ftransform(W_Ozone = fwithin(Ozone, GRP(.)), # Grouped centering
sd_Ozone_m = fsd(Ozone, GRP(.), TRA = "replace"), # In-Month standard deviation
sd_Ozone = fsd(Ozone, TRA = "replace"), # Overall standard deviation
sd_Ozone2 = fsd(Ozone, TRA = "fill"), # Same, overwriting NA's
sd_Ozone3 = fsd(Ozone)) |> head() # Same thing (calling alloc())
## For more complex mutations we can use ftransform with compound pipes
airquality |> fgroup_by(Month) %>%
ftransform(get_vars(., 1:3) |> fwithin() |> flag(0:2)) |> head()
airquality %>% ftransform(STD(., cols = 1:3) |> replace_na(0)) |> head()
# The list argument feature also allows flexible operations creating multiple new columns
airquality |> # The variance of Wind and Ozone, by month, weighted by temperature:
ftransform(fvar(list(Wind_var = Wind, Ozone_var = Ozone), Month, Temp, "replace")) |> head()
# Same as above using a grouped data frame (a bit more complex)
airquality |> fgroup_by(Month) %>%
ftransform(fselect(., Wind, Ozone) |> fvar(Temp, "replace") |> add_stub("_var", FALSE)) |>
fungroup() |> head()
# This performs 2 different multi-column grouped operations (need c() to make it one list)
ftransform(airquality, c(fmedian(list(Wind_Day_median = Wind,
Ozone_Day_median = Ozone), Day, TRA = "replace"),
fsd(list(Wind_Month_sd = Wind,
Ozone_Month_sd = Ozone), Month, TRA = "replace"))) |> head()
## settransform(v) works like ftransform(v) but modifies a data frame in the global environment..
settransform(airquality, Ratio = Ozone / Temp, Ozone = NULL, Temp = NULL)
head(airquality)
rm(airquality)
# Grouped and weighted centering
settransformv(airquality, 1:3, fwithin, Month, Temp, apply = FALSE)
head(airquality)
rm(airquality)
# Suitably lagged first-differences
settransform(airquality, get_vars(airquality, 1:3) |> fdiff() |> flag(0:2))
head(airquality)
rm(airquality)
# Same as above using magrittr::`%<>%`
airquality %<>% ftransform(get_vars(., 1:3) |> fdiff() |> flag(0:2))
head(airquality)
rm(airquality)
# It is also possible to achieve the same thing via a replacement method (if needed)
ftransform(airquality) <- get_vars(airquality, 1:3) |> fdiff() |> flag(0:2)
head(airquality)
rm(airquality)
## fcompute only returns the modified / computed columns
head(fcompute(airquality, Ozone = -Ozone))
head(fcompute(airquality, new = -Ozone, Temp = (Temp-32)/1.8))
head(fcompute(airquality, new = -Ozone, new2 = 1))
# Can preserve existing columns, computed ones are added to the right if names are different
head(fcompute(airquality, new = -Ozone, new2 = 1, keep = 1:3))
# If given same name as preserved columns, preserved columns are replaced in order...
head(fcompute(airquality, Ozone = -Ozone, new = 1, keep = 1:3))
# Same holds for fcomputev
head(fcomputev(iris, is.numeric, log)) # Same as:
iris |> get_vars(is.numeric) |> dapply(log) |> head()
head(fcomputev(iris, is.numeric, log, keep = "Species")) # Adds in front
head(fcomputev(iris, is.numeric, log, keep = names(iris))) # Preserve order
# Keep a subset of the data, add standardized columns
head(fcomputev(iris, 3:4, STD, apply = FALSE, keep = names(iris)[3:5]))
Run the code above in your browser using DataLab