# NOT RUN {
## ftransform modifies and returns a data.frame
head(ftransform(airquality, Ozone = -Ozone))
head(ftransform(airquality, new = -Ozone, Temp = (Temp-32)/1.8))
head(ftransform(airquality, new = -Ozone, new2 = 1, Temp = NULL)) # Deleting Temp
head(ftransform(airquality, Ozone = NULL, Temp = NULL)) # Deleting columns
# With collapse's grouped and weighted functions, complex operations are done on the fly
head(ftransform(airquality, # Grouped operations by month:
Ozone_Month_median = fmedian(Ozone, Month, TRA = "replace_fill"),
Ozone_Month_sd = fsd(Ozone, Month, TRA = "replace"),
Ozone_Month_centered = fwithin(Ozone, Month)))
# Grouping by month and above/below average temperature in each month
head(ftransform(airquality, Ozone_Month_high_median =
fmedian(Ozone, list(Month, Temp > fbetween(Temp, Month)), TRA = "replace_fill")))
## ftransformv can be used to modify multiple columns using a function
head(ftransformv(airquality, 1:3, log))
head(`[<-`(airquality, 1:3, value = lapply(airquality[1:3], log))) # Same thing in base R
head(ftransformv(airquality, 1:3, log, apply = FALSE))
head(`[<-`(airquality, 1:3, value = log(airquality[1:3]))) # Same thing in base R
# Using apply = FALSE yields meaningful performance gains with collapse functions
# This calls fwithin.default, and repeates the grouping by month 3 times:
head(ftransformv(airquality, 1:3, fwithin, Month))
# This calls fwithin.data.frame, and only groups one time -> 5x faster!
head(ftransformv(airquality, 1:3, fwithin, Month, apply = FALSE))
library(magrittr) # Pipe operators
# This also works for grouped and panel data frames (calling fwithin.grouped_df)
airquality %>% fgroup_by(Month) %>%
ftransformv(1:3, fwithin, apply = FALSE) %>% head
# But this gives the WRONG result (calling fwithin.default). Need option apply = FALSE!!
airquality %>% fgroup_by(Month) %>%
ftransformv(1:3, fwithin) %>% head
# For grouped modification of single columns in a grouped dataset, we can use GRP():
airquality %>% fgroup_by(Month) %>%
ftransform(W_Ozone = fwithin(Ozone, GRP(.)), # Grouped centering
sd_Ozone_m = fsd(Ozone, GRP(.), TRA = "replace"), # In-Month standard deviation
sd_Ozone = fsd(Ozone, TRA = "replace"), # Overall standard deviation
sd_Ozone2 = fsd(Ozone, TRA = "replace_fill"), # Same, overwriting NA's
sd_Ozone3 = fsd(Ozone)) %>% head # Same thing (calling alloc())
rm(airquality)
## For more complex mutations we can use ftransform with compound pipes
airquality %>% fgroup_by(Month) %>%
ftransform(get_vars(., 1:3) %>% fwithin %>% flag(0:2)) %>% head
airquality %>% ftransform(STD(., cols = 1:3) %>% replace_NA(0)) %>% head
# The list argument feature also allows flexible operations creating multiple new columns
airquality %>% # The variance of Wind and Ozone, by month, weighted by temperature:
ftransform(fvar(list(Wind_var = Wind, Ozone_var = Ozone), Month, Temp, "replace")) %>% head
# Same as above using a grouped data frame (a bit more complex)
airquality %>% fgroup_by(Month) %>%
ftransform(fselect(., Wind, Ozone) %>% fvar(Temp, "replace") %>% add_stub("_var", FALSE)) %>%
fungroup %>% head
# This performs 2 different multi-column grouped operations (need c() to make it one list)
ftransform(airquality, c(fmedian(list(Wind_Day_median = Wind,
Ozone_Day_median = Ozone), Day, TRA = "replace"),
fsd(list(Wind_Month_sd = Wind,
Ozone_Month_sd = Ozone), Month, TRA = "replace"))) %>% head
## settransform(v) works like ftransform(v) but modifies a data frame in the global environment..
settransform(airquality, Ratio = Ozone / Temp, Ozone = NULL, Temp = NULL)
head(airquality)
rm(airquality)
# Grouped and weighted centering
settransformv(airquality, 1:3, fwithin, Month, Temp, apply = FALSE)
head(airquality)
rm(airquality)
# Suitably lagged first-differences
settransform(airquality, get_vars(airquality, 1:3) %>% fdiff %>% flag(0:2))
head(airquality)
rm(airquality)
# Same as above using magrittr::`%<>%`
airquality %<>% ftransform(get_vars(., 1:3) %>% fdiff %>% flag(0:2))
head(airquality)
rm(airquality)
# It is also possible to achieve the same thing via a replacement method (if needed)
ftransform(airquality) <- get_vars(airquality, 1:3) %>% fdiff %>% flag(0:2)
head(airquality)
rm(airquality)
## fcompute only returns the modified / computed columns
head(fcompute(airquality, Ozone = -Ozone))
head(fcompute(airquality, new = -Ozone, Temp = (Temp-32)/1.8))
head(fcompute(airquality, new = -Ozone, new2 = 1))
# Can preserve existing columns, computed ones are added to the right if names are different
head(fcompute(airquality, new = -Ozone, new2 = 1, keep = 1:3))
# If given same name as preserved columns, preserved columns are replaced in order...
head(fcompute(airquality, Ozone = -Ozone, new = 1, keep = 1:3))
# Same holds for fcomputev
head(fcomputev(iris, is.numeric, log)) # Same as:
iris %>% get_vars(is.numeric) %>% dapply(log) %>% head()
head(fcomputev(iris, is.numeric, log, keep = "Species")) # Adds in front
head(fcomputev(iris, is.numeric, log, keep = names(iris))) # Preserve order
# Keep a subset of the data, add standardized columns
head(fcomputev(iris, 3:4, STD, apply = FALSE, keep = names(iris)[3:5]))
# }
Run the code above in your browser using DataLab