## default method
GRP(mtcars$cyl)
GRP(mtcars, ~ cyl + vs + am) # Or GRP(mtcars, c("cyl","vs","am")) or GRP(mtcars, c(2,8:9))
g <- GRP(mtcars, ~ cyl + vs + am) # Saving the object
print(g) # Printing it
plot(g) # Plotting it
GRPnames(g) # Retain group names
fsum(mtcars, g) # Compute the sum of mtcars, grouped by variables cyl, vs and am
gsplit(mtcars$mpg, g) # Use the object to split a vector
gsplit(NULL, g) # The indices of the groups
identical(mtcars$mpg, # greorder and unlist undo the effect of gsplit
greorder(unlist(gsplit(mtcars$mpg, g)), g))
## Convert factor to GRP object and vice-versa
GRP(iris$Species)
as_factor_GRP(g)
## dplyr integration
library(dplyr)
mtcars %>% group_by(cyl,vs,am) %>% GRP() # Get GRP object from a dplyr grouped tibble
mtcars %>% group_by(cyl,vs,am) %>% fmean() # Grouped mean using dplyr grouping
mtcars %>% fgroup_by(cyl,vs,am) %>% fmean() # Faster alternative with collapse grouping
mtcars %>% fgroup_by(cyl,vs,am) # Print method for grouped data frame
library(magrittr)
## Adding a column of group sizes.
mtcars %>% fgroup_by(cyl,vs,am) %>% fsummarise(Sizes = GRPN())
mtcars %>% fgroup_by(cyl,vs,am) %>% fmutate(Sizes = GRPN())
# Note: can also set options(collapse_mask = "n") to use n() instead, see help("collapse-options")
# Other usage modes:
mtcars %>% fgroup_by(cyl,vs,am) %>% ftransform(Sizes = GRPN(.))
mtcars %>% ftransform(Sizes = GRPN(list(cyl,vs,am))) # Same thing, slightly more efficient
## Various options for programming and interactive use
fgroup_by(GGDC10S, Variable, Decade = floor(Year / 10) * 10) %>% head(3)
fgroup_by(GGDC10S, 1:3, 5) %>% head(3)
fgroup_by(GGDC10S, c("Variable", "Country")) %>% head(3)
fgroup_by(GGDC10S, is.character) %>% head(3)
fgroup_by(GGDC10S, Country:Variable, Year) %>% head(3)
fgroup_by(GGDC10S, Country:Region, Var = Variable, Year) %>% head(3)
## Note that you can create a grouped data frame without materializing the unique grouping columns
fgroup_by(GGDC10S, Variable, Country, return.groups = FALSE) %>% fmutate(across(AGR:SUM, fscale))
fgroup_by(GGDC10S, Variable, Country, return.groups = FALSE) %>% fselect(AGR:SUM) %>% fmean()
## Note also that setting sort = FALSE on unsorted data can be much faster... if not required...
library(microbenchmark)
microbenchmark(gby(GGDC10S, Variable, Country), gby(GGDC10S, Variable, Country, sort = FALSE))
Run the code above in your browser using DataLab