# examples form 'dplyr' package
data(mtcars)
# Newly created variables are available immediately
mtcars %>%
let(
cyl2 = cyl * 2,
cyl4 = cyl2 * 2
) %>% head()
# You can also use let() to remove variables and
# modify existing variables
mtcars %>%
let(
mpg = NULL,
disp = disp * 0.0163871 # convert to litres
) %>% head()
# window functions are useful for grouped computations
mtcars %>%
let(rank = rank(-mpg, ties.method = "min"),
by = cyl) %>%
head()
# You can drop variables by setting them to NULL
mtcars %>% let(cyl = NULL) %>% head()
# keeps all existing variables
mtcars %>%
let(displ_l = disp / 61.0237) %>%
head()
# keeps only the variables you create
mtcars %>%
take(displ_l = disp / 61.0237)
# can refer to both contextual variables and variable names:
var = 100
mtcars %>%
let(cyl = cyl * var) %>%
head()
# A 'take' with summary functions applied without 'by' argument returns an aggregated data
mtcars %>%
take(mean = mean(disp), n = .N)
# Usually, you'll want to group first
mtcars %>%
take(mean = mean(disp), n = .N, by = cyl)
# You can group by expressions:
mtcars %>%
take_all(mean, by = list(vsam = vs + am))
# modify all non-grouping variables in-place
mtcars %>%
let_all((.x - mean(.x))/sd(.x), by = am) %>%
head()
# modify all non-grouping variables to new variables
mtcars %>%
let_all(scaled = (.x - mean(.x))/sd(.x), by = am) %>%
head()
# conditionally modify all variables
iris %>%
let_all(mean = if(is.numeric(.x)) mean(.x)) %>%
head()
# modify all variables conditionally on name
iris %>%
let_all(
mean = if(startsWith(.name, "Sepal")) mean(.x),
median = if(startsWith(.name, "Petal")) median(.x),
by = Species
) %>%
head()
# aggregation with 'take_all'
mtcars %>%
take_all(mean = mean(.x), sd = sd(.x), n = .N, by = am)
# conditionally aggregate all variables
iris %>%
take_all(mean = if(is.numeric(.x)) mean(.x))
# aggregate all variables conditionally on name
iris %>%
take_all(
mean = if(startsWith(.name, "Sepal")) mean(.x),
median = if(startsWith(.name, "Petal")) median(.x),
by = Species
)
# parametric evaluation:
var = quote(mean(cyl))
mtcars %>%
let(mean_cyl = eval(var)) %>%
head()
take(mtcars, eval(var))
# all together
new_var = "mean_cyl"
mtcars %>%
let((new_var) := eval(var)) %>%
head()
take(mtcars, (new_var) := eval(var))
########################################
# variable selection
# range selection
iris %>%
let(
avg = rowMeans(Sepal.Length %to% Petal.Width)
) %>%
head()
# multiassignment
iris %>%
let(
# starts with Sepal or Petal
multipled1 %to% multipled4 := cols("^(Sepal|Petal)")*2
) %>%
head()
mtcars %>%
let(
# text expansion
cols("scaled_{names(mtcars)}") := lapply(cols("{names(mtcars)}"), scale)
) %>%
head()
# range selection in 'by'
# range selection + additional column
mtcars %>%
take(
res = sum(cols(mpg, disp %to% drat)),
by = vs %to% gear
)
########################################
# examples from data.table
dat = data.table(
x=rep(c("b","a","c"), each=3),
y=c(1,3,6),
v=1:9
)
# basic row subset operations
take_if(dat, 2) # 2nd row
take_if(dat, 3:2) # 3rd and 2nd row
take_if(dat, order(x)) # no need for order(dat$x)
take_if(dat, y>2) # all rows where dat$y > 2
take_if(dat, y>2 & v>5) # compound logical expressions
take_if(dat, !2:4) # all rows other than 2:4
take_if(dat, -(2:4)) # same
# select|compute columns
take(dat, v) # v column (as data.table)
take(dat, sum(v)) # return data.table with sum of v (column autonamed 'sum(v)')
take(dat, sv = sum(v)) # same, but column named "sv"
take(dat, v, v*2) # return two column data.table, v and v*2
# subset rows and select|compute
take_if(dat, 2:3, sum(v)) # sum(v) over rows 2 and 3
take_if(dat, 2:3, sv = sum(v)) # same, but return data.table with column sv
# grouping operations
take(dat, sum(v), by = x) # ad hoc by, order of groups preserved in result
take(dat, sum(v), keyby = x) # same, but order the result on by cols
# all together now
take_if(dat, x!="a", sum(v), by=x) # get sum(v) by "x" for each x != "a"
# more on special symbols, see also ?"data.table::special-symbols"
take_if(dat, .N) # last row
take(dat, .N) # total number of rows in DT
take(dat, .N, by=x) # number of rows in each group
take(dat, .I[1], by=x) # row number in DT corresponding to each group
# add/update/delete by reference
# [] at the end of expression is for autoprinting
let(dat, grp = .GRP, by=x)[] # add a group counter column
let(dat, z = 42L)[] # add new column by reference
let(dat, z = NULL)[] # remove column by reference
let_if(dat, x=="a", v = 42L)[] # subassign to existing v column by reference
let_if(dat, x=="b", v2 = 84L)[] # subassign to new column by reference (NA padded)
let(dat, m = mean(v), by=x)[] # add new column by reference by group
# advanced usage
dat = data.table(x=rep(c("b","a","c"), each=3),
v=c(1,1,1,2,2,1,1,2,2),
y=c(1,3,6),
a=1:9,
b=9:1)
take(dat, sum(v), by=list(y%%2)) # expressions in by
take(dat, sum(v), by=list(bool = y%%2)) # same, using a named list to change by column name
take_all(dat, sum, by=x) # sum of all (other) columns for each group
take(dat,
MySum=sum(v),
MyMin=min(v),
MyMax=max(v),
by = list(x, y%%2) # by 2 expressions
)
take(dat, seq = min(a):max(b), by=x) # j is not limited to just aggregations
dat %>%
take(V1 = sum(v), by=x) %>%
take_if(V1<20) # compound query
dat %>%
take(V1 = sum(v), by=x) %>%
sort_by(-V1) %>% # ordering results
head()
Run the code above in your browser using DataLab