# NOT RUN {
DT = data.table(a = LETTERS[c(3L,1:3)], b = 4:7)
DT[, c := 8] # add a numeric column, 8 for all rows
DT[, d := 9L] # add an integer column, 9L for all rows
DT[, c := NULL] # remove column c
DT[2, d := -8L] # subassign by reference to d; 2nd row is -8L now
DT # DT changed by reference
DT[2, d := 10L][] # shorthand for update and print
DT[b > 4, b := d * 2L] # subassign to b with d*2L on those rows where b > 4 is TRUE
DT[b > 4][, b := d * 2L] # different from above. [, := ] is performed on the subset
# which is an new (ephemeral) data.table. Result needs to be
# assigned to a variable (using `<-`).
DT[, e := mean(d), by = a] # add new column by group by reference
DT["A", b := 0L, on = "a"] # ad-hoc update of column b for group "A" using
# joins-as-subsets with binary search and 'on='
# same as above but using keys
setkey(DT, a)
DT["A", b := 0L] # binary search for group "A" and set column b using keys
DT["B", f := mean(d)] # subassign to new column, NA initialized
# }
# NOT RUN {
# Speed example ...
m = matrix(1, nrow = 2e6L, ncol = 100L)
DF = as.data.frame(m)
DT = as.data.table(m)
system.time(for (i in 1:1000) DF[i, 1] = i)
# 15.856 seconds
system.time(for (i in 1:1000) DT[i, V1 := i])
# 0.279 seconds (57 times faster)
system.time(for (i in 1:1000) set(DT, i, 1L, i))
# 0.002 seconds (7930 times faster, overhead of [.data.table is avoided)
# However, normally, we call [.data.table *once* on *large* data, not many times on small data.
# The above is to demonstrate overhead, not to recommend looping in this way. But the option
# of set() is there if you need it.
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab