# Type 'example(setkey)' to run these at prompt and browse output
DT = data.table(A=5:1,B=letters[5:1])
DT # before
setkey(DT,B) # re-orders table and marks it sorted.
DT # after
tables() # KEY column reports the key'd columns
key(DT)
keycols = c("A","B")
setkeyv(DT,keycols) # rather than key(DT)<-keycols (which copies entire table)
DT = data.table(A=5:1,B=letters[5:1])
DT2 = DT # does not copy
setkey(DT2,B) # does not copy-on-write to DT2
identical(DT,DT2) # TRUE. DT and DT2 are two names for the same keyed table
DT = data.table(A=5:1,B=letters[5:1])
DT2 = copy(DT) # explicit copy() needed to copy a data.table
setkey(DT2,B) # now just changes DT2
identical(DT,DT2) # FALSE. DT and DT2 are now different tables
DF = data.frame(a=1:2,b=3:4) # base data.frame to demo copies, as of R 2.15.1
try(tracemem(DF)) # try() for non-Windows where R is faster without memory profiling
colnames(DF)[1] <- "A" # 4 copies of entire object
names(DF)[1] <- "A" # 3 copies of entire object
names(DF) <- c("A", "b") # 1 copy of entire object
`names<-`(DF,c("A","b")) # 1 copy of entire object
# What if DF is large, say 10GB in RAM. Copy 10GB, even once, just to change a column name?
DT = data.table(a=1:2,b=3:4,c=5:6)
try(tracemem(DT))
setnames(DT,"b","B") # by name; no match() needed
setnames(DT,3,"C") # by position
setnames(DT,2:3,c("D","E")) # multiple
setnames(DT,c("a","E"),c("A","F")) # multiple by name
setnames(DT,c("X","Y","Z")) # replace all
# And, no copy of DT was made by setnames() at all.
Run the code above in your browser using DataLab