oldopts <- options(max.print = 70)
## Which value
whichNA(wlddev$PCGDP) # Same as which(is.na(wlddev$PCGDP))
whichNA(wlddev$PCGDP, invert = TRUE) # Same as which(!is.na(wlddev$PCGDP))
whichv(wlddev$country, "Chad") # Same as which(wlddev$county == "Chad")
wlddev$country %==% "Chad" # Same thing
whichv(wlddev$country, "Chad", TRUE) # Same as which(wlddev$county != "Chad")
wlddev$country %!=% "Chad" # Same thing
lvec <- wlddev$country == "Chad" # If we already have a logical vector...
whichv(lvec, FALSE) # is fastver than which(!lvec)
rm(lvec)
# Using the %==% operator can yield tangible performance gains
fsubset(wlddev, iso3c %==% "DEU") # 3x faster than:
fsubset(wlddev, iso3c == "DEU")
# With multiple categories we can use %iin%
fsubset(wlddev, iso3c %iin% c("DEU", "ITA", "FRA"))
## Math by reference: permissible types of operations
x <- alloc(1.0, 1e5) # Vector
x %+=% 1
x %+=% 1:1e5
xm <- matrix(alloc(1.0, 1e5), ncol = 100) # Matrix
xm %+=% 1
xm %+=% 1:1e3
setop(xm, "+", 1:100, rowwise = TRUE)
xm %+=% xm
xm %+=% 1:1e5
xd <- qDF(replicate(100, alloc(1.0, 1e3), simplify = FALSE)) # Data Frame
xd %+=% 1
xd %+=% 1:1e3
setop(xd, "+", 1:100, rowwise = TRUE)
xd %+=% xd
rm(x, xm, xd)
## setv() and copyv()
x <- rnorm(100)
y <- sample.int(10, 100, replace = TRUE)
setv(y, 5, 0) # Faster than y[y == 5] <- 0
setv(y, 4, x) # Faster than y[y == 4] <- x[y == 4]
setv(y, 20:30, y[40:50]) # Faster than y[20:30] <- y[40:50]
setv(y, 20:30, x) # Faster than y[20:30] <- x[20:30]
rm(x, y)
# Working with data frames, here returning copies of the frame
copyv(mtcars, 20:30, ss(mtcars, 10:20))
copyv(mtcars, 20:30, fscale(mtcars))
ftransform(mtcars, new = copyv(cyl, 4, vs))
# Column-wise:
copyv(mtcars, 2:3, fscale(mtcars), xlist = TRUE)
copyv(mtcars, 2:3, mtcars[4:5], xlist = TRUE)
## Missing values
mtc_na <- na_insert(mtcars, 0.15) # Set 15% of values missing at random
fnobs(mtc_na) # See observation count
missing_cases(mtc_na) # Fast equivalent to !complete.cases(mtc_na)
missing_cases(mtc_na, cols = 3:4) # Missing cases on certain columns?
missing_cases(mtc_na, count = TRUE) # Missing case count
missing_cases(mtc_na, prop = 0.8) # Cases with 80% or more missing
missing_cases(mtc_na, cols = 3:4, prop = 1) # Cases mssing columns 3 and 4
missing_cases(mtc_na, cols = 3:4, count = TRUE) # Missing case count on columns 3 and 4
na_omit(mtc_na) # 12x faster than na.omit(mtc_na)
na_omit(mtc_na, prop = 0.8) # Only remove cases missing 80% or more
na_omit(mtc_na, na.attr = TRUE) # Adds attribute with removed cases, like na.omit
na_omit(mtc_na, cols = .c(vs, am)) # Removes only cases missing vs or am
na_omit(qM(mtc_na)) # Also works for matrices
na_omit(mtc_na$vs, na.attr = TRUE) # Also works with vectors
na_rm(mtc_na$vs) # For vectors na_rm is faster ...
rm(mtc_na)
## Efficient vectorization
head(vec(EuStockMarkets)) # Atomic objects: no copy at all
head(vec(mtcars)) # Lists: directly in C
options(oldopts)
Run the code above in your browser using DataLab