## --- simple linear model simulation ---
set.seed(1234)
n <- 200
x <- rnorm(n)
y <- rnorm(n, crossprod(t(model.matrix(~ x)), c(0, 1)), sd = 0.5)
d <- data.frame(y, x)
## model fitting and bootstrap
mod <- lm(y ~ x, d)
ndat <- model.frame(mod)
B <- 100
bid <- sapply(1:B, function(i) sample(nrow(ndat), nrow(ndat), TRUE))
fun <- function(z) {
if (missing(z))
z <- sample(nrow(ndat), nrow(ndat), TRUE)
coef(lm(mod$call$formula, data=ndat[z,]))
}
## standard '*apply' functions
system.time(res1 <- lapply(1:B, function(i) fun(bid[,i])))
system.time(res2 <- sapply(1:B, function(i) fun(bid[,i])))
system.time(res3 <- apply(bid, 2, fun))
system.time(res4 <- replicate(B, fun()))
## 'pb*apply' functions
## try different settings:
## "none", "txt", "tk", "win", "timer"
op <- pboptions(type = "timer") # default
system.time(res1pb <- pblapply(1:B, function(i) fun(bid[,i])))
pboptions(op)
pboptions(type = "txt")
system.time(res2pb <- pbsapply(1:B, function(i) fun(bid[,i])))
pboptions(op)
pboptions(type = "txt", style = 1, char = "=")
system.time(res3pb <- pbapply(bid, 2, fun))
pboptions(op)
pboptions(type = "txt", char = ":")
system.time(res4pb <- pbreplicate(B, fun()))
pboptions(op)
if (FALSE) {
## parallel evaluation using the parallel package
## (n = 2000 and B = 1000 will give visible timing differences)
library(parallel)
cl <- makeCluster(2L)
clusterExport(cl, c("fun", "mod", "ndat", "bid"))
## parallel with no progress bar: snow type cluster
## (RNG is set in the main process to define the object bid)
system.time(res1cl <- parLapply(cl = cl, 1:B, function(i) fun(bid[,i])))
system.time(res2cl <- parSapply(cl = cl, 1:B, function(i) fun(bid[,i])))
system.time(res3cl <- parApply(cl, bid, 2, fun))
## parallel with progress bar: snow type cluster
## (RNG is set in the main process to define the object bid)
system.time(res1pbcl <- pblapply(1:B, function(i) fun(bid[,i]), cl = cl))
system.time(res2pbcl <- pbsapply(1:B, function(i) fun(bid[,i]), cl = cl))
## (RNG needs to be set when not using bid)
parallel::clusterSetRNGStream(cl, iseed = 0L)
system.time(res4pbcl <- pbreplicate(B, fun(), cl = cl))
system.time(res3pbcl <- pbapply(bid, 2, fun, cl = cl))
stopCluster(cl)
if (.Platform$OS.type != "windows") {
## parallel with no progress bar: multicore type forking
## (mc.set.seed = TRUE in parallel::mclapply by default)
system.time(res2mc <- mclapply(1:B, function(i) fun(bid[,i]), mc.cores = 2L))
## parallel with progress bar: multicore type forking
## (mc.set.seed = TRUE in parallel::mclapply by default)
system.time(res1pbmc <- pblapply(1:B, function(i) fun(bid[,i]), cl = 2L))
system.time(res2pbmc <- pbsapply(1:B, function(i) fun(bid[,i]), cl = 2L))
system.time(res4pbmc <- pbreplicate(B, fun(), cl = 2L))
}
}
## --- Examples taken from standard '*apply' functions ---
## --- sapply, lapply, and replicate ---
require(stats); require(graphics)
x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE,TRUE))
# compute the list mean for each list element
pblapply(x, mean)
pbwalk(x, mean)
# median and quartiles for each list element
pblapply(x, quantile, probs = 1:3/4)
pbsapply(x, quantile)
i39 <- sapply(3:9, seq) # list of vectors
pbsapply(i39, fivenum)
pbvapply(i39, fivenum,
c(Min. = 0, "1st Qu." = 0, Median = 0, "3rd Qu." = 0, Max. = 0))
## sapply(*, "array") -- artificial example
(v <- structure(10*(5:8), names = LETTERS[1:4]))
f2 <- function(x, y) outer(rep(x, length.out = 3), y)
(a2 <- pbsapply(v, f2, y = 2*(1:5), simplify = "array"))
a.2 <- pbvapply(v, f2, outer(1:3, 1:5), y = 2*(1:5))
stopifnot(dim(a2) == c(3,5,4), all.equal(a2, a.2),
identical(dimnames(a2), list(NULL,NULL,LETTERS[1:4])))
summary(pbreplicate(100, mean(rexp(10))))
## use of replicate() with parameters:
foo <- function(x = 1, y = 2) c(x, y)
# does not work: bar <- function(n, ...) replicate(n, foo(...))
bar <- function(n, x) pbreplicate(n, foo(x = x))
bar(5, x = 3)
## --- apply ---
## Compute row and column sums for a matrix:
x <- cbind(x1 = 3, x2 = c(4:1, 2:5))
dimnames(x)[[1]] <- letters[1:8]
pbapply(x, 2, mean, trim = .2)
col.sums <- pbapply(x, 2, sum)
row.sums <- pbapply(x, 1, sum)
rbind(cbind(x, Rtot = row.sums), Ctot = c(col.sums, sum(col.sums)))
stopifnot( pbapply(x, 2, is.vector))
## Sort the columns of a matrix
pbapply(x, 2, sort)
## keeping named dimnames
names(dimnames(x)) <- c("row", "col")
x3 <- array(x, dim = c(dim(x),3),
dimnames = c(dimnames(x), list(C = paste0("cop.",1:3))))
identical(x, pbapply( x, 2, identity))
identical(x3, pbapply(x3, 2:3, identity))
##- function with extra args:
cave <- function(x, c1, c2) c(mean(x[c1]), mean(x[c2]))
pbapply(x, 1, cave, c1 = "x1", c2 = c("x1","x2"))
ma <- matrix(c(1:4, 1, 6:8), nrow = 2)
ma
pbapply(ma, 1, table) #--> a list of length 2
pbapply(ma, 1, stats::quantile) # 5 x n matrix with rownames
stopifnot(dim(ma) == dim(pbapply(ma, 1:2, sum)))
## Example with different lengths for each call
z <- array(1:24, dim = 2:4)
zseq <- pbapply(z, 1:2, function(x) seq_len(max(x)))
zseq ## a 2 x 3 matrix
typeof(zseq) ## list
dim(zseq) ## 2 3
zseq[1,]
pbapply(z, 3, function(x) seq_len(max(x)))
# a list without a dim attribute
## --- mapply and .mapply ---
pbmapply(rep, 1:4, 4:1)
pbmapply(rep, times = 1:4, x = 4:1)
pbmapply(rep, times = 1:4, MoreArgs = list(x = 42))
pbmapply(function(x, y) seq_len(x) + y,
c(a = 1, b = 2, c = 3), # names from first
c(A = 10, B = 0, C = -10))
word <- function(C, k) paste(rep.int(C, k), collapse = "")
utils::str(pbmapply(word, LETTERS[1:6], 6:1, SIMPLIFY = FALSE))
pb.mapply(rep,
dots = list(1:4, 4:1),
MoreArgs = list())
pb.mapply(rep,
dots = list(times = 1:4, x = 4:1),
MoreArgs = list())
pb.mapply(rep,
dots = list(times = 1:4),
MoreArgs = list(x = 42))
pb.mapply(function(x, y) seq_len(x) + y,
dots = list(c(a = 1, b = 2, c = 3), # names from first
c(A = 10, B = 0, C = -10)),
MoreArgs = list())
## --- Map ---
pbMap(`+`, 1, 1 : 3) ; 1 + 1:3
## --- eapply ---
env <- new.env(hash = FALSE)
env$a <- 1:10
env$beta <- exp(-3:3)
env$logic <- c(TRUE, FALSE, FALSE, TRUE)
pbeapply(env, mean)
unlist(pbeapply(env, mean, USE.NAMES = FALSE))
pbeapply(env, quantile, probs = 1:3/4)
pbeapply(env, quantile)
## --- tapply ---
require(stats)
groups <- as.factor(rbinom(32, n = 5, prob = 0.4))
pbtapply(groups, groups, length) #- is almost the same as
table(groups)
## contingency table from data.frame : array with named dimnames
pbtapply(warpbreaks$breaks, warpbreaks[,-1], sum)
pbtapply(warpbreaks$breaks, warpbreaks[, 3, drop = FALSE], sum)
n <- 17; fac <- factor(rep_len(1:3, n), levels = 1:5)
table(fac)
pbtapply(1:n, fac, sum)
pbtapply(1:n, fac, sum, default = 0) # maybe more desirable
pbtapply(1:n, fac, sum, simplify = FALSE)
pbtapply(1:n, fac, range)
pbtapply(1:n, fac, quantile)
pbtapply(1:n, fac, length) ## NA's
pbtapply(1:n, fac, length, default = 0) # == table(fac)
## example of ... argument: find quarterly means
pbtapply(presidents, cycle(presidents), mean, na.rm = TRUE)
ind <- list(c(1, 2, 2), c("A", "A", "B"))
table(ind)
pbtapply(1:3, ind) #-> the split vector
pbtapply(1:3, ind, sum)
## Some assertions (not held by all patch propsals):
nq <- names(quantile(1:5))
stopifnot(
identical(pbtapply(1:3, ind), c(1L, 2L, 4L)),
identical(pbtapply(1:3, ind, sum),
matrix(c(1L, 2L, NA, 3L), 2, dimnames = list(c("1", "2"), c("A", "B")))),
identical(pbtapply(1:n, fac, quantile)[-1],
array(list(`2` = structure(c(2, 5.75, 9.5, 13.25, 17), .Names = nq),
`3` = structure(c(3, 6, 9, 12, 15), .Names = nq),
`4` = NULL, `5` = NULL), dim=4, dimnames=list(as.character(2:5)))))
## --- by ---
pbby(warpbreaks[, 1:2], warpbreaks[,"tension"], summary)
pbby(warpbreaks[, 1], warpbreaks[, -1], summary)
pbby(warpbreaks, warpbreaks[,"tension"],
function(x) lm(breaks ~ wool, data = x))
tmp <- with(warpbreaks,
pbby(warpbreaks, tension,
function(x) lm(breaks ~ wool, data = x)))
sapply(tmp, coef)
Run the code above in your browser using DataLab