set.seed(2345)
n <- 50
x0 <- sample(1:4, n, TRUE)
x1 <- ifelse(x0 %in% 1:2, 1, 0)
x2 <- rnorm(n, 0.5, 1)
x3 <- ifelse(x0 %in% 2:4, 1, 0)
lam1 <- exp(0.5 + 1*x1 + -0.2*x2)
Y1 <- rpois(n, lam1)
lam2 <- exp(1 + 0.5*x3)
Y2 <- rpois(n, lam2)
Y3 <- rpois(n, exp(0))
Y <- cbind(Spp1=Y1, Spp2=Y2, Spp3=Y3)
oc <- opticut(Y ~ x2, strata=x0, dist="poisson", comb="rank")
## asymptotic confidence intervals
(uc1 <- uncertainty(oc, type="asymp", B=999))
summary(uc1)
## bootstrap-based confidence intervals
(uc2 <- uncertainty(oc, type="boot", B=19))
summary(uc2)
## use user-supplied indices
## multi-model bootstrap based uncertainties
B <- replicate(25, sample.int(n, replace=TRUE))
check_strata(oc, B) # check representation
(uc3 <- uncertainty(oc, type="multi", B=B))
summary(uc3)
## best partitions:
## selection frequencies for strata and species
bestpart(uc3)
heatmap(bestpart(uc3), scale="none", col=occolors()(25))
## bootstrap smoothed predictions per strata
bsmooth(uc3)
heatmap(bestpart(uc3), scale="none", col=occolors()(25))
## individual species results
uc3$uncertainty
bestpart(uc3$uncertainty[[1]])
bsmooth(uc3$uncertainty[[1]])
if (FALSE) {
## block bootstrap
block_fun <- function()
unlist(lapply(unique(x0), function(z) if (sum(x0==z) < 2)
which(x0==z) else sample(which(x0==z), sum(x0==z), replace=TRUE)))
B <- replicate(25, block_fun())
check_strata(oc, B) # check representation
summary(uncertainty(oc, type="multi", B=B))
## jackknife
B <- sapply(1:n, function(i) which((1:n) != i))
check_strata(oc, B) # check representation
summary(uncertainty(oc, type="multi", B=B))
## multicut based uncertainty
mc <- multicut(Y ~ x2, strata=x0, dist="poisson")
## asymptotic confidence intervals
(muc1 <- uncertainty(mc, type="asymp", B=999))
summary(muc1)
bestpart(muc1)
## bootstrap-based confidence intervals
(muc2 <- uncertainty(mc, type="boot", B=19))
summary(muc2)
bestpart(muc2)
## dolina example
data(dolina)
## stratum as ordinal
dolina$samp$stratum <- as.integer(dolina$samp$stratum)
## filter species to speed up things a bit
Y <- ifelse(dolina$xtab[,colSums(dolina$xtab > 0) >= 20] > 0, 1, 0)
## opticut results, note the cloglog link function
dol <- opticut(Y ~ stratum + lmoist + method, data=dolina$samp,
strata=dolina$samp$mhab, dist="binomial:cloglog")
## parallel computing for uncertainty
library(parallel)
cl <- makeCluster(2)
ucdol <- uncertainty(dol, type="multi", B=25, cl=cl)
stopCluster(cl)
bestpart(ucdol)
heatmap(t(bestpart(ucdol)), scale="none", col=occolors()(25),
distfun=function(x) dist(x, "manhattan"))
## See how indicator value changes with different partitions
## (and why it is the wrong metric to use in this calse)
with(ucdol$uncertainty[["pvic"]],
boxplot(I ~ best, col="gold", ylab="Indicator value"))
## What we should calculate is the bootstrap smoothed mean of the
## expected value and its confidence intervals
bs <- bsmooth(ucdol$uncertainty[["pvic"]])
boxplot(t(bs), ylab="Expected value")
cbind(Mean=rowMeans(bs), t(apply(bs, 1, quantile, probs=c(0.025, 0.975))))
## A more interesting simulated example for bootstrap smoothing
## and comparing opticut vs. multicut
set.seed(1)
n <- 2000
x <- sort(runif(n, -8, 8))
p <- plogis(0.5 + -0.1 * x + -0.2 * x^2)
y <- rbinom(n, 1, p)
d <- diff(range(x))/10
br <- seq(min(x), max(x), by=d)
g <- cut(x, br, include.lowest=TRUE)
levels(g) <- LETTERS[1:nlevels(g)]
o <- opticut(y ~ 1, strata=g, dist="binomial")
m <- multicut(y ~ 1, strata=g, dist="binomial")
library(parallel)
cl <- makeCluster(2)
uo <- uncertainty(o, type="multi", B=99, cl=cl)
um <- uncertainty(m, type="boot", B=99, cl=cl)
stopCluster(cl)
## bootstrap average for opticut
bs <- bsmooth(uo$uncertainty[[1]])
stat <- cbind(Mean=rowMeans(bs),
t(apply(bs, 1, quantile, probs=c(0.025, 0.975))))
## bootstrap average for multicut
bsm <- as.matrix(um$uncertainty[[1]][,-(1:2)])
statm <- cbind(Mean=colMeans(bsm),
t(apply(bsm, 2, quantile, probs=c(0.025, 0.975))))
op <- par(mfrow=c(2,1))
plot(p ~ x, type="l", ylim=c(0,1), main="Binary partitions (opticut)")
abline(v=br, col="grey", lty=3)
lines(br[-1]-0.5*d, stat[,1], col=4)
lines(br[-1]-0.5*d, stat[,2], col=4, lty=2)
lines(br[-1]-0.5*d, stat[,3], col=4, lty=2)
lines(br[-1]-0.5*d, bs[,1], col=2)
legend("topright", bty="n", lty=c(1,1,2,1), col=c(1,4,4,2),
legend=c("True response","bsmooth","0.95 CI","Best partition"))
plot(p ~ x, type="l", ylim=c(0,1), main="Multi-level model (multicut)")
abline(v=br, col="grey", lty=3)
lines(br[-1]-0.5*d, statm[,1], col=4)
lines(br[-1]-0.5*d, statm[,2], col=4, lty=2)
lines(br[-1]-0.5*d, statm[,3], col=4, lty=2)
legend("topright", bty="n", lty=c(1,1,2), col=c(1,4,4),
legend=c("True response","bsmooth","0.95 CI"))
par(op)
}
Run the code above in your browser using DataLab