bictab: Create Model Selection Tables Based on BIC

Description

This function creates a model selection table based on the Bayesian information criterion (Schwarz 1978, Burnham and Anderson 2002). The table ranks the models based on the BIC and also provides delta BIC and BIC model weights. The function adjusts for overdispersion in model selection by using the QBIC when c.hat > 1. bictab selects the appropriate function to create the model selection table based on the object class. The current version works with lists containing objects of aov, betareg, clm, clmm, clogit, coxme, coxph, fitdist, fitdistr, glm, glmmTMB, gls, gnls, hurdle, lavaan, lm, lme, lmekin, maxlikeFit, mer, merMod, lmerModLmerTest, multinom, nlme, nls, polr, rlm, survreg, vglm, and zeroinfl classes as well as various models of unmarkedFit classes but does not yet allow mixing of different classes.

Usage

bictab(cand.set, modnames = NULL, nobs = NULL,
       sort = TRUE, ...)
# S3 method for AICaov.lm
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)
# S3 method for AICbetareg
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)
# S3 method for AICsclm.clm
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)
# S3 method for AICclmm
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)
# S3 method for AICclm
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)
# S3 method for AICcoxme
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)
# S3 method for AICcoxph
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)
# S3 method for AICfitdist
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)
# S3 method for AICfitdistr
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)
# S3 method for AICglm.lm
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, c.hat = 1, ...)  
# S3 method for AICglmmTMB
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, c.hat = 1, ...)  
# S3 method for AICgls
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)
# S3 method for AICgnls.gls
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)
# S3 method for AIChurdle
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)
# S3 method for AIClavaan
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)
# S3 method for AIClm
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)
# S3 method for AIClme
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)
# S3 method for AIClmekin
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)
# S3 method for AICmaxlikeFit.list
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...) 
# S3 method for AICmer
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)
# S3 method for AIClmerMod
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)
# S3 method for AIClmerModLmerTest
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)
# S3 method for AICglmerMod
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...) 
# S3 method for AICnlmerMod
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)
# S3 method for AICmultinom.nnet
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICnlme.lme
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...) 
# S3 method for AICnls
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)
# S3 method for AICpolr
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)
# S3 method for AICrlm.lm
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)
# S3 method for AICsurvreg
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)
# S3 method for AICunmarkedFitOccu
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitColExt
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitOccuRN
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitPCount
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitPCO
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitDS
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitGDS
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitOccuFP
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitMPois
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitGMM
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitGPC
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitOccuMulti
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitOccuMS
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitOccuTTD
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitMMO
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitDSO
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICvglm
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICzeroinfl
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)

Value

bictab creates an object of class bictab with the following components:

Modname: the name of each model of the candidate model set.
K: the number of estimated parameters for each model.
(Q)BIC: the Bayesian information criterion for each model.
Delta_(Q)BIC: the delta BIC component.
ModelLik: the relative likelihood of the model given the data (exp(-0.5*delta[i])). This is not to be confused with the likelihood of the parameters given the data. The relative likelihood can then be normalized across all models to get the model probabilities.
(Q)BICWt: the BIC model weights, also termed "model probabilities" (Burnham and Anderson 2002, Link and Barker 2006, Anderson 2008). These measures indicate the level of support (i.e., weight of evidence) in favor of any given model being the most parsimonious among the candidate model set.
Cum.Wt: the cumulative BIC weights. These are only meaningful if results in table are sorted in decreasing order of BIC weights (i.e., sort = TRUE).
c.hat: if c.hat was specified as an argument, it is included in the table.
LL: the log-likelihood of each model.
Res.LL: if parameters are estimated by restricted maximum-likelihood (REML), the restricted log-likelihood of each model.

Arguments

cand.set: a list storing each of the models in the candidate model set.
modnames: a character vector of model names to facilitate the identification of each model in the model selection table. If NULL, the function uses the names in the cand.set list of candidate models (i.e., a named list). If no names appear in the list and no character vector is provided, generic names (e.g., Mod1, Mod2) are supplied in the table in the same order as in the list of candidate models.
nobs: this argument allows to specify a numeric value other than total sample size to compute the BIC (i.e., nobs defaults to total number of observations). This is relevant only for mixed models or various models of unmarkedFit classes where sample size is not straightforward. In such cases, one might use total number of observations or number of independent clusters (e.g., sites) as the value of nobs.
sort: logical. If TRUE, the model selection table is ranked according to the BIC values.
c.hat: value of overdispersion parameter (i.e., variance inflation factor) such as that obtained from c_hat. Note that values of c.hat different from 1 are only appropriate for binomial GLM's with trials > 1 (i.e., success/trial or cbind(success, failure) syntax), with Poisson GLM's, single-season occupancy models (MacKenzie et al. 2002), dynamic occupancy models (MacKenzie et al. 2003), or N-mixture models (Royle 2004, Dail and Madsen 2011). If c.hat > 1, bictab will return the quasi-likelihood analogue of the BIC (QBIC) and multiply the variance-covariance matrix of the estimates by this value (i.e., SE's are multiplied by sqrt(c.hat)). This option is not supported for generalized linear mixed models of the mer or merMod classes.
...: additional arguments passed to the function.

Author

Marc J. Mazerolle

Details

BIC tends to favor simpler models than AIC whenever n > 8 (Schwarz 1978, Link and Barker 2006, Anderson 2008). BIC assigns uniform prior probabilities across all models (i.e., equal 1/R), whereas in AIC and AICc, prior probabilities increase with sample size (Burnham and Anderson 2004, Link and Barker 2010). Some authors argue that BIC requires the true model to be included in the model set, whereas AIC or AICc does not (Burnham and Anderson 2002). However, Link and Barker (2006, 2010) consider both as assuming that a model in the model set approximates truth.

bictab internally creates a new class for the cand.set list of candidate models, according to the contents of the list. The current function is implemented for clogit, coxme, coxph, fitdist, fitdistr, glm, glmmTMB, gls, gnls, hurdle, lavaan, lm, lme, lmekin, maxlikeFit, mer, merMod, lmerModLmerTest, multinom, nlme, nls, polr, rlm, survreg, vglm, and zeroinfl classes as well as various unmarkedFit classes. The function constructs a model selection table based on BIC.

References

Anderson, D. R. (2008) Model-based Inference in the Life Sciences: a primer on evidence. Springer: New York.

Burnham, K. P., Anderson, D. R. (2002) Model Selection and Multimodel Inference: a practical information-theoretic approach. Second edition. Springer: New York.

Burnham, K. P., Anderson, D. R. (2004) Multimodel inference: understanding AIC and BIC in model selection. Sociological Methods and Research 33, 261--304.

Dail, D., Madsen, L. (2011) Models for estimating abundance from repeated counts of an open population. Biometrics 67, 577--587.

Link, W. A., Barker, R. J. (2006) Model weights and the foundations of multimodel inference. Ecology 87, 2626--2635.

Link, W. A., Barker, R. J. (2010) Bayesian Inference with Ecological Applications. Academic Press: Boston.

MacKenzie, D. I., Nichols, J. D., Lachman, G. B., Droege, S., Royle, J. A., Langtimm, C. A. (2002) Estimating site occupancy rates when detection probabilities are less than one. Ecology 83, 2248--2255.

MacKenzie, D. I., Nichols, J. D., Hines, J. E., Knutson, M. G., Franklin, A. B. (2003) Estimating site occupancy, colonization, and local extinction when a species is detected imperfectly. Ecology 84, 2200--2207.

Royle, J. A. (2004) N-mixture models for estimating population size from spatially replicated counts. Biometrics 60, 108--115.

Schwarz, G. (1978) Estimating the dimension of a model. Annals of Statistics 6, 461--464.

Examples

Run this code

##Mazerolle (2006) frog water loss example
data(dry.frog)

##setup a subset of models of Table 1
Cand.models <- list( )
Cand.models[[1]] <- lm(log_Mass_lost ~ Shade + Substrate +
                       cent_Initial_mass + Initial_mass2,
                       data = dry.frog)
Cand.models[[2]] <- lm(log_Mass_lost ~ Shade + Substrate +
                       cent_Initial_mass + Initial_mass2 +
                       Shade:Substrate, data = dry.frog)
Cand.models[[3]] <- lm(log_Mass_lost ~ cent_Initial_mass +
                       Initial_mass2, data = dry.frog)
Cand.models[[4]] <- lm(log_Mass_lost ~ Shade + cent_Initial_mass +
                       Initial_mass2, data = dry.frog)
Cand.models[[5]] <- lm(log_Mass_lost ~ Substrate + cent_Initial_mass +
                       Initial_mass2, data = dry.frog)

##create a vector of names to trace back models in set
Modnames <- paste("mod", 1:length(Cand.models), sep = " ")

##generate BIC table
bictab(cand.set = Cand.models, modnames = Modnames, sort = TRUE)
##round to 4 digits after decimal point and give log-likelihood
print(bictab(cand.set = Cand.models, modnames = Modnames, sort = TRUE),
      digits = 4, LL = TRUE)



if (FALSE) {
##Burnham and Anderson (2002) flour beetle data
data(beetle)
##models as suggested by Burnham and Anderson p. 198          
Cand.set <- list( )
Cand.set[[1]] <- glm(Mortality_rate ~ Dose, family =
                     binomial(link = "logit"), weights = Number_tested,
                     data = beetle)
Cand.set[[2]] <- glm(Mortality_rate ~ Dose, family =
                     binomial(link = "probit"), weights = Number_tested,
                     data = beetle)
Cand.set[[3]] <- glm(Mortality_rate ~ Dose, family =
                     binomial(link ="cloglog"), weights = Number_tested,
                     data = beetle)

##set up named list
names(Cand.set) <- c("logit", "probit", "cloglog")

##compare models
##model names will be taken from the list if modnames is not specified
bictab(cand.set = Cand.set)
}


##two-way ANOVA with interaction
data(iron)
##full model
m1 <- lm(Iron ~ Pot + Food + Pot:Food, data = iron)
##additive model
m2 <- lm(Iron ~ Pot + Food, data = iron)
##null model
m3 <- lm(Iron ~ 1, data = iron)

##candidate models
Cand.aov <- list(m1, m2, m3)
Cand.names <- c("full", "additive", "null")
bictab(Cand.aov, Cand.names)



##single-season occupancy model example modified from ?occu
if (FALSE) {
require(unmarked)
##single season example modified from ?occu
data(frogs)
pferUMF <- unmarkedFrameOccu(pfer.bin)
##add fake covariates
siteCovs(pferUMF) <- data.frame(sitevar1 = rnorm(numSites(pferUMF)),
                                sitevar2 = runif(numSites(pferUMF))) 
     
##observation covariates
obsCovs(pferUMF) <- data.frame(obsvar1 = rnorm(numSites(pferUMF) *
                                 obsNum(pferUMF))) 

##set up candidate model set
fm1 <- occu(~ obsvar1 ~ sitevar1, pferUMF)
fm2 <- occu(~ 1 ~ sitevar1, pferUMF)
fm3 <- occu(~ obsvar1 ~ sitevar2, pferUMF)
fm4 <- occu(~ 1 ~ sitevar2, pferUMF)

##assemble models in named list (alternative to using 'modnames' argument)
Cand.mods <- list("fm1" = fm1, "fm2" = fm2, "fm3" = fm3, "fm4" = fm4)

##compute table based on QBIC that accounts for c.hat
bictab(cand.set = Cand.mods, c.hat = 3.9)

detach(package:unmarked)
}

Run the code above in your browser using DataLab