# NOT RUN {
## check examples from Ferro and Stephenson (2011)
## see their Tables 2 - 5
TPR.Table2 <- 55/100
FPR.Table2 <- 45/900
ensemble.SEDI(TPR=TPR.Table2, FPR=FPR.Table2)
TPR.Table4 <- 195/300
FPR.Table4 <- 105/700
ensemble.SEDI(TPR=TPR.Table4, FPR=FPR.Table4)
# }
# NOT RUN {
## Not run:
# get predictor variables
library(dismo)
predictor.files <- list.files(path=paste(system.file(package="dismo"), '/ex', sep=''),
pattern='grd', full.names=TRUE)
predictors <- stack(predictor.files)
# subset based on Variance Inflation Factors
predictors <- subset(predictors, subset=c("bio5", "bio6",
"bio16", "bio17", "biome"))
predictors
predictors@title <- "predictors"
# presence points
presence_file <- paste(system.file(package="dismo"), '/ex/bradypus.csv', sep='')
pres <- read.table(presence_file, header=TRUE, sep=',')[,-1]
# the kfold function randomly assigns data to groups;
# groups are used as calibration (1/4) and training (3/4) data
groupp <- kfold(pres, 4)
pres_train <- pres[groupp != 1, ]
pres_test <- pres[groupp == 1, ]
# choose background points
background <- randomPoints(predictors, n=1000, extf=1.00)
colnames(background)=c('lon', 'lat')
groupa <- kfold(background, 4)
backg_train <- background[groupa != 1, ]
backg_test <- background[groupa == 1, ]
# formulae for random forest and generalized linear model
# compare with: ensemble.formulae(predictors, factors=c("biome"))
rfformula <- as.formula(pb ~ bio5+bio6+bio16+bio17)
glmformula <- as.formula(pb ~ bio5 + I(bio5^2) + I(bio5^3) +
bio6 + I(bio6^2) + I(bio6^3) + bio16 + I(bio16^2) + I(bio16^3) +
bio17 + I(bio17^2) + I(bio17^3) )
# fit four ensemble models (RF, GLM, BIOCLIM, DOMAIN)
# factors removed for BIOCLIM, DOMAIN, MAHAL
ensemble.nofactors <- ensemble.calibrate.models(x=predictors, p=pres_train, a=backg_train,
pt=pres_test, at=backg_test,
species.name="Bradypus",
ENSEMBLE.tune=TRUE,
ENSEMBLE.min = 0.65,
MAXENT=0, MAXNET=0, MAXLIKE=0, GBM=0, GBMSTEP=0, RF=1, CF=0,
GLM=1, GLMSTEP=0, GAM=0, GAMSTEP=0, MGCV=0, MGCVFIX=0,
EARTH=0, RPART=0, NNET=0, FDA=0, SVM=0, SVME=0, GLMNET=0,
BIOCLIM.O=0, BIOCLIM=1, DOMAIN=1, MAHAL=0, MAHAL01=0,
Yweights="BIOMOD",
factors="biome",
evaluations.keep=TRUE, models.keep=FALSE,
RF.formula=rfformula,
GLM.formula=glmformula)
# with option evaluations.keep, all model evaluations are saved in the ensemble object
attributes(ensemble.nofactors$evaluations)
# Get evaluation statistics for the ENSEMBLE model
eval.ENSEMBLE <- ensemble.nofactors$evaluations$ENSEMBLE.T
eval.calibrate.ENSEMBLE <- ensemble.nofactors$evaluations$ENSEMBLE.C
ensemble.evaluate(eval=eval.ENSEMBLE, eval.train=eval.calibrate.ENSEMBLE)
# TSS is maximum where specificity + sensitivity is maximum
threshold.specsens <- threshold(eval.ENSEMBLE, stat="spec_sens")
ensemble.evaluate(eval=eval.ENSEMBLE, fixed.threshold=threshold.specsens,
eval.train=eval.calibrate.ENSEMBLE)
# usual practice to calculate threshold from calibration data
ensemble.evaluate(eval=eval.ENSEMBLE, eval.train=eval.calibrate.ENSEMBLE)
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab