# Classification
data(iris)
# force predict to return class labels only
mypredict.lda <- function(object, newdata)
predict(object, newdata = newdata)$class
# 10-fold cv of LDA for Iris data
errorest(Species ~ ., data=iris, model=lda,
estimator = "cv", predict= mypredict.lda)
data(PimaIndiansDiabetes)
# 632+ bootstrap of LDA for Diabetes data
errorest(diabetes ~ ., data=PimaIndiansDiabetes, model=lda,
estimator = "632plus", predict= mypredict.lda)
data(Glass)
# LDA has cross-validated misclassification error of
# 38\% (Ripley, 1996, page 98)
# Pruned trees about 32\% (Ripley, 1996, page 230)
# use stratified sampling here, i.e. preserve the class proportions
errorest(Type ~ ., data=Glass, model=lda,
predict=mypredict.lda, est.para=control.errorest(strat=TRUE))
# force predict to return class labels
mypredict.rpart <- function(object, newdata)
predict(object, newdata = newdata,type="class")
pruneit <- function(formula, ...)
prune(rpart(formula, ...), cp =0.01)
errorest(Type ~ ., data=Glass, model=pruneit,
predict=mypredict.rpart, est.para=control.errorest(strat=TRUE))
# compute sensitivity and specifity for stabilised LDA
data(GlaucomaM)
error <- errorest(Class ~ ., data=GlaucomaM, model=slda,
predict=mypredict.lda, est.para=control.errorest(predictions=TRUE))
# sensitivity
mean(error$predictions[GlaucomaM$Class == "glaucoma"] == "glaucoma")
# specifity
mean(error$predictions[GlaucomaM$Class == "normal"] == "normal")
# Indirect Classification: Smoking data
data(Smoking)
# Set three groups of variables:
# 1) explanatory variables are: TarY, NicY, COY, Sex, Age
# 2) intermediate variables are: TVPS, BPNL, COHB
# 3) response (resp) is defined by:
resp <- function(data){
res <- t(t(data) > c(4438, 232.5, 58))
res <- as.factor(ifelse(apply(res, 1, sum) > 2, 1, 0))
res
}
response <- resp(Smoking[ ,c("TVPS", "BPNL", "COHB")])
smoking <- cbind(Smoking, response)
formula <- TVPS+BPNL+COHB~TarY+NicY+COY+Sex+Age
mypredict.inclass <- function(object, newdata){
res <- predict.inclass(object = object, cFUN = resp, newdata = newdata)
return(res)
}
# Estimation per leave-one-out estimate for the misclassification is
# 36.36\% (Hand et al., 2001), using indirect classification with
# linear models
errorest(formula, data = smoking, model = inclass, predict = mypredict.inclass,
estimator = "cv", iclass = "response", pFUN = lm,
est.para=control.errorest(k=nrow(smoking)))
# Regression
data(BostonHousing)
# 10-fold cv of lm for Boston Housing data
errorest(medv ~ ., data=BostonHousing, model=lm,
est.para=control.errorest(random=FALSE))
# the same, with "model" returning a function for prediction
# instead of an object of class "lm"
mylm <- function(formula, data) {
mod <- lm(formula, data)
function(newdata) predict(mod, newdata)
}
errorest(medv ~ ., data=BostonHousing, model=mylm,
est.para=control.errorest(random=FALSE))
# Survival data
data(GBSG2)
# prediction is fitted Kaplan-Meier
predict.survfit <- function(object, newdata) object
# 5-fold cv of Kaplan-Meier for GBSG2 study
errorest(Surv(time, cens) ~ 1, data=GBSG2, model=survfit,
predict=predict.survfit, est.para=control.errorest(k=5))
Run the code above in your browser using DataLab