X <- as.data.frame(matrix(rnorm(1000), ncol=10))
y <- factor(ifelse(apply(X, 1, mean) > 0, 1, 0))
learn <- cbind(y, X)
mypredict.lda <- function(object, newdata)
predict(object, newdata = newdata)$class
errorest(y ~ ., data= learn, model=lda,
estimator = "cv", predict= mypredict.lda)
# n-fold cv = leave-one-out.
errorest(y ~ ., data= learn, model=lda,
estimator = "cv", est.para=list(k = nrow(learn)),
predict= mypredict.lda)
errorest(y ~ ., data= learn, model=lda,
estimator = "boot", predict= mypredict.lda)
errorest(y ~ ., data= learn, model=lda,
estimator = "632plus", predict= mypredict.lda)
attach(learn)
errorest(y ~ V1 + V2 + V3, model=lda, estimator = "cv",
predict= mypredict.lda)
detach(learn)
mypredict.rpart <- function(object, newdata)
predict(object, newdata = newdata,type="class")
errorest(y ~ ., data= learn, model=rpart, estimator = "cv",
predict=mypredict.rpart)
errorest(y ~ ., data= learn, model=rpart, estimator = "boot",
predict=mypredict.rpart)
errorest(y ~ ., data= learn, model=rpart, estimator = "632plus",
predict=mypredict.rpart)
errorest(y ~ ., data= learn, model=bagging, estimator = "cv",
nbagg=10)
data(Glass)
# LDA has cross-validated misclassification error of
# 38\% (Ripley, 1996, page 98)
# Pruned trees about 32\% (Ripley, 1996, page 230)
pruneit <- function(formula, ...)
prune(rpart(formula, ...), cp =0.01)
errorest(Type ~ ., data=Glass, model=pruneit, estimator= "cv",
predict=mypredict.rpart)
data(smoking)
# Set three groups of variables:
# 1) explanatory variables are: TarY, NicY, COY, Sex, Age
# 2) intermediate variables are: TVPS, BPNL, COHB
# 3) response (resp) is defined by:
resp <- function(data){
res <- t(t(data) > c(4438, 232.5, 58))
res <- as.factor(ifelse(apply(res, 1, sum) > 2, 1, 0))
res
}
response <- resp(smoking[ ,c("TVPS", "BPNL", "COHB")])
smoking <- cbind(smoking, response)
formula <- TVPS+BPNL+COHB~TarY+NicY+COY+Sex+Age
mypredict.inclass <- function(object, newdata){
res <- predict.inclass(object = object, cFUN = resp, newdata = newdata)
return(res)
}
# Estimation per leave-one-out estimate for the misclassification is
# 36.36\% (Hand et al., 2001), using indirect classification with
# linear models
errorest(formula, data = smoking, model = inclass, predict = mypredict.inclass,
estimator = "cv", iclass = "response", pFUN = lm,
est.para=list(k=nrow(smoking)))
Run the code above in your browser using DataLab