# Enable parallelization in examples
if (FALSE) {
library(doParallel)
cl <- makeCluster(2)
registerDoParallel(cl)
}
# Close at the end
# stopCluster(cl) #nolint
# registerDoSEQ() #nolint
if(require("rpart")) {
# 1) Evaluator from FSelector package.
evaluator <- function(subset, data, dependent = names(iris)[5]) {
library(rpart)
k <- 5
splits <- runif(nrow(data))
results <- sapply(1:k, function(i) {
test.idx <- (splits >= (i - 1) / k) & (splits < i / k)
train.idx <- !test.idx
test <- data[test.idx, , drop = FALSE]
train <- data[train.idx, , drop = FALSE]
tree <- rpart(to_formula(subset, dependent), train)
error.rate <- sum(test[[dependent]] != predict(tree, test, type = "c")) /
nrow(test)
return(1 - error.rate)
})
return(mean(results))
}
set.seed(123)
# Default greedy search.
system.time(
feature_search(attributes = names(iris)[-5],
fun = evaluator,
data = iris)
)
system.time(
feature_search(attributes = names(iris)[-5],
fun = evaluator,
data = iris,
parallel = FALSE)
)
# Optional exhaustive search.
system.time(
feature_search(attributes = names(iris)[-5],
fun = evaluator,
data = iris,
mode = "exhaustive")
)
system.time(
feature_search(attributes = names(iris)[-5],
fun = evaluator,
data = iris,
mode = "exhaustive",
parallel = FALSE)
)
}
# 2) Maximize R^2 statistics in the linear regression model/problem.
evaluator_R2_lm <- function(attributes, data, dependent = names(iris)[1]) {
summary(
lm(to_formula(attributes, dependent), data = data)
)$r.squared
}
feature_search(attributes = names(iris)[-1],
fun = evaluator_R2_lm, data = iris,
mode = "exhaustive")
# 3) Optimize BIC crietion in generalized linear model.
# Aim of Bayesian approach it to identify the model with the highest
# probability of being the true model. - Kuha 2004
if(require("MASS")) {
utils::data(anorexia, package = "MASS")
evaluator_BIC_glm <- function(attributes, data, dependent = "Postwt") {
extractAIC(
fit = glm(to_formula(attributes, dependent), family = gaussian,
data = data),
k = log(nrow(data))
)[2]
}
feature_search(attributes = c("Prewt", "Treat", "offset(Prewt)"),
fun = evaluator_BIC_glm,
data = anorexia,
mode = "exhaustive")
}
# Close parallelization
if (FALSE) {
stopCluster(cl)
registerDoSEQ()
}
Run the code above in your browser using DataLab