# \donttest{
# Attach packages
library(cvms)
library(groupdata2) # fold()
library(dplyr) # %>% arrange() mutate()
# Note: More examples of custom functions can be found at:
# model_fn: model_functions()
# predict_fn: predict_functions()
# preprocess_fn: preprocess_functions()
# Data is part of cvms
data <- participant.scores
# Set seed for reproducibility
set.seed(7)
# Fold data
data <- partition(
data,
p = 0.8,
cat_col = "diagnosis",
id_col = "participant",
list_out = FALSE
) %>%
mutate(diagnosis = as.factor(diagnosis)) %>%
arrange(.partitions)
# Formulas to validate
formula_gaussian <- "score ~ diagnosis"
formula_binomial <- "diagnosis ~ score"
#
# Gaussian
#
# Create model function that returns a fitted model object
lm_model_fn <- function(train_data, formula, hyperparameters) {
lm(formula = formula, data = train_data)
}
# Create predict function that returns the predictions
lm_predict_fn <- function(test_data, model, formula,
hyperparameters, train_data) {
stats::predict(
object = model,
newdata = test_data,
type = "response",
allow.new.levels = TRUE
)
}
# Validate the model function
v <- validate_fn(
data,
formulas = formula_gaussian,
type = "gaussian",
model_fn = lm_model_fn,
predict_fn = lm_predict_fn,
partitions_col = ".partitions"
)
v
# Extract model object
v$Model[[1]]
#
# Binomial
#
# Create model function that returns a fitted model object
glm_model_fn <- function(train_data, formula, hyperparameters) {
glm(formula = formula, data = train_data, family = "binomial")
}
# Create predict function that returns the predictions
glm_predict_fn <- function(test_data, model, formula,
hyperparameters, train_data) {
stats::predict(
object = model,
newdata = test_data,
type = "response",
allow.new.levels = TRUE
)
}
# Validate the model function
validate_fn(
data,
formulas = formula_binomial,
type = "binomial",
model_fn = glm_model_fn,
predict_fn = glm_predict_fn,
partitions_col = ".partitions"
)
#
# Support Vector Machine (svm)
# with known hyperparameters
#
# Only run if the `e1071` package is installed
if (requireNamespace("e1071", quietly = TRUE)){
# Create model function that returns a fitted model object
# We use the hyperparameters arg to pass in the kernel and cost values
# These will usually have been found with cross_validate_fn()
svm_model_fn <- function(train_data, formula, hyperparameters) {
# Expected hyperparameters:
# - kernel
# - cost
if (!"kernel" %in% names(hyperparameters))
stop("'hyperparameters' must include 'kernel'")
if (!"cost" %in% names(hyperparameters))
stop("'hyperparameters' must include 'cost'")
e1071::svm(
formula = formula,
data = train_data,
kernel = hyperparameters[["kernel"]],
cost = hyperparameters[["cost"]],
scale = FALSE,
type = "C-classification",
probability = TRUE
)
}
# Create predict function that returns the predictions
svm_predict_fn <- function(test_data, model, formula,
hyperparameters, train_data) {
predictions <- stats::predict(
object = model,
newdata = test_data,
allow.new.levels = TRUE,
probability = TRUE
)
# Extract probabilities
probabilities <- dplyr::as_tibble(
attr(predictions, "probabilities")
)
# Return second column
probabilities[[2]]
}
# Specify hyperparameters to use
# We found these in the examples in ?cross_validate_fn()
svm_hparams <- list(
"kernel" = "linear",
"cost" = 10
)
# Validate the model function
validate_fn(
data,
formulas = formula_binomial,
type = "binomial",
model_fn = svm_model_fn,
predict_fn = svm_predict_fn,
hyperparameters = svm_hparams,
partitions_col = ".partitions"
)
} # closes `e1071` package check
# }
Run the code above in your browser using DataLab