# \donttest{
# Attach packages
library(cvms)
library(dplyr)
##
## Multinomial
##
# Find the most challenging data points (per classifier)
# in the predicted.musicians dataset
# which resembles the "Predictions" tibble from the evaluation results
# Passing predicted probabilities
# Observations with 30% highest MAE scores
most_challenging(
predicted.musicians,
obs_id_col = "ID",
prediction_cols = c("A", "B", "C", "D"),
type = "multinomial",
threshold = 0.30
)
# Observations with 25% highest Cross Entropy scores
most_challenging(
predicted.musicians,
obs_id_col = "ID",
prediction_cols = c("A", "B", "C", "D"),
type = "multinomial",
threshold = 0.25,
metric = "Cross Entropy"
)
# Passing predicted classes
# Observations with 30% lowest Accuracy scores
most_challenging(
predicted.musicians,
obs_id_col = "ID",
prediction_cols = "Predicted Class",
type = "multinomial",
threshold = 0.30
)
# The 40% lowest-scoring on accuracy per classifier
predicted.musicians %>%
dplyr::group_by(Classifier) %>%
most_challenging(
obs_id_col = "ID",
prediction_cols = "Predicted Class",
type = "multinomial",
threshold = 0.40
)
# Accuracy scores below 0.05
most_challenging(
predicted.musicians,
obs_id_col = "ID",
type = "multinomial",
threshold = 0.05,
threshold_is = "score"
)
##
## Binomial
##
# Subset the predicted.musicians
binom_data <- predicted.musicians %>%
dplyr::filter(Target %in% c("A","B")) %>%
dplyr::rename(Prediction = B)
# Passing probabilities
# Observations with 30% highest MAE
most_challenging(
binom_data,
obs_id_col = "ID",
type = "binomial",
prediction_cols = "Prediction",
threshold = 0.30
)
# Observations with 30% highest Cross Entropy
most_challenging(
binom_data,
obs_id_col = "ID",
type = "binomial",
prediction_cols = "Prediction",
threshold = 0.30,
metric = "Cross Entropy"
)
# Passing predicted classes
# Observations with 30% lowest Accuracy scores
most_challenging(
binom_data,
obs_id_col = "ID",
type = "binomial",
prediction_cols = "Predicted Class",
threshold = 0.30
)
##
## Gaussian
##
set.seed(1)
df <- data.frame(
"Observation" = rep(1:10, n = 3),
"Target" = rnorm(n = 30, mean = 25, sd = 5),
"Prediction" = rnorm(n = 30, mean = 27, sd = 7)
)
# The 20% highest RMSE scores
most_challenging(
df,
type = "gaussian",
threshold = 0.2
)
# RMSE scores above 9
most_challenging(
df,
type = "gaussian",
threshold = 9,
threshold_is = "score"
)
# }
Run the code above in your browser using DataLab