library(dplyr)
data("hpc_cv")
# The confusion matrix from a single assessment set (i.e. fold)
cm <- hpc_cv %>%
filter(Resample == "Fold01") %>%
conf_mat(obs, pred)
cm
# Now compute the average confusion matrix across all folds in
# terms of the proportion of the data contained in each cell.
# First get the raw cell counts per fold using the `tidy` method
library(tidyr)
cells_per_resample <- hpc_cv %>%
group_by(Resample) %>%
conf_mat(obs, pred) %>%
mutate(tidied = lapply(conf_mat, tidy)) %>%
unnest(tidied)
# Get the totals per resample
counts_per_resample <- hpc_cv %>%
group_by(Resample) %>%
summarize(total = n()) %>%
left_join(cells_per_resample, by = "Resample") %>%
# Compute the proportions
mutate(prop = value/total) %>%
group_by(name) %>%
# Average
summarize(prop = mean(prop))
counts_per_resample
# Now reshape these into a matrix
mean_cmat <- matrix(counts_per_resample$prop, byrow = TRUE, ncol = 4)
rownames(mean_cmat) <- levels(hpc_cv$obs)
colnames(mean_cmat) <- levels(hpc_cv$obs)
round(mean_cmat, 3)
# The confusion matrix can quickly be visualized using autoplot()
library(ggplot2)
autoplot(cm, type = "mosaic")
autoplot(cm, type = "heatmap")
Run the code above in your browser using DataLab