library("ggplot2")
set.seed(1)
# custom data frame with kind and score
custom_data <- data.frame(
kind = as.factor(c(rep("second", 500), rep("first", 500))),
score = c(rnorm(500, 400, 40), rnorm(500, 600, 100))
)
ggplot(custom_data, aes(score, fill = kind)) +
geom_density(alpha = 0.5)
fixed_data <- disparate_impact_remover(
data = custom_data,
protected = custom_data$kind,
features_to_transform = "score",
lambda = 0.8
)
ggplot(fixed_data, aes(score, fill = kind)) +
geom_density(alpha = 0.5)
# lambda 1 gives identical distribution, lambda 0 (almost) original distributions
fixed_data_unchanged <- disparate_impact_remover(
data = custom_data,
protected = custom_data$kind,
features_to_transform = "score",
lambda = 0
)
ggplot(fixed_data_unchanged, aes(score, fill = kind)) +
geom_density(alpha = 0.5)
fixed_data_fully_changed <- disparate_impact_remover(
data = custom_data,
protected = custom_data$kind,
features_to_transform = "score",
lambda = 1
)
ggplot(fixed_data_fully_changed, aes(score, fill = kind)) +
geom_density(alpha = 0.5) +
facet_wrap(kind ~ ., nrow = 2)
Run the code above in your browser using DataLab