library(ggplot2)
library(dplyr)
library(tidyr)
library(forcats)
# Distribution of fact_correct by age group
ggplot(fact_opinion, aes(x = age_group, y = fact_correct)) +
geom_boxplot() +
labs(
x = "Age group",
y = "Number correct (factual)",
title = "Number of factual statements classified correctly by age group"
)
# Distribution of opinion_correct by age group
ggplot(fact_opinion, aes(x = age_group, y = opinion_correct)) +
geom_boxplot() +
labs(
x = "Age group",
y = "Number correct (opinion)",
title = "Number of opinion statements classified correctly by age group"
)
# Replicating the figure from Pew report (see source for link)
fact_opinion %>%
mutate(
facts = case_when(
fact_correct <= 2 ~ "Two or fewer",
fact_correct %in% c(3, 4) ~ "Three or four",
fact_correct == 5 ~ "All five"
),
facts = fct_relevel(facts, "Two or fewer", "Three or four", "All five"),
opinions = case_when(
opinion_correct <= 2 ~ "Two or fewer",
opinion_correct %in% c(3, 4) ~ "Three or four",
opinion_correct == 5 ~ "All five"
),
opinions = fct_relevel(opinions, "Two or fewer", "Three or four", "All five")
) %>%
select(-fact_correct, -opinion_correct) %>%
pivot_longer(cols = -age_group, names_to = "question_type", values_to = "n_correct") %>%
ggplot(aes(y = fct_rev(age_group), fill = n_correct)) +
geom_bar(position = "fill") +
facet_wrap(~question_type, ncol = 1) +
scale_fill_viridis_d(guide = guide_legend(reverse = TRUE)) +
labs(
x = "Proportion",
y = "Age group",
fill = "Number of\ncorrect\nclassifications"
)
Run the code above in your browser using DataLab