data(api, package = "survey")
dstrata <- apistrat %>%
as_survey_design(strata = stype, weights = pw)
dstrata %>%
summarise(api99_mn = survey_mean(api99),
api00_mn = survey_mean(api00),
api_diff = survey_mean(api00 - api99))
dstrata_grp <- dstrata %>%
group_by(stype)
dstrata_grp %>%
summarise(api99_mn = survey_mean(api99),
api00_mn = survey_mean(api00),
api_diff = survey_mean(api00 - api99))
# `dplyr::across` can be used to programmatically summarize multiple columns
# See https://dplyr.tidyverse.org/articles/colwise.html for details
# A basic example of working on 2 columns at once and then calculating the total
# the mean
total_vars <- c("enroll", "api.stu")
dstrata %>%
summarize(across(c(all_of(total_vars)), survey_total))
# Expressions are allowed in summarize arguments & inside functions
# Here we can calculate binary variable on the fly and also multiply by 100 to
# get percentages
dstrata %>%
summarize(api99_over_700_pct = 100 * survey_mean(api99 > 700))
# But be careful, the variance doesn't scale the same way, so this is wrong!
dstrata %>%
summarize(api99_over_700_pct = 100 * survey_mean(api99 > 700, vartype = "var"))
# Wrong variance!
Run the code above in your browser using DataLab