library(dplyr)
library(ggplot2)
library(tibble)
# take 1000 samples of size n = 50, without replacement
slices <- gss %>%
rep_slice_sample(n = 50, reps = 1000)
slices
# compute the proportion of respondents with a college
# degree in each replicate
p_hats <- slices %>%
group_by(replicate) %>%
summarize(prop_college = mean(college == "degree"))
# plot sampling distribution
ggplot(p_hats, aes(x = prop_college)) +
geom_density() +
labs(
x = "p_hat", y = "Number of samples",
title = "Sampling distribution of p_hat"
)
# sampling with probability weights. Note probabilities are automatically
# renormalized to sum to 1
df <- tibble(
id = 1:5,
letter = factor(c("a", "b", "c", "d", "e"))
)
rep_slice_sample(df, n = 2, reps = 5, weight_by = c(.5, .4, .3, .2, .1))
# alternatively, pass an unquoted column name in `.data` as `weight_by`
df <- df %>% mutate(wts = c(.5, .4, .3, .2, .1))
rep_slice_sample(df, n = 2, reps = 5, weight_by = wts)
Run the code above in your browser using DataLab