boot_dist <- gss %>%
# We're interested in the number of hours worked per week
specify(response = hours) %>%
# Generate bootstrap samples
generate(reps = 1000, type = "bootstrap") %>%
# Calculate mean of each bootstrap sample
calculate(stat = "mean")
boot_dist %>%
# Calculate the confidence interval around the point estimate
get_confidence_interval(
# At the 95% confidence level; percentile method
level = 0.95
)
# for type = "se" or type = "bias-corrected" we need a point estimate
sample_mean <- gss %>%
specify(response = hours) %>%
calculate(stat = "mean")
boot_dist %>%
get_confidence_interval(
point_estimate = sample_mean,
# At the 95% confidence level
level = 0.95,
# Using the standard error method
type = "se"
)
# using a theoretical distribution -----------------------------------
# define a sampling distribution
sampling_dist <- gss %>%
specify(response = hours) %>%
assume("t")
# get the confidence interval---note that the
# point estimate is required here
get_confidence_interval(
sampling_dist,
level = .95,
point_estimate = sample_mean
)
# using a model fitting workflow -----------------------
# fit a linear model predicting number of hours worked per
# week using respondent age and degree status.
observed_fit <- gss %>%
specify(hours ~ age + college) %>%
fit()
observed_fit
# fit 100 models to resamples of the gss dataset, where the response
# `hours` is permuted in each. note that this code is the same as
# the above except for the addition of the `generate` step.
null_fits <- gss %>%
specify(hours ~ age + college) %>%
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
fit()
null_fits
get_confidence_interval(
null_fits,
point_estimate = observed_fit,
level = .95
)
# more in-depth explanation of how to use the infer package
if (FALSE) {
vignette("infer")
}
Run the code above in your browser using DataLab