# NOT RUN {
# find a null distribution
null_dist <- gss %>%
# we're interested in the number of hours worked per week
specify(response = hours) %>%
# hypothesizing that the mean is 40
hypothesize(null = "point", mu = 40) %>%
# generating data points for a null distribution
generate(reps = 1000, type = "bootstrap") %>%
# calculating a distribution of t test statistics
calculate(stat = "t")
# we can easily plot the null distribution by piping into visualize
null_dist %>%
visualize()
# we can add layers to the plot as in ggplot, as well...
# find the point estimate---mean number of hours worked per week
point_estimate <- gss %>%
specify(response = hours) %>%
hypothesize(null = "point", mu = 40) %>%
calculate(stat = "t")
# find a confidence interval around the point estimate
ci <- null_dist %>%
get_confidence_interval(point_estimate = point_estimate,
# at the 95% confidence level
level = .95,
# using the standard error method
type = "se")
# display a shading of the area beyond the p-value on the plot
null_dist %>%
visualize() +
shade_p_value(obs_stat = point_estimate, direction = "two-sided")
null_dist %>%
visualize() +
shade_confidence_interval(ci)
# to plot a theoretical null distribution, skip the generate()
# step and supply `method = "theoretical"` to `visualize()`
null_dist_theoretical <- gss %>%
specify(response = hours) %>%
hypothesize(null = "point", mu = 40) %>%
calculate(stat = "t")
visualize(null_dist_theoretical, method = "theoretical")
# to plot both a theory-based and simulation-based null distribution,
# use the simulation-based null distribution and supply
# `method = "both"` to `visualize()`
visualize(null_dist, method = "both")
# More in-depth explanation of how to use the infer package
# }
# NOT RUN {
vignette("infer")
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab