# generate a null distribution
null_dist <- gss %>%
  # we're interested in the number of hours worked per week
  specify(response = hours) %>%
  # hypothesizing that the mean is 40
  hypothesize(null = "point", mu = 40) %>%
  # generating data points for a null distribution
  generate(reps = 1000, type = "bootstrap") %>%
  # calculating a distribution of means
  calculate(stat = "mean")
  
# or a bootstrap distribution, omitting the hypothesize() step,
# for use in confidence intervals
boot_dist <- gss %>%
  specify(response = hours) %>%
  generate(reps = 1000, type = "bootstrap") %>%
  calculate(stat = "mean")
  
# we can easily plot the null distribution by piping into visualize
null_dist %>%
  visualize()
# we can add layers to the plot as in ggplot, as well... 
# find the point estimate---mean number of hours worked per week
point_estimate <- gss %>%
  specify(response = hours) %>%
  calculate(stat = "mean")
  
# find a confidence interval around the point estimate
ci <- boot_dist %>%
  get_confidence_interval(point_estimate = point_estimate,
                          # at the 95% confidence level
                          level = .95,
                          # using the standard error method
                          type = "se")  
  
# display a shading of the area beyond the p-value on the plot
null_dist %>%
  visualize() +
  shade_p_value(obs_stat = point_estimate, direction = "two-sided")
# ...or within the bounds of the confidence interval
null_dist %>%
  visualize() +
  shade_confidence_interval(ci)
  
# plot a theoretical sampling distribution by creating
# a theory-based distribution with `assume()`
sampling_dist <- gss %>%
  specify(response = hours) %>%
  assume(distribution = "t") 
  
visualize(sampling_dist)
# you can shade confidence intervals on top of
# theoretical distributions, too---the theoretical
# distribution will be recentered and rescaled to
# align with the confidence interval
visualize(sampling_dist) +
  shade_confidence_interval(ci)
# to plot both a theory-based and simulation-based null distribution,
# use a theorized statistic (i.e. one of t, z, F, or Chisq)
# and supply the simulation-based null distribution
null_dist_t <- gss %>%
  specify(response = hours) %>%
  hypothesize(null = "point", mu = 40) %>%
  generate(reps = 1000, type = "bootstrap") %>%
  calculate(stat = "t")
  
obs_stat <- gss %>%
  specify(response = hours) %>%
  hypothesize(null = "point", mu = 40) %>%
  calculate(stat = "t")
visualize(null_dist_t, method = "both")
visualize(null_dist_t, method = "both") +
  shade_p_value(obs_stat, "both")
# \donttest{
# to visualize distributions of coefficients for multiple
# explanatory variables, use a `fit()`-based workflow
# fit 1000 models with the `hours` variable permuted
null_fits <- gss %>%
 specify(hours ~ age + college) %>%
 hypothesize(null = "independence") %>%
 generate(reps = 1000, type = "permute") %>%
 fit()
 
null_fits
# visualize distributions of resulting coefficients
visualize(null_fits)
# the interface to add themes and other elements to patchwork
# plots (outputted by `visualize` when the inputted data
# is from the `fit()` function) is a bit different than adding
# them to ggplot2 plots.
library(ggplot2)
# to add a ggplot2 theme to a `calculate()`-based visualization, use `+`
null_dist %>% visualize() + theme_dark()
  
# to add a ggplot2 theme to a `fit()`-based visualization, use `&`
null_fits %>% visualize() & theme_dark()
# }
# More in-depth explanation of how to use the infer package
if (FALSE) {
vignette("infer")
}
Run the code above in your browser using DataLab