Learn R Programming

broom.helpers

The broom.helpers package provides suite of functions to work with regression model broom::tidy() tibbles.

The suite includes functions to group regression model terms by variable, insert reference and header rows for categorical variables, add variable labels, and more.

broom.helpers is used, in particular, by gtsummary::tbl_regression() for producing nice formatted tables of model coefficients and by ggstats::ggcoef_model() for plotting model coefficients.

Installation & Documentation

To install stable version:

install.packages("broom.helpers")

Documentation of stable version: https://larmarange.github.io/broom.helpers/

To install development version:

remotes::install_github("larmarange/broom.helpers")

Documentation of development version: https://larmarange.github.io/broom.helpers/dev/

Examples

all-in-one wrapper

mod1 <- lm(Sepal.Length ~ Sepal.Width + Species, data = iris)
library(broom.helpers)
ex1 <- mod1 |> tidy_plus_plus()
ex1
#> # A tibble: 4 × 17
#>   term              variable  var_label var_class var_type var_nlevels contrasts
#>   <chr>             <chr>     <chr>     <chr>     <chr>          <int> <chr>    
#> 1 Sepal.Width       Sepal.Wi… Sepal.Wi… numeric   continu…          NA <NA>     
#> 2 Speciessetosa     Species   Species   factor    categor…           3 contr.tr…
#> 3 Speciesversicolor Species   Species   factor    categor…           3 contr.tr…
#> 4 Speciesvirginica  Species   Species   factor    categor…           3 contr.tr…
#> # ℹ 10 more variables: contrasts_type <chr>, reference_row <lgl>, label <chr>,
#> #   n_obs <dbl>, estimate <dbl>, std.error <dbl>, statistic <dbl>,
#> #   p.value <dbl>, conf.low <dbl>, conf.high <dbl>
dplyr::glimpse(ex1)
#> Rows: 4
#> Columns: 17
#> $ term           <chr> "Sepal.Width", "Speciessetosa", "Speciesversicolor", "S…
#> $ variable       <chr> "Sepal.Width", "Species", "Species", "Species"
#> $ var_label      <chr> "Sepal.Width", "Species", "Species", "Species"
#> $ var_class      <chr> "numeric", "factor", "factor", "factor"
#> $ var_type       <chr> "continuous", "categorical", "categorical", "categorica…
#> $ var_nlevels    <int> NA, 3, 3, 3
#> $ contrasts      <chr> NA, "contr.treatment", "contr.treatment", "contr.treatm…
#> $ contrasts_type <chr> NA, "treatment", "treatment", "treatment"
#> $ reference_row  <lgl> NA, TRUE, FALSE, FALSE
#> $ label          <chr> "Sepal.Width", "setosa", "versicolor", "virginica"
#> $ n_obs          <dbl> 150, 50, 50, 50
#> $ estimate       <dbl> 0.8035609, 0.0000000, 1.4587431, 1.9468166
#> $ std.error      <dbl> 0.1063390, NA, 0.1121079, 0.1000150
#> $ statistic      <dbl> 7.556598, NA, 13.011954, 19.465255
#> $ p.value        <dbl> 4.187340e-12, NA, 3.478232e-26, 2.094475e-42
#> $ conf.low       <dbl> 0.5933983, NA, 1.2371791, 1.7491525
#> $ conf.high      <dbl> 1.013723, NA, 1.680307, 2.144481

mod2 <- glm(
  response ~ poly(age, 3) + stage + grade * trt,
  na.omit(gtsummary::trial),
  family = binomial,
  contrasts = list(
    stage = contr.treatment(4, base = 3),
    grade = contr.sum
  )
)
ex2 <- mod2 |>
  tidy_plus_plus(
    exponentiate = TRUE,
    variable_labels = c(age = "Age (in years)"),
    add_header_rows = TRUE,
    show_single_row = "trt"
  )
ex2
#> # A tibble: 17 × 19
#>    term   variable var_label var_class var_type var_nlevels header_row contrasts
#>    <chr>  <chr>    <chr>     <chr>     <chr>          <int> <lgl>      <chr>    
#>  1 <NA>   age      Age (in … nmatrix.3 continu…          NA TRUE       <NA>     
#>  2 poly(… age      Age (in … nmatrix.3 continu…          NA FALSE      <NA>     
#>  3 poly(… age      Age (in … nmatrix.3 continu…          NA FALSE      <NA>     
#>  4 poly(… age      Age (in … nmatrix.3 continu…          NA FALSE      <NA>     
#>  5 <NA>   stage    T Stage   factor    categor…           4 TRUE       contr.tr…
#>  6 stage1 stage    T Stage   factor    categor…           4 FALSE      contr.tr…
#>  7 stage2 stage    T Stage   factor    categor…           4 FALSE      contr.tr…
#>  8 stage3 stage    T Stage   factor    categor…           4 FALSE      contr.tr…
#>  9 stage4 stage    T Stage   factor    categor…           4 FALSE      contr.tr…
#> 10 <NA>   grade    Grade     factor    categor…           3 TRUE       contr.sum
#> 11 grade1 grade    Grade     factor    categor…           3 FALSE      contr.sum
#> 12 grade2 grade    Grade     factor    categor…           3 FALSE      contr.sum
#> 13 grade3 grade    Grade     factor    categor…           3 FALSE      contr.sum
#> 14 trtDr… trt      Chemothe… character dichoto…           2 NA         contr.tr…
#> 15 <NA>   grade:t… Grade * … <NA>      interac…          NA TRUE       <NA>     
#> 16 grade… grade:t… Grade * … <NA>      interac…          NA FALSE      <NA>     
#> 17 grade… grade:t… Grade * … <NA>      interac…          NA FALSE      <NA>     
#> # ℹ 11 more variables: contrasts_type <chr>, reference_row <lgl>, label <chr>,
#> #   n_obs <dbl>, n_event <dbl>, estimate <dbl>, std.error <dbl>,
#> #   statistic <dbl>, p.value <dbl>, conf.low <dbl>, conf.high <dbl>
dplyr::glimpse(ex2)
#> Rows: 17
#> Columns: 19
#> $ term           <chr> NA, "poly(age, 3)1", "poly(age, 3)2", "poly(age, 3)3", …
#> $ variable       <chr> "age", "age", "age", "age", "stage", "stage", "stage", …
#> $ var_label      <chr> "Age (in years)", "Age (in years)", "Age (in years)", "…
#> $ var_class      <chr> "nmatrix.3", "nmatrix.3", "nmatrix.3", "nmatrix.3", "fa…
#> $ var_type       <chr> "continuous", "continuous", "continuous", "continuous",…
#> $ var_nlevels    <int> NA, NA, NA, NA, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, NA, NA, NA
#> $ header_row     <lgl> TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, F…
#> $ contrasts      <chr> NA, NA, NA, NA, "contr.treatment(base=3)", "contr.treat…
#> $ contrasts_type <chr> NA, NA, NA, NA, "treatment", "treatment", "treatment", …
#> $ reference_row  <lgl> NA, NA, NA, NA, NA, FALSE, FALSE, TRUE, FALSE, NA, FALS…
#> $ label          <chr> "Age (in years)", "Age (in years)", "Age (in years)²", …
#> $ n_obs          <dbl> NA, 92, 56, 80, NA, 46, 50, 35, 42, NA, 63, 53, 57, 90,…
#> $ n_event        <dbl> NA, 31, 17, 22, NA, 17, 12, 13, 12, NA, 20, 16, 18, 30,…
#> $ estimate       <dbl> NA, 20.2416394, 1.2337899, 0.4931553, NA, 1.0047885, 0.…
#> $ std.error      <dbl> NA, 2.3254455, 2.3512842, 2.3936657, NA, 0.4959893, 0.5…
#> $ statistic      <dbl> NA, 1.29340459, 0.08935144, -0.29533409, NA, 0.00963137…
#> $ p.value        <dbl> NA, 0.1958712, 0.9288026, 0.7677387, NA, 0.9923154, 0.1…
#> $ conf.low       <dbl> NA, 0.225454425, 0.007493208, 0.004745694, NA, 0.379776…
#> $ conf.high      <dbl> NA, 2315.587655, 100.318341, 74.226179, NA, 2.683385, 1…

fine control

ex3 <- mod1 |>
  # perform initial tidying of model
  tidy_and_attach() |>
  # add reference row
  tidy_add_reference_rows() |>
  # add term labels
  tidy_add_term_labels() |>
  # remove intercept
  tidy_remove_intercept()
ex3
#> # A tibble: 4 × 16
#>   term              variable  var_label var_class var_type var_nlevels contrasts
#>   <chr>             <chr>     <chr>     <chr>     <chr>          <int> <chr>    
#> 1 Sepal.Width       Sepal.Wi… Sepal.Wi… numeric   continu…          NA <NA>     
#> 2 Speciessetosa     Species   Species   factor    categor…           3 contr.tr…
#> 3 Speciesversicolor Species   Species   factor    categor…           3 contr.tr…
#> 4 Speciesvirginica  Species   Species   factor    categor…           3 contr.tr…
#> # ℹ 9 more variables: contrasts_type <chr>, reference_row <lgl>, label <chr>,
#> #   estimate <dbl>, std.error <dbl>, statistic <dbl>, p.value <dbl>,
#> #   conf.low <dbl>, conf.high <dbl>
dplyr::glimpse(ex3)
#> Rows: 4
#> Columns: 16
#> $ term           <chr> "Sepal.Width", "Speciessetosa", "Speciesversicolor", "S…
#> $ variable       <chr> "Sepal.Width", "Species", "Species", "Species"
#> $ var_label      <chr> "Sepal.Width", "Species", "Species", "Species"
#> $ var_class      <chr> "numeric", "factor", "factor", "factor"
#> $ var_type       <chr> "continuous", "categorical", "categorical", "categorica…
#> $ var_nlevels    <int> NA, 3, 3, 3
#> $ contrasts      <chr> NA, "contr.treatment", "contr.treatment", "contr.treatm…
#> $ contrasts_type <chr> NA, "treatment", "treatment", "treatment"
#> $ reference_row  <lgl> NA, TRUE, FALSE, FALSE
#> $ label          <chr> "Sepal.Width", "setosa", "versicolor", "virginica"
#> $ estimate       <dbl> 0.8035609, NA, 1.4587431, 1.9468166
#> $ std.error      <dbl> 0.1063390, NA, 0.1121079, 0.1000150
#> $ statistic      <dbl> 7.556598, NA, 13.011954, 19.465255
#> $ p.value        <dbl> 4.187340e-12, NA, 3.478232e-26, 2.094475e-42
#> $ conf.low       <dbl> 0.5933983, NA, 1.2371791, 1.7491525
#> $ conf.high      <dbl> 1.013723, NA, 1.680307, 2.144481

ex4 <- mod2 |>
  # perform initial tidying of model
  tidy_and_attach(exponentiate = TRUE) |>
  # add variable labels, including a custom value for age
  tidy_add_variable_labels(labels = c(age = "Age in years")) |>
  # add reference rows for categorical variables
  tidy_add_reference_rows() |>
  # add a, estimate value of reference terms
  tidy_add_estimate_to_reference_rows(exponentiate = TRUE) |>
  # add header rows for categorical variables
  tidy_add_header_rows()
ex4
#> # A tibble: 20 × 17
#>    term   variable var_label var_class var_type var_nlevels header_row contrasts
#>    <chr>  <chr>    <chr>     <chr>     <chr>          <int> <lgl>      <chr>    
#>  1 (Inte… (Interc… (Interce… <NA>      interce…          NA NA         <NA>     
#>  2 <NA>   age      Age in y… nmatrix.3 continu…          NA TRUE       <NA>     
#>  3 poly(… age      Age in y… nmatrix.3 continu…          NA FALSE      <NA>     
#>  4 poly(… age      Age in y… nmatrix.3 continu…          NA FALSE      <NA>     
#>  5 poly(… age      Age in y… nmatrix.3 continu…          NA FALSE      <NA>     
#>  6 <NA>   stage    T Stage   factor    categor…           4 TRUE       contr.tr…
#>  7 stage1 stage    T Stage   factor    categor…           4 FALSE      contr.tr…
#>  8 stage2 stage    T Stage   factor    categor…           4 FALSE      contr.tr…
#>  9 stage3 stage    T Stage   factor    categor…           4 FALSE      contr.tr…
#> 10 stage4 stage    T Stage   factor    categor…           4 FALSE      contr.tr…
#> 11 <NA>   grade    Grade     factor    categor…           3 TRUE       contr.sum
#> 12 grade1 grade    Grade     factor    categor…           3 FALSE      contr.sum
#> 13 grade2 grade    Grade     factor    categor…           3 FALSE      contr.sum
#> 14 grade3 grade    Grade     factor    categor…           3 FALSE      contr.sum
#> 15 <NA>   trt      Chemothe… character dichoto…           2 TRUE       contr.tr…
#> 16 trtDr… trt      Chemothe… character dichoto…           2 FALSE      contr.tr…
#> 17 trtDr… trt      Chemothe… character dichoto…           2 FALSE      contr.tr…
#> 18 <NA>   grade:t… Grade * … <NA>      interac…          NA TRUE       <NA>     
#> 19 grade… grade:t… Grade * … <NA>      interac…          NA FALSE      <NA>     
#> 20 grade… grade:t… Grade * … <NA>      interac…          NA FALSE      <NA>     
#> # ℹ 9 more variables: contrasts_type <chr>, reference_row <lgl>, label <chr>,
#> #   estimate <dbl>, std.error <dbl>, statistic <dbl>, p.value <dbl>,
#> #   conf.low <dbl>, conf.high <dbl>
dplyr::glimpse(ex4)
#> Rows: 20
#> Columns: 17
#> $ term           <chr> "(Intercept)", NA, "poly(age, 3)1", "poly(age, 3)2", "p…
#> $ variable       <chr> "(Intercept)", "age", "age", "age", "age", "stage", "st…
#> $ var_label      <chr> "(Intercept)", "Age in years", "Age in years", "Age in …
#> $ var_class      <chr> NA, "nmatrix.3", "nmatrix.3", "nmatrix.3", "nmatrix.3",…
#> $ var_type       <chr> "intercept", "continuous", "continuous", "continuous", …
#> $ var_nlevels    <int> NA, NA, NA, NA, NA, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2,…
#> $ header_row     <lgl> NA, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALS…
#> $ contrasts      <chr> NA, NA, NA, NA, NA, "contr.treatment(base=3)", "contr.t…
#> $ contrasts_type <chr> NA, NA, NA, NA, NA, "treatment", "treatment", "treatmen…
#> $ reference_row  <lgl> NA, NA, NA, NA, NA, NA, FALSE, FALSE, TRUE, FALSE, NA, …
#> $ label          <chr> "(Intercept)", "Age in years", "Age in years", "Age in …
#> $ estimate       <dbl> 0.5266376, NA, 20.2416394, 1.2337899, 0.4931553, NA, 1.…
#> $ std.error      <dbl> 0.4130930, NA, 2.3254455, 2.3512842, 2.3936657, NA, 0.4…
#> $ statistic      <dbl> -1.55229592, NA, 1.29340459, 0.08935144, -0.29533409, N…
#> $ p.value        <dbl> 0.1205914, NA, 0.1958712, 0.9288026, 0.7677387, NA, 0.9…
#> $ conf.low       <dbl> 0.227717775, NA, 0.225454425, 0.007493208, 0.004745694,…
#> $ conf.high      <dbl> 1.164600, NA, 2315.587655, 100.318341, 74.226179, NA, 2…

Copy Link

Version

Install

install.packages('broom.helpers')

Monthly Downloads

20,179

Version

1.17.0

License

GPL (>= 3)

Issues

Pull Requests

Stars

Forks

Last Published

August 28th, 2024

Functions in broom.helpers (1.17.0)

model_get_contrasts

Get contrasts used in the model
model_get_model

Get the model from model objects
model_get_response_variable

Get the name of the response variable
model_get_weights

Get sampling weights used by a model
model_get_xlevels

Get xlevels used in the model
model_get_terms

Get the terms of a model
model_get_model_matrix

Get the model matrix of a model
model_get_assign

Get the assign attribute of model matrix of a model
.select_to_varnames

Variable selector
model_get_pairwise_contrasts

Get pairwise comparison of the levels of a categorical variable
model_get_nlevels

Get the number of levels for each factor used in xlevels
model_get_offset

Get model offset
model_get_n

Get the number of observations
model_identify_variables

Identify for each coefficient of a model the corresponding variable
scope_tidy

Scoping a tidy tibble allowing to tidy select
model_get_response

Get model response
reexports

Objects exported from other packages
model_list_variables

List all the variables used in a model
seq_range

Sequence generation between min and max
supported_models

Listing of Supported Models
model_list_higher_order_variables

List higher order variables of a model
model_list_terms_levels

List levels of categorical terms
select_helpers

Select helper functions
model_list_contrasts

List contrasts used by a model
tidy_add_variable_labels

Add variable labels
tidy_add_reference_rows

Add references rows for categorical variables
tidy_add_term_labels

Add term labels
tidy_disambiguate_terms

Disambiguate terms
model_get_model_frame

Get the model frame of a model
tidy_marginal_means

Marginal Means with marginaleffects::marginal_means()
tidy_ggpredict

Marginal Predictions with ggeffects::ggpredict()
tidy_parameters

Tidy a model with parameters package
tidy_add_estimate_to_reference_rows

Add an estimate value to references rows for categorical variables
tidy_remove_intercept

Remove intercept(s)
tidy_all_effects

Marginal Predictions at the mean with effects::allEffects()
tidy_marginal_predictions

Marginal Predictions with marginaleffects::avg_predictions()
tidy_add_coefficients_type

Add coefficients type and label as attributes
tidy_select_variables

Select variables to keep/drop
tidy_add_pairwise_contrasts

Add pairwise contrasts for categorical variables
tidy_attach_model

Attach a full model to the tibble of model terms
tidy_add_n

Add the (weighted) number of observations
tidy_plus_plus

Tidy a model and compute additional informations
tidy_avg_comparisons

Marginal Contrasts with marginaleffects::avg_comparisons()
tidy_identify_variables

Identify the variable corresponding to each model coefficient
tidy_add_contrasts

Add contrasts type for categorical variables
tidy_margins

Average Marginal Effects with margins::margins()
tidy_multgee

Tidy a multgee model
tidy_marginal_contrasts

Marginal Contrasts with marginaleffects::avg_comparisons()
tidy_broom

Tidy with broom::tidy() and checks that all arguments are used
tidy_avg_slopes

Marginal Slopes / Effects with marginaleffects::avg_slopes()
tidy_add_header_rows

Add header rows variables with several terms
tidy_with_broom_or_parameters

Tidy a model with broom or parameters
tidy_zeroinfl

Tidy a zeroinfl or a hurdle model
assert_package

Check a package installation status or minimum required version
.clean_backticks

Remove backticks around variable names
model_compute_terms_contributions

Compute a matrix of terms contributions
model_get_coefficients_type

Get coefficient type
.escape_regex

Escapes any characters that would have special meaning in a regular expression
.formula_list_to_named_list

Convert formula selector to a named list
.generic_selector

Generate a custom selector function