# NOT RUN {
# Let's walk through a data quality
# analysis of an extremely small table;
# it's actually called `small_table` and
# we can find it as a dataset in this
# package
small_table
# We ought to think about what's
# tolerable in terms of data quality so
# let's designate proportional failure
# thresholds to the `warn`, `stop`, and
# `notify` states using `action_levels()`
al <-
action_levels(
warn_at = 0.10,
stop_at = 0.25,
notify_at = 0.35
)
# Now create a pointblank `agent` object
# and give it the `al` object (which
# serves as a default for all validation
# steps which can be overridden); the
# static thresholds provided by `al` will
# make the reporting a bit more useful
agent <-
create_agent(
read_fn = ~ small_table,
tbl_name = "small_table",
label = "An example.",
actions = al
)
# Then, as with any `agent` object, we
# can add steps to the validation plan by
# using as many validation functions as we
# want; then, we use `interrogate()` to
# physically perform the validations and
# gather intel
agent <-
agent %>%
col_exists(vars(date, date_time)) %>%
col_vals_regex(
vars(b),
regex = "[0-9]-[a-z]{3}-[0-9]{3}"
) %>%
rows_distinct() %>%
col_vals_gt(vars(d), value = 100) %>%
col_vals_lte(vars(c), value = 5) %>%
col_vals_equal(
vars(d), value = vars(d),
na_pass = TRUE
) %>%
col_vals_between(
vars(c),
left = vars(a), right = vars(d),
na_pass = TRUE
) %>%
interrogate()
# Calling `agent` in the console
# prints the agent's report; but we
# can get a `gt_tbl` object directly
# with `get_agent_report(agent)`
report <- get_agent_report(agent)
class(report)
# What can you do with the report?
# Print it from an R Markdown code
# chunk, use it in a **blastula** email,
# put it in a webpage, or further
# modify it with the **gt** package
# From the report we know that Step
# 4 had two test units (rows, really)
# that failed; we can see those rows
# with `get_data_extracts()`
agent %>% get_data_extracts(i = 4)
# We can get an x-list for the whole
# validation (8 steps), or, just for
# the 4th step with `get_agent_x_list()`
xl_step_4 <-
agent %>% get_agent_x_list(i = 4)
# And then we can peruse the different
# parts of the list; let's get the
# fraction of test units that failed
xl_step_4$f_failed
# Just printing the x-list will tell
# us what's available therein
xl_step_4
# An x-list not specific to any step
# will have way more information and a
# slightly different structure; see
# `help(get_agent_x_list)` for more info
# get_agent_x_list(agent)
# }
Run the code above in your browser using DataLab