# assign mtcars to new data.frame df
df <- mtcars
# add na values to make things interesting
df[1, 1:11] <- NA
rownames(df)[1] <- "Missing Car"
# add value labels
df <- add_val_labs(
data = df,
vars = "am",
vals = c(0, 1),
labs = c("automatic", "manual")
)
df <- add_val_labs(
data = df,
vars = "carb",
vals = c(1, 2, 3, 4, 6, 8),
labs = c(
"1-carb", "2-carbs",
"3-carbs", "4-carbs",
"6-carbs", "8-carbs"
)
)
# var arg can be unquoted if using add_val1()
# note that this is not add_val_labs(); add_val1() has "var" arg instead of "vars
df <- add_val1(
data = df,
var = cyl, # note, "var," not "vars" arg
vals = c(4, 6, 8),
labs = c(
"four-cyl",
"six-cyl",
"eight-cyl"
)
)
df <- add_val_labs(
data = df,
vars = "gear",
vals = 3:5,
labs = c(
"3-speed",
"4-speed",
"5-speed"
)
)
# lookup mapping
get_val_labs(df)
# introduce other "irregular" values
df$am[1] <- NA
df[2, "am"] <- NaN
df[3, "am"] <- -Inf
df[5, "cyl"] <- "NAN"
# take a look
head(df)
# demonstrate tabl() frequency tabulation function
# this is the "first call" that will be referenced repeatedly below
# labels on, sort by variable values, suppress/exclude NA/irregular values
# ...return counts
tabl(df,
vars = c("cyl", "am"),
labs.on = TRUE, # use variable value labels
sort.freq = FALSE, # sort by vars values (not frequencies)
irreg.rm = TRUE, # NAs and the like are suppressed
prop.digits = NULL
) # return counts, not proportions
# same as "first call", except now value labels are off
tabl(df,
vars = c("cyl", "am"),
labs.on = FALSE, # use variable values
sort.freq = FALSE, # sort by vars values (not frequencies)
irreg.rm = TRUE, # NAs and the like are suppressed
prop.digits = NULL
) # return counts, not proportions
# same as "first call," except now proportions instead of counts
tabl(df,
vars = c("cyl", "am"),
labs.on = TRUE, # use variable value labels
sort.freq = FALSE, # sort by vars values (not frequencies)
irreg.rm = TRUE, # NAs and the like are suppressed
prop.digits = 3
) # return proportions, rounded to 3rd decimal
# same as "first call," except now sort by frequency counts
tabl(df,
vars = c("cyl", "am"),
labs.on = TRUE, # use variable value labels
sort.freq = TRUE, # sort in order of descending frequency
irreg.rm = TRUE, # NAs and the like are suppressed
prop.digits = NULL
) # return proportions, rounded to 3rd decimal
# same as "first call," except now use weights
set.seed(2944) # for reproducibility
df$freqwt <- sample(10:50, nrow(df), replace = TRUE) # create (fake) freq wts
tabl(df,
vars = c("cyl", "am"),
wt = "freqwt", # use frequency weights
labs.on = TRUE, # use variable value labels
sort.freq = FALSE, # sort by vars values (not frequencies)
irreg.rm = FALSE, # NAs and the like are included/shown
prop.digits = NULL
) # return counts, not proportions
df$freqwt <- NULL # we don't need this anymore
# now, with extremely large weights to illustrate div.by
set.seed(428441) # for reproducibility
df$freqwt <- sample(1000000:10000000, nrow(df), replace = TRUE) # large freq wts
tabl(df,
vars = c("cyl", "am"),
wt = "freqwt", # use frequency weights
labs.on = TRUE, # use variable value labels
sort.freq = FALSE, # sort by vars values (not frequencies)
irreg.rm = FALSE, # NAs and the like are included/shown
prop.digits = NULL
) # return counts, not proportions
# show div by - Millions
tabl(df,
vars = c("cyl", "am"),
wt = "freqwt", # use frequency weights
labs.on = TRUE, # use variable value labels
sort.freq = FALSE, # sort by vars values (not frequencies)
irreg.rm = FALSE, # NAs and the like are included/shown
prop.digits = NULL, # return counts, not proportions
div.by = "1M"
) # one million
# show div by - Tens of millions
tabl(df,
vars = c("cyl", "am"),
wt = "freqwt", # use frequency weights
labs.on = TRUE, # use variable value labels
sort.freq = FALSE, # sort by vars values (not frequencies)
irreg.rm = FALSE, # NAs and the like are included/shown
prop.digits = NULL, # return counts, not proportions
div.by = "10M"
) # ten million
# show div by - 10000
tabl(df,
vars = c("cyl", "am"),
wt = "freqwt", # use frequency weights
labs.on = TRUE, # use variable value labels
sort.freq = FALSE, # sort by vars values (not frequencies)
irreg.rm = FALSE, # NAs and the like are included/shown
prop.digits = NULL, # return counts, not proportions
div.by = 10000
) # ten thousand; could've used div.by = "10K"
# show div by - 10000, but different syntax
tabl(df,
vars = c("cyl", "am"),
wt = "freqwt", # use frequency weights
labs.on = TRUE, # use variable value labels
sort.freq = FALSE, # sort by vars values (not frequencies)
irreg.rm = FALSE, # NAs and the like are included/shown
prop.digits = NULL, # return counts, not proportions
div.by = "10K"
) # ten thousand; could've used div.by = 10000
df$freqwt <- NULL # we don't need this anymore
# turn labels off, to make this more compact
# do not show zero values (zero.rm)
# do not show NA values (irreg.rm)
# many-valued numeric variables will be converted to quantile categories by
# ...qtiles argument
tabl(df,
vars = c("am", "gear", "carb", "mpg"),
qtiles = 4, # many-valued numerics converted to quantile
labs.on = FALSE, # use values, not variable value labels
sort.freq = FALSE, # sort by vars values (not frequencies)
irreg.rm = TRUE, # NAs and the like are suppressed
zero.rm = TRUE, # variable combinations that never occur are suppressed
prop.digits = NULL, # return counts, not proportions
max.unique.vals = 10
) # drop from table any var with >10 distinct values
# same as above, but include NA/irregular category values,
# zero.rm is TRUE; include unobserved (zero-count) category combinations
tabl(df,
vars = c("am", "gear", "carb", "mpg"),
qtiles = 4,
labs.on = FALSE, # use values, not variable value labels
sort.freq = TRUE, # sort by frequency
irreg.rm = FALSE, # preserve/include NAs and irregular values
zero.rm = FALSE, # include non-observed combinations
prop.digits = NULL, # return counts, not proportions
max.unique.vals = 10
) # drop from table any var with >10 distinct values
# show cross-tab view with wide.col arg
tabl(df,
vars = c("cyl", "am"),
labs.on = TRUE, # use variable value labels
sort.freq = TRUE, # sort by vars values (not frequencies)
irreg.rm = TRUE, # NAs and the like are suppressed
prop.digits = NULL, # return counts, not proportions
wide.col = "am"
) # use "am" as a column variable in a cross-tab view
tabl(df,
vars = c("cyl", "am"),
labs.on = TRUE, # use variable value labels
sort.freq = TRUE, # sort by vars values (not frequencies)
irreg.rm = TRUE, # NAs and the like are suppressed
prop.digits = NULL, # return counts, not proportions
wide.col = "cyl"
) # use "cyl" as a column variable in a cross-tab view
# verify select counts using base::subset()
nrow(subset(df, am == 0 & cyl == 4))
nrow(subset(df, am == 0 & cyl == 8))
nrow(subset(df, am == 1 & cyl == 8))
nrow(subset(df, am == 0 & cyl == 6))
nrow(subset(df, am == 1 & cyl == 6))
# will work on an un-labeled data.frame
tabl(mtcars, vars = c("am", "gear", "carb", "mpg"))
Run the code above in your browser using DataLab