numform
numform contains tools to assist in the formatting of numbers and
plots for publication. Tools include the removal of leading zeros,
standardization of number of digits, addition of affixes, and a p-value
formatter. These tools combine the functionality of several 'base'
functions such as paste()
, format()
, and sprintf()
into specific
use case functions that are named in a way that is consistent with
usage, making their names easy to remember and easy to deploy.
Installation
To download the development version of numform:
Download the zip
ball or tar
ball, decompress and
run R CMD INSTALL
on it, or use the pacman package to install the
development version:
if (!require("pacman")) install.packages("pacman")
pacman::p_load_current_gh("trinker/numform")
pacman::p_load(tidyverse, gridExtra)
Table of Contents
Contact
You are welcome to:
- submit suggestions and bug-reports at: https://github.com/trinker/numform/issues
- send a pull request on: https://github.com/trinker/numform/
- compose a friendly e-mail to: tyler.rinker@gmail.com
Available Functions
Below is a table of available numform functions. Note that f_
is
read as "format" whereas fv_
is read as "format vector". The former
formats individual values in the vector while the latter uses the vector
to compute a calculation on each of the values and then formats them.
Additionally, all numform f_
functions have a closure, function
retuning, version that is prefixed with an additional f
(read "format
function"). For example, f_num
has ff_num
which has the same
arguments but returns a function instead. This is useful for passing in
to ggplot2 scale_x/y_type
functions (see Plotting for
usage).
Demonstration
Load Packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load_gh("trinker/numform")
pacman::p_load(dplyr)
Numbers
f_num(c(0.0, 0, .2, -00.02, 1.122222, pi, "A"))
## [1] ".0" ".0" ".2" "-.0" "1.1" "3.1" NA
Abbreviated Numbers
f_thous(1234)
## [1] "1K"
f_thous(12345)
## [1] "12K"
f_thous(123456)
## [1] "123K"
f_mills(1234567)
## [1] "1M"
f_mills(12345678)
## [1] "12M"
f_mills(123456789)
## [1] "123M"
f_bills(1234567891)
## [1] "1B"
f_bills(12345678912)
## [1] "12B"
f_bills(123456789123)
## [1] "123B"
...or auto-detect:
f_denom(1234)
## [1] "1K"
f_denom(12345)
## [1] "12K"
f_denom(123456)
## [1] "123K"
f_denom(1234567)
## [1] "1M"
f_denom(12345678)
## [1] "12M"
f_denom(123456789)
## [1] "123M"
f_denom(1234567891)
## [1] "1B"
f_denom(12345678912)
## [1] "12B"
f_denom(123456789123)
## [1] "123B"
Commas
f_comma(c(1234.12345, 1234567890, .000034034, 123000000000, -1234567))
## [1] "1,234.123" "1,234,567,890" ".000034034" "123,000,000,000"
## [5] "-1,234,567"
Percents
f_percent(c(30, 33.45, .1), digits = 1)
## [1] "30.0%" "33.5%" ".1%"
f_percent(c(0.0, 0, .2, -00.02, 1.122222, pi))
## [1] ".0%" ".0%" ".2%" "-.0%" "1.1%" "3.1%"
f_prop2percent(c(.30, 1, 1.01, .33, .222, .01))
## [1] "30.0%" "100.0%" "101.0%" "33.0%" "22.2%" "1.0%"
f_prop2percent(c(.30, 1, 1.01, .33, .222, .01), digits = 0)
## [1] "30%" "100%" "101%" "33%" "22%" "1%"
f_pp(c(.30, 1, 1.01, .33, .222, .01)) # same as f_prop2percent(digits = 0)
## [1] "30%" "100%" "101%" "33%" "22%" "1%"
Dollars
f_dollar(c(0, 30, 33.45, .1))
## [1] "$0.00" "$30.00" "$33.45" "$0.10"
f_dollar(c(0.0, 0, .2, -00.02, 1122222, pi)) %>%
f_comma()
## [1] "$0.00" "$0.00" "$0.20" "$-.02"
## [5] "$1,122,222.00" "$3.14"
Sometimes one wants to lop off digits of money in order to see the
important digits, the real story. The f_denom
family of functions can
do job.
f_denom(c(12345267, 98765433, 658493021), prefix = '$')
## [1] "$ 12M" "$ 99M" "$658M"
f_denom(c(12345267, 98765433, 658493021), relative = 1, prefix = '$')
## [1] "$ 12.3M" "$ 98.8M" "$658.5M"
Tables
Notice the use of the alignment
function to detect the column
alignment.
pacman::p_load(dplyr, pander)
set.seed(10)
dat <- data_frame(
Team = rep(c("West Coast", "East Coast"), each = 4),
Year = rep(2012:2015, 2),
YearStart = round(rnorm(8, 2e6, 1e6) + sample(1:10/100, 8, TRUE), 2),
Won = round(rnorm(8, 4e5, 2e5) + sample(1:10/100, 8, TRUE), 2),
Lost = round(rnorm(8, 4.4e5, 2e5) + sample(1:10/100, 8, TRUE), 2),
WinLossRate = Won/Lost,
PropWon = Won/YearStart,
PropLost = Lost/YearStart
)
dat %>%
group_by(Team) %>%
mutate(
`%ΔWinLoss` = fv_percent_diff(WinLossRate, 0),
`ΔWinLoss` = f_sign(Won - Lost, '<b>+</b>', '<b>–</b>')
) %>%
ungroup() %>%
mutate_at(vars(Won:Lost), .funs = ff_denom(relative = -1, prefix = '$')) %>%
mutate_at(vars(PropWon, PropLost), .funs = ff_prop2percent(digits = 0)) %>%
mutate(
YearStart = f_denom(YearStart, 1, prefix = '$'),
Team = fv_runs(Team),
WinLossRate = f_num(WinLossRate, 1)
) %>%
data.frame(stringsAsFactors = FALSE, check.names = FALSE) %>%
pander::pander(split.tables = Inf, justify = alignment(.), style = 'simple')
pacman::p_load(dplyr, pander)
data_frame(
Event = c('freezing water', 'room temp', 'body temp', 'steak\'s done', 'hamburger\'s done', 'boiling water', 'sun surface', 'lighting'),
F = c(32, 70, 98.6, 145, 160, 212, 9941, 50000)
) %>%
mutate(
Event = f_title(Event),
C = (F - 32) * (5/9)
) %>%
mutate(
F = f_degree(F, measure = 'F', type = 'string'),
C = f_degree(C, measure = 'C', type = 'string', zero = '0.0')
) %>%
data.frame(stringsAsFactors = FALSE, check.names = FALSE) %>%
pander::pander(split.tables = Inf, justify = alignment(.), style = 'simple')
if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse)
set.seed(11)
data_frame(
date = sample(seq(as.Date("1990/1/1"), by = "day", length.out = 2e4), 12)
) %>%
mutate(
year_4 = f_year(date, 4),
year_2 = f_year(date, 2),
quarter = f_quarter(date),
month_name = f_month_name(date) %>%
numform::as_factor(),
month_abbreviation = f_month_abbreviation(date) %>%
numform::as_factor(),
month_short = f_month(date),
weekday_name = f_weekday_name(date),
weekday_abbreviation = f_weekday_abbreviation(date),
weekday_short = f_weekday(date),
weekday_short_distinct = f_weekday(date, distinct = TRUE)
) %>%
data.frame(stringsAsFactors = FALSE, check.names = FALSE) %>%
pander::pander(split.tables = Inf, justify = alignment(.), style = 'simple')
mtcars %>%
count(cyl, gear) %>%
group_by(cyl) %>%
mutate(
p = numform::f_pp(n/sum(n))
) %>%
ungroup() %>%
mutate(
cyl = numform::fv_runs(cyl),
` ` = f_text_bar(n) ## Overall
) %>%
as.data.frame()
cyl gear n p
1 4 3 1 9% _
2 4 8 73% ______
3 5 2 18% __
4 6 3 2 29% __
5 4 4 57% ___
6 5 1 14% _
7 8 3 12 86% _________
8 5 2 14% __
Plotting
library(tidyverse); library(viridis)
set.seed(10)
data_frame(
revenue = rnorm(10000, 500000, 50000),
date = sample(seq(as.Date('1999/01/01'), as.Date('2000/01/01'), by="day"), 10000, TRUE),
site = sample(paste("Site", 1:5), 10000, TRUE)
) %>%
mutate(
dollar = f_comma(f_dollar(revenue, digits = -3)),
thous = f_denom(revenue),
thous_dollars = f_denom(revenue, prefix = '$'),
abb_month = f_month(date),
abb_week = numform::as_factor(f_weekday(date, distinct = TRUE))
) %>%
group_by(site, abb_week) %>%
mutate(revenue = {if(sample(0:1, 1) == 0) `-` else `+`}(revenue, sample(1e2:1e5, 1))) %>%
ungroup() %T>%
print() %>%
ggplot(aes(abb_week, revenue)) +
geom_jitter(width = .2, height = 0, alpha = .2, aes(color = revenue)) +
scale_y_continuous(label = ff_denom(prefix = '$'))+
facet_wrap(~site) +
theme_bw() +
scale_color_viridis() +
theme(
strip.text.x = element_text(hjust = 0, color = 'grey45'),
strip.background = element_rect(fill = NA, color = NA),
panel.border = element_rect(fill = NA, color = 'grey75'),
panel.grid = element_line(linetype = 'dotted'),
axis.ticks = element_line(color = 'grey55'),
axis.text = element_text(color = 'grey55'),
axis.title.x = element_text(color = 'grey55', margin = margin(t = 10)),
axis.title.y = element_text(color = 'grey55', angle = 0, margin = margin(r = 10)),
legend.position = 'none'
) +
labs(
x = 'Day of Week',
y = 'Revenue',
title = 'Site Revenue by Day of Week',
subtitle = f_wrap(c(
'This faceted dot plot shows the distribution of revenues within sites',
'across days of the week. Notice the consistently increasing revenues for',
'Site 2 across the week.'
), width = 85, collapse = TRUE)
)
## # A tibble: 10,000 x 8
## revenue date site dollar thous thous_dollars abb_month abb_week
## <dbl> <date> <chr> <chr> <chr> <chr> <chr> <fct>
## 1 449648. 1999-11-29 Site 1 $501,0~ 501K $501K N M
## 2 560514. 1999-07-07 Site 4 $491,0~ 491K $491K J W
## 3 438891. 1999-08-06 Site 2 $431,0~ 431K $431K A F
## 4 528543. 1999-05-04 Site 3 $470,0~ 470K $470K M T
## 5 462758. 1999-07-08 Site 4 $515,0~ 515K $515K J Th
## 6 553879. 1999-07-22 Site 2 $519,0~ 519K $519K J Th
## 7 473985. 1999-05-20 Site 2 $440,0~ 440K $440K M Th
## 8 533825. 1999-05-28 Site 5 $482,0~ 482K $482K M F
## 9 426124. 1999-01-15 Site 2 $419,0~ 419K $419K J F
## 10 406613. 1999-08-19 Site 3 $487,0~ 487K $487K A Th
## # ... with 9,990 more rows
library(tidyverse); library(viridis)
set.seed(10)
dat <- data_frame(
revenue = rnorm(144, 500000, 10000),
date = seq(as.Date('2005/01/01'), as.Date('2016/12/01'), by="month")
) %>%
mutate(
quarter = f_quarter(date),
year = f_year(date, 4)
) %>%
group_by(year, quarter) %>%
summarize(revenue = sum(revenue)) %>%
ungroup() %>%
mutate(quarter = as.integer(gsub('Q', '', quarter)))
year_average <- dat %>%
group_by(year) %>%
summarize(revenue = mean(revenue)) %>%
mutate(x1 = .8, x2 = 4.2)
dat %>%
ggplot(aes(quarter, revenue, group = year)) +
geom_segment(
linetype = 'dashed',
data = year_average, color = 'grey70', size = 1,
aes(x = x1, y = revenue, xend = x2, yend = revenue)
) +
geom_line(size = .85, color = '#009ACD') +
geom_point(size = 1.5, color = '#009ACD') +
facet_wrap(~year, nrow = 2) +
scale_y_continuous(label = ff_denom(relative = 2)) +
scale_x_continuous(breaks = 1:4, label = f_quarter) +
theme_bw() +
theme(
strip.text.x = element_text(hjust = 0, color = 'grey45'),
strip.background = element_rect(fill = NA, color = NA),
panel.border = element_rect(fill = NA, color = 'grey75'),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(linetype = 'dotted'),
axis.ticks = element_line(color = 'grey55'),
axis.text = element_text(color = 'grey55'),
axis.title.x = element_text(color = 'grey55', margin = margin(t = 10)),
axis.title.y = element_text(color = 'grey55', angle = 0, margin = margin(r = 10)),
legend.position = 'none'
) +
labs(
x = 'Quarter',
y = 'Revenue ($)',
title = 'Quarterly Revenue Across Years',
subtitle = f_wrap(c(
'This faceted line plot shows the change in quarterly revenue across',
'years.'
), width = 85, collapse = TRUE)
)
library(tidyverse); library(gridExtra)
set.seed(10)
dat <- data_frame(
level = c("not_involved", "somewhat_involved_single_group",
"somewhat_involved_multiple_groups", "very_involved_one_group",
"very_involved_multiple_groups"
),
n = sample(1:10, length(level))
) %>%
mutate(
level = factor(level, levels = unique(level)),
`%` = n/sum(n)
)
gridExtra::grid.arrange(
gridExtra::arrangeGrob(
dat %>%
ggplot(aes(level, `%`)) +
geom_col() +
labs(title = 'Very Sad', y = NULL) +
theme(
axis.text = element_text(size = 7),
title = element_text(size = 9)
),
dat %>%
ggplot(aes(level, `%`)) +
geom_col() +
scale_x_discrete(labels = function(x) f_replace(x, '_', '\n')) +
scale_y_continuous(labels = ff_prop2percent(digits = 0)) +
labs(title = 'Underscore Split (Readable)', y = NULL) +
theme(
axis.text = element_text(size = 7),
title = element_text(size = 9)
),
ncol = 2
),
gridExtra::arrangeGrob(
dat %>%
ggplot(aes(level, `%`)) +
geom_col() +
scale_x_discrete(labels = function(x) f_title(f_replace(x))) +
scale_y_continuous(labels = ff_prop2percent(digits = 0)) +
labs(title = 'Underscore Replaced & Title (Capitalized Sadness)', y = NULL) +
theme(
axis.text = element_text(size = 7),
title = element_text(size = 9)
),
dat %>%
ggplot(aes(level, `%`)) +
geom_col() +
scale_x_discrete(labels = function(x) f_wrap(f_title(f_replace(x)))) +
scale_y_continuous(labels = ff_prop2percent(digits = 0)) +
labs(title = 'Underscore Replaced, Title, & Wrapped (Happy)', y = NULL) +
theme(
axis.text = element_text(size = 7),
title = element_text(size = 9)
),
ncol = 2
), ncol = 1
)
set.seed(10)
dat <- data_frame(
state = sample(state.name, 10),
value = sample(10:20, 10) ^ (7),
cols = sample(colors()[1:150], 10)
) %>%
arrange(desc(value)) %>%
mutate(state = factor(state, levels = unique(state)))
dat %>%
ggplot(aes(state, value, fill = cols)) +
geom_col() +
scale_x_discrete(labels = f_state) +
scale_fill_identity() +
scale_y_continuous(labels = ff_denom(prefix = '$'), expand = c(0, 0),
limits = c(0, max(dat$value) * 1.05)
) +
theme_minimal() +
theme(
panel.grid.major.x = element_blank(),
axis.title.y = element_text(angle = 0)
) +
labs(x = 'State', y = 'Cash\nFlow',
title = f_title("look at how professional i look"),
subtitle = 'Subtitles: For that extra professional look.'
)
library(tidyverse); library(viridis)
data_frame(
Event = c('freezing water', 'room temp', 'body temp', 'steak\'s done', 'hamburger\'s done', 'boiling water'),
F = c(32, 70, 98.6, 145, 160, 212)
) %>%
mutate(
C = (F - 32) * (5/9),
Event = f_title(Event),
Event = factor(Event, levels = unique(Event))
) %>%
ggplot(aes(Event, F, fill = F)) +
geom_col() +
geom_text(aes(y = F + 4, label = f_fahrenheit(F, digits = 1, type = 'text')), parse = TRUE, color = 'grey60') +
scale_y_continuous(
labels = f_fahrenheit, limits = c(0, 220), expand = c(0, 0),
sec.axis = sec_axis(trans = ~(. - 32) * (5/9), labels = f_celcius, name = f_celcius(prefix = 'Temperature ', type = 'title'))
) +
scale_x_discrete(labels = ff_replace(pattern = ' ', replacement = '\n')) +
scale_fill_viridis(option = "magma", labels = f_fahrenheit, name = NULL) +
theme_bw() +
labs(
y = f_fahrenheit(prefix = 'Temperature ', type = 'title'),
title = f_fahrenheit(prefix = 'Temperature of Common Events ', type = 'title')
) +
theme(
axis.ticks.x = element_blank(),
panel.border = element_rect(fill = NA, color = 'grey80'),
panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_blank()
)
library(tidyverse); library(maps)
world <- map_data(map="world")
ggplot(world, aes(map_id = region, x = long, y = lat)) +
geom_map(map = world, aes(map_id = region), fill = "grey40", colour = "grey70", size = 0.25) +
scale_y_continuous(labels = f_latitude) +
scale_x_continuous(labels = f_longitude)
mtcars %>%
mutate(mpg2 = cut(mpg, 10, right = FALSE)) %>%
ggplot(aes(mpg2)) +
geom_bar(fill = '#33A1DE') +
scale_x_discrete(labels = function(x) f_wrap(f_bin_text_right(x, l = 'up to'), width = 8)) +
scale_y_continuous(breaks = seq(0, 14, by = 2), limits = c(0, 7)) +
theme_minimal() +
theme(
panel.grid.major.x = element_blank(),
axis.text.x = element_text(size = 14, margin = margin(t = -12)),
axis.text.y = element_text(size = 14),
plot.title = element_text(hjust = .5)
) +
labs(title = 'Histogram', x = NULL, y = NULL)
dat <- data_frame(
Value = c(111, 2345, 34567, 456789, 1000001, 1000000001),
Time = 1:6
)
gridExtra::grid.arrange(
ggplot(dat, aes(Time, Value)) +
geom_line() +
scale_y_continuous(labels = ff_denom( prefix = '$')) +
labs(title = "Single Denominational Unit"),
ggplot(dat, aes(Time, Value)) +
geom_line() +
scale_y_continuous(
labels = ff_denom(mix.denom = TRUE, prefix = '$', pad.char = '')
) +
labs(title = "Mixed Denominational Unit"),
ncol = 2
)
Modeling
We can see its use in actual model reporting as well:
mod1 <- t.test(1:10, y = c(7:20))
sprintf(
"t = %s (%s)",
f_num(mod1$statistic),
f_pval(mod1$p.value)
)
## [1] "t = -5.4 (p < .05)"
mod2 <- t.test(1:10, y = c(7:20, 200))
sprintf(
"t = %s (%s)",
f_num(mod2$statistic, 2),
f_pval(mod2$p.value, digits = 2)
)
## [1] "t = -1.63 (p = .12)"
We can build a function to report model statistics:
report <- function(mod, stat = NULL, digits = c(0, 2, 2)) {
stat <- if (is.null(stat)) stat <- names(mod[["statistic"]])
sprintf(
"%s(%s) = %s, %s",
gsub('X-squared', 'Χ<sup>2</sup>', stat),
paste(f_num(mod[["parameter"]], digits[1]), collapse = ", "),
f_num(mod[["statistic"]], digits[2]),
f_pval(mod[["p.value"]], digits = digits[3])
)
}
report(mod1)
## [1] "t(22) = -5.43, p < .05"
report(oneway.test(count ~ spray, InsectSprays))
## [1] "F(5, 30) = 36.07, p < .05"
report(chisq.test(matrix(c(12, 5, 7, 7), ncol = 2)))
## [1] "Χ<sup>2</sup>(1) = .64, p = .42"
This enables in-text usage as well. First set up the models in a code chunk:
mymod <- oneway.test(count ~ spray, InsectSprays)
mymod2 <- chisq.test(matrix(c(12, 5, 7, 7), ncol = 2))
And then use `r report(mymod)`
resulting in
a report that looks like this: F(5, 30) = 36.07, p < .05. For
Χ2 using proper HTML leads to Χ2(1) = .64, p =
.42.