to_long: Convert data to long or to wide form

Description

to_long increases number of rows in the dataset and reduce number of columns. to_wide makes invert transformation. You can use cols for selecting variables in the arguments. See examples.

Usage

to_long(
  data,
  columns = NULL,
  keep = NULL,
  names_in = "variable",
  values_in = "value",
  drop_na = FALSE,
  names_factor = TRUE,
  value_factor = FALSE,
  ...
)
to_wide(
  data,
  keep = NULL,
  names_in = variable,
  values_in = value,
  fun = identity,
  sep = "_",
  fill = NA,
  missing_comb = c("none", "rows", "columns", "all"),
  ...
)

Value

data.table in the wide or long form.

Arguments

data: A data.frame to convert
columns: unquoted names of variables for stacking. When missing, we will stack all columns outside keep columns.
keep: unquoted names of columns which will be kept as is, e. g. only recycled or deduplicated. If missing, it is all columns except stacked or unstacked. If FALSE then nothing will be kept.
names_in: name of the stacked variable names column. The default name is 'variable'. It is quoted in the to_long and unquoted in to_wide. If FALSE in the to_wide than nothing will be widening.
values_in: name(-s) of the stacked data values column(s). The default name is 'value'. Multiple names can be provided here for the case when columns is a list, though note well that the names provided in columns take precedence. It is quoted in the to_long and unqoted in to_wide
drop_na: If TRUE, NA values will be removed from the stacked data.
names_factor: If TRUE, the column with names will be converted to factor, else it will be a character column. TRUE by default.
value_factor: If TRUE, the value column will be converted to factor, else the stacked values type is left unchanged. FALSE by default.
...: other arguments passed to data.table::melt/data.table::dcast
fun: Should the data be aggregated before casting? By default, it is identity - no aggregation. To use multiple aggregation functions, pass a list; see Examples.
sep: Character vector of length 1, indicating the separating character in variable names generated during casting. Default is "_".
fill: Value with which to fill missing cells. NA by default. If fun is present, takes the value by applying the function on a 0-length vector.
missing_comb: One of "none" (the default), "rows" - include missing combinations in rows, "columns" - include missing combinations in columns, and "all" include all missing combinations.

Examples

Run this code

data(iris)

# 'to_long'

long_iris = iris %>%
    to_long(keep = Species)

long_iris

iris_with_stat = long_iris %>%
    take(mean = mean(value),
         sd = sd(value),
         n = .N*1.0,
         by = .(Species, variable)
    ) %>%
    to_long(columns = c(mean, sd, n), names_in = "stat")

# 'to_wide' - table with multiple stats
iris_with_stat %>%
    to_wide()


iris_with_stat %>%
    to_wide(names_in = c(variable, stat))

iris_with_stat %>%
    to_wide(names_in = c(variable, Species))

# 'to_wide' - aggregation function
long_iris %>%
    to_wide(fun = list(Mean = mean, SD = sd, N = length))

# multiple variables
iris %>%
    to_long(list(Sepal = cols("^Sepal"), Petal = cols("^Petal"))) %>%
    let(
        variable = factor(variable, levels = 1:2, labels = c("Length", "Width"))
    ) %>%
    to_wide(values_in = c(Sepal, Petal))

# '%to%' selector - example from tidyr::pivot_longer

data(anscombe)
anscombe %>%
    to_long(
        list(x = x1 %to% x4, y = y1 %to% y4),
        names_in = "set"
    )

######################################
## Examples from data.table melt/dcast
######################################

set.seed(45)
DT = data.table(
    i_1 = c(1:5, NA)*1.0,
    i_2 = c(NA,6,7,8,9,10)*1.0,
    f_1 = factor(sample(c(letters[1:3], NA), 6, TRUE)),
    f_2 = factor(c("z", "a", "x", "c", "x", "x"), ordered=TRUE),
    c_1 = sample(c(letters[1:3], NA), 6, TRUE),
    d_1 = as.Date(c(1:3,NA,4:5), origin="2013-09-01"),
    d_2 = as.Date(6:1, origin="2012-01-01")
)

# id, values as character/integer/numeric vectors

to_long(DT, f_1, keep = 1:2)
to_long(DT, f_1, keep = c(i_1, i_2))
to_long(DT, f_1, keep = i_1 %to% i_2)
to_long(DT, f_1, keep = cols(i_1:i_2), names_factor = FALSE)
to_long(DT, f_1, keep = cols("i_{1:2}"))
to_long(DT, f_1, keep = cols("^i_"))
to_long(DT, f_1, keep = cols("^i_"), names_in = "var", values_in = "val")

col_var = "^i_"
to_long(DT, 3, keep = cols(col_var))

to_long(DT, cols("^f_"), keep = cols("^i_"), value_factor = TRUE)

to_long(mtcars)
to_long(mtcars, keep = am)
to_long(mtcars, columns = c(am, vs, mpg))
to_long(mtcars, columns = c(am, vs, mpg), keep = FALSE)
to_long(DT, keep = f_1, columns = c(i_1, i_2), drop_na = TRUE)
to_long(DT, keep=1:2, columns = list(cols("^f_"), cols("^d_")), value_factor=TRUE)

data("ChickWeight")
names(ChickWeight) = tolower(names(ChickWeight))
DT = to_long(ChickWeight, keep=2:4)

to_wide(DT, keep = time, fun = mean)
to_wide(DT, keep = FALSE, fun = mean)
to_wide(DT, keep = diet, fun = mean)
to_wide(DT, keep = c(diet, chick), names_in = time, missing_comb = "all")
to_wide(DT, keep = c(diet, chick), names_in = time, missing_comb = "all", fill = 0)
to_wide(DT, chick, time, fun = mean)



# using FALSE
DT = data.table(v1 = rep(1:2, each = 6),
                v2 = rep(rep(1:3, 2), each = 2),
                v3 = rep(1:2, 6),
                v4 = rnorm(6))

## for each combination of (v1, v2), add up all values of v4
to_wide(DT,
        cols("^v(1|2)"),
        names_in = FALSE,
        values_in = v4,
        fun = sum
)

# multiple values_in and multiple fun
DT = data.table(x=sample(5,20,TRUE),
                y=sample(2,20,TRUE),
                z=sample(letters[1:2], 20,TRUE),
                d1 = runif(20),
                d2=1L)

# multiple values_in
to_wide(DT,
        keep = c(x, y),
        names_in = z,
        values_in = c(d1, d2),
        fun = sum,
        fill = 0)

# multiple funs
to_wide(DT,
        keep = c(x, y),
        names_in = z,
        values_in = d1,
        fun = list(sum = sum, mean = mean),
        fill = NULL)

# multiple fun and values_in (all combinations)
to_wide(DT,
        keep = c(x, y),
        names_in = z,
        values_in = c(d1, d2),
        fun = list(sum = sum, mean = mean)
)

# multiple fun and values_in (one-to-one)
to_wide(DT,
        keep = c(x, y),
        names_in = z,
        values_in = list(d1, d2),
        fun = list(sum = sum, mean = mean)
)

Run the code above in your browser using DataLab