data_long <- read.table(header = TRUE, text = "
subject sex condition measurement
1 M control 7.9
1 M cond1 12.3
1 M cond2 10.7
2 F control 6.3
2 F cond1 10.6
2 F cond2 11.1
3 F control 9.5
3 F cond1 13.1
3 F cond2 13.8
4 M control 11.5
4 M cond1 13.4
4 M cond2 12.9")
# converting long data into wide format
data_to_wide(
data_long,
id_cols = "subject",
names_from = "condition",
values_from = "measurement"
)
# converting long data into wide format with custom column names
data_to_wide(
data_long,
id_cols = "subject",
names_from = "condition",
values_from = "measurement",
names_prefix = "Var.",
names_sep = "."
)
# converting long data into wide format, combining multiple columns
production <- expand.grid(
product = c("A", "B"),
country = c("AI", "EI"),
year = 2000:2014
)
production <- data_filter(production, (product == "A" & country == "AI") | product == "B")
production$production <- rnorm(nrow(production))
data_to_wide(
production,
names_from = c("product", "country"),
values_from = "production",
names_glue = "prod_{product}_{country}"
)
# using the "sleepstudy" dataset
data(sleepstudy, package = "lme4")
# the sleepstudy data contains repeated measurements of average reaction
# times for each subjects over multiple days, in a sleep deprivation study.
# It is in long-format, i.e. each row corresponds to a single measurement.
# The variable "Days" contains the timepoint of the measurement, and
# "Reaction" contains the measurement itself. Converting this data to wide
# format will create a new column for each day, with the reaction time as the
# value.
head(sleepstudy)
data_to_wide(
sleepstudy,
id_cols = "Subject",
names_from = "Days",
values_from = "Reaction"
)
# clearer column names
data_to_wide(
sleepstudy,
id_cols = "Subject",
names_from = "Days",
values_from = "Reaction",
names_prefix = "Reaction_Day_"
)
# For unequal group sizes, missing information is filled with NA
d <- subset(sleepstudy, Days %in% c(0, 1, 2, 3, 4))[c(1:9, 11:13, 16:17, 21), ]
# long format, different number of "Subjects"
d
data_to_wide(
d,
id_cols = "Subject",
names_from = "Days",
values_from = "Reaction",
names_prefix = "Reaction_Day_"
)
# filling missing values with 0
data_to_wide(
d,
id_cols = "Subject",
names_from = "Days",
values_from = "Reaction",
names_prefix = "Reaction_Day_",
values_fill = 0
)
Run the code above in your browser using DataLab