# Attach packages
library(rearrr)
library(dplyr)
# Set seed
set.seed(1)
# Create a data frame
df <- data.frame(
"index" = 1:10,
"A" = sample(1:10),
"B" = runif(10),
"C" = LETTERS[1:10],
"G" = c(
1, 1, 1, 2, 2,
2, 3, 3, 3, 3
),
stringsAsFactors = FALSE
)
# Pair extreme indices (row numbers)
pair_extremes(df)
# Pair extremes in each of the columns
pair_extremes(df, col = "A")$A
pair_extremes(df, col = "B")$B
pair_extremes(df, col = "C")$C
# Shuffle the members pair-wise
# The rows within each pair are shuffled
# while the `.pair` column maintains it order
pair_extremes(df, col = "A", shuffle_members = TRUE)
# Shuffle the order of the pairs
# The rows within each pair maintain their order
# and stay together but the `.pair` column is shuffled
pair_extremes(df, col = "A", shuffle_pairs = TRUE)
# Use recursive pairing
# Mostly meaningful with much larger datasets
# Order initial grouping by pair identifiers
pair_extremes(df, col = "A", num_pairings = 2)
# Order initial grouping by aggregate values
pair_extremes(df, col = "A", num_pairings = 2, order_by_aggregates = TRUE)
# Grouped by G
# Each G group only has 3 elements
# so it only creates 1 pair and a group
# with the single excessive element
# per G group
df %>%
dplyr::select(G, A) %>% # For clarity
dplyr::group_by(G) %>%
pair_extremes(col = "A")
# Plot the extreme pairs
plot(
x = 1:10,
y = pair_extremes(df, col = "B")$B,
col = as.character(rep(1:5, each = 2))
)
# With shuffled pair members (run a few times)
plot(
x = 1:10,
y = pair_extremes(df, col = "B", shuffle_members = TRUE)$B,
col = as.character(rep(1:5, each = 2))
)
# With shuffled pairs (run a few times)
plot(
x = rep(1:5, each = 2),
y = pair_extremes(df, col = "B", shuffle_pairs = TRUE)$B,
col = as.character(rep(1:5, each = 2))
)
Run the code above in your browser using DataLab