x <- c(1, 2, NA, 3, NaN)
y <- c(2, 1, 4, NA, 1, 2, NaN)
# By default, for each value of `x`, all matching locations in `y` are
# returned
matches <- vec_locate_matches(x, y)
matches
# The result can be used to slice the inputs to align them
data_frame(
x = vec_slice(x, matches$needles),
y = vec_slice(y, matches$haystack)
)
# If multiple matches are present, control which is returned with `multiple`
vec_locate_matches(x, y, multiple = "first")
vec_locate_matches(x, y, multiple = "last")
vec_locate_matches(x, y, multiple = "any")
# Use `relationship` to add constraints and error on multiple matches if
# they aren't expected
try(vec_locate_matches(x, y, relationship = "one-to-one"))
# In this case, the `NA` in `y` matches two rows in `x`
try(vec_locate_matches(x, y, relationship = "one-to-many"))
# By default, `NA` is treated as being identical to `NaN`.
# Using `nan_distinct = TRUE` treats `NA` and `NaN` as different values, so
# `NA` can only match `NA`, and `NaN` can only match `NaN`.
vec_locate_matches(x, y, nan_distinct = TRUE)
# If you never want missing values to match, set `incomplete = NA` to return
# `NA` in the `haystack` column anytime there was an incomplete value
# in `needles`.
vec_locate_matches(x, y, incomplete = NA)
# Using `incomplete = NA` allows us to enforce the one-to-many relationship
# that we couldn't before
vec_locate_matches(x, y, relationship = "one-to-many", incomplete = NA)
# `no_match` allows you to specify the returned value for a needle with
# zero matches. Note that this is different from an incomplete value,
# so specifying `no_match` allows you to differentiate between incomplete
# values and unmatched values.
vec_locate_matches(x, y, incomplete = NA, no_match = 0L)
# If you want to require that every `needle` has at least 1 match, set
# `no_match` to `"error"`:
try(vec_locate_matches(x, y, incomplete = NA, no_match = "error"))
# By default, `vec_locate_matches()` detects equality between `needles` and
# `haystack`. Using `condition`, you can detect where an inequality holds
# true instead. For example, to find every location where `x[[i]] >= y`:
matches <- vec_locate_matches(x, y, condition = ">=")
data_frame(
x = vec_slice(x, matches$needles),
y = vec_slice(y, matches$haystack)
)
# You can limit which matches are returned with a `filter`. For example,
# with the above example you can filter the matches returned by `x[[i]] >= y`
# down to only the ones containing the maximum `y` value of those matches.
matches <- vec_locate_matches(x, y, condition = ">=", filter = "max")
# Here, the matches for the `3` needle value have been filtered down to
# only include the maximum haystack value of those matches, `2`. This is
# often referred to as a rolling join.
data_frame(
x = vec_slice(x, matches$needles),
y = vec_slice(y, matches$haystack)
)
# In the very rare case that you need to generate locations for a
# cross match, where every value of `x` is forced to match every
# value of `y` regardless of what the actual values are, you can
# replace `x` and `y` with integer vectors of the same size that contain
# a single value and match on those instead.
x_proxy <- vec_rep(1L, vec_size(x))
y_proxy <- vec_rep(1L, vec_size(y))
nrow(vec_locate_matches(x_proxy, y_proxy))
vec_size(x) * vec_size(y)
# By default, missing values will match other missing values when using
# `==`, `>=`, or `<=` conditions, but not when using `>` or `<` conditions.
# This is similar to how `vec_compare(x, y, na_equal = TRUE)` works.
x <- c(1, NA)
y <- c(NA, 2)
vec_locate_matches(x, y, condition = "<=")
vec_locate_matches(x, y, condition = "<")
# You can force missing values to match regardless of the `condition`
# by using `incomplete = "match"`
vec_locate_matches(x, y, condition = "<", incomplete = "match")
# You can also use data frames for `needles` and `haystack`. The
# `condition` will be recycled to the number of columns in `needles`, or
# you can specify varying conditions per column. In this example, we take
# a vector of date `values` and find all locations where each value is
# between lower and upper bounds specified by the `haystack`.
values <- as.Date("2019-01-01") + 0:9
needles <- data_frame(lower = values, upper = values)
set.seed(123)
lower <- as.Date("2019-01-01") + sample(10, 10, replace = TRUE)
upper <- lower + sample(3, 10, replace = TRUE)
haystack <- data_frame(lower = lower, upper = upper)
# (values >= lower) & (values <= upper)
matches <- vec_locate_matches(needles, haystack, condition = c(">=", "<="))
data_frame(
lower = vec_slice(lower, matches$haystack),
value = vec_slice(values, matches$needle),
upper = vec_slice(upper, matches$haystack)
)
Run the code above in your browser using DataLab