x <- data.frame(a = 1:3, b = c("a", "b", "c"), c = 5:7, id = 1:3)
y <- data.frame(c = 6:8, d = c("f", "g", "h"), e = 100:102, id = 2:4)
x
y
# "by" will default to all shared columns, i.e. "c" and "id". new columns
# "d" and "e" will be copied from "y" to "x", but there are only two cases
# in "x" that have the same values for "c" and "id" in "y". only those cases
# have values in the copied columns, the other case gets "NA".
data_merge(x, y, join = "left")
# we change the id-value here
x <- data.frame(a = 1:3, b = c("a", "b", "c"), c = 5:7, id = 1:3)
y <- data.frame(c = 6:8, d = c("f", "g", "h"), e = 100:102, id = 3:5)
x
y
# no cases in "y" have the same matching "c" and "id" as in "x", thus
# copied variables from "y" to "x" copy no values, all get NA.
data_merge(x, y, join = "left")
# one case in "y" has a match in "id" with "x", thus values for this
# case from the remaining variables in "y" are copied to "x", all other
# values (cases) in those remaining variables get NA
data_merge(x, y, join = "left", by = "id")
data(mtcars)
x <- mtcars[1:5, 1:3]
y <- mtcars[28:32, 4:6]
# add ID common column
x$id <- 1:5
y$id <- 3:7
# left-join, add new variables and copy values from y to x,
# where "id" values match
data_merge(x, y)
# right-join, add new variables and copy values from x to y,
# where "id" values match
data_merge(x, y, join = "right")
# full-join
data_merge(x, y, join = "full")
data(mtcars)
x <- mtcars[1:5, 1:3]
y <- mtcars[28:32, c(1, 4:5)]
# add ID common column
x$id <- 1:5
y$id <- 3:7
# left-join, no matching rows (because columns "id" and "disp" are used)
# new variables get all NA values
data_merge(x, y)
# one common value in "mpg", so one row from y is copied to x
data_merge(x, y, by = "mpg")
# only keep rows with matching values in by-column
data_merge(x, y, join = "semi", by = "mpg")
# only keep rows with non-matching values in by-column
data_merge(x, y, join = "anti", by = "mpg")
# merge list of data frames. can be of different rows
x <- mtcars[1:5, 1:3]
y <- mtcars[28:31, 3:5]
z <- mtcars[11:18, c(1, 3:4, 6:8)]
x$id <- 1:5
y$id <- 4:7
z$id <- 3:10
data_merge(list(x, y, z), join = "bind", by = "id", id = "source")
Run the code above in your browser using DataLab