if (require("RSQLite") && has_lahman("sqlite")) {
# Left joins ----------------------------------------------------------------
lahman_s <- lahman_sqlite()
batting <- tbl(lahman_s, "Batting")
team_info <- select(tbl(lahman_s, "Teams"), yearID, lgID, teamID, G, R:H)
# Combine player and whole team statistics
first_stint <- select(filter(batting, stint == 1), playerID:H)
both <- left_join(first_stint, team_info, type = "inner", by = c("yearID", "teamID", "lgID"))
head(both)
explain(both)
# Join with a local data frame
grid <- expand.grid(
teamID = c("WAS", "ATL", "PHI", "NYA"),
yearID = 2010:2012)
top4a <- left_join(batting, grid, copy = TRUE)
explain(top4a)
# Indices don't really help here because there's no matching index on
# batting
top4b <- left_join(batting, grid, copy = TRUE, auto_index = TRUE)
explain(top4b)
# Semi-joins ----------------------------------------------------------------
people <- tbl(lahman_s, "Master")
# All people in half of fame
hof <- tbl(lahman_s, "HallOfFame")
semi_join(people, hof)
# All people not in the hall of fame
anti_join(people, hof)
# Find all managers
manager <- tbl(lahman_s, "Managers")
semi_join(people, manager)
# Find all managers in hall of fame
famous_manager <- semi_join(semi_join(people, manager), hof)
famous_manager
explain(famous_manager)
# Anti-joins ----------------------------------------------------------------
# batters without person covariates
anti_join(batting, people)
}
Run the code above in your browser using DataLab