library(janeaustenr)
library(dplyr)
pride_prejudice_words <- data_frame(text = prideprejudice) %>%
mutate(line = row_number()) %>%
unnest_tokens(word, text) %>%
anti_join(stop_words)
# find words that co-occur within lines
pride_prejudice_words %>%
pair_count(line, word, sort = TRUE)
# when finding words most often occuring with a particular word,
# use unique_pair = FALSE
pride_prejudice_words %>%
pair_count(line, word, sort = TRUE, unique_pair = FALSE) %>%
filter(value1 == "elizabeth")
Run the code above in your browser using DataLab