library(janeaustenr)
library(dplyr)
library(tidytext)
# Comparing Jane Austen novels
austen_words <- austen_books() %>%
unnest_tokens(word, text) %>%
anti_join(stop_words, by = "word") %>%
count(book, word) %>%
ungroup()
# closest books to each other
closest <- austen_words %>%
pairwise_similarity(book, word, n) %>%
arrange(desc(similarity))
closest
closest %>%
filter(item1 == "Emma")
Run the code above in your browser using DataLab