library(dplyr)
library(janeaustenr)
book_words <- austen_books() %>%
unnest_tokens(word, text) %>%
count(book, word, sort = TRUE)
book_words
# find the words most distinctive to each document
book_words %>%
bind_tf_idf(word, book, n) %>%
arrange(desc(tf_idf))
Run the code above in your browser using DataLab