library(dplyr)
library(janeaustenr)
d <- data_frame(txt = prideprejudice)
d
d %>%
unnest_tokens(word, txt)
d %>%
unnest_tokens(sentence, txt, token = "sentences")
d %>%
unnest_tokens(ngram, txt, token = "ngrams", n = 2)
d %>%
unnest_tokens(ngram, txt, token = "skip_ngrams", n = 4, k = 2)
d %>%
unnest_tokens(chapter, txt, token = "regex", pattern = "Chapter [\\d]")
# custom function
d %>%
unnest_tokens(word, txt, token = stringr::str_split, pattern = " ")
Run the code above in your browser using DataLab