# NOT RUN {
library(janeaustenr)
library(sparklyr)
library(dplyr)
sc <- spark_connect(master = "local")
austen_books <- austen_books()
books_tbl <- sdf_copy_to(sc, austen_books, overwrite = TRUE)
first_tbl <- books_tbl %>% filter(nchar(text) > 0) %>% head(100)
first_tbl %>%
ft_tokenizer("text", "tokens") %>%
ft_count_vectorizer("tokens", "features") %>%
ml_lda("features", k = 4)
# }
Run the code above in your browser using DataLab