# NOT RUN {
library(text2vec)
data("movie_review")
N = 500
tokens = movie_review$review[1:N] %>% tolower %>% word_tokenizer
it = itoken(tokens, ids = movie_review$id[1:N])
v = create_vocabulary(it) %>%
prune_vocabulary(term_count_min = 5, doc_proportion_max = 0.2)
dtm = create_dtm(it, vocab_vectorizer(v), 'lda_c')
lda_model = LatentDirichletAllocation$new(n_topics = 10, vocabulary = v,
doc_topic_prior = 0.1,
topic_word_prior = 0.1)
doc_topic_distr = lda_model$fit_transform(dtm, n_iter =20, check_convergence_every_n = 5)
# run LDAvis visualisation if needed (make sure LDAvis package installed)
# lda_model$plot()
# }
Run the code above in your browser using DataLab