## Not run:
# txt <- movie_review[['review']][1:1000]
# it <- itoken(txt, tolower, word_tokenizer)
# vocab <- create_vocabulary(it)
# #remove very common and uncommon words
# pruned_vocab = prune_vocabulary(vocab, term_count_min = 10, doc_proportion_max = 0.8,
# doc_proportion_min = 0.001, max_number_of_terms = 5000)
#
# vectorizer <- vocab_vectorizer(pruned_vocab, grow_dtm = FALSE, skip_grams_window = 5L)
# it <- itoken(txt, tolower, word_tokenizer)
# corpus <- create_corpus(it, vectorizer)
# tcm <- get_tcm(corpus)
# dim(tcm)
# ## End(Not run)
Run the code above in your browser using DataLab