mycorpus <- corpus_subset(data_corpus_inaugural, Year > 1970)
quantdfm <- dfm(mycorpus, verbose = FALSE)
# austin's wfm format
identical(dim(quantdfm), dim(convert(quantdfm, to = "austin")))
# stm package format
stmdfm <- convert(quantdfm, to = "stm")
str(stmdfm)
# illustrate what happens with zero-length documents
quantdfm2 <- dfm(c(punctOnly = "!!!", mycorpus[-1]), verbose = FALSE)
rowSums(quantdfm2)
stmdfm2 <- convert(quantdfm2, to = "stm", docvars = docvars(mycorpus))
str(stmdfm2)
## Not run: ------------------------------------
# #' # tm's DocumentTermMatrix format
# tmdfm <- convert(quantdfm, to = "tm")
# str(tmdfm)
#
# # topicmodels package format
# str(convert(quantdfm, to = "topicmodels"))
#
# # lda package format
# ldadfm <- convert(quantdfm, to = "lda")
# str(ldadfm)
## ---------------------------------------------
Run the code above in your browser using DataLab