# NOT RUN {
mycorpus <- corpus_subset(data_corpus_inaugural, Year > 1970)
quantdfm <- dfm(mycorpus, verbose = FALSE)
# austin's wfm format
identical(dim(quantdfm), dim(convert(quantdfm, to = "austin")))
# stm package format
stmdfm <- convert(quantdfm, to = "stm")
str(stmdfm)
#' # triplet
triplet <- convert(quantdfm, to = "tripletlist")
str(triplet)
# illustrate what happens with zero-length documents
quantdfm2 <- dfm(c(punctOnly = "!!!", mycorpus[-1]), verbose = FALSE)
rowSums(quantdfm2)
stmdfm2 <- convert(quantdfm2, to = "stm", docvars = docvars(mycorpus))
str(stmdfm2)
# }
# NOT RUN {
# tm's DocumentTermMatrix format
tmdfm <- convert(quantdfm, to = "tm")
str(tmdfm)
# topicmodels package format
str(convert(quantdfm, to = "topicmodels"))
# lda package format
ldadfm <- convert(quantdfm, to = "lda")
str(ldadfm)
# }
Run the code above in your browser using DataLab