# NOT RUN {
corp <- corpus_subset(data_corpus_inaugural, Year > 1970)
dfmat1 <- dfm(corp)
# austin's wfm format
identical(dim(dfmat1), dim(convert(dfmat1, to = "austin")))
# stm package format
stmmat <- convert(dfmat1, to = "stm")
str(stmmat)
#' # triplet
tripletmat <- convert(dfmat1, to = "tripletlist")
str(tripletmat)
# illustrate what happens with zero-length documents
dfmat2 <- dfm(c(punctOnly = "!!!", corp[-1]))
rowSums(dfmat2)
str(convert(dfmat2, to = "stm", docvars = docvars(corp)))
# }
# NOT RUN {
# tm's DocumentTermMatrix format
tmdfm <- convert(dfmat1, to = "tm")
str(tmdfm)
# topicmodels package format
str(convert(dfmat1, to = "topicmodels"))
# lda package format
str(convert(dfmat1, to = "lda"))
# }
Run the code above in your browser using DataLab