mycorpus <- subset(inaugCorpus, Year > 1970)
quantdfm <- dfm(mycorpus, verbose = FALSE)
# austin's wfm format
austindfm <- as.wfm(quantdfm)
identical(austindfm, convert(quantdfm, to = "austin"))
# tm's DocumentTermMatrix format
tmdfm <- as.DocumentTermMatrix(quantdfm)
str(tmdfm)
# stm package format
stmdfm <- convert(quantdfm, to = "stm")
str(stmdfm)
# illustrate what happens with zero-length documents
quantdfm2 <- dfm(c(punctOnly = "!!!", mycorpus[-1]), verbose = FALSE)
rowSums(quantdfm2)
stmdfm2 <- convert(quantdfm2, to = "stm", docvars = docvars(mycorpus))
str(stmdfm2)
# topicmodels package format
topicmodelsdfm <- quantedaformat2dtm(quantdfm)
identical(topicmodelsdfm, convert(quantdfm, to = "topicmodels"))
# lda package format
ldadfm <- convert(quantdfm, to = "lda")
str(ldadfm)
identical(ldadfm[1], stmdfm[1])
# calling dfm2ldaformat directly
ldadfm <- dfm2ldaformat(quantdfm)
str(ldadfm)
Run the code above in your browser using DataLab