## convert a dfm
toks <- corpus_subset(data_corpus_inaugural, Year > 1970) |>
tokens()
dfmat1 <- dfm(toks)
# austin's wfm format
identical(dim(dfmat1), dim(convert(dfmat1, to = "austin")))
# stm package format
stmmat <- convert(dfmat1, to = "stm")
str(stmmat)
# triplet
tripletmat <- convert(dfmat1, to = "tripletlist")
str(tripletmat)
if (FALSE) {
# tm's DocumentTermMatrix format
tmdfm <- convert(dfmat1, to = "tm")
str(tmdfm)
# topicmodels package format
str(convert(dfmat1, to = "topicmodels"))
# lda package format
str(convert(dfmat1, to = "lda"))
}
## convert a corpus into a data.frame
corp <- corpus(c(d1 = "Text one.", d2 = "Text two."),
docvars = data.frame(dvar1 = 1:2, dvar2 = c("one", "two"),
stringsAsFactors = FALSE))
convert(corp, to = "data.frame")
convert(corp, to = "json")
Run the code above in your browser using DataLab