x <- data.frame(
doc_id = c(1, 1, 2, 3, 4),
term = c("A", "C", "Z", "X", "G"),
freq = c(1, 5, 7, 10, 0))
dtm <- document_term_matrix(x)
x <- dtm_colsums(dtm)
x
x <- dtm_rowsums(dtm)
head(x)
##
## Grouped column summation
##
x <- list(doc1 = c("aa", "bb", "aa", "b"), doc2 = c("bb", "bb", "BB"))
dtm <- document_term_matrix(x)
dtm
dtm_colsums(dtm, groups = list(combinedB = c("b", "bb"), combinedA = c("aa", "A")))
dtm_colsums(dtm, groups = list(combinedA = c("aa", "A")))
dtm_colsums(dtm, groups = list(
combinedB = grep(pattern = "b", colnames(dtm), ignore.case = TRUE, value = TRUE),
combinedA = c("aa", "A", "ZZZ"),
test = character()))
dtm_colsums(dtm, groups = list())
##
## Grouped row summation
##
x <- list(doc1 = c("aa", "bb", "aa", "b"),
doc2 = c("bb", "bb", "BB"),
doc3 = c("bb", "bb", "BB"),
doc4 = c("bb", "bb", "BB", "b"))
dtm <- document_term_matrix(x)
dtm
dtm_rowsums(dtm, groups = list(doc1 = "doc1", combi = c("doc2", "doc3", "doc4")))
dtm_rowsums(dtm, groups = list(unknown = "docUnknown", combi = c("doc2", "doc3", "doc4")))
dtm_rowsums(dtm, groups = list())
Run the code above in your browser using DataLab