# examples not run by default to save time on CRAN test machines
# \donttest{
#' use(pkg = "RcppCWB", corpus = "REUTERS")
# enriching partition_bundle explicitly
tdm <- corpus("REUTERS") %>%
partition_bundle(s_attribute = "id") %>%
enrich(p_attribute = "word") %>%
as.TermDocumentMatrix(col = "count")
# leave the counting to the as.TermDocumentMatrix-method
tdm <- partition_bundle("REUTERS", s_attribute = "id") %>%
as.TermDocumentMatrix(p_attribute = "word", verbose = FALSE)
# obtain TermDocumentMatrix directly (fastest option)
tdm <- as.TermDocumentMatrix(
"REUTERS",
p_attribute = "word",
s_attribute = "id",
verbose = FALSE
)
# workflow using split()
dtm <- corpus("REUTERS") %>%
split(s_attribute = "id") %>%
as.TermDocumentMatrix(p_attribute = "word")
# }
Run the code above in your browser using DataLab