use("RcppCWB", "REUTERS")
# generate bundle with articles in REUTERS corpus
b <- partition_bundle("REUTERS", s_attribute = "id")
# basic operations
length(b)
names(b)
get_corpus(b)
summary(b)
# enrich with count for p-attribute
b <- enrich(b, p_attribute = "word")
# Indexing and accessing bundle objects
reu <- corpus("REUTERS") %>% split(s_attribute = "id")
reu[1:3]
reu[-1]
reu[-(1:10)]
reu["127"]
reu$`127` # alternative access
reu[c("127", "273")]
reu[["127"]] <- NULL
pb <- partition_bundle("GERMAPARLMINI", s_attribute = "party")
pb$"NA" <- NULL # quotation needed if name is "NA"
# Turn bundle into data.table (not tested to save time)
# \donttest{
dt <- partition_bundle("REUTERS", s_attribute = "id") %>%
cooccurrences(query = "oil", cqp = FALSE) %>%
as.data.table(col = "ll")
# }
Run the code above in your browser using DataLab