use("polmineR")
use(pkg = "RcppCWB", corpus = "REUTERS")
merkel <- partition("GERMAPARLMINI", interjection = "speech", speaker = ".*Merkel", regex = TRUE)
merkel <- enrich(merkel, p_attribute = "word")
cooc <- cooccurrences(merkel, query = "Deutschland")
# use subset-method to filter results
a <- cooccurrences("REUTERS", query = "oil")
b <- subset(a, !is.na(ll))
c <- subset(b, !word %in% tm::stopwords("en"))
d <- subset(c, count_coi >= 5)
e <- subset(c, ll >= 10.83)
format(e)
# using pipe operator with subset
cooccurrences("REUTERS", query = "oil") %>%
subset(!is.na(ll)) %>%
subset(!word %in% tm::stopwords("en")) %>%
subset(count_coi >= 5) %>%
subset(ll >= 10.83) %>%
format()
# generate datatables htmlwidget with buttons for export (Excel & more)
# (alternatively use openxlsx::write.xlsx())
# \donttest{
interactive_table <- cooccurrences("REUTERS", query = "oil") %>%
format() %>%
DT::datatable(
extensions = "Buttons",
options = list(dom = 'Btip', buttons = c("excel", "pdf", "csv"))
)
if (interactive()) show(interactive_table)
# }
# compute cooccurrences for a set of partitions
# (example not run by default to save time on test machines)
# \donttest{
pb <- partition_bundle("GERMAPARLMINI", s_attribute = "speaker")
ps <- count(pb, query = "Deutschland")[Deutschland >= 25][["partition"]]
pb_min <- pb[ps]
y <- cooccurrences(pb_min, query = "Deutschland")
if (interactive()) y[[1]]
if (interactive()) y[[2]]
y2 <- corpus("GERMAPARLMINI") %>%
subset(speaker %in% c("Hubertus Heil", "Angela Dorothea Merkel")) %>%
split(s_attribute = "speaker") %>%
cooccurrences(query = "Deutschland")
# }
Run the code above in your browser using DataLab