use("polmineR")
use(pkg = "RcppCWB", corpus = "REUTERS")
debates <- partition("GERMAPARLMINI", date = ".*", regex=TRUE)
count(debates, query = "Arbeit") # get frequencies for one token
count(debates, c("Arbeit", "Freizeit", "Zukunft")) # get frequencies for multiple tokens
count("GERMAPARLMINI", query = c("Migration", "Integration"), p_attribute = "word")
debates <- partition_bundle(
"GERMAPARLMINI", s_attribute = "date", values = NULL,
mc = FALSE, verbose = FALSE
)
y <- count(debates, query = "Arbeit", p_attribute = "word")
y <- count(debates, query = c("Arbeit", "Migration", "Zukunft"), p_attribute = "word")
count("GERMAPARLMINI", '"Integration.*"', breakdown = TRUE)
P <- partition("GERMAPARLMINI", date = "2009-11-11")
count(P, '"Integration.*"', breakdown = TRUE)
sc <- corpus("GERMAPARLMINI") %>% subset(party == "SPD")
phr <- cpos(sc, query = '"Deutsche.*" "Bundestag.*"', cqp = TRUE) %>%
as.phrases(corpus = "GERMAPARLMINI", enc = "latin1")
cnt <- count(sc, phrases = phr, p_attribute = "word")
# Multiple queries and overlapping query matches. The first count
# operation will issue a warning that matches overlap, see the second
# example for a solution.
corpus("REUTERS") %>%
count(query = c('".*oil"', '"turmoil"'), cqp = TRUE)
corpus("REUTERS") %>%
count(query = '"(.*oil|turmoil)"', cqp =TRUE)
Run the code above in your browser using DataLab