# The REUTERS corpus included in the RcppCWB package is used in examples
use(pkg = "RcppCWB", corpus = "REUTERS") # activate REUTERS corpus
r <- corpus("REUTERS")
if (interactive()) show_info(r)
# The package includes GERMAPARLMINI as sample data
use("polmineR") # activate GERMAPARLMINI
gparl <- corpus("GERMAPARLMINI")
if (interactive()) show_info(gparl)
# Core methods
count("REUTERS", query = "oil")
count("REUTERS", query = c("oil", "barrel"))
count("REUTERS", query = '"Saudi" "Arab.*"', breakdown = TRUE, cqp = TRUE)
dispersion("REUTERS", query = "oil", s_attribute = "id")
k <- kwic("REUTERS", query = "oil")
coocs <- cooccurrences("REUTERS", query = "oil")
# Core methods applied to partition
kuwait <- partition("REUTERS", places = "kuwait", regex = TRUE)
C <- count(kuwait, query = "oil")
D <- dispersion(kuwait, query = "oil", s_attribute = "id")
K <- kwic(kuwait, query = "oil", meta = "id")
CO <- cooccurrences(kuwait, query = "oil")
# Go back to full text
p <- partition("REUTERS", id = 127)
if (interactive()) read(p)
h <- html(p) %>%
highlight(highlight = list(yellow = "oil"))
if (interactive()) h_highlighted
# Generate term document matrix (not run by default to save time)
# \donttest{
pb <- partition_bundle("REUTERS", s_attribute = "id")
cnt <- count(pb, p_attribute = "word")
tdm <- as.TermDocumentMatrix(cnt, col = "count")
# }
Run the code above in your browser using DataLab