# NOT RUN {
use("polmineR")
K <- kwic("GERMAPARLMINI", "Integration")
get_corpus(K)
length(K)
K_min <- K[1]
K_min <- K[1:5]
# using kwic_bundle class
queries <- c("oil", "prices", "barrel")
li <- lapply(queries, function(q) kwic("REUTERS", query = q))
kb <- as.bundle(li)
# use count-method on kwic object
coi <- kwic("REUTERS", query = "oil") %>%
count(p_attribute = "word")
# features vs cooccurrences-method (identical results)
ref <- count("REUTERS", p_attribute = "word") %>%
subset(word != "oil")
slot(ref, "size") <- slot(ref, "size") - count("REUTERS", "oil")[["count"]]
y_features <- features(coi, ref, method = "ll", included = TRUE)
y_cooc <- cooccurrences("REUTERS", query = "oil")
# extract node and left and right context as character vectors
oil <- kwic("REUTERS", query = "oil")
as.character(oil, fmt = NULL)
as.character(oil) # node wrapped into <i> tag by default
as.character(oil, fmt = "<b>%s</b>")
# subsetting kwic objects
oil <- corpus("REUTERS") %>%
kwic(query = "oil") %>%
subset(grepl("prices", right))
saudi_arabia <- corpus("REUTERS") %>%
kwic(query = "Arabia") %>%
subset(grepl("Saudi", left))
int_spd <- corpus("GERMAPARLMINI") %>%
kwic(query = "Integration") %>%
enrich(s_attribute = "party") %>%
subset(grepl("SPD", party))
# turn kwic object into data.frame with html tags
int <- corpus("GERMAPARLMINI") %>%
kwic(query = "Integration")
as.data.frame(int) # Without further metadata
enrich(int, s_attributes = c("date", "speaker", "party")) %>%
as.data.frame()
# merge bundle of kwic objects into one kwic
reuters <- corpus("REUTERS")
queries <- c('"Saudi" "Arabia"', "oil", '"barrel.*"')
comb <- lapply(queries, function(qu) kwic(reuters, query = qu)) %>%
as.bundle() %>%
merge()
# enrich kwic object
i <- corpus("GERMAPARLMINI") %>%
kwic(query = "Integration") %>%
enrich(s_attributes = c("date", "speaker", "party"))
# }
Run the code above in your browser using DataLab