y <- cooccurrences("REUTERS", query = "oil", method = "pmi")
N <- size(y)[["partition"]]
I <- log2((y[["count_coi"]]/N) / ((count(y) / N) * (y[["count_partition"]] / N)))
use("polmineR")
use(pkg = "RcppCWB", corpus = "REUTERS")
dt <- decode(
"REUTERS",
p_attribute = "word",
s_attribute = character(),
to = "data.table",
verbose = FALSE
)
n <- ngrams(dt, n = 2L, p_attribute = "word")
obs <- count("REUTERS", p_attribute = "word")
phrases <- pmi(n, observed = obs)
Run the code above in your browser using DataLab