use("polmineR")
kauder <- partition(
"GERMAPARLMINI",
speaker = "Volker Kauder", interjection = "speech",
p_attribute = "word"
)
all <- partition("GERMAPARLMINI", interjection = "speech", p_attribute = "word")
terms_kauder <- features(x = kauder, y = all, included = TRUE)
top100 <- subset(terms_kauder, rank_chisquare <= 100)
head(top100)
# a different way is to compare count objects
kauder_count <- as(kauder, "count")
all_count <- as(all, "count")
terms_kauder <- features(kauder_count, all_count, included = TRUE)
top100 <- subset(terms_kauder, rank_chisquare <= 100)
head(top100)
# get matrix with features (dontrun to keep time for examples short)
if (FALSE) {
use("RcppCWB")
docs <- partition_bundle("REUTERS", s_attribute = "id") %>%
enrich( p_attribute = "word")
all <- corpus("REUTERS") %>%
count(p_attribute = "word")
docs_terms <- features(docs[1:5], all, included = TRUE, progress = FALSE)
dtm <- as.DocumentTermMatrix(docs_terms, col = "chisquare", verbose = FALSE)
}
# Get features of objects in a count_bundle
ref <- corpus("GERMAPARLMINI") %>% count(p_attribute = "word")
cois <- corpus("GERMAPARLMINI") %>%
subset(speaker %in% c("Angela Dorothea Merkel", "Hubertus Heil")) %>%
split(s_attribute = "speaker") %>%
count(p_attribute = "word")
y <- features(cois, ref, included = TRUE, method = "chisquare", progress = TRUE)
Run the code above in your browser using DataLab