data(brussels_reviews_anno)
##
## Which nouns occur in text containing the term 'centre'
##
x <- subset(brussels_reviews_anno, xpos == "NN" & language == "fr")
x <- x[, c("doc_id", "lemma")]
x <- document_term_frequencies(x)
dtm <- document_term_matrix(x)
relevant <- dtm_chisq(dtm, groups = dtm[, "centre"] > 0)
head(relevant, 10)
##
## Which adjectives occur in text containing the term 'hote'
##
x <- subset(brussels_reviews_anno, xpos == "JJ" & language == "fr")
x <- x[, c("doc_id", "lemma")]
x <- document_term_frequencies(x)
dtm <- document_term_matrix(x)
group <- subset(brussels_reviews_anno, lemma %in% "hote")
group <- rownames(dtm) %in% group$doc_id
relevant <- dtm_chisq(dtm, groups = group)
head(relevant, 10)
if (FALSE) {
# do not show scientific notation of the p-values
options(scipen = 100)
head(relevant, 10)
}
Run the code above in your browser using DataLab