# NOT RUN {
mydfm <- corpus_subset(data_corpus_inaugural, Year > 1980) %>%
dfm(remove_punct = TRUE)
mydfm_nostopw <- dfm_remove(mydfm, stopwords("english"))
# most frequent features
topfeatures(mydfm)
topfeatures(mydfm_nostopw)
# least frequent features
topfeatures(mydfm_nostopw, decreasing = FALSE)
# top features of individual documents
topfeatures(mydfm_nostopw, n = 5, groups = docnames(mydfm_nostopw))
# grouping by president last name
topfeatures(mydfm_nostopw, n = 5, groups = "President")
# features by document frequencies
tail(topfeatures(mydfm, scheme = "docfreq", n = 200))
# }
Run the code above in your browser using DataLab