# NOT RUN {
dfmat1 <- corpus_subset(data_corpus_inaugural, Year > 1980) %>%
dfm(remove_punct = TRUE)
dfmat2 <- dfm_remove(dfmat1, stopwords("english"))
# most frequent features
topfeatures(dfmat1)
topfeatures(dfmat2)
# least frequent features
topfeatures(dfmat2, decreasing = FALSE)
# top features of individual documents
topfeatures(dfmat2, n = 5, groups = docnames(dfmat2))
# grouping by president last name
topfeatures(dfmat2, n = 5, groups = "President")
# features by document frequencies
tail(topfeatures(dfmat1, scheme = "docfreq", n = 200))
# }
Run the code above in your browser using DataLab