dfmat1 <- corpus_subset(data_corpus_inaugural, Year > 1980) |>
tokens(remove_punct = TRUE) |>
dfm()
dfmat2 <- dfm_remove(dfmat1, stopwords("en"))
# most frequent features
topfeatures(dfmat1)
topfeatures(dfmat2)
# least frequent features
topfeatures(dfmat2, decreasing = FALSE)
# top features of individual documents
topfeatures(dfmat2, n = 5, groups = docnames(dfmat2))
# grouping by president last name
topfeatures(dfmat2, n = 5, groups = President)
# features by document frequencies
tail(topfeatures(dfmat1, scheme = "docfreq", n = 200))
Run the code above in your browser using DataLab