corp <- corpus_subset(data_corpus_inaugural, Year>1900)
dict <- dictionary(list(christmas = c("Christmas", "Santa", "holiday"),
opposition = c("Opposition", "reject", "notincorpus"),
taxing = "taxing",
taxation = "taxation",
taxregex = "tax*",
country = "america"))
tokens(corp) |>
tokens_lookup(dictionary = dict) |>
dfm()
# subset a dictionary
dict[1:2]
dict[c("christmas", "opposition")]
dict[["opposition"]]
# combine dictionaries
c(dict["christmas"], dict["country"])
if (FALSE) {
dfmat <- dfm(tokens(data_corpus_inaugural))
# import the Laver-Garry dictionary from Provalis Research
dictfile <- tempfile()
download.file("https://provalisresearch.com/Download/LaverGarry.zip",
dictfile, mode = "wb")
unzip(dictfile, exdir = (td <- tempdir()))
dictlg <- dictionary(file = paste(td, "LaverGarry.cat", sep = "/"))
dfm_lookup(dfmat, dictlg)
# import a LIWC formatted dictionary from http://www.moralfoundations.org
download.file("http://bit.ly/37cV95h", tf <- tempfile())
dictliwc <- dictionary(file = tf, format = "LIWC")
dfm_lookup(dfmat, dictliwc)
}
Run the code above in your browser using DataLab