Last chance! 50% off unlimited learning
Sale ends in
applyDictionary(x, dictionary, ...)
"applyDictionary"(x, dictionary, exclusive = TRUE, valuetype = c("glob", "regex", "fixed"), case_insensitive = TRUE, capkeys = !exclusive, verbose = TRUE, ...)
x
TRUE
, remove all features not in dictionary,
otherwise, replace values in dictionary with keys while leaving other
features unaffected"glob"
for
"glob"-style wildcard expressions (the format used in Wordstat and LIWC
formatted dictionary values); "regex"
for regular expressions; or
"fixed"
for exact matching (entire words, for instance)TRUE
TRUE
, convert dictionary keys to
uppercase to distinguish them from other featuresTRUE
myDict <- dictionary(list(christmas = c("Christmas", "Santa", "holiday"),
opposition = c("Opposition", "reject", "notincorpus"),
taxglob = "tax*",
taxregex = "tax.+$",
country = c("United_States", "Sweden")))
myDfm <- dfm(c("My Christmas was ruined by your opposition tax plan.",
"Does the United_States or Sweden have more progressive taxation?"),
ignoredFeatures = stopwords("english"), verbose = FALSE)
myDfm
# glob format
applyDictionary(myDfm, myDict, valuetype = "glob")
applyDictionary(myDfm, myDict, valuetype = "glob", case_insensitive = FALSE)
# regex v. glob format: note that "united_states" is a regex match for "tax*"
applyDictionary(myDfm, myDict, valuetype = "glob")
applyDictionary(myDfm, myDict, valuetype = "regex", case_insensitive = TRUE)
# fixed format: no pattern matching
applyDictionary(myDfm, myDict, valuetype = "fixed")
applyDictionary(myDfm, myDict, valuetype = "fixed", case_insensitive = FALSE)
Run the code above in your browser using DataLab