# NOT RUN {
toks <- tokens(data_corpus_inaugural)
dict <- dictionary(list(country = "united states",
law=c('law*', 'constitution'),
freedom=c('free*', 'libert*')))
dfm(tokens_lookup(toks, dict, valuetype='glob', verbose = TRUE))
dfm(tokens_lookup(toks, dict, valuetype='glob', verbose = TRUE, nomatch = 'NONE'))
dict_fix <- dictionary(list(country = "united states",
law = c('law', 'constitution'),
freedom = c('freedom', 'liberty')))
# dfm(applyDictionary(toks, dict_fix, valuetype='fixed'))
dfm(tokens_lookup(toks, dict_fix, valuetype='fixed'))
# hierarchical dictionary example
txt <- c(d1 = "The United States has the Atlantic Ocean and the Pacific Ocean.",
d2 = "Britain and Ireland have the Irish Sea and the English Channel.")
toks <- tokens(txt)
dict <- dictionary(list(US = list(Countries = c("States"),
oceans = c("Atlantic", "Pacific")),
Europe = list(Countries = c("Britain", "Ireland"),
oceans = list(west = "Irish Sea",
east = "English Channel"))))
tokens_lookup(toks, dict, levels = 1)
tokens_lookup(toks, dict, levels = 2)
tokens_lookup(toks, dict, levels = 1:2)
tokens_lookup(toks, dict, levels = 3)
tokens_lookup(toks, dict, levels = c(1,3))
tokens_lookup(toks, dict, levels = c(2,3))
# show unmatched tokens
tokens_lookup(toks, dict, nomatch = "_UNMATCHED")
# }
Run the code above in your browser using DataLab