toks <- tokenize(inaugCorpus, removePunct = TRUE)
seqs_token <- list(c('foreign', 'policy'), c('United', 'States'))
seqs_glob <- list(c('foreign', 'polic*'), c('United', 'States'))
seqs_regex <- list(c('^foreign', '^polic(ie|y)'), c('^United', '^States'))
toks2 <- joinTokens(toks, seqs_token, "_", 'glob')
toks2 <- joinTokens(toks, seqs_glob, "_", 'glob')
toks2 <- joinTokens(toks, seqs_regex, "_", 'regex')
kwic(toks2, 'foreign_policy', window=1) # joined
kwic(toks2, c('foreign', 'policy'), window=1) # not joined
kwic(toks2, 'United_States', window=1) # joined
Run the code above in your browser using DataLab