# NOT RUN {
txts <- "Fellow citizens, I am again called upon by the voice of my country to
execute the functions of its Chief Magistrate. When the occasion proper for
it shall arrive, I shall endeavor to express the high sense I entertain of
this distinguished honor."
toks <- tokens(txts)
# split by any punctuation
tokens_segment(toks, "^\\p{Sterm}$", valuetype = "regex",
extract_pattern = TRUE,
pattern_position = "after")
tokens_segment(toks, c(".", "?", "!"), valuetype = "fixed",
extract_pattern = TRUE,
pattern_position = "after")
# }
Run the code above in your browser using DataLab