tokenize(x, ...)## S3 method for class 'character':
tokenize(x, what = c("word", "sentence", "character",
"fastestword", "fasterword"), removeNumbers = FALSE, removePunct = FALSE,
removeSeparators = TRUE, removeTwitter = FALSE, ngrams = 1,
concatenator = "_", simplify = FALSE, verbose = FALSE, ...)
## S3 method for class 'corpus':
tokenize(x, ...)
"word"
.
Available alternatives are c("character", "word", "line_break",
"sentence")
. See stringi-search-boundaries.2day
removePunct=FALSE
. Only applicable
for what = "character"
(when you pro@
and #}; set to
FALSE
if you wish to eliminate these.
1
(unigrams). For bigrams, for i