# NOT RUN {
## tokens_select with simple examples
toks <- tokens(c("This is a sentence.", "This is a second sentence."),
remove_punct = TRUE)
tokens_select(toks, c("is", "a", "this"), selection = "keep", padding = FALSE)
tokens_select(toks, c("is", "a", "this"), selection = "keep", padding = TRUE)
tokens_select(toks, c("is", "a", "this"), selection = "remove", padding = FALSE)
tokens_select(toks, c("is", "a", "this"), selection = "remove", padding = TRUE)
# how case_insensitive works
tokens_select(toks, c("is", "a", "this"), selection = "remove", case_insensitive = TRUE)
tokens_select(toks, c("is", "a", "this"), selection = "remove", case_insensitive = FALSE)
## tokens_remove example
txt <- c(wash1 <- "Fellow citizens, I am again called upon by the voice of my country to
execute the functions of its Chief Magistrate.",
wash2 <- "When the occasion proper for it shall arrive, I shall endeavor to express
the high sense I entertain of this distinguished honor.")
tokens_remove(tokens(txt, remove_punct = TRUE), stopwords("english"))
# }
Run the code above in your browser using DataLab