if (FALSE) {
txt <- c(doc1 = "Tweet https://quanteda.io using @quantedainit and #rstats.",
doc2 = "The £1,000,000 question.",
doc4 = "Line 1.\nLine2\n\nLine3.",
doc5 = "?",
doc6 = "Self-aware machines! \U0001f600",
doc7 = "Qu'est-ce que c'est?")
tokenize_word2(txt)
tokenize_word2(txt, split_hyphens = FALSE)
tokenize_word1(txt, split_hyphens = FALSE)
tokenize_word4(txt, split_hyphens = FALSE, split_elisions = TRUE)
tokenize_fasterword(txt)
tokenize_fastestword(txt)
tokenize_sentence(txt)
tokenize_character(txt[2])
}
Run the code above in your browser using DataLab