library(tm)
en <- c("Romeo loves Juliet", "Romeo loves a girl")
en.corpus <- VCorpus(VectorSource(en))
tdm <- TermDocumentMatrix(en.corpus, 
                          control=list(wordLengths=c(1,Inf), 
                                       tokenize=function(x) ngram_tokenize(x, char=TRUE, 
                                                                           ngmin=3, ngmax=3)))
inspect(tdm)
ch <- c("abab", "aabb")
ch.corpus <- VCorpus(VectorSource(ch))
tdm <- TermDocumentMatrix(ch.corpus, 
                          control=list(wordLengths=c(1,Inf), 
                                       tokenize=function(x) ngram_tokenize(x, char=TRUE, 
                                                                           ngmin=1, ngmax=2)))
inspect(tdm)
Run the code above in your browser using DataLab