# NOT RUN {
tokenized.obj <- tokenize(
file.path(path.package("koRpus"), "tests", "testthat", "sample_text.txt")
)
# get the document-term frequencies in a sparse matrix
myDTMatrix <- docTermMatrix(tokenized.obj)
# combine with filterByClass() to, e.g., exclude all punctuation
myDTMatrix <- docTermMatrix(filterByClass(tokenized.obj))
# instead of absolute frequencies, get the tf-idf values
myDTMatrix <- docTermMatrix(
filterByClass(tokenized.obj),
tfidf=TRUE
)
# }
Run the code above in your browser using DataLab