# first way to invoke POS tagging, using a built-in preset:
tagged.results <- treetag("~/my.data/speech.txt", treetagger="manual", lang="en",
TT.options=list(path="~/bin/treetagger", preset="en"))
# second way, use one of the batch scripts that come with TreeTagger:
tagged.results <- treetag("~/my.data/speech.txt",
treetagger="~/bin/treetagger/cmd/tree-tagger-english", lang="en")
# third option, set the above batch script in an environment object first:
set.kRp.env(TT.cmd="~/bin/treetagger/cmd/tree-tagger-english", lang="en")
tagged.results <- treetag("~/my.data/speech.txt")
# after tagging, use the resulting object with other functions in this package:
readability(tagged.results)
lex.div(tagged.results)
## enabling stopword detection and stemming
# if you also installed the packages tm and Snowball,
# you can use some of their features with koRpus:
set.kRp.env(TT.cmd="manual", lang="en", TT.options=list(path="~/bin/treetagger",
preset="en"))
tagged.results <- treetag("~/my.data/speech.txt",
stopwords=tm::stopwords("en"),
stemmer=Snowball::SnowballStemmer)
# removing all stopwords now is simple:
tagged.noStopWords <- kRp.filter.wclass(tagged.results, "stopword")
Run the code above in your browser using DataLab