(myDfm <- dfm(inaugCorpus, verbose = FALSE))
# only words occuring >=10 times and in >=2 docs
trim(myDfm, minCount = 10, minDoc = 2)
# only words occuring >=10 times and in at least 0.4 of the documents
trim(myDfm, minCount = 10, minDoc = 0.4)
# only words occuring at least 0.01 times and in >=2 documents
trim(myDfm, minCount = .01, minDoc = 2)
# only words occuring 5 times in 1000
trim(myDfm, minDoc = 0.2, minCount = 0.005)
# sample 50 words occurring at least 20 times each
(myDfmSampled <- trim(myDfm, minCount = 20, nsample = 50))
topfeatures(myDfmSampled)
## Not run:
# if (require(tm)) {
# (tmdtm <- convert(myDfm, "tm"))
# removeSparseTerms(tmdtm, 0.7)
# trim(td, minDoc = 0.3)
# trim(td, sparsity = 0.7)
# }
# ## End(Not run)
Run the code above in your browser using DataLab