# NOT RUN {
dtm <- dfm(data_corpus_inaugural)
x <- apply(dtm, 1, function(tf) tf/max(tf))
topfeatures(dtm)
normDtm <- dfm_weight(dtm, "relfreq")
topfeatures(normDtm)
maxTfDtm <- dfm_weight(dtm, type = "relmaxfreq")
topfeatures(maxTfDtm)
logTfDtm <- dfm_weight(dtm, type = "logfreq")
topfeatures(logTfDtm)
tfidfDtm <- dfm_weight(dtm, type = "tfidf")
topfeatures(tfidfDtm)
# combine these methods for more complex dfm_weightings, e.g. as in Section 6.4
# of Introduction to Information Retrieval
head(tfidf(dtm, scheme_tf = "log"))
# apply numeric weights
str <- c("apple is better than banana", "banana banana apple much better")
(mydfm <- dfm(str, remove = stopwords("english")))
dfm_weight(mydfm, weights = c(apple = 5, banana = 3, much = 0.5))
# }
# NOT RUN {
# smooth the dfm
dfm_smooth(mydfm, 0.5)
# }
Run the code above in your browser using DataLab