# NOT RUN {
dtm <- dfm(data_corpus_inaugural)
x <- apply(dtm, 1, function(tf) tf/max(tf))
topfeatures(dtm)
normDtm <- dfm_weight(dtm, "prop")
topfeatures(normDtm)
maxTfDtm <- dfm_weight(dtm, scheme = "propmax")
topfeatures(maxTfDtm)
logTfDtm <- dfm_weight(dtm, scheme = "logcount")
topfeatures(logTfDtm)
logaveDtm <- dfm_weight(dtm, scheme = "logave")
topfeatures(logaveDtm)
# combine these methods for more complex dfm_weightings, e.g. as in Section 6.4
# of Introduction to Information Retrieval
head(dfm_tfidf(dtm, scheme_tf = "logcount"))
# apply numeric weights
str <- c("apple is better than banana", "banana banana apple much better")
(mydfm <- dfm(str, remove = stopwords("english")))
dfm_weight(mydfm, weights = c(apple = 5, banana = 3, much = 0.5))
# }
# NOT RUN {
# smooth the dfm
dfm_smooth(mydfm, 0.5)
# }
Run the code above in your browser using DataLab