# NOT RUN {
dfmat1 <- dfm(data_corpus_inaugural)
dfmat2 <- dfm_weight(dfmat1, scheme = "prop")
topfeatures(dfmat2)
dfmat3 <- dfm_weight(dfmat1)
topfeatures(dfmat3)
dfmat4 <- dfm_weight(dfmat1, scheme = "logcount")
topfeatures(dfmat4)
dfmat5 <- dfm_weight(dfmat1, scheme = "logave")
topfeatures(dfmat5)
# combine these methods for more complex dfm_weightings, e.g. as in Section 6.4
# of Introduction to Information Retrieval
head(dfm_tfidf(dfmat1, scheme_tf = "logcount"))
# apply numeric weights
str <- c("apple is better than banana", "banana banana apple much better")
(dfmat6 <- dfm(str, remove = stopwords("english")))
dfm_weight(dfmat6, weights = c(apple = 5, banana = 3, much = 0.5))
# smooth the dfm
dfmat <- dfm(data_corpus_inaugural)
dfm_smooth(dfmat, 0.5)
# }
Run the code above in your browser using DataLab