dtm <- dfm(inaugCorpus)
x <- apply(dtm, 1, function(tf) tf/max(tf))
topfeatures(dtm)
normDtm <- weight(dtm, "relFreq")
topfeatures(normDtm)
maxTfDtm <- weight(dtm, type="relMaxFreq")
topfeatures(maxTfDtm)
logTfDtm <- weight(dtm, type="logFreq")
topfeatures(logTfDtm)
tfidfDtm <- weight(dtm, type="tfidf")
topfeatures(tfidfDtm)
# combine these methods for more complex weightings, e.g. as in Section 6.4
# of Introduction to Information Retrieval
head(logTfDtm <- weight(dtm, type="logFreq"))
head(tfidf(logTfDtm, normalize = FALSE))
# apply numeric weights
str <- c("apple is better than banana", "banana banana apple much better")
weights <- c(apple = 5, banana = 3, much = 0.5)
(mydfm <- dfm(str, ignoredFeatures = stopwords("english"), verbose = FALSE))
weight(mydfm, weights)
Run the code above in your browser using DataLab