# NOT RUN {
dfmat1 <- as.dfm(data_dfm_lbgexample)
head(dfmat1[, 5:10])
head(dfm_tfidf(dfmat1)[, 5:10])
docfreq(dfmat1)[5:15]
head(dfm_weight(dfmat1)[, 5:10])
# replication of worked example from
# https://en.wikipedia.org/wiki/Tf-idf#Example_of_tf.E2.80.93idf
dfmat2 <-
matrix(c(1,1,2,1,0,0, 1,1,0,0,2,3),
byrow = TRUE, nrow = 2,
dimnames = list(docs = c("document1", "document2"),
features = c("this", "is", "a", "sample",
"another", "example"))) %>%
as.dfm()
dfmat2
docfreq(dfmat2)
dfm_tfidf(dfmat2, scheme_tf = "prop") %>% round(digits = 2)
# }
# NOT RUN {
# comparison with tm
if (requireNamespace("tm")) {
convert(dfmat2, to = "tm") %>% tm::weightTfIdf() %>% as.matrix()
# same as:
dfm_tfidf(dfmat2, base = 2, scheme_tf = "prop")
}
# }
Run the code above in your browser using DataLab