# NOT RUN {
## Example from 13.1 of _An Introduction to Information Retrieval_
txt <- c(d1 = "Chinese Beijing Chinese",
d2 = "Chinese Chinese Shanghai",
d3 = "Chinese Macao",
d4 = "Tokyo Japan Chinese",
d5 = "Chinese Chinese Chinese Tokyo Japan")
trainingset <- dfm(txt, tolower = FALSE)
trainingclass <- factor(c("Y", "Y", "Y", "N", NA), ordered = TRUE)
## replicate IIR p261 prediction for test set (document 5)
(nb.p261 <- textmodel_NB(trainingset, trainingclass, prior = "docfreq"))
predict(nb.p261, newdata = trainingset[5, ])
# contrast with other priors
predict(textmodel_NB(trainingset, trainingclass, prior = "uniform"))
predict(textmodel_NB(trainingset, trainingclass, prior = "termfreq"))
## replicate IIR p264 Bernoulli Naive Bayes
(nb.p261.bern <- textmodel_NB(trainingset, trainingclass, distribution = "Bernoulli",
prior = "docfreq"))
predict(nb.p261.bern, newdata = trainingset[5, ])
# }
Run the code above in your browser using DataLab