# NOT RUN {
(wf <- textmodel_wordfish(data_dfm_lbgexample, dir = c(1,5)))
summary(wf, n = 10)
coef(wf)
predict(wf)
predict(wf, se.fit = TRUE)
predict(wf, interval = "confidence")
# }
# NOT RUN {
ie2010dwf <- dfm(data_corpus_irishbudget2010, verbose = FALSE)
(wf1 <- textmodel_wordfish(ie2010dfm, dir = c(6,5)))
(wf2a <- textmodel_wordfish(ie2010dfm, dir = c(6,5),
dispersion = "quasipoisson", dispersion_floor = 0))
(wf2b <- textmodel_wordfish(ie2010dfm, dir = c(6,5),
dispersion = "quasipoisson", dispersion_floor = .5))
plot(wf2a$phi, wf2b$phi, xlab = "Min underdispersion = 0", ylab = "Min underdispersion = .5",
xlim = c(0, 1.0), ylim = c(0, 1.0))
plot(wf2a$phi, wf2b$phi, xlab = "Min underdispersion = 0", ylab = "Min underdispersion = .5",
xlim = c(0, 1.0), ylim = c(0, 1.0), type = "n")
underdispersedTerms <- sample(which(wf2a$phi < 1.0), 5)
which(featnames(ie2010dfm) %in% names(topfeatures(ie2010dfm, 20)))
text(wf2a$phi, wf2b$phi, wf2a$features,
cex = .8, xlim = c(0, 1.0), ylim = c(0, 1.0), col = "grey90")
text(wf2a$phi['underdispersedTerms'], wf2b$phi['underdispersedTerms'],
wf2a$features['underdispersedTerms'],
cex = .8, xlim = c(0, 1.0), ylim = c(0, 1.0), col = "black")
if (require(austin)) {
wf_austin <- austin::wordfish(quanteda::as.wfm(ie2010dfm), dir = c(6,5))
cor(wf1$theta, wf_austin$theta)
}
# }
Run the code above in your browser using DataLab