if (FALSE) {
data("usnews", package = "sentometrics")
data("list_lexicons", package = "sentometrics")
data("list_valence_shifters", package = "sentometrics")
data("epu", package = "sentometrics")
set.seed(505)
# construct a sento_measures object to start with
corpusAll <- sento_corpus(corpusdf = usnews)
corpus <- quanteda::corpus_subset(corpusAll, date >= "2004-01-01")
l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")])
ctr <- ctr_agg(howWithin = "counts", howDocs = "proportional",
howTime = c("equal_weight", "linear"),
by = "month", lag = 3)
sento_measures <- sento_measures(corpus, l, ctr)
# prepare y and other x variables
y <- epu[epu$date %in% get_dates(sento_measures), "index"]
length(y) == nobs(sento_measures) # TRUE
x <- data.frame(runif(length(y)), rnorm(length(y))) # two other (random) x variables
colnames(x) <- c("x1", "x2")
# a linear model based on the Akaike information criterion
ctrIC <- ctr_model(model = "gaussian", type = "AIC", do.iter = FALSE, h = 4,
do.difference = TRUE)
out1 <- sento_model(sento_measures, y, x = x, ctr = ctrIC)
# attribution and prediction as post-analysis
attributions1 <- attributions(out1, sento_measures,
refDates = get_dates(sento_measures)[20:25])
plot(attributions1, "features")
nx <- nmeasures(sento_measures) + ncol(x)
newx <- runif(nx) * cbind(data.table::as.data.table(sento_measures)[, -1], x)[30:40, ]
preds <- predict(out1, newx = as.matrix(newx), type = "link")
# an iterative out-of-sample analysis, parallelized
ctrIter <- ctr_model(model = "gaussian", type = "BIC", do.iter = TRUE, h = 3,
oos = 2, alphas = c(0.25, 0.75), nSample = 75, nCore = 2)
out2 <- sento_model(sento_measures, y, x = x, ctr = ctrIter)
summary(out2)
# plot predicted vs. realized values
p <- plot(out2)
p
# a cross-validation based model, parallelized
cl <- parallel::makeCluster(2)
doParallel::registerDoParallel(cl)
ctrCV <- ctr_model(model = "gaussian", type = "cv", do.iter = FALSE,
h = 0, alphas = c(0.10, 0.50, 0.90), trainWindow = 70,
testWindow = 10, oos = 0, do.progress = TRUE)
out3 <- sento_model(sento_measures, y, x = x, ctr = ctrCV)
parallel::stopCluster(cl)
foreach::registerDoSEQ()
summary(out3)
# a cross-validation based model for a binomial target
yb <- epu[epu$date %in% get_dates(sento_measures), "above"]
ctrCVb <- ctr_model(model = "binomial", type = "cv", do.iter = FALSE,
h = 0, alphas = c(0.10, 0.50, 0.90), trainWindow = 70,
testWindow = 10, oos = 0, do.progress = TRUE)
out4 <- sento_model(sento_measures, yb, x = x, ctr = ctrCVb)
summary(out4)}
Run the code above in your browser using DataLab