set.seed(505)
data("usnews", package = "sentometrics")
data("list_lexicons", package = "sentometrics")
data("list_valence_shifters", package = "sentometrics")
# computation of sentiment
corpus <- sento_corpus(corpusdf = usnews)
corpusSample <- quanteda::corpus_sample(corpus, size = 500)
l1 <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")],
list_valence_shifters[["en"]])
l2 <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")],
list_valence_shifters[["en"]][, c("x", "t")])
sent1 <- compute_sentiment(corpusSample, l1, how = "counts")
sent2 <- compute_sentiment(corpusSample, l2, do.sentence = TRUE)
sent3 <- compute_sentiment(as.character(corpusSample), l2,
do.sentence = TRUE)
ctr <- ctr_agg(howTime = c("linear"), by = "year", lag = 3)
# aggregate into sentiment measures
sm1 <- aggregate(sent1, ctr)
sm2 <- aggregate(sent2, ctr)
# two-step aggregation (first into document-level sentiment)
sd2 <- aggregate(sent2, ctr, do.full = FALSE)
sm3 <- aggregate(sd2, ctr)
# aggregation of a sentiment data.table
cols <- c("word_count", names(l2)[-length(l2)])
sd3 <- sent3[, lapply(.SD, sum), by = "id", .SDcols = cols]
Run the code above in your browser using DataLab