# NOT RUN {
with(partial_republican_debates_2015,
as_dtm(dialogue, paste(location, element_id, sentence_id, sep = "_"))
)
as_dtm(mtcars)
as_dtm(CO2, docs = c('Plant', 'Type', 'Treatment'))
# }
# NOT RUN {
## termco object to DTM/TDM
library(termco)
as_dtm(markers)
as_dtm(markers,weighting = tm::weightTfIdf)
as_tdm(markers)
cosine_distance <- function (x, ...) {
x <- t(slam::as.simple_triplet_matrix(x))
stats::as.dist(1 - slam::crossprod_simple_triplet_matrix(x)/(sqrt(slam::col_sums(x^2) %*%
t(slam::col_sums(x^2)))))
}
mod <- hclust(cosine_distance(as_dtm(markers)))
plot(mod)
rect.hclust(mod, k = 5, border = "red")
(clusters <- cutree(mod, 5))
## Parts of speech to DTM/TDM
library(tagger)
library(dplyr)
data(presidential_debates_2012_pos)
pos <- presidential_debates_2012_pos %>%
select_tags(c("NN", "NNP", "NNPS", "NNS"))
as_dtm(pos_text)
as_dtm(pos_text, pos=FALSE)
as_tdm(pos_text)
as_tdm(pos_text, pos=FALSE)
presidential_debates_2012_pos %>%
as_basic() %>%
as_dtm()
# }
Run the code above in your browser using DataLab