# NOT RUN {
# create a corpus from texts
corpus(data_char_ukimmig2010)
# create a corpus from texts and assign meta-data and document variables
summary(corpus(data_char_ukimmig2010,
docvars = data.frame(party = names(data_char_ukimmig2010))), 5)
corpus(texts(data_corpus_irishbudget2010))
# import a tm VCorpus
if (requireNamespace("tm", quietly = TRUE)) {
data(crude, package = "tm") # load in a tm example VCorpus
mytmCorpus <- corpus(crude)
summary(mytmCorpus, showmeta=TRUE)
data(acq, package = "tm")
summary(corpus(acq), 5, showmeta=TRUE)
tmCorp <- tm::VCorpus(tm::VectorSource(data_char_ukimmig2010))
quantCorp <- corpus(tmCorp)
summary(quantCorp)
}
# construct a corpus from a data.frame
dat <- data.frame(letter_factor = factor(rep(letters[1:3], each = 2)),
some_ints = 1L:6L,
some_text = paste0("This is text number ", 1:6, "."),
stringsAsFactors = FALSE,
row.names = paste0("fromDf_", 1:6))
dat
summary(corpus(dat, text_field = "some_text",
metacorpus = list(source = "From a data.frame called mydf.")))
# construct a corpus from a kwic object
mykwic <- kwic(data_corpus_inaugural, "southern")
summary(corpus(mykwic))
# from a kwic
kw <- kwic(data_char_sampletext, "econom*")
summary(corpus(kw))
summary(corpus(kw, split_context = FALSE))
texts(corpus(kw, split_context = FALSE))
# }
Run the code above in your browser using DataLab