# simple example
mycorpus <- corpus(c(textone = "This is a sentence. Another sentence. Yet another.",
textwo = "Premiere phrase. Deuxieme phrase."),
docvars = data.frame(country=c("UK", "USA"), year=c(1990, 2000)),
notes = "This is a simple example to show how changeunits() works.")
summary(mycorpus)
summary(changeunits(mycorpus, to = "sentences"), showmeta=TRUE)
# example with inaugural corpus speeches
(mycorpus2 <- subset(inaugCorpus, Year>2004))
paragCorpus <- changeunits(mycorpus2, to="paragraphs")
paragCorpus
summary(paragCorpus, 100, showmeta=TRUE)
## Note that Bush 2005 is recorded as a single paragraph because that text used a single
## \n to mark the end of a paragraph.
Run the code above in your browser using DataLab