data("useR_2008_abstracts")
words <-
with(useR_2008_abstracts,
strsplit(Abstract, "[[:space:]]+"))
## (A poor word tokenizer ...)
n_of_words <- sapply(words, length)
authors <-
with(useR_2008_abstracts,
strsplit(Author, " and ", fixed = TRUE))
n_of_authors <- sapply(authors, length)
## Do more authors write longer abstracts?
boxplot(n_of_words ~ n_of_authors)
## Session structure:
sessions <-
with(useR_2008_abstracts,
sub("-[[:digit:]].*", "", Session))
sort(unique(sessions))
## Numbers of focus, invited and kaleidoscope presentations:
table(sub("-.*", "", sessions))
Run the code above in your browser using DataLab