split_transcript(c("greg: Who me", "sarah: yes you!"))
if (FALSE) {
## 2015 Vice-Presidential Debates Example
if (!require("pacman")) install.packages("pacman")
pacman::p_load(rvest, magrittr, xml2)
debates <- c(
wisconsin = "110908",
boulder = "110906",
california = "110756",
ohio = "110489"
)
lapply(debates, function(x){
xml2::read_html(paste0("http://www.presidency.ucsb.edu/ws/index.php?pid=", x)) %>%
rvest::html_nodes("p") %>%
rvest::html_text() %>%
textshape::split_index(grep("^[A-Z]+:", .)) %>%
textshape::combine() %>%
textshape::split_transcript() %>%
textshape::split_sentence()
})
}
Run the code above in your browser using DataLab