pdf_dat <- read_pdf(
    system.file("docs/rl10075oralhistoryst002.pdf", package = "textreadr")
)
pdf_dat_b <- read_pdf(
    system.file("docs/rl10075oralhistoryst002.pdf", package = "textreadr"),
    skip = 1
)
## Not run: ------------------------------------
# library(textshape)
# system.file("docs/rl10075oralhistoryst002.pdf", package = "textreadr") %>%
#     read_pdf(1) %>%
#     `[[`('text') %>%
#     head(-1) %>%
#     textshape::combine() %>%
#     gsub("([A-Z])( )([A-Z])", "\\1_\\3", .) %>%
#     strsplit("(-| )(?=[A-Z_]+:)", perl=TRUE) %>%
#     `[[`(1) %>%
#     textshape::split_transcript()
## ---------------------------------------------
Run the code above in your browser using DataLab