# NOT RUN {
pdf_dat <- read_pdf(
system.file("docs/rl10075oralhistoryst002.pdf", package = "textreadr")
)
pdf_dat_b <- read_pdf(
system.file("docs/rl10075oralhistoryst002.pdf", package = "textreadr"),
skip = 1
)
# }
# NOT RUN {
library(textshape)
system.file("docs/rl10075oralhistoryst002.pdf", package = "textreadr") %>%
read_pdf(1) %>%
`[[`('text') %>%
head(-1) %>%
textshape::combine() %>%
gsub("([A-Z])( )([A-Z])", "\\1_\\3", .) %>%
strsplit("(-| )(?=[A-Z_]+:)", perl=TRUE) %>%
`[[`(1) %>%
textshape::split_transcript()
# }
Run the code above in your browser using DataLab