pdf_dat <- read_pdf(
system.file("docs/rl10075oralhistoryst002.pdf", package = "textreadr")
)
pdf_dat_b <- read_pdf(
system.file("docs/rl10075oralhistoryst002.pdf", package = "textreadr"),
skip = 1
)
## Not run: ------------------------------------
# library(textshape)
# system.file("docs/rl10075oralhistoryst002.pdf", package = "textreadr") %>%
# read_pdf(1) %>%
# `[[`('text') %>%
# head(-1) %>%
# textshape::combine() %>%
# gsub("([A-Z])( )([A-Z])", "\\1_\\3", .) %>%
# strsplit("(-| )(?=[A-Z_]+:)", perl=TRUE) %>%
# `[[`(1) %>%
# textshape::split_transcript()
## ---------------------------------------------
Run the code above in your browser using DataLab