# NOT RUN {
# Get data from a website using its URL
scrapenames('http://en.wikipedia.org/wiki/Araneae')
scrapenames('http://en.wikipedia.org/wiki/Animalia')
scrapenames('http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0095068')
scrapenames('http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0080498')
scrapenames('http://ucjeps.berkeley.edu/cgi-bin/get_JM_treatment.pl?CARYOPHYLLACEAE')
# Scrape names from a pdf at a URL
url <- 'http://www.plosone.org/article/fetchObject.action?uri=
info%3Adoi%2F10.1371%2Fjournal.pone.0058268&representation=PDF'
scrapenames(url = sub('\n', '', url))
# With arguments
scrapenames(url = 'http://www.mapress.com/zootaxa/2012/f/z03372p265f.pdf',
unique=TRUE)
scrapenames(url = 'http://en.wikipedia.org/wiki/Araneae',
data_source_ids=c(1, 169))
# Get data from a file
speciesfile <- system.file("examples", "species.txt", package = "taxize")
scrapenames(file = speciesfile)
nms <- paste0(names_list("species"), collapse="\n")
file <- tempfile(fileext = ".txt")
writeLines(nms, file)
scrapenames(file = file)
# Get data from text string
scrapenames(text='A spider named Pardosa moesta Banks, 1892')
# return OCR content
scrapenames(url='http://www.mapress.com/zootaxa/2012/f/z03372p265f.pdf',
return_content = TRUE)
# use curl options
library("httr")
scrapenames(text='A spider named Pardosa moesta Banks, 1892')
# }
Run the code above in your browser using DataLab