# NOT RUN {
## Extract all external links and their titles from a wikipedia page
data(wiki)
wiki.parse <- XML::htmlParse(wiki)
links <- xscrape(wiki.parse,
row.xpath= "//a[starts-with(./@href, 'http')]",
col.xpath= c(title= ".", link= "./@href"),
parallel = 1)
# }
# NOT RUN {
## Convert results from a search for 'R' on duckduckgo.com
## First download the search page
duck <- XML::htmlParse("http://duckduckgo.com/html/?q=R")
## Then run xscrape on the dowloaded and parsed page
results <- xscrape(duck,
row.xpath= "//div[contains(@class, 'result__body')]",
col.xpath= c(title= "./h2",
snippet= ".//*[@class='result__snippet']",
url= ".//a[@class='result__url']/@href"))
# }
# NOT RUN {
# }
# NOT RUN {
## Convert results from a search for 'R' and 'Julia' on duckduckgo.com
## Directly provide the URLs to xscrape
results <- xscrape(c("http://duckduckgo.com/html/?q=R",
"http://duckduckgo.com/html/?q=julia"),
row.xpath= "//div[contains(@class, 'result__body')]",
col.xpath= c(title= "./h2",
snippet= ".//*[@class='result__snippet']",
url= ".//a[@class='result__url']/@href"))
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab