readReut21578XML <- readXML(
spec = list(Author = list("node", "/REUTERS/TEXT/AUTHOR"),
DateTimeStamp = list("function", function(node)
strptime(sapply(XML::getNodeSet(node, "/REUTERS/DATE"), XML::xmlValue),
format = " tz = "GMT")),
Description = list("unevaluated", ""),
Heading = list("node", "/REUTERS/TEXT/TITLE"),
ID = list("attribute", "/REUTERS/@NEWID"),
Origin = list("unevaluated", "Reuters-21578 XML"),
Topics = list("node", "/REUTERS/TOPICS/D")),
doc = new("Reuters21578Document"))
Run the code above in your browser using DataLab