# NOT RUN {
library(nc)
## The JobID column can be match with a complicated regular
## expression, that we will build up from small sub-pattern list
## variables that are easy to understand independently.
(sacct.df <- data.frame(
JobID = c(
"13937810_25", "13937810_25.batch",
"13937810_25.extern", "14022192_[1-3]", "14022204_[4]"),
Elapsed = c(
"07:04:42", "07:04:42", "07:04:49",
"00:00:00", "00:00:00"),
stringsAsFactors=FALSE))
## Just match the end of the range.
int.pattern <- list("[0-9]+", as.integer)
end.pattern <- list(
"-",
task.end=int.pattern)
capture_first_df(sacct.df, JobID=list(
end.pattern, nomatch.error=FALSE))
## Match the whole range inside square brackets.
range.pattern <- list(
"[[]",
task.start=int.pattern,
end.pattern, "?", #end is optional.
"[]]")
capture_first_df(sacct.df, JobID=list(
range.pattern, nomatch.error=FALSE))
## Match either a single task ID or a range, after an underscore.
task.pattern <- list(
"_",
list(
task.id=int.pattern,
"|",#either one task(above) or range(below)
range.pattern))
capture_first_df(sacct.df, JobID=task.pattern)
## Match type suffix alone.
type.pattern <- list(
"[.]",
type=".*")
capture_first_df(sacct.df, JobID=list(
type.pattern, nomatch.error=FALSE))
## Match task and optional type suffix.
task.type.pattern <- list(
task.pattern,
type.pattern, "?")
capture_first_df(sacct.df, JobID=task.type.pattern)
## Match full JobID and Elapsed columns.
(task.df <- capture_first_df(
sacct.df,
JobID=list(
job=int.pattern,
task.type.pattern),
Elapsed=list(
hours=int.pattern,
":",
minutes=int.pattern,
":",
seconds=int.pattern)))
str(task.df)
# }
Run the code above in your browser using DataLab