chr.pos.vec <- c(
"chr10:213,054,000-213,055,000",
"chrM:111,000",
"chr1:110-111 chr2:220-222") # two possible matches.
## Find the first match in each element of the subject character
## vector. Named argument values are used to create capture groups
## in the generated regex, and argument names become column names in
## the result.
(dt.chr.cols <- nc::capture_first_vec(
chr.pos.vec,
chrom="chr.*?",
":",
chromStart="[0-9,]+"))
## Even when no type conversion functions are specified, the result
## is always a data.table:
str(dt.chr.cols)
## Conversion functions are used to convert the previously named
## group, and patterns may be saved in lists for re-use.
keep.digits <- function(x)as.integer(gsub("[^0-9]", "", x))
int.pattern <- list("[0-9,]+", keep.digits)
range.pattern <- list(
chrom="chr.*?",
":",
chromStart=int.pattern,
list( # un-named list becomes non-capturing group.
"-",
chromEnd=int.pattern
), "?") # chromEnd is optional.
(dt.int.cols <- nc::capture_first_vec(
chr.pos.vec, range.pattern))
## Conversion functions used to create non-char columns.
str(dt.int.cols)
## NA used to indicate no match or missing subject.
na.vec <- c(
"this will not match",
NA, # neither will this.
chr.pos.vec)
nc::capture_first_vec(na.vec, range.pattern, nomatch.error=FALSE)
## another subject from https://adventofcode.com/2024/day/14
## type.convert=TRUE means to use utils::type.convert as default
## conversion function
pvxy.subject <- c("p=0,4 v=3,-3","p=6,3 v=-1,-3")
nc::capture_first_vec(
pvxy.subject,
"p=",
px="[0-9]",
",",
py="[0-9]",
" v=",
vx="[-0-9]+",
",",
vy="[-0-9]+",
type.convert=TRUE)
## to do the same as above but with less repetition:
g <- function(prefix,suffix)nc::group(
name=paste0(prefix,suffix),
"[-0-9]+")
xy <- function(prefix)list(
prefix,
"=",
g(prefix,"x"),
",",
g(prefix,"y"))
nc::capture_first_vec(
pvxy.subject,
xy("p"),
" ",
xy("v"),
type.convert=TRUE)
## or use a sub-pattern list without type.convert arg:
ipat <- list("[-0-9]+", as.integer)
nc::capture_first_vec(
pvxy.subject,
"p=",
px=ipat,
",",
py=ipat,
" v=",
vx=ipat,
",",
vy=ipat)
Run the code above in your browser using DataLab