# NOT RUN {
mgsub(DATA$state, c("it's", "I'm"), c("it is", "I am"))
mgsub(DATA$state, "[[:punct:]]", "PUNC", fixed = FALSE)
# }
# NOT RUN {
library(textclean)
hunthou <- replace_number(seq_len(1e5))
textclean::mgsub(
"'twenty thousand three hundred five' into 20305",
hunthou,
seq_len(1e5)
)
## "'20305' into 20305"
## Larger example from: https://stackoverflow.com/q/18332463/1000343
## A slower approach
fivehunthou <- replace_number(seq_len(5e5))
testvect <- c("fifty seven", "four hundred fifty seven",
"six thousand four hundred fifty seven",
"forty six thousand four hundred fifty seven",
"forty six thousand four hundred fifty seven",
"three hundred forty six thousand four hundred fifty seven"
)
textclean::mgsub(testvect, fivehunthou, seq_len(5e5))
## Safe substitution: Uses the mgsub package as the backend
dubious_string <- "Dopazamine is a fake chemical"
pattern <- c("dopazamin","do.*ne")
replacement <- c("freakout","metazamine")
mgsub(dubious_string, pattern, replacement, ignore.case = TRUE, fixed = FALSE)
mgsub(dubious_string, pattern, replacement, safe = TRUE, fixed = FALSE)
# }
Run the code above in your browser using DataLab