##
## 1. Consider Names = Ruben, Avila and Jose, where
## "e" and "A" in these examples carry an accent.
## With the default values for standardCharacters and
## replacement, these might be converted to something
## like Rub_n, _vila, and Jos_, with different software
## possibly mangling the names differently. (The
## standard checks for R packages in an English locale
## complains about non-ASCII characters, because they
## are not portable.)
##
nonstdNames <- c('Ra`l', 'Ra`', '`l', 'Torres, Raul',
"Robert C. \\Bobby\\\\", NA, '', ' ',
'$12', '12%')
# confusion in character sets can create
# names like Names[2]
Name2 <- subNonStandardCharacters(nonstdNames)
str(Name2)
# check
Name2. <- c('Ra_l', 'Ra_', '_l', nonstdNames[4],
'Robert C. "Bobby"', NA, '', ' ',
'$12', '12%')
str(Name2.)
stopifnot(
all.equal(Name2, Name2.)
)
##
## 2. Example from iconv
##
icx <- c("Ekstr\u{f8}m", "J\u{f6}reskog",
"bi\u{df}chen Z\u{fc}rcher")
icx2 <- subNonStandardCharacters(icx)
# check
icx. <- c('Ekstrom', 'Joreskog', 'bisschen Zurcher')
stopifnot(
all.equal(icx2, icx.)
)
Run the code above in your browser using DataLab