rm_white
- Remove multiple white space (> 1 becomes a single white
space), white space before a comma, white space before a single or
consecutive combination of a colon, semicolon, or endmark (period, question
mark, or exclamation point), white space after a left bracket ("{", "(", "[")
or before a right bracket ("}", ")", "]"), leading or trailing white space.
rm_white_bracket
- Remove white space after a left bracket ("{", "(", "[")
or before a right bracket ("}", ")", "]").
rm_white_colon
- Remove white space before a single or consecutive
combination of a colon, semicolon.
rm_white_comma
- Remove white space before a comma.
rm_white_endmark
- Remove white space before endmark(s) (".", "?", "!").
rm_white_lead
- Remove leading white space.
rm_white_lead_trail
- Remove leading or trailing white space.
rm_white_trail
- Remove trailing white space.
rm_white_multiple
- Remove multiple white space (> 1 becomes a single
white space).
rm_white_punctuation
- Remove multiple white space before a comma, white
space before a single or consecutive combination of a colon, semicolon, or
endmark (period, question mark, or exclamation point).
rm_white(
text.var,
trim = FALSE,
clean = FALSE,
pattern = "@rm_white",
replacement = "",
extract = FALSE,
dictionary = getOption("regex.library"),
...
)ex_white(
text.var,
trim = FALSE,
clean = FALSE,
pattern = "@rm_white",
replacement = "",
extract = TRUE,
dictionary = getOption("regex.library"),
...
)
rm_white_bracket(
text.var,
trim = !extract,
clean = TRUE,
pattern = "@rm_white_bracket",
replacement = "",
extract = FALSE,
dictionary = getOption("regex.library"),
...
)
ex_white_bracket(
text.var,
trim = !extract,
clean = TRUE,
pattern = "@rm_white_bracket",
replacement = "",
extract = TRUE,
dictionary = getOption("regex.library"),
...
)
rm_white_colon(
text.var,
trim = !extract,
clean = TRUE,
pattern = "@rm_white_colon",
replacement = "",
extract = FALSE,
dictionary = getOption("regex.library"),
...
)
ex_white_colon(
text.var,
trim = !extract,
clean = TRUE,
pattern = "@rm_white_colon",
replacement = "",
extract = TRUE,
dictionary = getOption("regex.library"),
...
)
rm_white_comma(
text.var,
trim = !extract,
clean = TRUE,
pattern = "@rm_white_comma",
replacement = "",
extract = FALSE,
dictionary = getOption("regex.library"),
...
)
ex_white_comma(
text.var,
trim = !extract,
clean = TRUE,
pattern = "@rm_white_comma",
replacement = "",
extract = TRUE,
dictionary = getOption("regex.library"),
...
)
rm_white_endmark(
text.var,
trim = !extract,
clean = TRUE,
pattern = "@rm_white_endmark",
replacement = "",
extract = FALSE,
dictionary = getOption("regex.library"),
...
)
ex_white_endmark(
text.var,
trim = !extract,
clean = TRUE,
pattern = "@rm_white_endmark",
replacement = "",
extract = TRUE,
dictionary = getOption("regex.library"),
...
)
rm_white_lead(
text.var,
trim = FALSE,
clean = FALSE,
pattern = "@rm_white_lead",
replacement = "",
extract = FALSE,
dictionary = getOption("regex.library"),
...
)
ex_white_lead(
text.var,
trim = FALSE,
clean = FALSE,
pattern = "@rm_white_lead",
replacement = "",
extract = TRUE,
dictionary = getOption("regex.library"),
...
)
rm_white_lead_trail(
text.var,
trim = FALSE,
clean = FALSE,
pattern = "@rm_white_lead_trail",
replacement = "",
extract = FALSE,
dictionary = getOption("regex.library"),
...
)
ex_white_lead_trail(
text.var,
trim = FALSE,
clean = FALSE,
pattern = "@rm_white_lead_trail",
replacement = "",
extract = TRUE,
dictionary = getOption("regex.library"),
...
)
rm_white_trail(
text.var,
trim = FALSE,
clean = FALSE,
pattern = "@rm_white_trail",
replacement = "",
extract = FALSE,
dictionary = getOption("regex.library"),
...
)
ex_white_trail(
text.var,
trim = FALSE,
clean = FALSE,
pattern = "@rm_white_trail",
replacement = "",
extract = TRUE,
dictionary = getOption("regex.library"),
...
)
rm_white_multiple(
text.var,
trim = !extract,
clean = TRUE,
pattern = "@rm_white_multiple",
replacement = "",
extract = FALSE,
dictionary = getOption("regex.library"),
...
)
ex_white_multiple(
text.var,
trim = !extract,
clean = TRUE,
pattern = "@rm_white_multiple",
replacement = "",
extract = TRUE,
dictionary = getOption("regex.library"),
...
)
rm_white_punctuation(
text.var,
trim = !extract,
clean = TRUE,
pattern = "@rm_white_punctuation",
replacement = "",
extract = FALSE,
dictionary = getOption("regex.library"),
...
)
ex_white_punctuation(
text.var,
trim = !extract,
clean = TRUE,
pattern = "@rm_white_punctuation",
replacement = "",
extract = TRUE,
dictionary = getOption("regex.library"),
...
)
Returns a character string with extra white space removed.
The text variable.
logical. If TRUE
removes leading and trailing white
spaces.
trim logical. If TRUE
extra white spaces and escaped
character will be removed.
A character string containing a regular expression (or
character string for fixed = TRUE
) to be matched in the given
character vector. Default, @rm_dollar
uses the
rm_dollar
regex from the regular expression dictionary from
the dictionary
argument.
Replacement for matched pattern
.
logical. If TRUE
the dollar strings are extracted into a
list of vectors.
A dictionary of canned regular expressions to search within
if pattern
begins with "@rm_"
.
Other arguments passed to gsub
.
rm_white_endmark
/rm_white_punctuation
- stackoverflow's hwnd and Tyler Rinker <tyler.rinker@gmail.com>.
The rm_white_endmark
/rm_white_punctuation
regular expression was taken from:
https://stackoverflow.com/a/25464921/1000343
Other rm_ functions:
rm_abbreviation()
,
rm_between()
,
rm_bracket()
,
rm_caps_phrase()
,
rm_caps()
,
rm_citation_tex()
,
rm_citation()
,
rm_city_state_zip()
,
rm_city_state()
,
rm_date()
,
rm_default()
,
rm_dollar()
,
rm_email()
,
rm_emoticon()
,
rm_endmark()
,
rm_hash()
,
rm_nchar_words()
,
rm_non_ascii()
,
rm_non_words()
,
rm_number()
,
rm_percent()
,
rm_phone()
,
rm_postal_code()
,
rm_repeated_characters()
,
rm_repeated_phrases()
,
rm_repeated_words()
,
rm_tag()
,
rm_time()
,
rm_title_name()
,
rm_url()
,
rm_zip()
x <- c(" There is ( $5.50 ) for , me . ", " that's [ 45.6% ] of! the pizza !",
" 14% is { $26 } or $25.99 ?", "Oh ; here's colon : Yippee !")
rm_white(x)
rm_white_bracket(x)
rm_white_colon(x)
rm_white_comma(x)
rm_white_endmark(x)
rm_white_lead(x)
rm_white_trail(x)
rm_white_lead_trail(x)
rm_white_multiple(x)
rm_white_punctuation(x)
Run the code above in your browser using DataLab