# Build a useful data_set set for example
require(data.table)
data_set <- data.table(numCol = c(1, 2, 3, NA),
charCol = c("", "a", NA, "c"),
booleanCol = c(TRUE, NA, FALSE, NA))
# To set NAs to 0, FALSE and "" (respectively for numeric, logical, character)
fast_handle_na(copy(data_set))
# In a numeric column to set NAs as "missing"
fast_handle_na(copy(data_set), set_char = "missing")
# In a numeric column, to set NAs to the minimum value of the column#'
fast_handle_na(copy(data_set), set_num = min) # Won't work because min(c(1, NA)) = NA so put back NA
fast_handle_na(copy(data_set), set_num = function(x)min(x,na.rm = TRUE)) # Now we handle NAs
# In a numeric column, to set NAs to the share of NAs values
rateNA <- function(x) {
sum(is.na(x)) / length(x)
}
fast_handle_na(copy(data_set), set_num = rateNA)
Run the code above in your browser using DataLab