# NOT RUN {
# Generate data for the example
heartfailure2 <- heartfailure
heartfailure2[sample(seq(NROW(heartfailure2)), 20), "platelets"] <- NA
heartfailure2[sample(seq(NROW(heartfailure2)), 5), "smoking"] <- NA
# Replace the missing value of the platelets variable with median
imputate_na(heartfailure2, platelets, method = "median")
# Replace the missing value of the platelets variable with rpart
# The target variable is death_event.
imputate_na(heartfailure2, platelets, death_event, method = "rpart")
# Replace the missing value of the smoking variable with mode
imputate_na(heartfailure2, smoking, method = "mode")
# Replace the missing value of the smoking variable with mice
# The target variable is death_event.
imputate_na(heartfailure2, smoking, death_event, method = "mice")
## using dplyr -------------------------------------
library(dplyr)
# The mean before and after the imputation of the platelets variable
heartfailure2 %>%
mutate(platelets_imp = imputate_na(heartfailure2, platelets, death_event,
method = "knn", no_attrs = TRUE)) %>%
group_by(death_event) %>%
summarise(orig = mean(platelets, na.rm = TRUE),
imputation = mean(platelets_imp))
# If the variable of interest is a numerical variable
platelets <- imputate_na(heartfailure2, platelets, death_event, method = "rpart")
platelets
summary(platelets)
# plot(platelets)
# If the variable of interest is a categorical variable
smoking <- imputate_na(heartfailure2, smoking, death_event, method = "mice")
smoking
summary(smoking)
# plot(smoking)
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab