library(dplyr)
# Assuming 'df' is the dataframe you want to process
df <- tibble::tibble(
Sepal_Length = c(5.2, 5, 5.7, NA, 6.2, 6.7, 5.5),
Petal_Length = c(1.5, 1.4, 4.2, 1.4, NA, 5.8, 3.7),
Petal_Width = c(NA, 0.2, 1.2, 0.2, 1.3, 1.8, NA),
Species = c("setosa", NA, "versicolor", "setosa",
NA, "virginica", "setosa")
)
# Impute using the mean method for continuous variables
result_df_mean <- fill_missing_values(df, method = "mean")
result_df_mean
# Impute using the geometric mean for continuous variables and specify
# variables `Petal_Length` and `Petal_Width`.
result_df_geomean <- fill_missing_values(df, selected_variables = c
("Petal_Length", "Petal_Width"), method = "geometric")
result_df_geomean
# Impute missing values (NAs) in a grouped data frame
# You can do that by using the following:
sample_iris <- tibble::tibble(
Sepal_Length = c(5.2, 5, 5.7, NA, 6.2, 6.7, 5.5),
Petal_Length = c(1.5, 1.4, 4.2, 1.4, NA, 5.8, 3.7),
Petal_Width = c(0.3, 0.2, 1.2, 0.2, 1.3, 1.8, NA),
Species = c("setosa", "setosa", "versicolor", "setosa",
"virginica", "virginica", "setosa")
)
sample_iris %>%
group_by(Species) %>%
group_split() %>%
map_df(fill_missing_values, method = "median")
Run the code above in your browser using DataLab