# NOT RUN {
# Generate data for the example
heartfailure2 <- heartfailure
heartfailure2[sample(seq(NROW(heartfailure2)), 20), "sodium"] <- NA
heartfailure2[sample(seq(NROW(heartfailure2)), 5), "smoking"] <- NA
# Describe descriptive statistics of numerical variables
describe(heartfailure2)
# Select the variable to describe
describe(heartfailure2, sodium, platelets, statistics = c("mean", "sd", "quantiles"))
describe(heartfailure2, -sodium, -platelets)
describe(heartfailure2, 5, statistics = c("mean", "sd", "quantiles"), quantiles = c(0.01, 0.1))
# Using dplyr::grouped_dt
library(dplyr)
gdata <- group_by(heartfailure2, hblood_pressure, death_event)
describe(gdata, "creatinine")
# Using pipes ---------------------------------
# Positive values select variables
heartfailure2 %>%
describe(platelets, sodium, creatinine)
# Negative values to drop variables
heartfailure2 %>%
describe(-platelets, -sodium, -creatinine)
# Using pipes & dplyr -------------------------
# Find the statistic of all numerical variables by 'hblood_pressure' and 'death_event',
# and extract only those with 'hblood_pressure' variable level is "Yes".
heartfailure2 %>%
group_by(hblood_pressure, death_event) %>%
describe() %>%
filter(hblood_pressure == "Yes")
# extract only those with 'smoking' variable level is "Yes",
# and find 'creatinine' statistics by 'hblood_pressure' and 'death_event'
heartfailure2 %>%
filter(smoking == "Yes") %>%
group_by(hblood_pressure, death_event) %>%
describe(creatinine)
# }
Run the code above in your browser using DataLab