# \donttest{
# Correlation coefficients of all numerical variables
tab_corr <- correlate(heartfailure)
tab_corr
# Select the variable to compute
correlate(heartfailure, "creatinine", "sodium")
# Non-parametric correlation coefficient by kendall method
correlate(heartfailure, creatinine, method = "kendall")
# theil's U correlation coefficient (Uncertainty Coefficient)
tab_corr <- correlate(heartfailure, anaemia, hblood_pressure, method = "theil")
tab_corr
# Using dplyr::grouped_dt
library(dplyr)
gdata <- group_by(heartfailure, smoking, death_event)
correlate(gdata)
# Using pipes ---------------------------------
# Correlation coefficients of all numerical variables
heartfailure %>%
correlate()
# Non-parametric correlation coefficient by spearman method
heartfailure %>%
correlate(creatinine, sodium, method = "spearman")
# ---------------------------------------------
# Correlation coefficient
# that eliminates redundant combination of variables
heartfailure %>%
correlate() %>%
filter(as.integer(var1) > as.integer(var2))
# Using pipes & dplyr -------------------------
# Compute the correlation coefficient of 'creatinine' variable by 'smoking'
# and 'death_event' variables. And extract only those with absolute
# value of correlation coefficient is greater than 0.2
heartfailure %>%
group_by(smoking, death_event) %>%
correlate(creatinine) %>%
filter(abs(coef_corr) >= 0.2)
# extract only those with 'smoking' variable level is "Yes",
# and compute the correlation coefficient of 'Sales' variable
# by 'hblood_pressure' and 'death_event' variables.
# And the correlation coefficient is negative and smaller than 0.5
heartfailure %>%
filter(smoking == "Yes") %>%
group_by(hblood_pressure, death_event) %>%
correlate(creatinine) %>%
filter(coef_corr < 0) %>%
filter(abs(coef_corr) > 0.5)
# }
# If you have the 'DBI' and 'RSQLite' packages installed, perform the code block:
if (FALSE) {
library(dplyr)
# connect DBMS
con_sqlite <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
# copy heartfailure to the DBMS with a table named TB_HEARTFAILURE
copy_to(con_sqlite, heartfailure, name = "TB_HEARTFAILURE", overwrite = TRUE)
# Using pipes ---------------------------------
# Correlation coefficients of all numerical variables
con_sqlite %>%
tbl("TB_HEARTFAILURE") %>%
correlate()
# Using pipes & dplyr -------------------------
# Compute the correlation coefficient of creatinine variable by 'hblood_pressure'
# and 'death_event' variables.
con_sqlite %>%
tbl("TB_HEARTFAILURE") %>%
group_by(hblood_pressure, death_event) %>%
correlate(creatinine)
# Disconnect DBMS
DBI::dbDisconnect(con_sqlite)
}
Run the code above in your browser using DataLab