# NOT RUN {
library(dplyr)
# connect DBMS
con_sqlite <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
# copy heartfailure to the DBMS with a table named TB_HEARTFAILURE
copy_to(con_sqlite, heartfailure, name = "TB_HEARTFAILURE", overwrite = TRUE)
# Using pipes ---------------------------------
# Correlation coefficients of all numerical variables
con_sqlite %>%
tbl("TB_HEARTFAILURE") %>%
correlate()
# Positive values select variables
con_sqlite %>%
tbl("TB_HEARTFAILURE") %>%
correlate(platelets, sodium)
# Negative values to drop variables, and In-memory mode and collect size is 200
con_sqlite %>%
tbl("TB_HEARTFAILURE") %>%
correlate(-platelets, -sodium, collect_size = 200)
# Positions values select variables
con_sqlite %>%
tbl("TB_HEARTFAILURE") %>%
correlate(1)
# Positions values select variables
con_sqlite %>%
tbl("TB_HEARTFAILURE") %>%
correlate(-1, -2, -3, -5, -6)
# ---------------------------------------------
# Correlation coefficient
# that eliminates redundant combination of variables
con_sqlite %>%
tbl("TB_HEARTFAILURE") %>%
correlate() %>%
filter(as.integer(var1) > as.integer(var2))
con_sqlite %>%
tbl("TB_HEARTFAILURE") %>%
correlate(platelets, sodium) %>%
filter(as.integer(var1) > as.integer(var2))
# Using pipes & dplyr -------------------------
# Compute the correlation coefficient of creatinine variable by 'hblood_pressure'
# and 'death_event' variables. And extract only those with absolute
# value of correlation coefficient is greater than 0.2
con_sqlite %>%
tbl("TB_HEARTFAILURE") %>%
group_by(hblood_pressure, death_event) %>%
correlate(creatinine) %>%
filter(abs(coef_corr) >= 0.2)
# extract only those with 'hblood_pressure' variable level is "Yes",
# and compute the correlation coefficient of 'creatinine' variable
# by 'sex' and 'death_event' variables.
# And the correlation coefficient is negative and smaller than -0.3
con_sqlite %>%
tbl("TB_HEARTFAILURE") %>%
filter(hblood_pressure == "Yes") %>%
group_by(sex, death_event) %>%
correlate(creatinine) %>%
filter(coef_corr < 0) %>%
filter(abs(coef_corr) > 0.3)
# Disconnect DBMS
DBI::dbDisconnect(con_sqlite)
# }
Run the code above in your browser using DataLab