# NOT RUN {
# Diagnosis of categorical variables
diagnose_category(jobchange)
# Select the variable to diagnose
# diagnose_category(jobchange, education_level, company_type)
# diagnose_category(jobchange, -education_level, -company_type)
# diagnose_category(jobchange, "education_level", "company_type")
# diagnose_category(jobchange, 7)
# Using pipes ---------------------------------
library(dplyr)
# Diagnosis of all categorical variables
jobchange %>%
diagnose_category()
# Positive values select variables
jobchange %>%
diagnose_category(company_type, job_chnge)
# Negative values to drop variables
jobchange %>%
diagnose_category(-company_type, -job_chnge)
# Positions values select variables
# jobchange %>%
# diagnose_category(7)
# Positions values select variables
# jobchange %>%
# diagnose_category(-7)
# Top rank levels with top argument
jobchange %>%
diagnose_category(top = 2)
# Using pipes & dplyr -------------------------
# Extraction of level that is more than 60% of categorical data
jobchange %>%
diagnose_category() %>%
filter(ratio >= 60)
# All observations of enrollee_id have a rank of 1.
# Because it is a unique identifier. Therefore, if you select up to the top rank 3,
# all records are displayed. It will probably fill your screen.
# extract rows that less than equal rank 3
# default of type argument is "n"
jobchange %>%
diagnose_category(enrollee_id, top = 3)
# extract rows that less than equal rank 3
jobchange %>%
diagnose_category(enrollee_id, top = 3, type = "rank")
# extract only 3 rows
jobchange %>%
diagnose_category(enrollee_id, top = 3, type = "n")
# }
Run the code above in your browser using DataLab