# \donttest{
# Diagnosis of categorical variables
diagnose_category(jobchange)
# Select the variable to diagnose
diagnose_category(jobchange, education_level, company_type)
# Using pipes ---------------------------------
library(dplyr)
# Diagnosis of all categorical variables
jobchange %>%
diagnose_category()
# Positive values select variables
jobchange %>%
diagnose_category(company_type, job_chnge)
# Negative values to drop variables
jobchange %>%
diagnose_category(-company_type, -job_chnge)
# Top rank levels with top argument
jobchange %>%
diagnose_category(top = 2)
# Using pipes & dplyr -------------------------
# Extraction of level that is more than 60% of categorical data
jobchange %>%
diagnose_category() %>%
filter(ratio >= 60)
# All observations of enrollee_id have a rank of 1.
# Because it is a unique identifier. Therefore, if you select up to the top rank 3,
# all records are displayed. It will probably fill your screen.
# extract rows that less than equal rank 3
# default of type argument is "n"
jobchange %>%
diagnose_category(enrollee_id, top = 3)
# extract rows that less than equal rank 3
jobchange %>%
diagnose_category(enrollee_id, top = 3, type = "rank")
# extract only 3 rows
jobchange %>%
diagnose_category(enrollee_id, top = 3, type = "n")
# Using group_by ------------------------------
# Calculate the diagnosis of 'company_type' variable by 'job_chnge' using group_by()
jobchange %>%
group_by(job_chnge) %>%
diagnose_category(company_type)
# }
Run the code above in your browser using DataLab