# NOT RUN {
# load the package
library(preText)
# load in the data
data("UK_Manifestos")
# preprocess data
preprocessed_documents <- factorial_preprocessing(
UK_Manifestos,
use_ngrams = TRUE,
infrequent_term_threshold = 0.02,
verbose = TRUE)
updated_dfm <- remove_infrequent_terms(preprocessed_documents$dfm_list[[1]],
proportion_threshold = 0.5,
indices = NULL,
verbose = TRUE)
# }
Run the code above in your browser using DataLab