# So, a simple example where we want to split the dataset "X" into "X_train"
# and "X_test" with 60% of the data in the training set and 40% of the
# dataset in the test set, we could run
if (FALSE) {
output <- preprocess_split(input=X, test_ratio=0.4)
X_train <- output$training
X_test <- output$test
}
# Also by default the dataset is shuffled and split; you can provide the
# "no_shuffle" option to avoid shuffling the data; an example to avoid
# shuffling of data is:
if (FALSE) {
output <- preprocess_split(input=X, test_ratio=0.4, no_shuffle=TRUE)
X_train <- output$training
X_test <- output$test
}
# If we had a dataset "X" and associated labels "y", and we wanted to split
# these into "X_train", "y_train", "X_test", and "y_test", with 30% of the
# data in the test set, we could run
if (FALSE) {
output <- preprocess_split(input=X, input_labels=y, test_ratio=0.3)
X_train <- output$training
y_train <- output$training_labels
X_test <- output$test
y_test <- output$test_labels
}
# To maintain the ratio of each class in the train and test sets,
# the"stratify_data" option can be used.
if (FALSE) {
output <- preprocess_split(input=X, test_ratio=0.4, stratify_data=TRUE)
X_train <- output$training
X_test <- output$test
}
Run the code above in your browser using DataLab