# regression task
task = tsk("boston_housing")
# roughly equal size split while stratifying on the binned response
split = partition(task, ratio = 0.5)
data = data.frame(
y = c(task$truth(split$train), task$truth(split$test)),
split = rep(c("train", "predict"), lengths(split))
)
boxplot(y ~ split, data = data)
# classification task
task = tsk("pima")
split = partition(task)
# roughly same distribution of the target label
prop.table(table(task$truth()))
prop.table(table(task$truth(split$train)))
prop.table(table(task$truth(split$test)))
# splitting into 3 disjunct sets, using ResamplingCV and stratification
task = tsk("iris")
task$set_col_roles(task$target_names, add_to = "stratum")
r = rsmp("cv", folds = 3)$instantiate(task)
sets = lapply(1:3, r$train_set)
lengths(sets)
prop.table(table(task$truth(sets[[1]])))
Run the code above in your browser using DataLab