# NOT RUN {
# Enable data.table h2o import, which should be faster.
# Make sure data.table and slam R packages are installed too.
options("h2o.use.data.table" = TRUE)
# }
# NOT RUN {
library(h2o)
# Start an h2o server with all (physical) cores usable.
local_h2o = h2o.init(nthreads = RhpcBLASctl::get_num_cores(),
# May need to specify extra memory.
max_mem_size = "8g")
library(SuperLearner)
h2o_auto = create.Learner("SL.h2o_auto",
# Increase max models and stopping rounds for better models.
# Decrease nfolds for faster training but less certainty.
params = list(max_models = 30,
stopping_rounds = 5,
nfolds = 10))
sl =
SuperLearner(Y = Y, X = X,
family = binomial(),
SL.library = c("SL.mean", h2o_auto$names),
verbose = T,
# Stratify during CV in case of rare outcome.
cvControl = SuperLearner.CV.control(V = 10L, stratifyCV = T))
print(sl)
h2o.shutdown()
# }
Run the code above in your browser using DataLab