# NOT RUN {
# Binary outcome example adapted from SuperLearner examples
set.seed(1)
N <- 200
X <- matrix(rnorm(N*10), N, 10)
X <- as.data.frame(X)
Y <- rbinom(N, 1, plogis(.2*X[, 1] + .1*X[, 2] - .2*X[, 3] +
.1*X[, 3]*X[, 4] - .2*abs(X[, 4])))
SL.library <- c("SL.glmnet", "SL.glm", "SL.knn", "SL.gam", "SL.mean")
# least squares loss function
set.seed(1) # for reproducibility
fit_nnls <- SuperLearner(Y = Y, X = X, SL.library = SL.library,
verbose = TRUE, method = "method.NNLS", family = binomial())
fit_nnls
# Risk Coef
# SL.glmnet_All 0.2439433 0.01293059
# SL.glm_All 0.2461245 0.08408060
# SL.knn_All 0.2604000 0.09600353
# SL.gam_All 0.2471651 0.40761918
# SL.mean_All 0.2486049 0.39936611
# negative log binomial likelihood loss function
fit_nnloglik <- recombineSL(fit_nnls, Y = Y, method = "method.NNloglik")
fit_nnloglik
# Risk Coef
# SL.glmnet_All 0.6815911 0.1577228
# SL.glm_All 0.6918926 0.0000000
# SL.knn_All Inf 0.0000000
# SL.gam_All 0.6935383 0.6292881
# SL.mean_All 0.6904050 0.2129891
# If we use the same seed as the original `fit_nnls`, then
# the recombineSL and SuperLearner results will be identical
# however, the recombineSL version will be much faster since
# it doesn't have to re-fit all the base learners.
set.seed(1)
fit_nnloglik2 <- SuperLearner(Y = Y, X = X, SL.library = SL.library,
verbose = TRUE, method = "method.NNloglik", family = binomial())
fit_nnloglik2
# Risk Coef
# SL.glmnet_All 0.6815911 0.1577228
# SL.glm_All 0.6918926 0.0000000
# SL.knn_All Inf 0.0000000
# SL.gam_All 0.6935383 0.6292881
# SL.mean_All 0.6904050 0.2129891
# }
Run the code above in your browser using DataLab