if(FALSE){
#### simulate data ####
set.seed(10)
n <- 100
df.train <- data.frame(Y = rbinom(n, prob = 0.5, size = 1),
X1 = rnorm(n), X2 = rnorm(n), X3 = rnorm(n), X4 = rnorm(n),
X5 = rnorm(n), X6 = rnorm(n), X7 = rnorm(n), X8 = rnorm(n),
X9 = rnorm(n), X10 = rnorm(n))
df.train$Y <- rbinom(n, size = 1,
prob = 1/(1+exp(-df.train$X5 - df.train$X6 - df.train$X7)))
#### fit models ####
e.null <- glm(Y~1, data = df.train, family = binomial(link="logit"))
e.logit <- glm(Y~X1+X2, data = df.train, family = binomial(link="logit"))
e.logit2 <- glm(Y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10, data = df.train,
family = binomial(link="logit"))
#### evaluate model (same seed) ####
fold.repetition <- 5 ## 0: internal perf (fast)
## >0: 10 fold CV repeated (slow)
test <- performanceResample(list(e.logit,e.logit2), seed = 10,
fold.repetition = fold.repetition, n.resampling = 100)
test.1 <- performanceResample(list(e.logit,e.logit2), seed = 10,
fold.repetition = fold.repetition, n.resampling = 1:50)
test.2 <- performanceResample(list(e.logit,e.logit2), seed = 10,
fold.repetition = fold.repetition, n.resampling = 51:100)
rbind(test.1,test.2)
test
## Note: when the prediction model call RNG then test.1 and test.2 may not give test
#### evaluate model (different seed) ####
test.3 <- performanceResample(list(e.logit,e.logit2), seed = 11,
fold.repetition = fold.repetition, n.resampling = 1:50)
test.4 <- performanceResample(list(e.logit,e.logit2), seed = 12,
fold.repetition = fold.repetition, n.resampling = 51:100)
rbind(test.3,test.4, tolerance = NA) ## does not check equality of the point estimate
## between test.3 and test.4
test
}
Run the code above in your browser using DataLab