# NOT RUN {
library(contextual)
horizon <- 100
sims <- 100
policy <- LinUCBDisjointOptimizedPolicy$new(alpha = 0.9)
weights <- matrix( c(0.4, 0.2, 0.4,
0.3, 0.4, 0.3,
0.1, 0.8, 0.1), nrow = 3, ncol = 3, byrow = TRUE)
bandit <- ContextualBernoulliBandit$new(weights = weights)
agent <- Agent$new(policy,bandit)
history <- Simulator$new(agent, horizon, sims)$run()
plot(history, type = "cumulative", regret = TRUE)
# }
Run the code above in your browser using DataLab