# NOT RUN {
# Multi-action policy learning.
n <- 250
p <- 10
X <- matrix(rnorm(n * p), n, p)
W <- as.factor(sample(c("A", "B", "C"), n, replace = TRUE))
Y <- X[, 1] + X[, 2] * (W == "B") + X[, 3] * (W == "C") + runif(n)
multi.forest <- grf::multi_arm_causal_forest(X, Y, W)
# Compute doubly robust reward estimates.
Gamma.matrix <- double_robust_scores(multi.forest)
# Fit a depth 2 tree on a random training subset.
train <- sample(1:n, 200)
opt.tree <- policy_tree(X[train, ], Gamma.matrix[train, ], depth = 2)
opt.tree
# Predict treatment on held out data.
predict(opt.tree, X[-train, ])
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab