# \donttest{
if (require("rdd", quietly = TRUE)) {
# Train a LM Forest to estimate CATEs in a regression discontinuity design.
# Simulate a simple example with a heterogeneous jump in the CEF.
n <- 2000
p <- 5
X <- matrix(rnorm(n * p), n, p)
Z <- runif(n, -4, 4)
cutoff <- 0
W <- as.numeric(Z >= cutoff)
tau <- pmax(0.5 * X[, 1], 0)
Y <- tau * W + 1 / (1 + exp(2 * Z)) + 0.2 * rnorm(n)
# Compute the Imbens-Kalyanaraman MSE-optimal bandwidth for a local linear regression.
bandwidth <- IKbandwidth(Z, Y, cutoff)
# Compute kernel weights for a triangular kernel.
sample.weights <- kernelwts(Z, cutoff, bandwidth, "triangular")
# Alternatively, specify bandwith and triangular kernel weights without using the `rdd` package.
# bandwidth <- # user can hand-specify this.
# dist <- abs((Z - cutoff) / bandwidth)
# sample.weights <- (1 - dist) * (dist <= 1) / bandwidth
# Estimate a local linear regression with the running variable Z conditional on covariates X = x:
# Y = c(x) + tau(x) W + b(x) Z.
# Specify gradient.weights = c(1, 0) to target heterogeneity in the RDD coefficient tau(x).
# Also, fit forest on subset with non-zero weights for faster estimation.
subset <- sample.weights > 0
lmf <- lm_forest(X[subset, ], Y[subset], cbind(W, Z)[subset, ],
sample.weights = sample.weights[subset], gradient.weights = c(1, 0))
tau.hat <- predict(lmf)$predictions[, 1, ]
# Plot estimated tau(x) vs simulated ground truth.
plot(X[subset, 1], tau.hat)
points(X[subset, 1], tau[subset], col = "red", cex = 0.1)
}
# }
Run the code above in your browser using DataLab