# NOT RUN {
## load 20 percent random sample
data(xdm20.train)
data(xdm20.test)
data(ydm20.train)
##test BART with token run to ensure installation works
## with current technology even a token run will violate CRAN policy
## set.seed(99)
## post <- recur.bart(x.train=xdm20.train, y.train=ydm20.train,
## nskip=1, ndpost=1, keepevery=1)
# }
# NOT RUN {
## set.seed(99)
## post <- recur.bart(x.train=xdm20.train, y.train=ydm20.train,
## keeptrainfits=TRUE)
## larger data sets can take some time so, if parallel processing
## is available, submit this statement instead
post <- mc.recur.bart(x.train=xdm20.train, y.train=ydm20.train,
keeptrainfits=TRUE, mc.cores=8, seed=99)
require(rpart)
require(rpart.plot)
post$yhat.train.mean <- apply(post$yhat.train, 2, mean)
dss <- rpart(post$yhat.train.mean~xdm20.train)
rpart.plot(dss)
## for the 20 percent sample, notice that the top splits
## involve cci_pvd and n
## for the full data set, notice that all splits
## involve ca, cci_pud, cci_pvd, ins270 and n
## (except one at the bottom involving a small group)
## compare patients treated with insulin (ins270=1) vs
## not treated with insulin (ins270=0)
N <- 50 ## 50 training patients and 50 validation patients
K <- post$K ## 798 unique time points
NK <- 50*K
## only testing set, i.e., remove training set
xdm20.test. <- xdm20.test[NK+1:NK, post$rm.const]
xdm20.test. <- rbind(xdm20.test., xdm20.test.)
xdm20.test.[ , 'ins270'] <- rep(0:1, each=NK)
## multiple threads will be utilized if available
pred <- predict(post, xdm20.test., mc.cores=8)
## create Friedman's partial dependence function for the
## relative intensity for ins270 by time
M <- nrow(pred$haz.test) ## number of MCMC samples
RI <- matrix(0, M, K)
for(j in 1:K) {
h <- seq(j, NK, by=K)
RI[ , j] <- apply(pred$haz.test[ , h+NK]/
pred$haz.test[ , h], 1, mean)
}
RI.lo <- apply(RI, 2, quantile, probs=0.025)
RI.mu <- apply(RI, 2, mean)
RI.hi <- apply(RI, 2, quantile, probs=0.975)
plot(post$times, RI.hi, type='l', lty=2, log='y',
ylim=c(min(RI.lo, 1/RI.hi), max(1/RI.lo, RI.hi)),
xlab='t', ylab='RI(t, x)',
sub='insulin(ins270=1) vs. no insulin(ins270=0)',
main='Relative intensity of hospital admissions for diabetics')
lines(post$times, RI.mu)
lines(post$times, RI.lo, lty=2)
lines(post$times, rep(1, K), col='darkgray')
## RI for insulin therapy seems fairly constant with time
mean(RI.mu)
# }
Run the code above in your browser using DataLab