library("survival")
op <- options(digits = 2)
### GBSG2 dataset
data("GBSG2", package = "TH.data")
### right-censored response
GBSG2$y <- with(GBSG2, Surv(time, cens))
### define Bernstein(log(time)) parameterisation
### of transformation function. The response
### is bounded (log(0) doesn't work, so we use log(1))
### support defines the support of the Bernstein polynomial
### and add can be used to make the grid wider (see below)
rvar <- numeric_var("y", bounds = c(0, Inf),
support = c(100, 2000))
rb <- Bernstein_basis(rvar, order = 6, ui = "increasing")
### dummy coding of menopausal status
hb <- as.basis(~ 0 + menostat, data = GBSG2)
### treatment contrast of hormonal treatment
xb <- as.basis(~ horTh, data = GBSG2, remove_intercept = TRUE)
### set-up and fit Cox model, stratified by menopausal status
m <- ctm(rb, interacting = hb, shifting = xb, todistr = "MinExtrVal")
fm <- mlt(m, data = GBSG2)
### generate grid for all three variables
### note that the response grid ranges between 1 (bounds[1])
### and 2000 (support[2])
(d <- mkgrid(m, n = 10))
### data.frame of menopausal status and treatment
nd <- do.call("expand.grid", d[-1])
### plot model on different scales, for all four combinations
### of menopausal status and hormonal treatment
typ <- c("distribution", "survivor", "density", "hazard",
"cumhazard", "odds")
layout(matrix(1:6, nrow = 2))
nl <- sapply(typ, function(tp)
### K = 500 makes densities and hazards smooth
plot(fm, newdata = nd, type = tp, col = 1:nrow(nd), K = 500))
legend("topleft", lty = 1, col = 1:nrow(nd),
legend = do.call("paste", nd), bty = "n")
### plot calls predict, which generates a grid with K = 50
### response values
### note that a K x nrow(newdata) matrix is returned
### (for reasons explained in the next example)
predict(fm, newdata = nd, type = "survivor")
### newdata can take a list, and evaluates the survivor
### function on the grid defined by newdata
### using a linear array model formulation and is
### extremely efficient (wrt computing time and memory)
### d[1] (the response grid) varies fastest
### => the first dimension of predict() is always the response,
### not the dimension of the predictor variables (like one
### might expect)
predict(fm, newdata = d, type = "survivor")
### owing to this structure, the result can be quickly stored in
### a data frame as follows
cd <- do.call("expand.grid", d)
cd$surv <- c(S <- predict(fm, newdata = d, type = "survivor"))
### works for distribution functions
all.equal(1 - S, predict(fm, newdata = d, type = "distribution"))
### cumulative hazard functions
all.equal(-log(S), predict(fm, newdata = d, type = "cumhazard"))
### log-cumulative hazard functions (= trafo, for Cox models)
all.equal(log(-log(S)), predict(fm, newdata = d, type = "logcumhazard"))
all.equal(log(-log(S)), predict(fm, newdata = d, type = "trafo"))
### densities, hazards, or odds functions
predict(fm, newdata = d, type = "density")
predict(fm, newdata = d, type = "hazard")
predict(fm, newdata = d, type = "odds")
### and quantiles (10 and 20%)
predict(fm, newdata = d[-1], type = "quantile", prob = 1:2 / 10)
### note that some quantiles are only defined as intervals
### (> 2000, in this case). Intervals are returned as an "response"
### object, see ?R. Unfortunately, these can't be stored as array, so
### a data.frame is returned where the quantile varies first
p <- c(list(prob = 1:9/10), d[-1])
np <- do.call("expand.grid", p)
(Q <- predict(fm, newdata = d[-1], type = "quantile", prob = 1:9 / 10))
np$Q <- Q
np
### simulating from the model works by inverting the distribution
### function; some obs are right-censored at 2000
(s <- simulate(fm, newdata = nd, nsim = 3))
### convert to Surv
sapply(s, as.Surv)
### generate 3 parametric bootstrap samples from the model
tmp <- GBSG2[, c("menostat", "horTh")]
s <- simulate(fm, newdata = tmp, nsim = 3)
### refit the model using the simulated response
lapply(s, function(y) {
tmp$y <- y
coef(mlt(m, data = tmp))
})
options(op)
Run the code above in your browser using DataLab