library(survival)
library(rms)
library(prodlim)
library(data.table)
set.seed(10)
#### Survival settings ####
#### ATE with Cox model ####
## generate data
n <- 100
dtS <- sampleData(n, outcome="survival")
dtS$time <- round(dtS$time,1)
dtS$X1 <- factor(rbinom(n, prob = c(0.3,0.4) , size = 2), labels = paste0("T",0:2))
## estimate the Cox model
fit <- cph(formula = Surv(time,event)~ X1+X2,data=dtS,y=TRUE,x=TRUE)
## compute the ATE at times 5, 6, 7, and 8 using X1 as the treatment variable
## standard error computed using the influence function
## confidence intervals / p-values based on asymptotic results
ateFit1a <- ate(fit, data = dtS, treatment = "X1", times = 5:8)
summary(ateFit1a)
summary(ateFit1a, short = TRUE, type = "meanRisk")
summary(ateFit1a, short = TRUE, type = "diffRisk")
summary(ateFit1a, short = TRUE, type = "ratioRisk")
if (FALSE) {
## same as before with in addition the confidence bands / adjusted p-values
## (argument band = TRUE)
ateFit1b <- ate(fit, data = dtS, treatment = "X1", times = 5:8,
band = TRUE)
summary(ateFit1b)
## by default bands/adjuste p-values computed separately for each treatment modality
summary(ateFit1b, band = 1,
se = FALSE, type = "diffRisk", short = TRUE, quantile = TRUE)
## adjustment over treatment and time using the band argument of confint
summary(ateFit1b, band = 2,
se = FALSE, type = "diffRisk", short = TRUE, quantile = TRUE)
## confidence intervals / p-values computed using 1000 boostrap samples
## (argument se = TRUE and B = 1000)
ateFit1c <- ate(fit, data = dtS, treatment = "X1",
times = 5:8, se = TRUE, B = 50, handler = "mclapply")
## NOTE: for real applications 50 bootstrap samples is not enough
## same but using 2 cpus for generating and analyzing the boostrap samples
## (parallel computation, argument mc.cores = 2)
ateFit1d <- ate(fit, data = dtS, treatment = "X1",
times = 5:8, se = TRUE, B = 50, mc.cores = 2)
## manually defining the cluster to be used
## useful when specific packages need to be loaded in each cluster
fit <- cph(formula = Surv(time,event)~ X1+X2+rcs(X6),data=dtS,y=TRUE,x=TRUE)
cl <- parallel::makeCluster(2)
parallel::clusterEvalQ(cl, library(rms))
ateFit1e <- ate(fit, data = dtS, treatment = "X1",
times = 5:8, se = TRUE, B = 50,
handler = "foreach", cl = cl)
}
#### Survival settings without censoring ####
#### ATE with glm ####
## generate data
n <- 100
dtB <- sampleData(n, outcome="binary")
dtB[, X2 := as.numeric(X2)]
## estimate a logistic regression model
fit <- glm(formula = Y ~ X1+X2, data=dtB, family = "binomial")
## compute the ATE using X1 as the treatment variable
## only point estimate (argument se = FALSE)
ateFit1a <- ate(fit, data = dtB, treatment = "X1", se = FALSE)
ateFit1a
if (FALSE) {
## with confidence intervals
ateFit1b <- ate(fit, data = dtB, treatment = "X1",
times = 5) ## just for having a nice output not used in computations
summary(ateFit1b, short = TRUE)
## using the lava package
library(lava)
ateLava <- estimate(fit, function(p, data){
a <- p["(Intercept)"] ; b <- p["X11"] ; c <- p["X2"] ;
R.X11 <- expit(a + b + c * data[["X2"]])
R.X10 <- expit(a + c * data[["X2"]])
list(risk0=R.X10,risk1=R.X11,riskdiff=R.X11-R.X10)},
average=TRUE)
ateLava
}
## see wglm for handling right-censoring with glm
#### Competing risks settings ####
#### ATE with cause specific Cox regression ####
## generate data
n <- 500
set.seed(10)
dt <- sampleData(n, outcome="competing.risks")
dt$X1 <- factor(rbinom(n, prob = c(0.2,0.3) , size = 2), labels = paste0("T",0:2))
## estimate cause specific Cox model
fitCR <- CSC(Hist(time,event)~ X1+X8,data=dt,cause=1)
## compute the ATE at times 1, 5, 10 using X1 as the treatment variable
ateFit2a <- ate(fitCR, data = dt, treatment = "X1", times = c(1,5,10),
cause = 1, se = TRUE, band = TRUE)
summary(ateFit2a)
as.data.table(ateFit2a)
#### Double robust estimator ####
if (FALSE) {
## generate data
n <- 500
set.seed(10)
dt <- sampleData(n, outcome="competing.risks")
dt$time <- round(dt$time,1)
dt$X1 <- factor(rbinom(n, prob = c(0.4) , size = 1), labels = paste0("T",0:1))
## working models
m.event <- CSC(Hist(time,event)~ X1+X2+X3+X5+X8,data=dt)
m.censor <- coxph(Surv(time,event==0)~ X1+X2+X3+X5+X8,data=dt, x = TRUE, y = TRUE)
m.treatment <- glm(X1~X2+X3+X5+X8,data=dt,family=binomial(link="logit"))
## prediction + average
ateRobust <- ate(event = m.event,
treatment = m.treatment,
censor = m.censor,
data = dt, times = 5:10,
cause = 1, band = TRUE)
## compare various estimators
ateRobust3 <- ate(event = m.event,
treatment = m.treatment,
censor = m.censor,
estimator = c("GFORMULA","IPTW","AIPTW"),
data = dt, times = c(5:10),
cause = 1, se = TRUE)
print(setkeyv(as.data.table(ateRobust3, type = "meanRisk"),"time"))
print(setkeyv(as.data.table(ateRobust3, type = "diffRisk"),"time"))
}
#### time-dependent covariates ###
if (FALSE) {
library(survival)
fit <- coxph(Surv(time, status) ~ celltype+karno + age + trt, veteran)
vet2 <- survSplit(Surv(time, status) ~., veteran,
cut=c(60, 120), episode ="timegroup")
fitTD <- coxph(Surv(tstart, time, status) ~ celltype +karno + age + trt,
data= vet2,x=1)
set.seed(16)
resVet <- ate(fitTD,formula=Hist(entry=tstart,time=time,event=status)~1,
data = vet2, treatment = "celltype",
times=5,verbose=1,
landmark = c(0,30,60,90), cause = 1, B = 50, se = 1,
band = FALSE, mc.cores=1)
summary(resVet)
}
if (FALSE) {
set.seed(137)
d=sampleDataTD(127)
library(survival)
d[,status:=1*(event==1)]
d[,X3:=as.factor(X3)]
## ignore competing risks
cox1TD <- coxph(Surv(start,time, status,type="counting") ~ X3+X5+X6+X8,
data=d, x = TRUE)
resTD1 <- ate(cox1TD,formula=Hist(entry=start,time=time,event=status)~1,
data = d, treatment = "X3", contrasts = NULL,
times=.5,verbose=1,
landmark = c(0,0.5,1), B = 20, se = 1,
band = FALSE, mc.cores=1)
resTD1
## account for competing risks
cscTD <- CSC(Hist(time=time, event=event,entry=start) ~ X3+X5+X6+X8, data=d)
set.seed(16)
resTD <- ate(cscTD,formula=Hist(entry=start,time=time,event=event)~1,
data = d, treatment = "X3", contrasts = NULL,
times=.5,verbose=1,
landmark = c(0,0.5,1), cause = 1, B = 20, se = 1,
band = FALSE, mc.cores=1)
resTD
}
Run the code above in your browser using DataLab