# \donttest{
#####
# Fit model with lung data set from survival
# Warning: long-ish computation time
library(dynamichazard)
.lung <- lung[!is.na(lung$ph.ecog), ]
# standardize
.lung$age <- scale(.lung$age)
# fit
set.seed(43588155)
pf_fit <- PF_EM(
Surv(time, status == 2) ~ ddFixed(ph.ecog) + age,
data = .lung, by = 50, id = 1:nrow(.lung),
Q_0 = diag(1, 2), Q = diag(.5^2, 2),
max_T = 800,
control = PF_control(
# these number should be larger! Small for CRAN checks
N_fw_n_bw = 100L, N_first = 250L, N_smooth = 100L,
n_max = 50, eps = .001, Q_tilde = diag(.2^2, 2), est_a_0 = FALSE,
n_threads = 2))
# Plot state vector estimates
plot(pf_fit, cov_index = 1)
plot(pf_fit, cov_index = 2)
# Plot log-likelihood
plot(pf_fit$log_likes)
# }
# \donttest{
######
# example with fixed intercept
# prepare data
temp <- subset(pbc, id <= 312, select=c(id, sex, time, status, edema, age))
pbc2 <- tmerge(temp, temp, id=id, death = event(time, status))
pbc2 <- tmerge(pbc2, pbcseq, id=id, albumin = tdc(day, albumin),
protime = tdc(day, protime), bili = tdc(day, bili))
pbc2 <- pbc2[, c("id", "tstart", "tstop", "death", "sex", "edema",
"age", "albumin", "protime", "bili")]
pbc2 <- within(pbc2, {
log_albumin <- log(albumin)
log_protime <- log(protime)
log_bili <- log(bili)
})
# standardize
for(c. in c("age", "log_albumin", "log_protime", "log_bili"))
pbc2[[c.]] <- drop(scale(pbc2[[c.]]))
# fit model with extended Kalman filter
ddfit <- ddhazard(
Surv(tstart, tstop, death == 2) ~ ddFixed_intercept() + ddFixed(age) +
ddFixed(edema) + ddFixed(log_albumin) + ddFixed(log_protime) + log_bili,
pbc2, Q_0 = 100, Q = 1e-2, by = 100, id = pbc2$id,
model = "exponential", max_T = 3600,
control = ddhazard_control(eps = 1e-5, NR_eps = 1e-4, n_max = 1e4))
summary(ddfit)
# fit model with particle filter
set.seed(88235076)
pf_fit <- PF_EM(
Surv(tstart, tstop, death == 2) ~ ddFixed_intercept() + ddFixed(age) +
ddFixed(edema) + ddFixed(log_albumin) + ddFixed(log_protime) + log_bili,
pbc2, Q_0 = 2^2, Q = ddfit$Q * 100, # use estimate from before
by = 100, id = pbc2$id,
model = "exponential", max_T = 3600,
control = PF_control(
# these number should be larger! Small for CRAN checks
N_fw_n_bw = 100, N_smooth = 250, N_first = 100, eps = 1e-3,
method = "AUX_normal_approx_w_cloud_mean", est_a_0 = FALSE,
Q_tilde = as.matrix(.1^2),
n_max = 25, # just take a few iterations as an example
n_threads = 2))
# compare results
plot(ddfit)
plot(pf_fit)
sqrt(ddfit$Q * 100)
sqrt(pf_fit$Q)
rbind(ddfit$fixed_effects, pf_fit$fixed_effects)
# }
# \donttest{
#####
# simulation example with `random` and `fixed` argument and a restricted
# model
# g groups with k individuals in each
g <- 3L
k <- 400L
# matrices for state equation
p <- g + 1L
G <- matrix(0., p^2, 2L)
for(i in 1:p)
G[i + (i - 1L) * p, 1L + (i == p)] <- 1L
theta <- c(.9, .8)
# coefficients in transition density
(F. <- matrix(as.vector(G %*% theta), 4L, 4L))
J <- matrix(0., ncol = 2L, nrow = p)
J[-p, 1L] <- J[p, 2L] <- 1
psi <- c(log(c(.3, .1)))
K <- matrix(0., p * (p - 1L) / 2L, 2L)
j <- 0L
for(i in (p - 1L):1L){
j <- j + i
K[j, 2L] <- 1
}
K[K[, 2L] < 1, 1L] <- 1
phi <- log(-(c(.8, .3) + 1) / (c(.8, .3) - 1))
V <- diag(exp(drop(J %*% psi)))
C <- diag(1, ncol(V))
C[lower.tri(C)] <- 2/(1 + exp(-drop(K %*% phi))) - 1
C[upper.tri(C)] <- t(C)[upper.tri(C)]
(Q <- V %*% C %*% V) # covariance matrix in transition density
cov2cor(Q)
Q_0 <- get_Q_0(Q, F.) # time-invariant covariance matrix
beta <- c(rep(-6, g), 0) # all groups have the same long run mean intercept
# simulate state variables
set.seed(56219373)
n_periods <- 300L
alphas <- matrix(nrow = n_periods + 1L, ncol = p)
alphas[1L, ] <- rnorm(p) %*% chol(Q_0)
for(i in 1:n_periods + 1L)
alphas[i, ] <- F. %*% alphas[i - 1L, ] + drop(rnorm(p) %*% chol(Q))
alphas <- t(t(alphas) + beta)
# plot state variables
matplot(alphas, type = "l", lty = 1)
# simulate individuals' outcome
n_obs <- g * k
df <- lapply(1:n_obs, function(i){
# find the group
grp <- (i - 1L) %/% (n_obs / g) + 1L
# left-censoring
tstart <- max(0L, sample.int((n_periods - 1L) * 2L, 1) - n_periods + 1L)
# covariates
x <- c(1, rnorm(1))
# outcome (stop time and event indicator)
osa <- NULL
oso <- NULL
osx <- NULL
y <- FALSE
for(tstop in (tstart + 1L):n_periods){
sigmoid <- 1 / (1 + exp(- drop(x %*% alphas[tstop + 1L, c(grp, p)])))
if(sigmoid > runif(1)){
y <- TRUE
break
}
if(.01 > runif(1L) && tstop < n_periods){
# sample new covariate
osa <- c(osa, tstart)
tstart <- tstop
oso <- c(oso, tstop)
osx <- c(osx, x[2])
x[2] <- rnorm(1)
}
}
cbind(
tstart = c(osa, tstart), tstop = c(oso, tstop),
x = c(osx, x[2]), y = c(rep(FALSE, length(osa)), y), grp = grp,
id = i)
})
df <- data.frame(do.call(rbind, df))
df$grp <- factor(df$grp)
# fit model. Start with "cheap" iterations
fit <- PF_EM(
fixed = Surv(tstart, tstop, y) ~ x, random = ~ grp + x - 1,
data = df, model = "logit", by = 1L, max_T = max(df$tstop),
Q_0 = diag(1.5^2, p), id = df$id, type = "VAR",
G = G, theta = c(.5, .5), J = J, psi = log(c(.1, .1)),
K = K, phi = log(-(c(.4, 0) + 1) / (c(.4, 0) - 1)),
control = PF_control(
N_fw_n_bw = 100L, N_smooth = 100L, N_first = 500L,
method = "AUX_normal_approx_w_cloud_mean",
nu = 5L, # sample from multivariate t-distribution
n_max = 60L, averaging_start = 50L,
smoother = "Fearnhead_O_N", eps = 1e-4, covar_fac = 1.2,
n_threads = 2L # depends on your cpu(s)
),
trace = 1L)
plot(fit$log_likes) # log-likelihood approximation at each iterations
# you can take more iterations by uncommenting the following
# cl <- fit$call
# ctrl <- cl[["control"]]
# ctrl[c("N_fw_n_bw", "N_smooth", "N_first", "n_max",
# "averaging_start")] <- list(500L, 2000L, 5000L, 200L, 30L)
# cl[["control"]] <- ctrl
# cl[c("phi", "psi", "theta")] <- list(fit$phi, fit$psi, fit$theta)
# fit_extra <- eval(cl)
plot(fit$log_likes) # log-likelihood approximation at each iteration
# check estimates
sqrt(diag(fit$Q))
sqrt(diag(Q))
cov2cor(fit$Q)
cov2cor(Q)
fit$F
F.
# plot predicted state variables
for(i in 1:p){
plot(fit, cov_index = i)
abline(h = 0, lty = 2)
lines(1:nrow(alphas) - 1, alphas[, i] - beta[i], lty = 3)
}
# }
Run the code above in your browser using DataLab