if (FALSE) {
## Built-in dataset with a subset of cols
mydat <- ex0.dag.data[, c("b1", "b2", "b3", "g1", "b4", "p2", "p4")]
## setup distribution list for each node
mydists <- list(b1 = "binomial",
b2 = "binomial",
b3 = "binomial",
g1 = "gaussian",
b4 = "binomial",
p2 = "poisson",
p4 = "poisson")
## Null model - all independent variables
mydag_empty <- matrix(0, nrow = 7, ncol = 7)
colnames(mydag_empty) <- rownames(mydag_empty) <- names(mydat)
## Now fit the model to calculate its goodness-of-fit
myres <- fitAbn(dag = mydag_empty,
data.df = mydat,
data.dists = mydists)
## Log-marginal likelihood goodness-of-fit for complete DAG
print(myres$mlik)
## fitAbn accepts also the formula statement
myres <- fitAbn(dag = ~ b1 | b2 + b2 | p4:g1 + g1 | p2 + b3 | g1 + b4 | b1 + p4 | g1,
data.df = mydat,
data.dists = mydists)
print(myres$mlik) # a much weaker fit than full independence DAG
# Plot the DAG via Rgraphviz
plot(myres)
## Or equivalently using the formula statement, with plotting
## Now repeat but include some dependencies first
mydag <- mydag_empty
mydag["b1", "b2"] <- 1 # b1<-b2 and so on
mydag["b2", "p4"] <- mydag["b2", "g1"] <- mydag["g1", "p2"] <- 1
mydag["b3", "g1"] <- mydag["b4", "b1"] <- mydag["p4", "g1"] <- 1
myres_alt <- fitAbn(dag = mydag,
data.df = mydat,
data.dists = mydists)
plot(myres_alt)
## -----------------------------------------------------------------------------
## This function contains an MLE implementation accessible through a method
## parameter use built-in simulated data set
## -----------------------------------------------------------------------------
myres_mle <- fitAbn(dag = ~ b1 | b2 + b2 | p4 + g1 + g1 | p2 + b3 | g1 + b4 | b1 + p4 | g1,
data.df = mydat,
data.dists = mydists,
method = "mle")
## Print the output for mle first then for Bayes:
print(myres_mle)
plot(myres_mle)
print(myres)
plot(myres)
## This is a basic plot of some posterior densities. The algorithm used for
## selecting density points is quite straightforward, but it might result
## in a sparse distribution. Therefore, we also recompute the density over
## an evenly spaced grid of 50 points between the two endpoints that had
## a minimum PDF at f = min.pdf.
## Setting max.mode.error = 0 forces the use of the internal C code.
myres_c <- fitAbn(dag = mydag,
data.df = mydat,
data.dists = mydists,
compute.fixed = TRUE,
control = list(max.mode.error = 0))
print(names(myres_c$marginals)) # gives all the different parameter names
## Repeat but use INLA for the numerics using max.mode.error = 100
## as using internal code is the default here rather than INLA
myres_inla <- fitAbn(dag = mydag,
data.df = mydat,
data.dists = mydists,
compute.fixed = TRUE,
control = list(max.mode.error = 100))
## Plot posterior densities
default_par <- par(no.readonly = TRUE) # save default par settings
par(mfrow = c(2, 2), mai = c(.7, .7, .2, .1))
plot(myres_c$marginals$b1[["b1 | (Intercept)"]], type = "l", xlab = "b1 | (Intercept)")
lines(myres_inla$marginals$b1[["b1 | (Intercept)"]], col = "blue")
plot(myres_c$marginals$b2[["b2 | p4"]], type = "l", xlab = "b2 | p4")
lines(myres_inla$marginals$b2[["b2 | p4"]], col = "blue")
plot(myres_c$marginals$g1[["g1 | precision"]], type = "l", xlab = "g1 | precision")
lines(myres_inla$marginals$g1[["g1 | precision"]], col = "blue")
plot(myres_c$marginals$b4[["b4 | b1"]], type = "l", xlab = "b4 | b1")
lines(myres_inla$marginals$b4[["b4 | b1"]], col = "blue")
par(default_par) # reset par settings
## An elementary mixed model example using built-in data specify DAG,
## only two variables using a subset of variables from ex3.dag.data
## both variables are assumed to need (separate) adjustment for the
## group variable, i.e., a binomial GLMM at each node
mydists <- list(b1 = "binomial",
b2 = "binomial")
## Compute marginal likelihood - use internal code via max.mode.error=0
## as using INLA is the default here.
## Model where b1 <- b2
myres_c <- fitAbn(dag = ~b1 | b2,
data.df = ex3.dag.data[, c(1, 2, 14)],
data.dists = mydists,
group.var = "group",
cor.vars = c("b1", "b2"),
control = list(max.mode.error = 0))
print(myres_c) # show all the output
## compare mode for node b1 with glmer(), lme4::glmer is automatically attached.
## Now for marginals - INLA is strongly preferable for estimating marginals for
## nodes with random effects as it is far faster, but may not be reliable
## see https://r-bayesian-networks.org/
## INLA's estimates of the marginals, using high n.grid = 500
## as this makes the plots smoother - see below.
myres_inla <- fitAbn(dag = ~b1 | b2,
data.df = ex3.dag.data[, c(1, 2, 14)],
data.dists = mydists,
group.var = "group",
cor.vars = c("b1", "b2"),
compute.fixed = TRUE,
n.grid = 500,
control = list(max.mode.error = 100,
max.hessian.error = 10E-02))
## this is NOT recommended - marginal density estimation using fitAbn in
## mixed models is really just for diagnostic purposes, better to use
## fitAbn.inla() here; but here goes... be patient
myres_c <- fitAbn(dag = ~b1 | b2,
data.df = ex3.dag.data[, c(1, 2, 14)],
data.dists = mydists,
group.var = "group",
cor.vars = c("b1", "b2"),
compute.fixed = TRUE,
control = list(max.mode.error = 0,
max.hessian.error = 10E-02))
## compare marginals between internal and INLA.
default_par <- par(no.readonly = TRUE) # save default par settings
par(mfrow = c(2, 3))
# 5 parameters - two intercepts, one slope, two group level precisions
plot(myres_inla$marginals$b1[[1]], type = "l", col = "blue")
lines(myres_c$marginals$b1[[1]], col = "brown", lwd = 2)
plot(myres_inla$marginals$b1[[2]], type = "l", col = "blue")
lines(myres_c$marginals$b1[[2]], col = "brown", lwd = 2)
# the precision of group-level random effects
plot(myres_inla$marginals$b1[[3]], type = "l", col = "blue", xlim = c(0, 2))
lines(myres_c$marginals$b1[[3]], col = "brown", lwd = 2)
plot(myres_inla$marginals$b2[[1]], type = "l", col = "blue")
lines(myres_c$marginals$b2[[1]], col = "brown", lwd = 2)
plot(myres_inla$marginals$b2[[1]], type = "l", col = "blue")
lines(myres_c$marginals$b2[[1]], col = "brown", lwd = 2)
# the precision of group-level random effects
plot(myres_inla$marginals$b2[[2]], type = "l", col = "blue", xlim = c(0, 2))
lines(myres_c$marginals$b2[[2]], col = "brown", lwd = 2)
par(default_par) # reset par settings
### these are very similar although not exactly identical
## use internal code but only to compute a single parameter over a specified
## grid.
## This can be necessary if the simple auto grid finding functions does
## a poor job.
myres_c <- fitAbn(dag = ~b1 | b2,
data.df = ex3.dag.data[, c(1, 2, 14)],
data.dists = mydists,
group.var = "group",
cor.vars = c("b1", "b2"),
centre = FALSE,
compute.fixed = TRUE,
control = list(marginal.node = 1,
marginal.param = 3, # precision term in node 1
variate.vec = seq(0.05, 1.5, len = 25),
max.hessian.error = 10E-02))
default_par <- par(no.readonly = TRUE) # save default par settings
par(mfrow = c(1, 2))
plot(myres_c$marginals$b1[[1]], type = "l", col = "blue") # still fairly sparse
# An easy way is to use spline to fill in the density without recomputing other
# points provided the original grid is not too sparse.
plot(spline(myres_c$marginals$b1[[1]], n = 100), type = "b", col = "brown")
par(default_par) # reset par settings
}
Run the code above in your browser using DataLab