## Not run:
# # This example illustrates the usefulness of the preprocess function.
#
# # first network: nodes a to j present
# mat1 <- rbinom(100, 1, 0.1)
# mat1 <- matrix(mat1, nrow = 10) # has 10 nodes
# rownames(mat1) <- letters[1:10]
# colnames(mat1) <- letters[1:10]
#
# # second network: nodes c to n present
# mat2 <- rbinom(144, 1, 0.1)
# mat2 <- matrix(mat2, nrow = 12) # has 12 nodes
# rownames(mat2) <- letters[3:14]
# colnames(mat2) <- letters[3:14]
#
# # third network: nodes a and d to k present
# mat3 <- rbinom(81, 1, 0.1)
# mat3 <- matrix(mat3, nrow = 9) # has 9 nodes
# rownames(mat3) <- letters[c(1, 4:11)]
# colnames(mat3) <- letters[c(1, 4:11)]
#
# # fourth network: same as second matrix
# mat4 <- mat2
#
# networks <- list(mat1, mat2, mat3, mat4)
#
# # btergm without cross-temporal dependencies:
# model.1 <- btergm(networks ~ edges + mutual)
# summary(model.1)
#
# # When cross-temporal dependencies are specified, the dimensions
# # of the matrices do not match and need to be adjusted by btergm:
#
# # btergm(networks[2:4] ~ edges + mutual + edgecov(networks[1:3]))
#
# # This is because the first network in the dependent network and the
# # first network in the lagged covariate are expected to have the same
# # dimensions (and also at the second and third time step, of course).
#
# # Therefore, missing nodes in the covariate (here: {k, l, m, n} at t=1,
# # {a} at t=2, and {c, l, m, n} at t=3) must be removed from the
# # dependent network at t=2, t=3 and t=4 as well:
#
# dep <- preprocess(networks, lag = TRUE, covariate = FALSE)
#
# # This reduces the size of dep from 12 to 8 at t=2, from 9 to 8 at
# # t=3, and from 12 to 8 at t=4, and it removes the first network from
# # the list. Moreover, some nodes are present in the lagged covariate
# # but not in the dependent network (that is, at the next time step).
# # Therefore, node sets {a, b}, {c, l, m, n}, and {a} must be removed
# # from the lagged covariate at t=1, t=2, and t=3, respectively, to make
# # the dimensions compatible. While this is done automatically by btergm,
# # it can also be done manually using the preprocess function:
#
# lag <- preprocess(networks, lag = TRUE, covariate = TRUE)
#
# # To compare the dimensions of the original versus preprocessed
# # dependent networks and covariates, try the following code:
#
# cbind(
# "original_dep" = lapply(networks[2:4], nrow),
# "original_lag" = lapply(networks[1:3], nrow),
# "new_dep" = lapply(dep, nrow),
# "new_lag" = lapply(lag, nrow)
# )
#
# # The dependent networks were reduced from 12, 9 and 12 to 8, 8 and
# # 8 nodes, and the lagged networks were reduced from 10, 12 and 9 to
# # 8, 8 and 8 nodes, respectively. The lagged node sets are now
# # compatible. To see this:
#
# cbind(rownames(dep[[1]]), rownames(lag[[1]]))
# cbind(rownames(dep[[2]]), rownames(lag[[2]]))
# cbind(rownames(dep[[3]]), rownames(lag[[3]]))
#
# # Note, however, that the composition still changes within each list
# # across some of the time steps:
#
# cbind(rownames(dep[[1]]), rownames(dep[[2]]), rownames(dep[[3]]))
# cbind(rownames(lag[[1]]), rownames(lag[[2]]), rownames(lag[[3]]))
#
# # We can now use the btergm function on the preprocessed lists:
#
# model.2 <- btergm(dep ~ edges + mutual + edgecov(lag))
# summary(model.2)
#
# # The model can now be estimated because the current and lagged networks
# # have the same node sets at each time step. The disadvantage of this
# # approach is that some observations are lost. The advantage, however,
# # is that cross-temporal theories can be tested.
#
# # However, since the node sets still differ across time steps, ROC and
# # PR curves cannot be estimated. This is true because a simulation from
# # nodes {c ... j} cannot be compared to a target network with nodes
# # {d ... k}. Therefore, the following command would compare the wrong
# # sets of nodes to estimate prediction performance:
#
# # gof.2 <- gof(model.2, classicgof = FALSE, rocprgof = TRUE) # PROBLEM!
#
# # To solve this problem, the most obvious approach is to estimate the
# # model at earlier time steps and compute the out-of-sample predictive
# # performance only for the last network:
#
# model.3 <- btergm(dep[1:2] ~ edges + mutual + edgecov(lag[1:2]))
# gof.3 <- gof(model.3, target = dep[[3]], formula = dep[[3]] ~ edges +
# mutual + edgecov(lag[[3]]), classicgof = FALSE, rocprgof = TRUE)
#
# # This models time steps 2 and 3 as a function of the lagged network
# # at time steps 1 and 2, uses the resulting coefficients to predict
# # the network at time step 4, and compares network 4 to simulations
# # based on the coefficients from the previous time steps and the
# # lagged network at the third time step. As the matrices within the
# # third list item have identical node sets, predictive performance
# # could be computed. The resulting ROC and PR curves can be plotted
# # as follows:
#
# plot(gof.3, boxplot = FALSE, pr = FALSE, roc.random = TRUE,
# ylab = "TPR/PPV", xlab = "FPR/TPR", roc.main = "ROC and PR")
# plot(gof.3, boxplot = FALSE, roc = FALSE, pr.random = TRUE,
# rocpr.add = TRUE)
# legend("right", legend = c("ROC", "ROC random graph", "PR",
# "PR random graph"), col = c("#bd0017", "#bd001744", "#5886be",
# "#5886be44"), lty = 1, lwd = 3)
#
# # For another example with real-world data, see vignette("knecht")
# ## End(Not run)
Run the code above in your browser using DataLab