-
Notifications
You must be signed in to change notification settings - Fork 4
Code specific to tergmLite within EpiModelHIV
Reading and writing network class objects was the primary computational bottleneck in simulation side of our network modeling. tergmLite converts the network class object into a list class object that is used by ergm's initial dynamic resimulation code written in C, then updates the relevant attribute values and other nodal and graph-level covariates that may change at each time step as the population size and composition evolves. The network is then represented as a two-column edgelist matrix, a set of individual-level attributes (some of which are components of the different ERGM formulas), and a separate evolving ERGM-based list structure that contains a subset of those changing attributes and other ergm-related information.
This page outlines the changes made to the earlier EpiModelHIV codebase, which evolved from Mardham, to handle the tergmLite simulation framework. We go through module-by-module the changes made and the basic reasons why.
- Create
dat$el
anddat$p
, usestergm_prep
andergm_prep
functions within tergmLite.dat$el
is the matrix class edgelist anddat$p
is the ergm-based list structure of network-related parameters like nodal and graph-level covariates.
### Lines 45-60
dat$el <- list()
dat$p <- list()
for (i in 1:2) {
dat$el[[i]] <- as.edgelist(nw[[i]])
attributes(dat$el[[i]])$vnames <- NULL
p <- tergmLite::stergm_prep(nw[[i]], x[[i]]$formation, x[[i]]$coef.diss$dissolution,
x[[i]]$coef.form, x[[i]]$coef.diss$coef.adj, x[[i]]$constraints)
p$model.form$formula <- NULL
p$model.diss$formula <- NULL
dat$p[[i]] <- p
}
dat$el[[3]] <- as.edgelist(nw[[3]])
attributes(dat$el[[3]])$vnames <- NULL
p <- tergmLite::ergm_prep(nw[[3]], x[[3]]$formation, x[[3]]$coef.form, x[[3]]$constraints)
p$model.form$formula <- NULL
dat$p[[3]] <- p
- Reinitialization for restarting simulations. This requires resetting the necessary components from a completed simulation back onto dat. These two components must be written out to the burn-in sim object.
## Lines 742-743
dat$el <- x$el[[s]]
dat$p <- x$p[[s]]
Creating edgelist object with references to dat$el
instead of pulling from dat$nw
.
## Lines 36-60 (Lines 43, 51, 59)
# Parameters
ai.scale <- dat$param$ai.scale
if (type == "main") {
base.ai.BB.rate <- dat$param$base.ai.main.BB.rate
base.ai.BW.rate <- dat$param$base.ai.main.BW.rate
base.ai.WW.rate <- dat$param$base.ai.main.WW.rate
fixed <- FALSE
ptype <- 1
el <- dat$el[[1]]
}
if (type == "pers") {
base.ai.BB.rate <- dat$param$base.ai.pers.BB.rate
base.ai.BW.rate <- dat$param$base.ai.pers.BW.rate
base.ai.WW.rate <- dat$param$base.ai.pers.WW.rate
fixed <- FALSE
ptype <- 2
el <- dat$el[[2]]
}
if (type == "inst") {
base.ai.BB.rate <- 1
base.ai.BW.rate <- 1
base.ai.WW.rate <- 1
fixed <- ifelse(ai.scale != 1, FALSE, TRUE)
ptype <- 3
el <- dat$el[[3]]
}
- Updating
dat$el
usingadd_vertices
. This function only adjusts then
attribute ondat$el
to account for population growth.
## Lines 55-60 (Line 58)
# Update Networks
if (nBirths > 0) {
for (i in 1:3) {
dat$el[[i]] <- tergmLite::add_vertices(dat$el[[i]], nBirths)
}
}
-
stopifnot
check fordat$el
length
## Lines 212-214 (Line 212)
if (unique(sapply(dat$attr, length)) != attributes(dat$el)$n) {
stop("mismatch between el and attr length in births mod")
}
- Updating
dat$el
for usingdelete_vertices
instead ofnetwork::delete.vertices
. This function "removes" nodes by permuting the edgelist downward when a specific nodal ID is counted as removed.
## Lines 54-62 (Lines 57, 60)
if (length(dth.all) > 0) {
dat$attr$active[dth.all] <- 0
for (i in 1:3) {
dat$el[[i]] <- tergmLite::delete_vertices(dat$el[[i]], dth.all)
}
dat$attr <- deleteAttr(dat$attr, dth.all)
if (unique(sapply(dat$attr, length)) != attributes(dat$el[[1]])$n) {
stop("mismatch between el and attr length in death mod")
}
- Creating edgelist object with references to
dat$el
## Lines 36-61 (Lines 44, 54, 60)
# Parameters and network
if (type == "main") {
disc.outset.B.prob <- dat$param$disc.outset.main.B.prob
disc.at.diag.B.prob <- dat$param$disc.at.diag.main.B.prob
disc.post.diag.B.prob <- dat$param$disc.post.diag.main.B.prob
disc.outset.W.prob <- dat$param$disc.outset.main.W.prob
disc.at.diag.W.prob <- dat$param$disc.at.diag.main.W.prob
disc.post.diag.W.prob <- dat$param$disc.post.diag.main.W.prob
el <- dat$el[[1]]
}
if (type == "pers") {
disc.outset.B.prob <- dat$param$disc.outset.pers.B.prob
disc.at.diag.B.prob <- dat$param$disc.at.diag.pers.B.prob
disc.post.diag.B.prob <- dat$param$disc.post.diag.pers.B.prob
disc.outset.W.prob <- dat$param$disc.outset.pers.W.prob
disc.at.diag.W.prob <- dat$param$disc.at.diag.pers.W.prob
disc.post.diag.W.prob <- dat$param$disc.post.diag.pers.W.prob
el <- dat$el[[2]]
}
if (type == "inst") {
disc.inst.B.prob <- dat$param$disc.inst.B.prob
disc.inst.W.prob <- dat$param$disc.inst.W.prob
el <- dat$el[[3]]
}
- Creating master edgelist using
dat$el
## Lines 136-144 (Line 138)
if (at > 2) {
discl.list <- dat$temp$discl.list
master.el <- rbind(dat$el[[1]], dat$el[[2]], dat$el[[3]])
m <- which(match(discl.list[, 1] * 1e7 + discl.list[, 2],
uid[master.el[, 1]] * 1e7 + uid[master.el[, 2]]) |
match(discl.list[, 2] * 1e7 + discl.list[, 1],
uid[master.el[, 1]] * 1e7 + uid[master.el[, 2]]))
dat$temp$discl.list <- discl.list[m, ]
}
- Get degree distribution values from
dat$el
usingget_degree
. This is a relatively new function within EpiModel, and is much faster than usingsummary(nw ~ sociality(base = 0))
to extract these data.
## Lines 46-48
main.deg <- get_degree(dat$el[[1]])
casl.deg <- get_degree(dat$el[[2]])
inst.deg <- get_degree(dat$el[[3]])
- Get degree, update terms in edgelist, and create new edges
## Lines 23-91
## Main network
nwparam.m <- EpiModel::get_nwparam(dat, network = 1)
if (dat$param$method == 1) {
dat$attr$deg.pers <- get_degree(dat$el[[2]])
} else {
dat$attr$deg.pers <- paste0(dat$attr$race, get_degree(dat$el[[2]]))
}
dat <- tergmLite::updateModelTermInputs(dat, network = 1)
dat$el[[1]] <- tergmLite::simulate_network(p = dat$p[[1]],
el = dat$el[[1]],
coef.form = nwparam.m$coef.form,
coef.diss = nwparam.m$coef.diss$coef.adj,
save.changes = TRUE)
dat$temp$new.edges <- NULL
if (at == 2) {
new.edges.m <- matrix(dat$el[[1]], ncol = 2)
} else {
new.edges.m <- attributes(dat$el[[1]])$changes
new.edges.m <- new.edges.m[new.edges.m[, "to"] == 1, 1:2, drop = FALSE]
}
dat$temp$new.edges <- matrix(dat$attr$uid[new.edges.m], ncol = 2)
## Casual network
nwparam.p <- EpiModel::get_nwparam(dat, network = 2)
if (dat$param$method == 1) {
dat$attr$deg.main <- get_degree(dat$el[[1]])
} else {
dat$attr$deg.main <- paste0(dat$attr$race, get_degree(dat$el[[1]]))
}
dat <- tergmLite::updateModelTermInputs(dat, network = 2)
dat$el[[2]] <- tergmLite::simulate_network(p = dat$p[[2]],
el = dat$el[[2]],
coef.form = nwparam.p$coef.form,
coef.diss = nwparam.p$coef.diss$coef.adj,
save.changes = TRUE)
if (at == 2) {
new.edges.p <- matrix(dat$el[[2]], ncol = 2)
} else {
new.edges.p <- attributes(dat$el[[2]])$changes
new.edges.p <- new.edges.p[new.edges.p[, "to"] == 1, 1:2, drop = FALSE]
}
dat$temp$new.edges <- rbind(dat$temp$new.edges,
matrix(dat$attr$uid[new.edges.p], ncol = 2))
## One-off network
nwparam.i <- EpiModel::get_nwparam(dat, network = 3)
if (dat$param$method == 1) {
dat$attr$deg.pers <- get_degree(dat$el[[2]])
} else {
dat$attr$deg.pers <- paste0(dat$attr$race, get_degree(dat$el[[2]]))
}
dat <- tergmLite::updateModelTermInputs(dat, network = 3)
dat$el[[3]] <- tergmLite::simulate_ergm(p = dat$p[[3]],
el = dat$el[[3]],
coef = nwparam.i$coef.form)
if (dat$control$save.nwstats == TRUE) {
dat <- calc_resim_nwstats(dat, at)
}
return(dat)
}
- Calculate resimulated network statistics. This could be expanded upon to calculate any number of other summary statistics.
## Lines 95-110 (Lines 98, 99, 101)
calc_resim_nwstats <- function(dat, at) {
for (nw in 1:3) {
n <- attr(dat$el[[nw]], "n")
edges <- nrow(dat$el[[nw]])
meandeg <- round(edges / n, 3)
concurrent <- round(mean(get_degree(dat$el[[nw]]) > 1), 3)
mat <- matrix(c(edges, meandeg, concurrent), ncol = 3, nrow = 1)
if (at == 2) {
dat$stats$nwstats[[nw]] <- mat
colnames(dat$stats$nwstats[[nw]]) <- c("edges", "meand", "conc")
}
if (at > 2) {
dat$stats$nwstats[[nw]] <- rbind(dat$stats$nwstats[[nw]], mat)
}
}
Extract dat$el
for transmission instead of referencing dat$nw
.
## Lines 371-375
if (is.null(dat$el)) {
el <- get.dyads.active(dat$nw, at = at)
} else {
el <- dat$el
}