EpiModel · clchand23 · Nov 11, 2024 · Nov 11, 2024 · Nov 12, 2024 · Dec 7, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -25,7 +25,6 @@ RoxygenNote: 7.3.2
 Encoding: UTF-8
 Remotes:
    github::EpiModel/ARTnetData@main,
-   github::EpiModel/EpiModel@main,
-   github::EpiModel/EpiModelHIV-p@main
+   github::EpiModel/EpiModelHIV-p
 Roxygen: list(markdown = TRUE)
 Config/testthat/edition: 3
diff --git a/R/EpiStats.R b/R/EpiStats.R
@@ -71,6 +71,9 @@
 #'   natural mortality.
 #' * `time.unit`: a number between 1 and 30 that specifies time units for ARTnet statistics. Set to
 #'   `7` by default.
+#' * `race.level`: a list of race and ethnicity categories from ARTnet that can be used for race stratification
+#'    in EpiModel. Values must match those in ARTnet, so options include "black", "hispanic", "white", "other",
+#'    "asian", "ai/an", "mult", "nh/pi". Race categories may be combined (for example, c("white", "other")).
 #'
 #' @examples
 #' # Age and geographic stratification, for the Atlanta metropolitan statistical area
@@ -108,6 +111,7 @@
 build_epistats <- function(geog.lvl = NULL,
                            geog.cat = NULL,
                            race = TRUE,
+                           race.level = list("black", "hispanic", c("white", "other")),
                            age.limits = c(15, 65),
                            age.breaks = c(25, 35, 45, 55),
                            age.sexual.cessation = NULL,
@@ -253,45 +257,61 @@ build_epistats <- function(geog.lvl = NULL,
   l$comb.age <- l$age + l$p_age_imp
   l$diff.age <- abs(l$age - l$p_age_imp)
 
-
   ## Race ethnicity ##
+
   if (race == TRUE) {
-    d$race.cat3 <- rep(NA, nrow(d))
-    d$race.cat3[d$race.cat == "black"] <- 1
-    d$race.cat3[d$race.cat == "hispanic"] <- 2
-    d$race.cat3[d$race.cat %in% c("white", "other")] <- 3
+    mult_race_cat <- c("asian", "ai/an", "mult", "nh/pi")
+    flat_race.level <- unlist(race.level)
+
+    # Determine which variables to use in ARTnet
+    if (any(flat_race.level %in% mult_race_cat)) {
 
-    l$race.cat3 <- rep(NA, nrow(l))
-    l$race.cat3[l$race.cat == "black"] <- 1
-    l$race.cat3[l$race.cat == "hispanic"] <- 2
-    l$race.cat3[l$race.cat %in% c("white", "other")] <- 3
+      d$race.eth <- ifelse(d$hispan == 1, "hispanic", d$race)
+      l <- merge(l, d[, c("AMIS_ID", "race.eth")], by = "AMIS_ID", all.x = TRUE)
+      l$p_race.eth <- ifelse(l$p_hispan == 1, "hispanic", l$p_race2)
 
-    l$p_race.cat3 <- rep(NA, nrow(l))
-    l$p_race.cat3[l$p_race.cat == "black"] <- 1
-    l$p_race.cat3[l$p_race.cat == "hispanic"] <- 2
-    l$p_race.cat3[l$p_race.cat %in% c("white", "other")] <- 3
+      p_race_var <- "p_race.eth"
+      race_var <- "race.eth"
+    } else {
+      p_race_var <- "p_race.cat"
+      race_var <- "race.cat"
+    }
 
-    # redistribute NAs in proportion to non-missing partner races
-    probs <- prop.table(table(l$race.cat3, l$p_race.cat3), 1)
+    # Assign race categories based on race.level
+    race.categories <- seq_along(race.level)
 
-    imp_black <- which(is.na(l$p_race.cat3) & l$race.cat3 == 1)
-    l$p_race.cat3[imp_black] <- sample(1:3, length(imp_black), TRUE, probs[1, ])
+    d$race.cat.num <- rep(NA, nrow(d))
+    l$race.cat.num <- rep(NA, nrow(l))
+    l$p_race.cat.num <- rep(NA, nrow(l))
 
-    imp_hisp <- which(is.na(l$p_race.cat3) & l$race.cat3 == 2)
-    l$p_race.cat3[imp_hisp] <- sample(1:3, length(imp_hisp), TRUE, probs[2, ])
+    for (i in seq_along(race.level)) {
+      d$race.cat.num[d[[race_var]] %in% race.level[[i]]] <- race.categories[i]
+      l$race.cat.num[l[[race_var]] %in% race.level[[i]]] <- race.categories[i]
+      l$p_race.cat.num[l[[p_race_var]] %in% race.level[[i]]] <- race.categories[i]
+    }
 
-    imp_white <- which(is.na(l$p_race.cat3) & l$race.cat3 == 3)
-    l$p_race.cat3[imp_white] <- sample(1:3, length(imp_white), TRUE, probs[3, ])
+    # Redistribute NAs in proportion to non-missing partner races
+    probs <- prop.table(table(l$race.cat.num, l$p_race.cat.num), 1)
 
+    for (i in race.categories) {
+      imp_indices <- which(is.na(l$p_race.cat.num) & l$race.cat.num == i)
+      if (length(imp_indices) > 0) {
+        l$p_race.cat.num[imp_indices] <- sample(race.categories, length(imp_indices), TRUE, probs[i, ])
+      }
+    }
+
+    # Initialize race.combo and assign combinations dynamically
     l$race.combo <- rep(NA, nrow(l))
-    l$race.combo[l$race.cat3 == 1 & l$p_race.cat3 == 1] <- 1
-    l$race.combo[l$race.cat3 == 1 & l$p_race.cat3 %in% 2:3] <- 2
-    l$race.combo[l$race.cat3 == 2 & l$p_race.cat3 %in% c(1, 3)] <- 3
-    l$race.combo[l$race.cat3 == 2 & l$p_race.cat3 == 2] <- 4
-    l$race.combo[l$race.cat3 == 3 & l$p_race.cat3 %in% 1:2] <- 5
-    l$race.combo[l$race.cat3 == 3 & l$p_race.cat3 == 3] <- 6
-
-    l <- select(l, -c(race.cat3, p_race.cat3))
+    combo_index <- 1
+    for (i in race.categories) {
+      # Case 1: Same race as one combination
+      l$race.combo[l$race.cat.num == i & l$p_race.cat.num == i] <- combo_index
+      combo_index <- combo_index + 1
+
+      # Case 2: Race compared with all other race groups
+      l$race.combo[l$race.cat.num == i & l$p_race.cat.num %in% setdiff(race.categories, i)] <- combo_index
+      combo_index <- combo_index + 1
+    }
   }
 
   ## HIV diagnosed status of index and partners ##
@@ -495,13 +515,13 @@ build_epistats <- function(geog.lvl = NULL,
   if (is.null(init.hiv.prev)) {
     if (race == TRUE) {
       if (is.null(geog.lvl)) {
-        d1 <- select(d, race.cat3, age, hiv2)
+        d1 <- select(d, race.cat.num, age, hiv2)
 
-        hiv.mod <- glm(hiv2 ~ age + as.factor(race.cat3),
+        hiv.mod <- glm(hiv2 ~ age + as.factor(race.cat.num),
                        data = d1, family = binomial())
       } else {
-        d1 <- select(d, race.cat3, geogYN, age, hiv2)
-        hiv.mod <- glm(hiv2 ~ age + geogYN + as.factor(race.cat3) + geogYN * as.factor(race.cat3),
+        d1 <- select(d, race.cat.num, geogYN, age, hiv2)
+        hiv.mod <- glm(hiv2 ~ age + geogYN + as.factor(race.cat.num) + geogYN * as.factor(race.cat.num),
                        data = d1, family = binomial())
       }
     } else {
@@ -520,9 +540,9 @@ build_epistats <- function(geog.lvl = NULL,
     # Output
     out$hiv.mod <- hiv.mod
   } else {
-    if (length(init.hiv.prev) != 3) {
-      stop("Input parameter init.prev.hiv must be a vector of size three")
-    }
+    #if (length(init.hiv.prev) != 3) {
+    #  stop("Input parameter init.prev.hiv must be a vector of size three")
+    #}
     if (prod(init.hiv.prev < 1) == 0  || prod(init.hiv.prev > 0) == 0) {
       stop("All elements of init.hiv.prev must be between 0 and 1 non-inclusive")
     }
@@ -538,6 +558,7 @@ build_epistats <- function(geog.lvl = NULL,
 
   out$geog.lvl <- geog.lvl
   out$race <- race
+  out$race.level <- race.level
   out$acts.mod <- acts.mod
   out$cond.mc.mod <- cond.mc.mod
   out$cond.oo.mod <- cond.oo.mod

diff --git a/R/NetParams.R b/R/NetParams.R
@@ -58,6 +58,7 @@ build_netparams <- function(epistats,
   ## Inputs ##
   geog.lvl <- epistats$geog.lvl
   race <- epistats$race
+  race.level <- epistats$race.level
   age.limits <- epistats$age.limits
   age.breaks <- epistats$age.breaks
   age.sexual.cessation <- epistats$age.sexual.cessation
@@ -151,33 +152,56 @@ build_netparams <- function(epistats,
   d$count.oo.part.trunc <- ifelse(d$count.oo.part > 100, 100, d$count.oo.part)
 
 
+  ## Race ethnicity ##
   if (race == TRUE) {
-    # Race Ethnicity
-    d$race.cat3 <- rep(NA, nrow(d))
-    d$race.cat3[d$race.cat == "black"] <- 1
-    d$race.cat3[d$race.cat == "hispanic"] <- 2
-    d$race.cat3[d$race.cat %in% c("white", "other")] <- 3
+    mult_race_cat <- c("asian", "ai/an", "mult", "nh/pi")
+    flat_race.level <- unlist(race.level)
+
+    # Determine which variables to use in ARTnet
+    if (any(flat_race.level %in% mult_race_cat)) {
+      l <- merge(l, d[, c("AMIS_ID", "race")], by = "AMIS_ID", all.x = TRUE)
+      p_race_var <- "p_race2"
+      race_var <- "race"
+    } else {
+      p_race_var <- "p_race.cat"
+      race_var <- "race.cat"
+    }
 
-    l$race.cat3[l$race.cat == "black"] <- 1
-    l$race.cat3[l$race.cat == "hispanic"] <- 2
-    l$race.cat3[l$race.cat %in% c("white", "other")] <- 3
+    # Assign race categories based on race.level
+    race.categories <- seq_along(race.level)
 
-    l$p_race.cat3 <- rep(NA, nrow(l))
-    l$p_race.cat3[l$p_race.cat == "black"] <- 1
-    l$p_race.cat3[l$p_race.cat == "hispanic"] <- 2
-    l$p_race.cat3[l$p_race.cat %in% c("white", "other")] <- 3
+    d$race.cat.num <- rep(NA, nrow(d))
+    l$race.cat.num <- rep(NA, nrow(l))
+    l$p_race.cat.num <- rep(NA, nrow(l))
 
-    # redistribute NAs in proportion to non-missing partner races
-    probs <- prop.table(table(l$race.cat3, l$p_race.cat3), 1)
+    for (i in seq_along(race.level)) {
+      d$race.cat.num[d[[race_var]] %in% race.level[[i]]] <- race.categories[i]
+      l$race.cat.num[l[[race_var]] %in% race.level[[i]]] <- race.categories[i]
+      l$p_race.cat.num[l[[p_race_var]] %in% race.level[[i]]] <- race.categories[i]
+    }
 
-    imp_black <- which(is.na(l$p_race.cat3) & l$race.cat3 == 1)
-    l$p_race.cat3[imp_black] <- sample(1:3, length(imp_black), TRUE, probs[1, ])
+    # Redistribute NAs in proportion to non-missing partner races
+    probs <- prop.table(table(l$race.cat.num, l$p_race.cat.num), 1)
 
-    imp_hisp <- which(is.na(l$p_race.cat3) & l$race.cat3 == 2)
-    l$p_race.cat3[imp_hisp] <- sample(1:3, length(imp_hisp), TRUE, probs[2, ])
+    for (i in race.categories) {
+      imp_indices <- which(is.na(l$p_race.cat.num) & l$race.cat.num == i)
+      if (length(imp_indices) > 0) {
+        l$p_race.cat.num[imp_indices] <- sample(race.categories, length(imp_indices), TRUE, probs[i, ])
+      }
+    }
 
-    imp_white <- which(is.na(l$p_race.cat3) & l$race.cat3 == 3)
-    l$p_race.cat3[imp_white] <- sample(1:3, length(imp_white), TRUE, probs[3, ])
+    # Initialize race.combo and assign combinations dynamically
+    l$race.combo <- rep(NA, nrow(l))
+    combo_index <- 1
+    for (i in race.categories) {
+      # Case 1: Same race as one combination
+      l$race.combo[l$race.cat.num == i & l$p_race.cat.num == i] <- combo_index
+      combo_index <- combo_index + 1
+
+      # Case 2: Race compared with all other race groups
+      l$race.combo[l$race.cat.num == i & l$p_race.cat.num %in% setdiff(race.categories, i)] <- combo_index
+      combo_index <- combo_index + 1
+    }
 
   }
 
@@ -318,21 +342,21 @@ build_netparams <- function(epistats,
   ## nodematch("race", diff = TRUE) ----
 
   if (race == TRUE) {
-    lmain$same.race <- ifelse(lmain$race.cat3 == lmain$p_race.cat3, 1, 0)
+    lmain$same.race <- ifelse(lmain$race.cat.num == lmain$p_race.cat.num, 1, 0)
 
     if (is.null(geog.lvl)) {
-      mod <- glm(same.race ~ as.factor(race.cat3),
+      mod <- glm(same.race ~ as.factor(race.cat.num),
                  data = lmain, family = binomial())
 
-      dat <- data.frame(race.cat3 = 1:3)
+      dat <- data.frame(race.cat.num = race.categories)
       pred <- predict(mod, newdata = dat, type = "response")
 
       out$main$nm.race <- as.numeric(pred)
     } else {
-      mod <- glm(same.race ~ geogYN + as.factor(race.cat3),
+      mod <- glm(same.race ~ geogYN + as.factor(race.cat.num),
                  data = lmain, family = binomial())
 
-      dat <- data.frame(geogYN = 1, race.cat3 = 1:3)
+      dat <- data.frame(geogYN = 1, race.cat.num = race.categories)
       pred <- predict(mod, newdata = dat, type = "response")
 
       out$main$nm.race <- as.numeric(pred)
@@ -358,18 +382,18 @@ build_netparams <- function(epistats,
     }
 
     if (is.null(geog.lvl)) {
-      mod <- glm(deg.main ~ as.factor(race.cat3),
+      mod <- glm(deg.main ~ as.factor(race.cat.num),
                  data = d, family = poisson())
 
-      dat <- data.frame(race.cat3 = 1:3)
+      dat <- data.frame(race.cat.num = race.categories)
       pred <- predict(mod, newdata = dat, type = "response")
 
       out$main$nf.race <- as.numeric(pred)
     } else {
-      mod <- glm(deg.main ~ geogYN + as.factor(race.cat3),
+      mod <- glm(deg.main ~ geogYN + as.factor(race.cat.num),
                  data = d, family = poisson())
 
-      dat <- data.frame(geogYN = 1, race.cat3 = 1:3)
+      dat <- data.frame(geogYN = 1, race.cat.num = race.categories)
       pred <- predict(mod, newdata = dat, type = "response")
 
       out$main$nf.race <- as.numeric(pred)
@@ -642,21 +666,21 @@ build_netparams <- function(epistats,
 
     ## nodematch("race") ----
 
-    lcasl$same.race <- ifelse(lcasl$race.cat3 == lcasl$p_race.cat3, 1, 0)
+    lcasl$same.race <- ifelse(lcasl$race.cat.num == lcasl$p_race.cat.num, 1, 0)
 
     if (is.null(geog.lvl)) {
-      mod <- glm(same.race ~ as.factor(race.cat3),
+      mod <- glm(same.race ~ as.factor(race.cat.num),
                  data = lcasl, family = binomial())
 
-      dat <- data.frame(race.cat3 = 1:3)
+      dat <- data.frame(race.cat.num = race.categories)
       pred <- predict(mod, newdata = dat, type = "response")
 
       out$casl$nm.race <- as.numeric(pred)
     } else {
-      mod <- glm(same.race ~ geogYN + as.factor(race.cat3),
+      mod <- glm(same.race ~ geogYN + as.factor(race.cat.num),
                  data = lcasl, family = binomial())
 
-      dat <- data.frame(geogYN = 1, race.cat3 = 1:3)
+      dat <- data.frame(geogYN = 1, race.cat.num = race.categories)
       pred <- predict(mod, newdata = dat, type = "response")
 
       out$casl$nm.race <- as.numeric(pred)
@@ -686,18 +710,18 @@ build_netparams <- function(epistats,
 
     if (is.null(geog.lvl)) {
 
-      mod <- glm(deg.casl ~ as.factor(race.cat3),
+      mod <- glm(deg.casl ~ as.factor(race.cat.num),
                  data = d, family = poisson())
 
-      dat <- data.frame(race.cat3 = 1:3)
+      dat <- data.frame(race.cat.num = race.categories)
       pred <- predict(mod, newdata = dat, type = "response")
 
       out$casl$nf.race <- as.numeric(pred)
     } else {
-      mod <- glm(deg.casl ~ geogYN + as.factor(race.cat3),
+      mod <- glm(deg.casl ~ geogYN + as.factor(race.cat.num),
                  data = d, family = poisson())
 
-      dat <- data.frame(geogYN = 1, race.cat3 = 1:3)
+      dat <- data.frame(geogYN = 1, race.cat.num = race.categories)
       pred <- predict(mod, newdata = dat, type = "response")
 
       out$casl$nf.race <- as.numeric(pred)
@@ -971,21 +995,21 @@ build_netparams <- function(epistats,
 
     ## nodematch("race", diff = TRUE) ----
 
-    linst$same.race <- ifelse(linst$race.cat3 == linst$p_race.cat3, 1, 0)
+    linst$same.race <- ifelse(linst$race.cat.num == linst$p_race.cat.num, 1, 0)
 
     if (is.null(geog.lvl)) {
-      mod <- glm(same.race ~ as.factor(race.cat3),
+      mod <- glm(same.race ~ as.factor(race.cat.num),
                  data = linst, family = binomial())
 
-      dat <- data.frame(race.cat3 = 1:3)
+      dat <- data.frame(race.cat.num = race.categories)
       pred <- predict(mod, newdata = dat, type = "response")
 
       out$inst$nm.race <- as.numeric(pred)
     } else {
-      mod <- glm(same.race ~ geogYN + as.factor(race.cat3),
+      mod <- glm(same.race ~ geogYN + as.factor(race.cat.num),
                  data = linst, family = binomial())
 
-      dat <- data.frame(geogYN = 1, race.cat3 = 1:3)
+      dat <- data.frame(geogYN = 1, race.cat.num = race.categories)
       pred <- predict(mod, newdata = dat, type = "response")
 
       out$inst$nm.race <- as.numeric(pred)
@@ -1015,18 +1039,18 @@ build_netparams <- function(epistats,
     ## nodefactor("race") ----
 
     if (is.null(geog.lvl)) {
-      mod <- glm(count.oo.part ~ as.factor(race.cat3),
+      mod <- glm(count.oo.part ~ as.factor(race.cat.num),
                  data = d, family = poisson())
 
-      dat <- data.frame(race.cat3 = 1:3)
+      dat <- data.frame(race.cat.num = race.categories)
       pred <- predict(mod, newdata = dat, type = "response") / (364 / time.unit)
 
       out$inst$nf.race <- as.numeric(pred)
     } else {
-      mod <- glm(count.oo.part ~ geogYN + as.factor(race.cat3),
+      mod <- glm(count.oo.part ~ geogYN + as.factor(race.cat.num),
                  data = d, family = poisson())
 
-      dat <- data.frame(geogYN = 1, race.cat3 = 1:3)
+      dat <- data.frame(geogYN = 1, race.cat.num = race.categories)
       pred <- predict(mod, newdata = dat, type = "response") / (364 / time.unit)
 
       out$inst$nf.race <- as.numeric(pred)