Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Additional race stratification #57

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
3 changes: 1 addition & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ RoxygenNote: 7.3.2
Encoding: UTF-8
Remotes:
github::EpiModel/ARTnetData@main,
github::EpiModel/EpiModel@main,
github::EpiModel/EpiModelHIV-p@main
github::EpiModel/EpiModelHIV-p
Roxygen: list(markdown = TRUE)
Config/testthat/edition: 3
93 changes: 57 additions & 36 deletions R/EpiStats.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@
#' natural mortality.
#' * `time.unit`: a number between 1 and 30 that specifies time units for ARTnet statistics. Set to
#' `7` by default.
#' * `race.level`: a list of race and ethnicity categories from ARTnet that can be used for race stratification
#' in EpiModel. Values must match those in ARTnet, so options include "black", "hispanic", "white", "other",
#' "asian", "ai/an", "mult", "nh/pi". Race categories may be combined (for example, c("white", "other")).
#'
#' @examples
#' # Age and geographic stratification, for the Atlanta metropolitan statistical area
Expand Down Expand Up @@ -108,6 +111,7 @@
build_epistats <- function(geog.lvl = NULL,
geog.cat = NULL,
race = TRUE,
race.level = list("black", "hispanic", c("white", "other")),
age.limits = c(15, 65),
age.breaks = c(25, 35, 45, 55),
age.sexual.cessation = NULL,
Expand Down Expand Up @@ -253,45 +257,61 @@ build_epistats <- function(geog.lvl = NULL,
l$comb.age <- l$age + l$p_age_imp
l$diff.age <- abs(l$age - l$p_age_imp)


## Race ethnicity ##

if (race == TRUE) {
d$race.cat3 <- rep(NA, nrow(d))
d$race.cat3[d$race.cat == "black"] <- 1
d$race.cat3[d$race.cat == "hispanic"] <- 2
d$race.cat3[d$race.cat %in% c("white", "other")] <- 3
mult_race_cat <- c("asian", "ai/an", "mult", "nh/pi")
flat_race.level <- unlist(race.level)

# Determine which variables to use in ARTnet
if (any(flat_race.level %in% mult_race_cat)) {

l$race.cat3 <- rep(NA, nrow(l))
l$race.cat3[l$race.cat == "black"] <- 1
l$race.cat3[l$race.cat == "hispanic"] <- 2
l$race.cat3[l$race.cat %in% c("white", "other")] <- 3
d$race.eth <- ifelse(d$hispan == 1, "hispanic", d$race)
l <- merge(l, d[, c("AMIS_ID", "race.eth")], by = "AMIS_ID", all.x = TRUE)
l$p_race.eth <- ifelse(l$p_hispan == 1, "hispanic", l$p_race2)

l$p_race.cat3 <- rep(NA, nrow(l))
l$p_race.cat3[l$p_race.cat == "black"] <- 1
l$p_race.cat3[l$p_race.cat == "hispanic"] <- 2
l$p_race.cat3[l$p_race.cat %in% c("white", "other")] <- 3
p_race_var <- "p_race.eth"
race_var <- "race.eth"
} else {
p_race_var <- "p_race.cat"
race_var <- "race.cat"
}

# redistribute NAs in proportion to non-missing partner races
probs <- prop.table(table(l$race.cat3, l$p_race.cat3), 1)
# Assign race categories based on race.level
race.categories <- seq_along(race.level)

imp_black <- which(is.na(l$p_race.cat3) & l$race.cat3 == 1)
l$p_race.cat3[imp_black] <- sample(1:3, length(imp_black), TRUE, probs[1, ])
d$race.cat.num <- rep(NA, nrow(d))
l$race.cat.num <- rep(NA, nrow(l))
l$p_race.cat.num <- rep(NA, nrow(l))

imp_hisp <- which(is.na(l$p_race.cat3) & l$race.cat3 == 2)
l$p_race.cat3[imp_hisp] <- sample(1:3, length(imp_hisp), TRUE, probs[2, ])
for (i in seq_along(race.level)) {
d$race.cat.num[d[[race_var]] %in% race.level[[i]]] <- race.categories[i]
l$race.cat.num[l[[race_var]] %in% race.level[[i]]] <- race.categories[i]
l$p_race.cat.num[l[[p_race_var]] %in% race.level[[i]]] <- race.categories[i]
}

imp_white <- which(is.na(l$p_race.cat3) & l$race.cat3 == 3)
l$p_race.cat3[imp_white] <- sample(1:3, length(imp_white), TRUE, probs[3, ])
# Redistribute NAs in proportion to non-missing partner races
probs <- prop.table(table(l$race.cat.num, l$p_race.cat.num), 1)

for (i in race.categories) {
imp_indices <- which(is.na(l$p_race.cat.num) & l$race.cat.num == i)
if (length(imp_indices) > 0) {
l$p_race.cat.num[imp_indices] <- sample(race.categories, length(imp_indices), TRUE, probs[i, ])
}
}

# Initialize race.combo and assign combinations dynamically
l$race.combo <- rep(NA, nrow(l))
l$race.combo[l$race.cat3 == 1 & l$p_race.cat3 == 1] <- 1
l$race.combo[l$race.cat3 == 1 & l$p_race.cat3 %in% 2:3] <- 2
l$race.combo[l$race.cat3 == 2 & l$p_race.cat3 %in% c(1, 3)] <- 3
l$race.combo[l$race.cat3 == 2 & l$p_race.cat3 == 2] <- 4
l$race.combo[l$race.cat3 == 3 & l$p_race.cat3 %in% 1:2] <- 5
l$race.combo[l$race.cat3 == 3 & l$p_race.cat3 == 3] <- 6

l <- select(l, -c(race.cat3, p_race.cat3))
combo_index <- 1
for (i in race.categories) {
# Case 1: Same race as one combination
l$race.combo[l$race.cat.num == i & l$p_race.cat.num == i] <- combo_index
combo_index <- combo_index + 1

# Case 2: Race compared with all other race groups
l$race.combo[l$race.cat.num == i & l$p_race.cat.num %in% setdiff(race.categories, i)] <- combo_index
combo_index <- combo_index + 1
}
}

## HIV diagnosed status of index and partners ##
Expand Down Expand Up @@ -495,13 +515,13 @@ build_epistats <- function(geog.lvl = NULL,
if (is.null(init.hiv.prev)) {
if (race == TRUE) {
if (is.null(geog.lvl)) {
d1 <- select(d, race.cat3, age, hiv2)
d1 <- select(d, race.cat.num, age, hiv2)

hiv.mod <- glm(hiv2 ~ age + as.factor(race.cat3),
hiv.mod <- glm(hiv2 ~ age + as.factor(race.cat.num),
data = d1, family = binomial())
} else {
d1 <- select(d, race.cat3, geogYN, age, hiv2)
hiv.mod <- glm(hiv2 ~ age + geogYN + as.factor(race.cat3) + geogYN * as.factor(race.cat3),
d1 <- select(d, race.cat.num, geogYN, age, hiv2)
hiv.mod <- glm(hiv2 ~ age + geogYN + as.factor(race.cat.num) + geogYN * as.factor(race.cat.num),
data = d1, family = binomial())
}
} else {
Expand All @@ -520,9 +540,9 @@ build_epistats <- function(geog.lvl = NULL,
# Output
out$hiv.mod <- hiv.mod
} else {
if (length(init.hiv.prev) != 3) {
stop("Input parameter init.prev.hiv must be a vector of size three")
}
#if (length(init.hiv.prev) != 3) {
# stop("Input parameter init.prev.hiv must be a vector of size three")
#}
if (prod(init.hiv.prev < 1) == 0 || prod(init.hiv.prev > 0) == 0) {
stop("All elements of init.hiv.prev must be between 0 and 1 non-inclusive")
}
Expand All @@ -538,6 +558,7 @@ build_epistats <- function(geog.lvl = NULL,

out$geog.lvl <- geog.lvl
out$race <- race
out$race.level <- race.level
out$acts.mod <- acts.mod
out$cond.mc.mod <- cond.mc.mod
out$cond.oo.mod <- cond.oo.mod
Expand Down
118 changes: 71 additions & 47 deletions R/NetParams.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ build_netparams <- function(epistats,
## Inputs ##
geog.lvl <- epistats$geog.lvl
race <- epistats$race
race.level <- epistats$race.level
age.limits <- epistats$age.limits
age.breaks <- epistats$age.breaks
age.sexual.cessation <- epistats$age.sexual.cessation
Expand Down Expand Up @@ -151,33 +152,56 @@ build_netparams <- function(epistats,
d$count.oo.part.trunc <- ifelse(d$count.oo.part > 100, 100, d$count.oo.part)


## Race ethnicity ##
if (race == TRUE) {
# Race Ethnicity
d$race.cat3 <- rep(NA, nrow(d))
d$race.cat3[d$race.cat == "black"] <- 1
d$race.cat3[d$race.cat == "hispanic"] <- 2
d$race.cat3[d$race.cat %in% c("white", "other")] <- 3
mult_race_cat <- c("asian", "ai/an", "mult", "nh/pi")
flat_race.level <- unlist(race.level)

# Determine which variables to use in ARTnet
if (any(flat_race.level %in% mult_race_cat)) {
l <- merge(l, d[, c("AMIS_ID", "race")], by = "AMIS_ID", all.x = TRUE)
p_race_var <- "p_race2"
race_var <- "race"
} else {
p_race_var <- "p_race.cat"
race_var <- "race.cat"
}

l$race.cat3[l$race.cat == "black"] <- 1
l$race.cat3[l$race.cat == "hispanic"] <- 2
l$race.cat3[l$race.cat %in% c("white", "other")] <- 3
# Assign race categories based on race.level
race.categories <- seq_along(race.level)

l$p_race.cat3 <- rep(NA, nrow(l))
l$p_race.cat3[l$p_race.cat == "black"] <- 1
l$p_race.cat3[l$p_race.cat == "hispanic"] <- 2
l$p_race.cat3[l$p_race.cat %in% c("white", "other")] <- 3
d$race.cat.num <- rep(NA, nrow(d))
l$race.cat.num <- rep(NA, nrow(l))
l$p_race.cat.num <- rep(NA, nrow(l))

# redistribute NAs in proportion to non-missing partner races
probs <- prop.table(table(l$race.cat3, l$p_race.cat3), 1)
for (i in seq_along(race.level)) {
d$race.cat.num[d[[race_var]] %in% race.level[[i]]] <- race.categories[i]
l$race.cat.num[l[[race_var]] %in% race.level[[i]]] <- race.categories[i]
l$p_race.cat.num[l[[p_race_var]] %in% race.level[[i]]] <- race.categories[i]
}

imp_black <- which(is.na(l$p_race.cat3) & l$race.cat3 == 1)
l$p_race.cat3[imp_black] <- sample(1:3, length(imp_black), TRUE, probs[1, ])
# Redistribute NAs in proportion to non-missing partner races
probs <- prop.table(table(l$race.cat.num, l$p_race.cat.num), 1)

imp_hisp <- which(is.na(l$p_race.cat3) & l$race.cat3 == 2)
l$p_race.cat3[imp_hisp] <- sample(1:3, length(imp_hisp), TRUE, probs[2, ])
for (i in race.categories) {
imp_indices <- which(is.na(l$p_race.cat.num) & l$race.cat.num == i)
if (length(imp_indices) > 0) {
l$p_race.cat.num[imp_indices] <- sample(race.categories, length(imp_indices), TRUE, probs[i, ])
}
}

imp_white <- which(is.na(l$p_race.cat3) & l$race.cat3 == 3)
l$p_race.cat3[imp_white] <- sample(1:3, length(imp_white), TRUE, probs[3, ])
# Initialize race.combo and assign combinations dynamically
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this section that create the race.combos could be made into an exported function. This way it could be re-used directly on EpiModel HIV.

#' for a race `n` the combo for `race1 == race2 ==n` the combo is is `2n - 1`  
#' for `race1 == n && race2 != n` the combo is `2n`
#' that's the same as the loop below
#' @param race1 a vector of race index for the index
#' @param race2 a vector of race index for the partners
#' @return a vector of the same size with the race combos
make_race_combo <- function(race1, race2) {
  # check length of both vect
  # check no NA
  race_combo <- ifelse(race1 == race2, 2 * race1 - 1, 2 * race1)
  return(race_combo)
}

l$race.combo <- rep(NA, nrow(l))
combo_index <- 1
for (i in race.categories) {
# Case 1: Same race as one combination
l$race.combo[l$race.cat.num == i & l$p_race.cat.num == i] <- combo_index
combo_index <- combo_index + 1

# Case 2: Race compared with all other race groups
l$race.combo[l$race.cat.num == i & l$p_race.cat.num %in% setdiff(race.categories, i)] <- combo_index
combo_index <- combo_index + 1
}

}

Expand Down Expand Up @@ -318,21 +342,21 @@ build_netparams <- function(epistats,
## nodematch("race", diff = TRUE) ----

if (race == TRUE) {
lmain$same.race <- ifelse(lmain$race.cat3 == lmain$p_race.cat3, 1, 0)
lmain$same.race <- ifelse(lmain$race.cat.num == lmain$p_race.cat.num, 1, 0)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as.integer(lmain$race.cat.num == lmain$p_race.cat.num) is a bit cleaner


if (is.null(geog.lvl)) {
mod <- glm(same.race ~ as.factor(race.cat3),
mod <- glm(same.race ~ as.factor(race.cat.num),
data = lmain, family = binomial())

dat <- data.frame(race.cat3 = 1:3)
dat <- data.frame(race.cat.num = race.categories)
pred <- predict(mod, newdata = dat, type = "response")

out$main$nm.race <- as.numeric(pred)
} else {
mod <- glm(same.race ~ geogYN + as.factor(race.cat3),
mod <- glm(same.race ~ geogYN + as.factor(race.cat.num),
data = lmain, family = binomial())

dat <- data.frame(geogYN = 1, race.cat3 = 1:3)
dat <- data.frame(geogYN = 1, race.cat.num = race.categories)
pred <- predict(mod, newdata = dat, type = "response")

out$main$nm.race <- as.numeric(pred)
Expand All @@ -358,18 +382,18 @@ build_netparams <- function(epistats,
}

if (is.null(geog.lvl)) {
mod <- glm(deg.main ~ as.factor(race.cat3),
mod <- glm(deg.main ~ as.factor(race.cat.num),
data = d, family = poisson())

dat <- data.frame(race.cat3 = 1:3)
dat <- data.frame(race.cat.num = race.categories)
pred <- predict(mod, newdata = dat, type = "response")

out$main$nf.race <- as.numeric(pred)
} else {
mod <- glm(deg.main ~ geogYN + as.factor(race.cat3),
mod <- glm(deg.main ~ geogYN + as.factor(race.cat.num),
data = d, family = poisson())

dat <- data.frame(geogYN = 1, race.cat3 = 1:3)
dat <- data.frame(geogYN = 1, race.cat.num = race.categories)
pred <- predict(mod, newdata = dat, type = "response")

out$main$nf.race <- as.numeric(pred)
Expand Down Expand Up @@ -642,21 +666,21 @@ build_netparams <- function(epistats,

## nodematch("race") ----

lcasl$same.race <- ifelse(lcasl$race.cat3 == lcasl$p_race.cat3, 1, 0)
lcasl$same.race <- ifelse(lcasl$race.cat.num == lcasl$p_race.cat.num, 1, 0)

if (is.null(geog.lvl)) {
mod <- glm(same.race ~ as.factor(race.cat3),
mod <- glm(same.race ~ as.factor(race.cat.num),
data = lcasl, family = binomial())

dat <- data.frame(race.cat3 = 1:3)
dat <- data.frame(race.cat.num = race.categories)
pred <- predict(mod, newdata = dat, type = "response")

out$casl$nm.race <- as.numeric(pred)
} else {
mod <- glm(same.race ~ geogYN + as.factor(race.cat3),
mod <- glm(same.race ~ geogYN + as.factor(race.cat.num),
data = lcasl, family = binomial())

dat <- data.frame(geogYN = 1, race.cat3 = 1:3)
dat <- data.frame(geogYN = 1, race.cat.num = race.categories)
pred <- predict(mod, newdata = dat, type = "response")

out$casl$nm.race <- as.numeric(pred)
Expand Down Expand Up @@ -686,18 +710,18 @@ build_netparams <- function(epistats,

if (is.null(geog.lvl)) {

mod <- glm(deg.casl ~ as.factor(race.cat3),
mod <- glm(deg.casl ~ as.factor(race.cat.num),
data = d, family = poisson())

dat <- data.frame(race.cat3 = 1:3)
dat <- data.frame(race.cat.num = race.categories)
pred <- predict(mod, newdata = dat, type = "response")

out$casl$nf.race <- as.numeric(pred)
} else {
mod <- glm(deg.casl ~ geogYN + as.factor(race.cat3),
mod <- glm(deg.casl ~ geogYN + as.factor(race.cat.num),
data = d, family = poisson())

dat <- data.frame(geogYN = 1, race.cat3 = 1:3)
dat <- data.frame(geogYN = 1, race.cat.num = race.categories)
pred <- predict(mod, newdata = dat, type = "response")

out$casl$nf.race <- as.numeric(pred)
Expand Down Expand Up @@ -971,21 +995,21 @@ build_netparams <- function(epistats,

## nodematch("race", diff = TRUE) ----

linst$same.race <- ifelse(linst$race.cat3 == linst$p_race.cat3, 1, 0)
linst$same.race <- ifelse(linst$race.cat.num == linst$p_race.cat.num, 1, 0)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as.integer(inst$race.cat.num == linst$p_race.cat.num)


if (is.null(geog.lvl)) {
mod <- glm(same.race ~ as.factor(race.cat3),
mod <- glm(same.race ~ as.factor(race.cat.num),
data = linst, family = binomial())

dat <- data.frame(race.cat3 = 1:3)
dat <- data.frame(race.cat.num = race.categories)
pred <- predict(mod, newdata = dat, type = "response")

out$inst$nm.race <- as.numeric(pred)
} else {
mod <- glm(same.race ~ geogYN + as.factor(race.cat3),
mod <- glm(same.race ~ geogYN + as.factor(race.cat.num),
data = linst, family = binomial())

dat <- data.frame(geogYN = 1, race.cat3 = 1:3)
dat <- data.frame(geogYN = 1, race.cat.num = race.categories)
pred <- predict(mod, newdata = dat, type = "response")

out$inst$nm.race <- as.numeric(pred)
Expand Down Expand Up @@ -1015,18 +1039,18 @@ build_netparams <- function(epistats,
## nodefactor("race") ----

if (is.null(geog.lvl)) {
mod <- glm(count.oo.part ~ as.factor(race.cat3),
mod <- glm(count.oo.part ~ as.factor(race.cat.num),
data = d, family = poisson())

dat <- data.frame(race.cat3 = 1:3)
dat <- data.frame(race.cat.num = race.categories)
pred <- predict(mod, newdata = dat, type = "response") / (364 / time.unit)

out$inst$nf.race <- as.numeric(pred)
} else {
mod <- glm(count.oo.part ~ geogYN + as.factor(race.cat3),
mod <- glm(count.oo.part ~ geogYN + as.factor(race.cat.num),
data = d, family = poisson())

dat <- data.frame(geogYN = 1, race.cat3 = 1:3)
dat <- data.frame(geogYN = 1, race.cat.num = race.categories)
pred <- predict(mod, newdata = dat, type = "response") / (364 / time.unit)

out$inst$nf.race <- as.numeric(pred)
Expand Down
Loading