From b2b305ada314c7696027f363aa65ee337bf02202 Mon Sep 17 00:00:00 2001 From: clchand23 Date: Mon, 11 Nov 2024 16:02:28 -0500 Subject: [PATCH 01/23] Update EpiStats.R Add section that stratifies race into 4 categories --- R/EpiStats.R | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/R/EpiStats.R b/R/EpiStats.R index ef4a7bb..03cc1e6 100644 --- a/R/EpiStats.R +++ b/R/EpiStats.R @@ -71,6 +71,9 @@ #' natural mortality. #' * `time.unit`: a number between 1 and 30 that specifies time units for ARTnet statistics. Set to #' `7` by default. +#' * `race.level`: the number of racial and ethnic categories that the model stratifies by. Default is +#' `3` for Black, Hispanic, and White/Other. `4` stratifies racial and ethnic group for Black, Hispanic, +#' White, and Other. #' #' @examples #' # Age and geographic stratification, for the Atlanta metropolitan statistical area @@ -108,6 +111,7 @@ build_epistats <- function(geog.lvl = NULL, geog.cat = NULL, race = TRUE, + race.level = 4, age.limits = c(15, 65), age.breaks = c(25, 35, 45, 55), age.sexual.cessation = NULL, @@ -255,7 +259,7 @@ build_epistats <- function(geog.lvl = NULL, ## Race ethnicity ## - if (race == TRUE) { + if (race == TRUE & race.level == 3) { d$race.cat3 <- rep(NA, nrow(d)) d$race.cat3[d$race.cat == "black"] <- 1 d$race.cat3[d$race.cat == "hispanic"] <- 2 @@ -294,6 +298,53 @@ build_epistats <- function(geog.lvl = NULL, l <- select(l, -c(race.cat3, p_race.cat3)) } + if (race == TRUE & race.level == 4) { + d$race.cat3 <- rep(NA, nrow(d)) + d$race.cat3[d$race.cat == "black"] <- 1 + d$race.cat3[d$race.cat == "hispanic"] <- 2 + d$race.cat3[d$race.cat == "white"] <- 3 + d$race.cat3[d$race.cat == "other"] <- 4 + + l$race.cat3 <- rep(NA, nrow(l)) + l$race.cat3[l$race.cat == "black"] <- 1 + l$race.cat3[l$race.cat == "hispanic"] <- 2 + l$race.cat3[l$race.cat == "white"] <- 3 + l$race.cat3[l$race.cat == "other"] <- 4 + + l$p_race.cat3 <- rep(NA, nrow(l)) + l$p_race.cat3[l$p_race.cat == "black"] <- 1 + l$p_race.cat3[l$p_race.cat == "hispanic"] <- 2 + l$p_race.cat3[l$p_race.cat == "white"] <- 3 + l$p_race.cat3[l$p_race.cat == "other"] <- 4 + + # redistribute NAs in proportion to non-missing partner races + probs <- prop.table(table(l$race.cat3, l$p_race.cat3), 1) + + imp_black <- which(is.na(l$p_race.cat3) & l$race.cat3 == 1) + l$p_race.cat3[imp_black] <- sample(1:4, length(imp_black), TRUE, probs[1, ]) + + imp_hisp <- which(is.na(l$p_race.cat3) & l$race.cat3 == 2) + l$p_race.cat3[imp_hisp] <- sample(1:4, length(imp_hisp), TRUE, probs[2, ]) + + imp_white <- which(is.na(l$p_race.cat3) & l$race.cat3 == 3) + l$p_race.cat3[imp_white] <- sample(1:4, length(imp_white), TRUE, probs[3, ]) + + imp_other <- which(is.na(l$p_race.cat3) & l$race.cat3 == 4) + l$p_race.cat3[imp_other] <- sample(1:4, length(imp_white), TRUE, probs[4, ]) + + l$race.combo <- rep(NA, nrow(l)) + l$race.combo[l$race.cat3 == 1 & l$p_race.cat3 == 1] <- 1 # Black-Black + l$race.combo[l$race.cat3 == 1 & l$p_race.cat3 %in% 2:4] <- 2 # Black-Hispanic/White/Other + l$race.combo[l$race.cat3 == 2 & l$p_race.cat3 %in% c(1, 3:4)] <- 3 # Hispanic-Black/White/Other + l$race.combo[l$race.cat3 == 2 & l$p_race.cat3 == 2] <- 4 # Hispanic-Hispanic + l$race.combo[l$race.cat3 == 3 & l$p_race.cat3 %in% c(1:2, 4)] <- 5 # White-Black/Hispanic/Other + l$race.combo[l$race.cat3 == 3 & l$p_race.cat3 == 3] <- 6 # White-White + l$race.combo[l$race.cat3 == 4 & l$p_race.cat3 == 4] <- 7 # Other-Other + l$race.combo[l$race.cat3 == 4 & l$p_race.cat3 %in% 1:3] <- 8 # Other-Black/Hispanic/White + + l <- select(l, -c(race.cat3, p_race.cat3)) + } + ## HIV diagnosed status of index and partners ## l$p_hiv2 <- ifelse(l$p_hiv == 1, 1, 0) From 47e43f8c80075e41e3071c1fc52d4684f922fedd Mon Sep 17 00:00:00 2001 From: clchand23 Date: Mon, 11 Nov 2024 16:06:54 -0500 Subject: [PATCH 02/23] Update NetParams.R Add new race stratification code --- R/NetParams.R | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/R/NetParams.R b/R/NetParams.R index a55b0ec..5ebcf49 100644 --- a/R/NetParams.R +++ b/R/NetParams.R @@ -151,7 +151,7 @@ build_netparams <- function(epistats, d$count.oo.part.trunc <- ifelse(d$count.oo.part > 100, 100, d$count.oo.part) - if (race == TRUE) { + if (race == TRUE & race.level == 3) { # Race Ethnicity d$race.cat3 <- rep(NA, nrow(d)) d$race.cat3[d$race.cat == "black"] <- 1 @@ -181,6 +181,42 @@ build_netparams <- function(epistats, } + if (race == TRUE & race.level == 4) { + # Race Ethnicity + d$race.cat3 <- rep(NA, nrow(d)) + d$race.cat3[d$race.cat == "black"] <- 1 + d$race.cat3[d$race.cat == "hispanic"] <- 2 + d$race.cat3[d$race.cat == "white"] <- 3 + d$race.cat3[d$race.cat == "other"] <- 4 + + l$race.cat3[l$race.cat == "black"] <- 1 + l$race.cat3[l$race.cat == "hispanic"] <- 2 + l$race.cat3[l$race.cat == "white"] <- 3 + l$race.cat3[l$race.cat == "other"] <- 4 + + l$p_race.cat3 <- rep(NA, nrow(l)) + l$p_race.cat3[l$p_race.cat == "black"] <- 1 + l$p_race.cat3[l$p_race.cat == "hispanic"] <- 2 + l$p_race.cat3[l$p_race.cat == "white"] <- 3 + l$p_race.cat3[l$p_race.cat == "other"] <- 4 + + # redistribute NAs in proportion to non-missing partner races + probs <- prop.table(table(l$race.cat3, l$p_race.cat3), 1) + + imp_black <- which(is.na(l$p_race.cat3) & l$race.cat3 == 1) + l$p_race.cat3[imp_black] <- sample(1:3, length(imp_black), TRUE, probs[1, ]) + + imp_hisp <- which(is.na(l$p_race.cat3) & l$race.cat3 == 2) + l$p_race.cat3[imp_hisp] <- sample(1:3, length(imp_hisp), TRUE, probs[2, ]) + + imp_white <- which(is.na(l$p_race.cat3) & l$race.cat3 == 3) + l$p_race.cat3[imp_white] <- sample(1:3, length(imp_white), TRUE, probs[3, ]) + + imp_other <- which(is.na(l$p_race.cat3) & l$race.cat3 == 4) + l$p_race.cat3[imp_other] <- sample(1:4, length(imp_white), TRUE, probs[4, ]) + + } + ## HIV status l$p_hiv2 <- ifelse(l$p_hiv == 1, 1, 0) From 2bf6aa0920c3cfb7ce874095179a6af0e5843234 Mon Sep 17 00:00:00 2001 From: clchand23 Date: Tue, 12 Nov 2024 13:06:04 -0500 Subject: [PATCH 03/23] Generalize race.cat variable Change from race.cat3 to race.cat throughout --- R/EpiStats.R | 132 +++++++++++++++++++++++------------------------ R/NetParams.R | 140 +++++++++++++++++++++++++------------------------- R/NetStats.R | 2 +- R/globals.R | 4 +- 4 files changed, 138 insertions(+), 140 deletions(-) diff --git a/R/EpiStats.R b/R/EpiStats.R index 03cc1e6..c54f316 100644 --- a/R/EpiStats.R +++ b/R/EpiStats.R @@ -260,89 +260,87 @@ build_epistats <- function(geog.lvl = NULL, ## Race ethnicity ## if (race == TRUE & race.level == 3) { - d$race.cat3 <- rep(NA, nrow(d)) - d$race.cat3[d$race.cat == "black"] <- 1 - d$race.cat3[d$race.cat == "hispanic"] <- 2 - d$race.cat3[d$race.cat %in% c("white", "other")] <- 3 + d$race.cat <- rep(NA, nrow(d)) + d$race.cat[d$race.cat == "black"] <- 1 + d$race.cat[d$race.cat == "hispanic"] <- 2 + d$race.cat[d$race.cat %in% c("white", "other")] <- 3 - l$race.cat3 <- rep(NA, nrow(l)) - l$race.cat3[l$race.cat == "black"] <- 1 - l$race.cat3[l$race.cat == "hispanic"] <- 2 - l$race.cat3[l$race.cat %in% c("white", "other")] <- 3 + l$race.cat <- rep(NA, nrow(l)) + l$race.cat[l$race.cat == "black"] <- 1 + l$race.cat[l$race.cat == "hispanic"] <- 2 + l$race.cat[l$race.cat %in% c("white", "other")] <- 3 - l$p_race.cat3 <- rep(NA, nrow(l)) - l$p_race.cat3[l$p_race.cat == "black"] <- 1 - l$p_race.cat3[l$p_race.cat == "hispanic"] <- 2 - l$p_race.cat3[l$p_race.cat %in% c("white", "other")] <- 3 + l$p_race.cat <- rep(NA, nrow(l)) + l$p_race.cat[l$p_race.cat == "black"] <- 1 + l$p_race.cat[l$p_race.cat == "hispanic"] <- 2 + l$p_race.cat[l$p_race.cat %in% c("white", "other")] <- 3 # redistribute NAs in proportion to non-missing partner races - probs <- prop.table(table(l$race.cat3, l$p_race.cat3), 1) + probs <- prop.table(table(l$race.cat, l$p_race.cat), 1) - imp_black <- which(is.na(l$p_race.cat3) & l$race.cat3 == 1) - l$p_race.cat3[imp_black] <- sample(1:3, length(imp_black), TRUE, probs[1, ]) + imp_black <- which(is.na(l$p_race.cat) & l$race.cat == 1) + l$p_race.cat[imp_black] <- sample(1:3, length(imp_black), TRUE, probs[1, ]) - imp_hisp <- which(is.na(l$p_race.cat3) & l$race.cat3 == 2) - l$p_race.cat3[imp_hisp] <- sample(1:3, length(imp_hisp), TRUE, probs[2, ]) + imp_hisp <- which(is.na(l$p_race.cat) & l$race.cat == 2) + l$p_race.cat[imp_hisp] <- sample(1:3, length(imp_hisp), TRUE, probs[2, ]) - imp_white <- which(is.na(l$p_race.cat3) & l$race.cat3 == 3) - l$p_race.cat3[imp_white] <- sample(1:3, length(imp_white), TRUE, probs[3, ]) + imp_white <- which(is.na(l$p_race.cat) & l$race.cat == 3) + l$p_race.cat[imp_white] <- sample(1:3, length(imp_white), TRUE, probs[3, ]) l$race.combo <- rep(NA, nrow(l)) - l$race.combo[l$race.cat3 == 1 & l$p_race.cat3 == 1] <- 1 - l$race.combo[l$race.cat3 == 1 & l$p_race.cat3 %in% 2:3] <- 2 - l$race.combo[l$race.cat3 == 2 & l$p_race.cat3 %in% c(1, 3)] <- 3 - l$race.combo[l$race.cat3 == 2 & l$p_race.cat3 == 2] <- 4 - l$race.combo[l$race.cat3 == 3 & l$p_race.cat3 %in% 1:2] <- 5 - l$race.combo[l$race.cat3 == 3 & l$p_race.cat3 == 3] <- 6 - - l <- select(l, -c(race.cat3, p_race.cat3)) + l$race.combo[l$race.cat == 1 & l$p_race.cat == 1] <- 1 + l$race.combo[l$race.cat == 1 & l$p_race.cat %in% 2:3] <- 2 + l$race.combo[l$race.cat == 2 & l$p_race.cat %in% c(1, 3)] <- 3 + l$race.combo[l$race.cat == 2 & l$p_race.cat == 2] <- 4 + l$race.combo[l$race.cat == 3 & l$p_race.cat %in% 1:2] <- 5 + l$race.combo[l$race.cat == 3 & l$p_race.cat == 3] <- 6 + + l <- select(l, -c(race.cat, p_race.cat)) } if (race == TRUE & race.level == 4) { - d$race.cat3 <- rep(NA, nrow(d)) - d$race.cat3[d$race.cat == "black"] <- 1 - d$race.cat3[d$race.cat == "hispanic"] <- 2 - d$race.cat3[d$race.cat == "white"] <- 3 - d$race.cat3[d$race.cat == "other"] <- 4 - - l$race.cat3 <- rep(NA, nrow(l)) - l$race.cat3[l$race.cat == "black"] <- 1 - l$race.cat3[l$race.cat == "hispanic"] <- 2 - l$race.cat3[l$race.cat == "white"] <- 3 - l$race.cat3[l$race.cat == "other"] <- 4 - - l$p_race.cat3 <- rep(NA, nrow(l)) - l$p_race.cat3[l$p_race.cat == "black"] <- 1 - l$p_race.cat3[l$p_race.cat == "hispanic"] <- 2 - l$p_race.cat3[l$p_race.cat == "white"] <- 3 - l$p_race.cat3[l$p_race.cat == "other"] <- 4 + d$race.cat <- rep(NA, nrow(d)) + d$race.cat[d$race.cat == "black"] <- 1 + d$race.cat[d$race.cat == "hispanic"] <- 2 + d$race.cat[d$race.cat == "white"] <- 3 + d$race.cat[d$race.cat == "other"] <- 4 + + l$race.cat <- rep(NA, nrow(l)) + l$race.cat[l$race.cat == "black"] <- 1 + l$race.cat[l$race.cat == "hispanic"] <- 2 + l$race.cat[l$race.cat == "white"] <- 3 + l$race.cat[l$race.cat == "other"] <- 4 + + l$p_race.cat <- rep(NA, nrow(l)) + l$p_race.cat[l$p_race.cat == "black"] <- 1 + l$p_race.cat[l$p_race.cat == "hispanic"] <- 2 + l$p_race.cat[l$p_race.cat == "white"] <- 3 + l$p_race.cat[l$p_race.cat == "other"] <- 4 # redistribute NAs in proportion to non-missing partner races - probs <- prop.table(table(l$race.cat3, l$p_race.cat3), 1) + probs <- prop.table(table(l$race.cat, l$p_race.cat), 1) - imp_black <- which(is.na(l$p_race.cat3) & l$race.cat3 == 1) - l$p_race.cat3[imp_black] <- sample(1:4, length(imp_black), TRUE, probs[1, ]) + imp_black <- which(is.na(l$p_race.cat) & l$race.cat == 1) + l$p_race.cat[imp_black] <- sample(1:4, length(imp_black), TRUE, probs[1, ]) - imp_hisp <- which(is.na(l$p_race.cat3) & l$race.cat3 == 2) - l$p_race.cat3[imp_hisp] <- sample(1:4, length(imp_hisp), TRUE, probs[2, ]) + imp_hisp <- which(is.na(l$p_race.cat) & l$race.cat == 2) + l$p_race.cat[imp_hisp] <- sample(1:4, length(imp_hisp), TRUE, probs[2, ]) - imp_white <- which(is.na(l$p_race.cat3) & l$race.cat3 == 3) - l$p_race.cat3[imp_white] <- sample(1:4, length(imp_white), TRUE, probs[3, ]) + imp_white <- which(is.na(l$p_race.cat) & l$race.cat == 3) + l$p_race.cat[imp_white] <- sample(1:4, length(imp_white), TRUE, probs[3, ]) - imp_other <- which(is.na(l$p_race.cat3) & l$race.cat3 == 4) - l$p_race.cat3[imp_other] <- sample(1:4, length(imp_white), TRUE, probs[4, ]) + imp_other <- which(is.na(l$p_race.cat) & l$race.cat == 4) + l$p_race.cat[imp_other] <- sample(1:4, length(imp_white), TRUE, probs[4, ]) l$race.combo <- rep(NA, nrow(l)) - l$race.combo[l$race.cat3 == 1 & l$p_race.cat3 == 1] <- 1 # Black-Black - l$race.combo[l$race.cat3 == 1 & l$p_race.cat3 %in% 2:4] <- 2 # Black-Hispanic/White/Other - l$race.combo[l$race.cat3 == 2 & l$p_race.cat3 %in% c(1, 3:4)] <- 3 # Hispanic-Black/White/Other - l$race.combo[l$race.cat3 == 2 & l$p_race.cat3 == 2] <- 4 # Hispanic-Hispanic - l$race.combo[l$race.cat3 == 3 & l$p_race.cat3 %in% c(1:2, 4)] <- 5 # White-Black/Hispanic/Other - l$race.combo[l$race.cat3 == 3 & l$p_race.cat3 == 3] <- 6 # White-White - l$race.combo[l$race.cat3 == 4 & l$p_race.cat3 == 4] <- 7 # Other-Other - l$race.combo[l$race.cat3 == 4 & l$p_race.cat3 %in% 1:3] <- 8 # Other-Black/Hispanic/White - - l <- select(l, -c(race.cat3, p_race.cat3)) + l$race.combo[l$race.cat == 1 & l$p_race.cat == 1] <- 1 # Black-Black + l$race.combo[l$race.cat == 1 & l$p_race.cat %in% 2:4] <- 2 # Black-Hispanic/White/Other + l$race.combo[l$race.cat == 2 & l$p_race.cat %in% c(1, 3:4)] <- 3 # Hispanic-Black/White/Other + l$race.combo[l$race.cat == 2 & l$p_race.cat == 2] <- 4 # Hispanic-Hispanic + l$race.combo[l$race.cat == 3 & l$p_race.cat %in% c(1:2, 4)] <- 5 # White-Black/Hispanic/Other + l$race.combo[l$race.cat == 3 & l$p_race.cat == 3] <- 6 # White-White + + l <- select(l, -c(race.cat, p_race.cat)) } ## HIV diagnosed status of index and partners ## @@ -546,13 +544,13 @@ build_epistats <- function(geog.lvl = NULL, if (is.null(init.hiv.prev)) { if (race == TRUE) { if (is.null(geog.lvl)) { - d1 <- select(d, race.cat3, age, hiv2) + d1 <- select(d, race.cat, age, hiv2) - hiv.mod <- glm(hiv2 ~ age + as.factor(race.cat3), + hiv.mod <- glm(hiv2 ~ age + as.factor(race.cat), data = d1, family = binomial()) } else { - d1 <- select(d, race.cat3, geogYN, age, hiv2) - hiv.mod <- glm(hiv2 ~ age + geogYN + as.factor(race.cat3) + geogYN * as.factor(race.cat3), + d1 <- select(d, race.cat, geogYN, age, hiv2) + hiv.mod <- glm(hiv2 ~ age + geogYN + as.factor(race.cat) + geogYN * as.factor(race.cat), data = d1, family = binomial()) } } else { diff --git a/R/NetParams.R b/R/NetParams.R index 5ebcf49..6bbf8f6 100644 --- a/R/NetParams.R +++ b/R/NetParams.R @@ -153,67 +153,67 @@ build_netparams <- function(epistats, if (race == TRUE & race.level == 3) { # Race Ethnicity - d$race.cat3 <- rep(NA, nrow(d)) - d$race.cat3[d$race.cat == "black"] <- 1 - d$race.cat3[d$race.cat == "hispanic"] <- 2 - d$race.cat3[d$race.cat %in% c("white", "other")] <- 3 + d$race.cat <- rep(NA, nrow(d)) + d$race.cat[d$race.cat == "black"] <- 1 + d$race.cat[d$race.cat == "hispanic"] <- 2 + d$race.cat[d$race.cat %in% c("white", "other")] <- 3 - l$race.cat3[l$race.cat == "black"] <- 1 - l$race.cat3[l$race.cat == "hispanic"] <- 2 - l$race.cat3[l$race.cat %in% c("white", "other")] <- 3 + l$race.cat[l$race.cat == "black"] <- 1 + l$race.cat[l$race.cat == "hispanic"] <- 2 + l$race.cat[l$race.cat %in% c("white", "other")] <- 3 - l$p_race.cat3 <- rep(NA, nrow(l)) - l$p_race.cat3[l$p_race.cat == "black"] <- 1 - l$p_race.cat3[l$p_race.cat == "hispanic"] <- 2 - l$p_race.cat3[l$p_race.cat %in% c("white", "other")] <- 3 + l$p_race.cat <- rep(NA, nrow(l)) + l$p_race.cat[l$p_race.cat == "black"] <- 1 + l$p_race.cat[l$p_race.cat == "hispanic"] <- 2 + l$p_race.cat[l$p_race.cat %in% c("white", "other")] <- 3 # redistribute NAs in proportion to non-missing partner races - probs <- prop.table(table(l$race.cat3, l$p_race.cat3), 1) + probs <- prop.table(table(l$race.cat, l$p_race.cat), 1) - imp_black <- which(is.na(l$p_race.cat3) & l$race.cat3 == 1) - l$p_race.cat3[imp_black] <- sample(1:3, length(imp_black), TRUE, probs[1, ]) + imp_black <- which(is.na(l$p_race.cat) & l$race.cat == 1) + l$p_race.cat[imp_black] <- sample(1:3, length(imp_black), TRUE, probs[1, ]) - imp_hisp <- which(is.na(l$p_race.cat3) & l$race.cat3 == 2) - l$p_race.cat3[imp_hisp] <- sample(1:3, length(imp_hisp), TRUE, probs[2, ]) + imp_hisp <- which(is.na(l$p_race.cat) & l$race.cat == 2) + l$p_race.cat[imp_hisp] <- sample(1:3, length(imp_hisp), TRUE, probs[2, ]) - imp_white <- which(is.na(l$p_race.cat3) & l$race.cat3 == 3) - l$p_race.cat3[imp_white] <- sample(1:3, length(imp_white), TRUE, probs[3, ]) + imp_white <- which(is.na(l$p_race.cat) & l$race.cat == 3) + l$p_race.cat[imp_white] <- sample(1:3, length(imp_white), TRUE, probs[3, ]) } if (race == TRUE & race.level == 4) { # Race Ethnicity - d$race.cat3 <- rep(NA, nrow(d)) - d$race.cat3[d$race.cat == "black"] <- 1 - d$race.cat3[d$race.cat == "hispanic"] <- 2 - d$race.cat3[d$race.cat == "white"] <- 3 - d$race.cat3[d$race.cat == "other"] <- 4 - - l$race.cat3[l$race.cat == "black"] <- 1 - l$race.cat3[l$race.cat == "hispanic"] <- 2 - l$race.cat3[l$race.cat == "white"] <- 3 - l$race.cat3[l$race.cat == "other"] <- 4 - - l$p_race.cat3 <- rep(NA, nrow(l)) - l$p_race.cat3[l$p_race.cat == "black"] <- 1 - l$p_race.cat3[l$p_race.cat == "hispanic"] <- 2 - l$p_race.cat3[l$p_race.cat == "white"] <- 3 - l$p_race.cat3[l$p_race.cat == "other"] <- 4 + d$race.cat <- rep(NA, nrow(d)) + d$race.cat[d$race.cat == "black"] <- 1 + d$race.cat[d$race.cat == "hispanic"] <- 2 + d$race.cat[d$race.cat == "white"] <- 3 + d$race.cat[d$race.cat == "other"] <- 4 + + l$race.cat[l$race.cat == "black"] <- 1 + l$race.cat[l$race.cat == "hispanic"] <- 2 + l$race.cat[l$race.cat == "white"] <- 3 + l$race.cat[l$race.cat == "other"] <- 4 + + l$p_race.cat <- rep(NA, nrow(l)) + l$p_race.cat[l$p_race.cat == "black"] <- 1 + l$p_race.cat[l$p_race.cat == "hispanic"] <- 2 + l$p_race.cat[l$p_race.cat == "white"] <- 3 + l$p_race.cat[l$p_race.cat == "other"] <- 4 # redistribute NAs in proportion to non-missing partner races - probs <- prop.table(table(l$race.cat3, l$p_race.cat3), 1) + probs <- prop.table(table(l$race.cat, l$p_race.cat), 1) - imp_black <- which(is.na(l$p_race.cat3) & l$race.cat3 == 1) - l$p_race.cat3[imp_black] <- sample(1:3, length(imp_black), TRUE, probs[1, ]) + imp_black <- which(is.na(l$p_race.cat) & l$race.cat == 1) + l$p_race.cat[imp_black] <- sample(1:3, length(imp_black), TRUE, probs[1, ]) - imp_hisp <- which(is.na(l$p_race.cat3) & l$race.cat3 == 2) - l$p_race.cat3[imp_hisp] <- sample(1:3, length(imp_hisp), TRUE, probs[2, ]) + imp_hisp <- which(is.na(l$p_race.cat) & l$race.cat == 2) + l$p_race.cat[imp_hisp] <- sample(1:3, length(imp_hisp), TRUE, probs[2, ]) - imp_white <- which(is.na(l$p_race.cat3) & l$race.cat3 == 3) - l$p_race.cat3[imp_white] <- sample(1:3, length(imp_white), TRUE, probs[3, ]) + imp_white <- which(is.na(l$p_race.cat) & l$race.cat == 3) + l$p_race.cat[imp_white] <- sample(1:3, length(imp_white), TRUE, probs[3, ]) - imp_other <- which(is.na(l$p_race.cat3) & l$race.cat3 == 4) - l$p_race.cat3[imp_other] <- sample(1:4, length(imp_white), TRUE, probs[4, ]) + imp_other <- which(is.na(l$p_race.cat) & l$race.cat == 4) + l$p_race.cat[imp_other] <- sample(1:4, length(imp_white), TRUE, probs[4, ]) } @@ -354,21 +354,21 @@ build_netparams <- function(epistats, ## nodematch("race", diff = TRUE) ---- if (race == TRUE) { - lmain$same.race <- ifelse(lmain$race.cat3 == lmain$p_race.cat3, 1, 0) + lmain$same.race <- ifelse(lmain$race.cat == lmain$p_race.cat, 1, 0) if (is.null(geog.lvl)) { - mod <- glm(same.race ~ as.factor(race.cat3), + mod <- glm(same.race ~ as.factor(race.cat), data = lmain, family = binomial()) - dat <- data.frame(race.cat3 = 1:3) + dat <- data.frame(race.cat = 1:length(unique(race.cat))) ## generalize these pred <- predict(mod, newdata = dat, type = "response") out$main$nm.race <- as.numeric(pred) } else { - mod <- glm(same.race ~ geogYN + as.factor(race.cat3), + mod <- glm(same.race ~ geogYN + as.factor(race.cat), data = lmain, family = binomial()) - dat <- data.frame(geogYN = 1, race.cat3 = 1:3) + dat <- data.frame(geogYN = 1, race.cat = 1:3) pred <- predict(mod, newdata = dat, type = "response") out$main$nm.race <- as.numeric(pred) @@ -394,18 +394,18 @@ build_netparams <- function(epistats, } if (is.null(geog.lvl)) { - mod <- glm(deg.main ~ as.factor(race.cat3), + mod <- glm(deg.main ~ as.factor(race.cat), data = d, family = poisson()) - dat <- data.frame(race.cat3 = 1:3) + dat <- data.frame(race.cat = 1:3) pred <- predict(mod, newdata = dat, type = "response") out$main$nf.race <- as.numeric(pred) } else { - mod <- glm(deg.main ~ geogYN + as.factor(race.cat3), + mod <- glm(deg.main ~ geogYN + as.factor(race.cat), data = d, family = poisson()) - dat <- data.frame(geogYN = 1, race.cat3 = 1:3) + dat <- data.frame(geogYN = 1, race.cat = 1:3) pred <- predict(mod, newdata = dat, type = "response") out$main$nf.race <- as.numeric(pred) @@ -678,21 +678,21 @@ build_netparams <- function(epistats, ## nodematch("race") ---- - lcasl$same.race <- ifelse(lcasl$race.cat3 == lcasl$p_race.cat3, 1, 0) + lcasl$same.race <- ifelse(lcasl$race.cat == lcasl$p_race.cat, 1, 0) if (is.null(geog.lvl)) { - mod <- glm(same.race ~ as.factor(race.cat3), + mod <- glm(same.race ~ as.factor(race.cat), data = lcasl, family = binomial()) - dat <- data.frame(race.cat3 = 1:3) + dat <- data.frame(race.cat = 1:3) pred <- predict(mod, newdata = dat, type = "response") out$casl$nm.race <- as.numeric(pred) } else { - mod <- glm(same.race ~ geogYN + as.factor(race.cat3), + mod <- glm(same.race ~ geogYN + as.factor(race.cat), data = lcasl, family = binomial()) - dat <- data.frame(geogYN = 1, race.cat3 = 1:3) + dat <- data.frame(geogYN = 1, race.cat = 1:3) pred <- predict(mod, newdata = dat, type = "response") out$casl$nm.race <- as.numeric(pred) @@ -722,18 +722,18 @@ build_netparams <- function(epistats, if (is.null(geog.lvl)) { - mod <- glm(deg.casl ~ as.factor(race.cat3), + mod <- glm(deg.casl ~ as.factor(race.cat), data = d, family = poisson()) - dat <- data.frame(race.cat3 = 1:3) + dat <- data.frame(race.cat = 1:3) pred <- predict(mod, newdata = dat, type = "response") out$casl$nf.race <- as.numeric(pred) } else { - mod <- glm(deg.casl ~ geogYN + as.factor(race.cat3), + mod <- glm(deg.casl ~ geogYN + as.factor(race.cat), data = d, family = poisson()) - dat <- data.frame(geogYN = 1, race.cat3 = 1:3) + dat <- data.frame(geogYN = 1, race.cat = 1:3) pred <- predict(mod, newdata = dat, type = "response") out$casl$nf.race <- as.numeric(pred) @@ -1007,21 +1007,21 @@ build_netparams <- function(epistats, ## nodematch("race", diff = TRUE) ---- - linst$same.race <- ifelse(linst$race.cat3 == linst$p_race.cat3, 1, 0) + linst$same.race <- ifelse(linst$race.cat == linst$p_race.cat, 1, 0) if (is.null(geog.lvl)) { - mod <- glm(same.race ~ as.factor(race.cat3), + mod <- glm(same.race ~ as.factor(race.cat), data = linst, family = binomial()) - dat <- data.frame(race.cat3 = 1:3) + dat <- data.frame(race.cat = 1:3) pred <- predict(mod, newdata = dat, type = "response") out$inst$nm.race <- as.numeric(pred) } else { - mod <- glm(same.race ~ geogYN + as.factor(race.cat3), + mod <- glm(same.race ~ geogYN + as.factor(race.cat), data = linst, family = binomial()) - dat <- data.frame(geogYN = 1, race.cat3 = 1:3) + dat <- data.frame(geogYN = 1, race.cat = 1:3) pred <- predict(mod, newdata = dat, type = "response") out$inst$nm.race <- as.numeric(pred) @@ -1051,18 +1051,18 @@ build_netparams <- function(epistats, ## nodefactor("race") ---- if (is.null(geog.lvl)) { - mod <- glm(count.oo.part ~ as.factor(race.cat3), + mod <- glm(count.oo.part ~ as.factor(race.cat), data = d, family = poisson()) - dat <- data.frame(race.cat3 = 1:3) + dat <- data.frame(race.cat = 1:3) pred <- predict(mod, newdata = dat, type = "response") / (364 / time.unit) out$inst$nf.race <- as.numeric(pred) } else { - mod <- glm(count.oo.part ~ geogYN + as.factor(race.cat3), + mod <- glm(count.oo.part ~ geogYN + as.factor(race.cat), data = d, family = poisson()) - dat <- data.frame(geogYN = 1, race.cat3 = 1:3) + dat <- data.frame(geogYN = 1, race.cat = 1:3) pred <- predict(mod, newdata = dat, type = "response") / (364 / time.unit) out$inst$nf.race <- as.numeric(pred) diff --git a/R/NetStats.R b/R/NetStats.R index 00b2776..ff86bbc 100644 --- a/R/NetStats.R +++ b/R/NetStats.R @@ -301,7 +301,7 @@ build_netstats <- function(epistats, netparams, # diag status if (is.null(epistats$init.hiv.prev)) { if (race == TRUE) { - xs <- data.frame(age = attr_age, race.cat3 = attr_race, geogYN = 1) + xs <- data.frame(age = attr_age, race.cat = attr_race, geogYN = 1) preds <- predict(epistats$hiv.mod, newdata = xs, type = "response") attr_diag.status <- rbinom(num, 1, preds) out$attr$diag.status <- attr_diag.status diff --git a/R/globals.R b/R/globals.R index 23470da..7994245 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,8 +1,8 @@ #' @import utils utils::globalVariables(c( "age", - "race.cat3", - "p_race.cat3", + "race.cat", + "p_race.cat", "AMIS_ID", "survey_year", "prep", From 173d264e1337cf8dba2bf768952d43e51fcf89a0 Mon Sep 17 00:00:00 2001 From: clchand23 Date: Sat, 7 Dec 2024 00:03:33 -0500 Subject: [PATCH 04/23] Update EpiStats.R Update EpiStats to generalize race categorization for ARTnet network estimation by allowing user to set `race.level` to a list or vector of race and ethnicity categories as described in ARTnet data (i.e., black, hispanic, white, other, ai/an, asian, mult, nh/pi) --- R/EpiStats.R | 131 +++++++++++++++++++-------------------------------- 1 file changed, 48 insertions(+), 83 deletions(-) diff --git a/R/EpiStats.R b/R/EpiStats.R index c54f316..8a52bd6 100644 --- a/R/EpiStats.R +++ b/R/EpiStats.R @@ -71,9 +71,6 @@ #' natural mortality. #' * `time.unit`: a number between 1 and 30 that specifies time units for ARTnet statistics. Set to #' `7` by default. -#' * `race.level`: the number of racial and ethnic categories that the model stratifies by. Default is -#' `3` for Black, Hispanic, and White/Other. `4` stratifies racial and ethnic group for Black, Hispanic, -#' White, and Other. #' #' @examples #' # Age and geographic stratification, for the Atlanta metropolitan statistical area @@ -111,7 +108,7 @@ build_epistats <- function(geog.lvl = NULL, geog.cat = NULL, race = TRUE, - race.level = 4, + race.level = list("black", "hispanic", c("white", "other")), age.limits = c(15, 65), age.breaks = c(25, 35, 45, 55), age.sexual.cessation = NULL, @@ -257,90 +254,57 @@ build_epistats <- function(geog.lvl = NULL, l$comb.age <- l$age + l$p_age_imp l$diff.age <- abs(l$age - l$p_age_imp) - ## Race ethnicity ## - if (race == TRUE & race.level == 3) { - d$race.cat <- rep(NA, nrow(d)) - d$race.cat[d$race.cat == "black"] <- 1 - d$race.cat[d$race.cat == "hispanic"] <- 2 - d$race.cat[d$race.cat %in% c("white", "other")] <- 3 - - l$race.cat <- rep(NA, nrow(l)) - l$race.cat[l$race.cat == "black"] <- 1 - l$race.cat[l$race.cat == "hispanic"] <- 2 - l$race.cat[l$race.cat %in% c("white", "other")] <- 3 - - l$p_race.cat <- rep(NA, nrow(l)) - l$p_race.cat[l$p_race.cat == "black"] <- 1 - l$p_race.cat[l$p_race.cat == "hispanic"] <- 2 - l$p_race.cat[l$p_race.cat %in% c("white", "other")] <- 3 - - # redistribute NAs in proportion to non-missing partner races - probs <- prop.table(table(l$race.cat, l$p_race.cat), 1) - - imp_black <- which(is.na(l$p_race.cat) & l$race.cat == 1) - l$p_race.cat[imp_black] <- sample(1:3, length(imp_black), TRUE, probs[1, ]) - - imp_hisp <- which(is.na(l$p_race.cat) & l$race.cat == 2) - l$p_race.cat[imp_hisp] <- sample(1:3, length(imp_hisp), TRUE, probs[2, ]) - - imp_white <- which(is.na(l$p_race.cat) & l$race.cat == 3) - l$p_race.cat[imp_white] <- sample(1:3, length(imp_white), TRUE, probs[3, ]) - - l$race.combo <- rep(NA, nrow(l)) - l$race.combo[l$race.cat == 1 & l$p_race.cat == 1] <- 1 - l$race.combo[l$race.cat == 1 & l$p_race.cat %in% 2:3] <- 2 - l$race.combo[l$race.cat == 2 & l$p_race.cat %in% c(1, 3)] <- 3 - l$race.combo[l$race.cat == 2 & l$p_race.cat == 2] <- 4 - l$race.combo[l$race.cat == 3 & l$p_race.cat %in% 1:2] <- 5 - l$race.combo[l$race.cat == 3 & l$p_race.cat == 3] <- 6 - - l <- select(l, -c(race.cat, p_race.cat)) - } - if (race == TRUE & race.level == 4) { - d$race.cat <- rep(NA, nrow(d)) - d$race.cat[d$race.cat == "black"] <- 1 - d$race.cat[d$race.cat == "hispanic"] <- 2 - d$race.cat[d$race.cat == "white"] <- 3 - d$race.cat[d$race.cat == "other"] <- 4 - - l$race.cat <- rep(NA, nrow(l)) - l$race.cat[l$race.cat == "black"] <- 1 - l$race.cat[l$race.cat == "hispanic"] <- 2 - l$race.cat[l$race.cat == "white"] <- 3 - l$race.cat[l$race.cat == "other"] <- 4 - - l$p_race.cat <- rep(NA, nrow(l)) - l$p_race.cat[l$p_race.cat == "black"] <- 1 - l$p_race.cat[l$p_race.cat == "hispanic"] <- 2 - l$p_race.cat[l$p_race.cat == "white"] <- 3 - l$p_race.cat[l$p_race.cat == "other"] <- 4 + if (race == TRUE) { + mult_race_cat <- c("asian", "ai/an", "mult", "nh/pi") + flat_race.level <- unlist(race.level) + + # Determine which variables to use in ARTnet + if (any(flat_race.level %in% mult_race_cat)) { + l <- merge(l, d[, c("AMIS_ID", "race")], by = "AMIS_ID", all.x = TRUE) + p_race_var <- "p_race2" + race_var <- "race" + } else { + p_race_var <- "p_race.cat" + race_var <- "race.cat" + } - # redistribute NAs in proportion to non-missing partner races - probs <- prop.table(table(l$race.cat, l$p_race.cat), 1) + # Assign race categories based on race.level + race.categories <- seq_along(race.level) - imp_black <- which(is.na(l$p_race.cat) & l$race.cat == 1) - l$p_race.cat[imp_black] <- sample(1:4, length(imp_black), TRUE, probs[1, ]) + d$race.cat.num <- rep(NA, nrow(d)) + l$race.cat.num <- rep(NA, nrow(l)) + l$p_race.cat.num <- rep(NA, nrow(l)) - imp_hisp <- which(is.na(l$p_race.cat) & l$race.cat == 2) - l$p_race.cat[imp_hisp] <- sample(1:4, length(imp_hisp), TRUE, probs[2, ]) + for (i in seq_along(race.level)) { + d$race.cat.num[d[[race_var]] %in% race.level[[i]]] <- race.categories[i] + l$race.cat.num[l[[race_var]] %in% race.level[[i]]] <- race.categories[i] + l$p_race.cat.num[l[[p_race_var]] %in% race.level[[i]]] <- race.categories[i] + } - imp_white <- which(is.na(l$p_race.cat) & l$race.cat == 3) - l$p_race.cat[imp_white] <- sample(1:4, length(imp_white), TRUE, probs[3, ]) + # Redistribute NAs in proportion to non-missing partner races + probs <- prop.table(table(l$race.cat.num, l$p_race.cat.num), 1) - imp_other <- which(is.na(l$p_race.cat) & l$race.cat == 4) - l$p_race.cat[imp_other] <- sample(1:4, length(imp_white), TRUE, probs[4, ]) + for (i in race.categories) { + imp_indices <- which(is.na(l$p_race.cat.num) & l$race.cat.num == i) + if (length(imp_indices) > 0) { + l$p_race.cat.num[imp_indices] <- sample(race.categories, length(imp_indices), TRUE, probs[i, ]) + } + } + # Initialize race.combo and assign combinations dynamically l$race.combo <- rep(NA, nrow(l)) - l$race.combo[l$race.cat == 1 & l$p_race.cat == 1] <- 1 # Black-Black - l$race.combo[l$race.cat == 1 & l$p_race.cat %in% 2:4] <- 2 # Black-Hispanic/White/Other - l$race.combo[l$race.cat == 2 & l$p_race.cat %in% c(1, 3:4)] <- 3 # Hispanic-Black/White/Other - l$race.combo[l$race.cat == 2 & l$p_race.cat == 2] <- 4 # Hispanic-Hispanic - l$race.combo[l$race.cat == 3 & l$p_race.cat %in% c(1:2, 4)] <- 5 # White-Black/Hispanic/Other - l$race.combo[l$race.cat == 3 & l$p_race.cat == 3] <- 6 # White-White - - l <- select(l, -c(race.cat, p_race.cat)) + combo_index <- 1 + for (i in race.categories) { + # Case 1: Same race as one combination + l$race.combo[l$race.cat.num == i & l$p_race.cat.num == i] <- combo_index + combo_index <- combo_index + 1 + + # Case 2: Race compared with all other race groups + l$race.combo[l$race.cat.num == i & l$p_race.cat.num %in% setdiff(race.categories, i)] <- combo_index + combo_index <- combo_index + 1 + } } ## HIV diagnosed status of index and partners ## @@ -544,13 +508,13 @@ build_epistats <- function(geog.lvl = NULL, if (is.null(init.hiv.prev)) { if (race == TRUE) { if (is.null(geog.lvl)) { - d1 <- select(d, race.cat, age, hiv2) + d1 <- select(d, race.cat.num, age, hiv2) - hiv.mod <- glm(hiv2 ~ age + as.factor(race.cat), + hiv.mod <- glm(hiv2 ~ age + as.factor(race.cat.num), data = d1, family = binomial()) } else { - d1 <- select(d, race.cat, geogYN, age, hiv2) - hiv.mod <- glm(hiv2 ~ age + geogYN + as.factor(race.cat) + geogYN * as.factor(race.cat), + d1 <- select(d, race.cat.num, geogYN, age, hiv2) + hiv.mod <- glm(hiv2 ~ age + geogYN + as.factor(race.cat.num) + geogYN * as.factor(race.cat.num), data = d1, family = binomial()) } } else { @@ -587,6 +551,7 @@ build_epistats <- function(geog.lvl = NULL, out$geog.lvl <- geog.lvl out$race <- race + out$race.level <- race.level out$acts.mod <- acts.mod out$cond.mc.mod <- cond.mc.mod out$cond.oo.mod <- cond.oo.mod From 714f529d3b1a0f13028cdf022b28a6b79def4b99 Mon Sep 17 00:00:00 2001 From: clchand23 Date: Sat, 7 Dec 2024 00:03:44 -0500 Subject: [PATCH 05/23] Update NetParams.R Update NetParams to generalize race categorization for ARTnet network estimation by allowing user to set `race.level` to a list or vector of race and ethnicity categories as described in ARTnet data (i.e., black, hispanic, white, other, ai/an, asian, mult, nh/pi) --- R/NetParams.R | 156 +++++++++++++++++++++++--------------------------- 1 file changed, 72 insertions(+), 84 deletions(-) diff --git a/R/NetParams.R b/R/NetParams.R index 6bbf8f6..59916a7 100644 --- a/R/NetParams.R +++ b/R/NetParams.R @@ -58,6 +58,7 @@ build_netparams <- function(epistats, ## Inputs ## geog.lvl <- epistats$geog.lvl race <- epistats$race + race.level <- epistats$race.level age.limits <- epistats$age.limits age.breaks <- epistats$age.breaks age.sexual.cessation <- epistats$age.sexual.cessation @@ -151,69 +152,56 @@ build_netparams <- function(epistats, d$count.oo.part.trunc <- ifelse(d$count.oo.part > 100, 100, d$count.oo.part) - if (race == TRUE & race.level == 3) { - # Race Ethnicity - d$race.cat <- rep(NA, nrow(d)) - d$race.cat[d$race.cat == "black"] <- 1 - d$race.cat[d$race.cat == "hispanic"] <- 2 - d$race.cat[d$race.cat %in% c("white", "other")] <- 3 - - l$race.cat[l$race.cat == "black"] <- 1 - l$race.cat[l$race.cat == "hispanic"] <- 2 - l$race.cat[l$race.cat %in% c("white", "other")] <- 3 - - l$p_race.cat <- rep(NA, nrow(l)) - l$p_race.cat[l$p_race.cat == "black"] <- 1 - l$p_race.cat[l$p_race.cat == "hispanic"] <- 2 - l$p_race.cat[l$p_race.cat %in% c("white", "other")] <- 3 - - # redistribute NAs in proportion to non-missing partner races - probs <- prop.table(table(l$race.cat, l$p_race.cat), 1) - - imp_black <- which(is.na(l$p_race.cat) & l$race.cat == 1) - l$p_race.cat[imp_black] <- sample(1:3, length(imp_black), TRUE, probs[1, ]) - - imp_hisp <- which(is.na(l$p_race.cat) & l$race.cat == 2) - l$p_race.cat[imp_hisp] <- sample(1:3, length(imp_hisp), TRUE, probs[2, ]) - - imp_white <- which(is.na(l$p_race.cat) & l$race.cat == 3) - l$p_race.cat[imp_white] <- sample(1:3, length(imp_white), TRUE, probs[3, ]) - - } - - if (race == TRUE & race.level == 4) { - # Race Ethnicity - d$race.cat <- rep(NA, nrow(d)) - d$race.cat[d$race.cat == "black"] <- 1 - d$race.cat[d$race.cat == "hispanic"] <- 2 - d$race.cat[d$race.cat == "white"] <- 3 - d$race.cat[d$race.cat == "other"] <- 4 - - l$race.cat[l$race.cat == "black"] <- 1 - l$race.cat[l$race.cat == "hispanic"] <- 2 - l$race.cat[l$race.cat == "white"] <- 3 - l$race.cat[l$race.cat == "other"] <- 4 + ## Race ethnicity ## + if (race == TRUE) { + mult_race_cat <- c("asian", "ai/an", "mult", "nh/pi") + flat_race.level <- unlist(race.level) + + # Determine which variables to use in ARTnet + if (any(flat_race.level %in% mult_race_cat)) { + l <- merge(l, d[, c("AMIS_ID", "race")], by = "AMIS_ID", all.x = TRUE) + p_race_var <- "p_race2" + race_var <- "race" + } else { + p_race_var <- "p_race.cat" + race_var <- "race.cat" + } - l$p_race.cat <- rep(NA, nrow(l)) - l$p_race.cat[l$p_race.cat == "black"] <- 1 - l$p_race.cat[l$p_race.cat == "hispanic"] <- 2 - l$p_race.cat[l$p_race.cat == "white"] <- 3 - l$p_race.cat[l$p_race.cat == "other"] <- 4 + # Assign race categories based on race.level + race.categories <- seq_along(race.level) - # redistribute NAs in proportion to non-missing partner races - probs <- prop.table(table(l$race.cat, l$p_race.cat), 1) + d$race.cat.num <- rep(NA, nrow(d)) + l$race.cat.num <- rep(NA, nrow(l)) + l$p_race.cat.num <- rep(NA, nrow(l)) - imp_black <- which(is.na(l$p_race.cat) & l$race.cat == 1) - l$p_race.cat[imp_black] <- sample(1:3, length(imp_black), TRUE, probs[1, ]) + for (i in seq_along(race.level)) { + d$race.cat.num[d[[race_var]] %in% race.level[[i]]] <- race.categories[i] + l$race.cat.num[l[[race_var]] %in% race.level[[i]]] <- race.categories[i] + l$p_race.cat.num[l[[p_race_var]] %in% race.level[[i]]] <- race.categories[i] + } - imp_hisp <- which(is.na(l$p_race.cat) & l$race.cat == 2) - l$p_race.cat[imp_hisp] <- sample(1:3, length(imp_hisp), TRUE, probs[2, ]) + # Redistribute NAs in proportion to non-missing partner races + probs <- prop.table(table(l$race.cat.num, l$p_race.cat.num), 1) - imp_white <- which(is.na(l$p_race.cat) & l$race.cat == 3) - l$p_race.cat[imp_white] <- sample(1:3, length(imp_white), TRUE, probs[3, ]) + for (i in race.categories) { + imp_indices <- which(is.na(l$p_race.cat.num) & l$race.cat.num == i) + if (length(imp_indices) > 0) { + l$p_race.cat.num[imp_indices] <- sample(race.categories, length(imp_indices), TRUE, probs[i, ]) + } + } - imp_other <- which(is.na(l$p_race.cat) & l$race.cat == 4) - l$p_race.cat[imp_other] <- sample(1:4, length(imp_white), TRUE, probs[4, ]) + # Initialize race.combo and assign combinations dynamically + l$race.combo <- rep(NA, nrow(l)) + combo_index <- 1 + for (i in race.categories) { + # Case 1: Same race as one combination + l$race.combo[l$race.cat.num == i & l$p_race.cat.num == i] <- combo_index + combo_index <- combo_index + 1 + + # Case 2: Race compared with all other race groups + l$race.combo[l$race.cat.num == i & l$p_race.cat.num %in% setdiff(race.categories, i)] <- combo_index + combo_index <- combo_index + 1 + } } @@ -354,21 +342,21 @@ build_netparams <- function(epistats, ## nodematch("race", diff = TRUE) ---- if (race == TRUE) { - lmain$same.race <- ifelse(lmain$race.cat == lmain$p_race.cat, 1, 0) + lmain$same.race <- ifelse(lmain$race.cat.num == lmain$p_race.cat.num, 1, 0) if (is.null(geog.lvl)) { - mod <- glm(same.race ~ as.factor(race.cat), + mod <- glm(same.race ~ as.factor(race.cat.num), data = lmain, family = binomial()) - dat <- data.frame(race.cat = 1:length(unique(race.cat))) ## generalize these + dat <- data.frame(race.cat.num = race.categories) pred <- predict(mod, newdata = dat, type = "response") out$main$nm.race <- as.numeric(pred) } else { - mod <- glm(same.race ~ geogYN + as.factor(race.cat), + mod <- glm(same.race ~ geogYN + as.factor(race.cat.num), data = lmain, family = binomial()) - dat <- data.frame(geogYN = 1, race.cat = 1:3) + dat <- data.frame(geogYN = 1, race.cat.num = race.categories) pred <- predict(mod, newdata = dat, type = "response") out$main$nm.race <- as.numeric(pred) @@ -394,18 +382,18 @@ build_netparams <- function(epistats, } if (is.null(geog.lvl)) { - mod <- glm(deg.main ~ as.factor(race.cat), + mod <- glm(deg.main ~ as.factor(race.cat.num), data = d, family = poisson()) - dat <- data.frame(race.cat = 1:3) + dat <- data.frame(race.cat.num = race.categories) pred <- predict(mod, newdata = dat, type = "response") out$main$nf.race <- as.numeric(pred) } else { - mod <- glm(deg.main ~ geogYN + as.factor(race.cat), + mod <- glm(deg.main ~ geogYN + as.factor(race.cat.num), data = d, family = poisson()) - dat <- data.frame(geogYN = 1, race.cat = 1:3) + dat <- data.frame(geogYN = 1, race.cat.num = race.categories) pred <- predict(mod, newdata = dat, type = "response") out$main$nf.race <- as.numeric(pred) @@ -678,21 +666,21 @@ build_netparams <- function(epistats, ## nodematch("race") ---- - lcasl$same.race <- ifelse(lcasl$race.cat == lcasl$p_race.cat, 1, 0) + lcasl$same.race <- ifelse(lcasl$race.cat.num == lcasl$p_race.cat.num, 1, 0) if (is.null(geog.lvl)) { - mod <- glm(same.race ~ as.factor(race.cat), + mod <- glm(same.race ~ as.factor(race.cat.num), data = lcasl, family = binomial()) - dat <- data.frame(race.cat = 1:3) + dat <- data.frame(race.cat.num = race.categories) pred <- predict(mod, newdata = dat, type = "response") out$casl$nm.race <- as.numeric(pred) } else { - mod <- glm(same.race ~ geogYN + as.factor(race.cat), + mod <- glm(same.race ~ geogYN + as.factor(race.cat.num), data = lcasl, family = binomial()) - dat <- data.frame(geogYN = 1, race.cat = 1:3) + dat <- data.frame(geogYN = 1, race.cat.num = race.categories) pred <- predict(mod, newdata = dat, type = "response") out$casl$nm.race <- as.numeric(pred) @@ -722,18 +710,18 @@ build_netparams <- function(epistats, if (is.null(geog.lvl)) { - mod <- glm(deg.casl ~ as.factor(race.cat), + mod <- glm(deg.casl ~ as.factor(race.cat.num), data = d, family = poisson()) - dat <- data.frame(race.cat = 1:3) + dat <- data.frame(race.cat.num = race.categories) pred <- predict(mod, newdata = dat, type = "response") out$casl$nf.race <- as.numeric(pred) } else { - mod <- glm(deg.casl ~ geogYN + as.factor(race.cat), + mod <- glm(deg.casl ~ geogYN + as.factor(race.cat.num), data = d, family = poisson()) - dat <- data.frame(geogYN = 1, race.cat = 1:3) + dat <- data.frame(geogYN = 1, race.cat.num = race.categories) pred <- predict(mod, newdata = dat, type = "response") out$casl$nf.race <- as.numeric(pred) @@ -1007,21 +995,21 @@ build_netparams <- function(epistats, ## nodematch("race", diff = TRUE) ---- - linst$same.race <- ifelse(linst$race.cat == linst$p_race.cat, 1, 0) + linst$same.race <- ifelse(linst$race.cat.num == linst$p_race.cat.num, 1, 0) if (is.null(geog.lvl)) { - mod <- glm(same.race ~ as.factor(race.cat), + mod <- glm(same.race ~ as.factor(race.cat.num), data = linst, family = binomial()) - dat <- data.frame(race.cat = 1:3) + dat <- data.frame(race.cat.num = race.categories) pred <- predict(mod, newdata = dat, type = "response") out$inst$nm.race <- as.numeric(pred) } else { - mod <- glm(same.race ~ geogYN + as.factor(race.cat), + mod <- glm(same.race ~ geogYN + as.factor(race.cat.num), data = linst, family = binomial()) - dat <- data.frame(geogYN = 1, race.cat = 1:3) + dat <- data.frame(geogYN = 1, race.cat.num = race.categories) pred <- predict(mod, newdata = dat, type = "response") out$inst$nm.race <- as.numeric(pred) @@ -1051,18 +1039,18 @@ build_netparams <- function(epistats, ## nodefactor("race") ---- if (is.null(geog.lvl)) { - mod <- glm(count.oo.part ~ as.factor(race.cat), + mod <- glm(count.oo.part ~ as.factor(race.cat.num), data = d, family = poisson()) - dat <- data.frame(race.cat = 1:3) + dat <- data.frame(race.cat.num = race.categories) pred <- predict(mod, newdata = dat, type = "response") / (364 / time.unit) out$inst$nf.race <- as.numeric(pred) } else { - mod <- glm(count.oo.part ~ geogYN + as.factor(race.cat), + mod <- glm(count.oo.part ~ geogYN + as.factor(race.cat.num), data = d, family = poisson()) - dat <- data.frame(geogYN = 1, race.cat = 1:3) + dat <- data.frame(geogYN = 1, race.cat.num = race.categories) pred <- predict(mod, newdata = dat, type = "response") / (364 / time.unit) out$inst$nf.race <- as.numeric(pred) From b9c3041873f734de265f5a0fcf669933c9814216 Mon Sep 17 00:00:00 2001 From: clchand23 Date: Mon, 9 Dec 2024 10:12:25 -0500 Subject: [PATCH 06/23] Update NetStats.R Generalize code that assigns numbered race levels Generalize code that assigns initial HIV status --- R/NetStats.R | 58 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/R/NetStats.R b/R/NetStats.R index ff86bbc..e0055d3 100644 --- a/R/NetStats.R +++ b/R/NetStats.R @@ -98,6 +98,7 @@ build_netstats <- function(epistats, netparams, geog.lvl <- epistats$geog.lvl sex.cess.mod <- epistats$sex.cess.mod race <- epistats$race + race.level <- epistats$race.level age.limits <- epistats$age.limits age.sexual.cessation <- epistats$age.sexual.cessation @@ -118,11 +119,12 @@ build_netstats <- function(epistats, netparams, # Population size by race group # race.dist.3cat + + if (!is.null(race.prop)) { - # reorder for consistency with the else case - race.prop <- race.prop[c(3, 1, 2)] + flattened_race_level <- sapply(race.level, function(x) paste(x, collapse = ".")) props <- as.data.frame(t(race.prop)) - colnames(props) <- c("White.Other", "Black", "Hispanic") + colnames(props) <- flattened_race_level } else { if (!is.null(geog.lvl) && geog.lvl != "county" && length(geog.cat) == 1) { props <- race.dist[[geog.lvl]][which(race.dist[[geog.lvl]]$Geog == geog.cat), -c(1, 2)] / 100 @@ -131,9 +133,27 @@ build_netstats <- function(epistats, netparams, } } out$demog$props <- props - num.B <- out$demog$num.B <- round(num * props$Black) - num.H <- out$demog$num.H <- round(num * props$Hispanic) - num.W <- out$demog$num.W <- num - num.B - num.H + race.num.vars <- list() + total_remaining <- num + for (i in seq_len(length(flattened_race_level) - 1)) { + race_name <- flattened_race_level[i] + race_num_var <- paste0("num.", toupper(substr(race_name, 1, 1))) + race_num_value <- round(num * props[[race_name]]) + out$demog[[race_num_var]] <- race_num_value + race.num.vars[[race_name]] <- race_num_value + total_remaining <- total_remaining - race_num_value + } + + # Assign the residual race group + residual_race <- flattened_race_level[length(flattened_race_level)] + residual_num_var <- paste0("num.", toupper(substr(residual_race, 1, 1))) + out$demog[[residual_num_var]] <- total_remaining + race.num.vars[[residual_race]] <- total_remaining + + for (race_name in names(race.num.vars)) { + race_num_var <- paste0("num.", toupper(substr(race_name, 1, 1))) + assign(race_num_var, race.num.vars[[race_name]]) + } ## Age-sex-specific mortality rates (B, H, W) # in 1-year age decrements starting with age 1 @@ -265,7 +285,14 @@ build_netstats <- function(epistats, netparams, out$attr$active.sex <- attr_active.sex # race attribute - attr_race <- apportion_lr(num, 1:3, c(num.B / num, num.H / num, num.W / num), shuffled = TRUE) + race_numbers <- sapply(flattened_race_level, function(race) { + race_num_var <- paste0("num.", toupper(substr(race, 1, 1))) + out$demog[[race_num_var]] + }) + + race_proportions <- race_numbers / num + group_ids <- seq_along(flattened_race_level) + attr_race <- apportion_lr(num, group_ids, race_proportions, shuffled = TRUE) out$attr$race <- attr_race # deg.casl attribute @@ -301,7 +328,7 @@ build_netstats <- function(epistats, netparams, # diag status if (is.null(epistats$init.hiv.prev)) { if (race == TRUE) { - xs <- data.frame(age = attr_age, race.cat = attr_race, geogYN = 1) + xs <- data.frame(age = attr_age, race.cat.num = attr_race, geogYN = 1) preds <- predict(epistats$hiv.mod, newdata = xs, type = "response") attr_diag.status <- rbinom(num, 1, preds) out$attr$diag.status <- attr_diag.status @@ -314,18 +341,13 @@ build_netstats <- function(epistats, netparams, } else { if (race == TRUE) { init.hiv.prev <- epistats$init.hiv.prev - samp.B <- which(attr_race == 1) - exp.B <- ceiling(length(samp.B) * init.hiv.prev[1]) - samp.H <- which(attr_race == 2) - exp.H <- ceiling(length(samp.H) * init.hiv.prev[2]) - samp.W <- which(attr_race == 3) - exp.W <- ceiling(length(samp.W) * init.hiv.prev[3]) - attr_diag.status <- rep(0, network.size) - attr_diag.status[sample(samp.B, exp.B)] <- 1 - attr_diag.status[sample(samp.H, exp.H)] <- 1 - attr_diag.status[sample(samp.W, exp.W)] <- 1 + for (i in seq_along(flattened_race_level)) { + samp <- which(attr_race == i) + exp <- ceiling(length(samp) * init.hiv.prev[i]) + attr_diag.status[sample(samp, exp)] <- 1 + } out$attr$diag.status <- attr_diag.status From 5e519df7db7baefc55a4126ba3de6b44936a5489 Mon Sep 17 00:00:00 2001 From: clchand23 Date: Mon, 9 Dec 2024 11:57:20 -0500 Subject: [PATCH 07/23] Update NetStats.R Update capitalization of race labels to work with current code --- R/NetStats.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/NetStats.R b/R/NetStats.R index e0055d3..b7d938b 100644 --- a/R/NetStats.R +++ b/R/NetStats.R @@ -122,7 +122,10 @@ build_netstats <- function(epistats, netparams, if (!is.null(race.prop)) { - flattened_race_level <- sapply(race.level, function(x) paste(x, collapse = ".")) + flattened_race_level <- sapply(race.level, function(x) { + x_cap <- sapply(x, function(y) paste0(toupper(substr(y, 1, 1)), tolower(substr(y, 2, nchar(y))))) + paste(x_cap, collapse = ".") + }) props <- as.data.frame(t(race.prop)) colnames(props) <- flattened_race_level } else { @@ -137,7 +140,7 @@ build_netstats <- function(epistats, netparams, total_remaining <- num for (i in seq_len(length(flattened_race_level) - 1)) { race_name <- flattened_race_level[i] - race_num_var <- paste0("num.", toupper(substr(race_name, 1, 1))) + race_num_var <- paste0("num.", substr(race_name, 1, 1)) race_num_value <- round(num * props[[race_name]]) out$demog[[race_num_var]] <- race_num_value race.num.vars[[race_name]] <- race_num_value @@ -146,12 +149,12 @@ build_netstats <- function(epistats, netparams, # Assign the residual race group residual_race <- flattened_race_level[length(flattened_race_level)] - residual_num_var <- paste0("num.", toupper(substr(residual_race, 1, 1))) + residual_num_var <- paste0("num.", substr(residual_race, 1, 1)) out$demog[[residual_num_var]] <- total_remaining race.num.vars[[residual_race]] <- total_remaining for (race_name in names(race.num.vars)) { - race_num_var <- paste0("num.", toupper(substr(race_name, 1, 1))) + race_num_var <- paste0("num.", substr(race_name, 1, 1)) assign(race_num_var, race.num.vars[[race_name]]) } From 1bdb1241a9cfbe69ccfe4f56fbe471bdaaa23654 Mon Sep 17 00:00:00 2001 From: clchand23 Date: Mon, 9 Dec 2024 13:17:13 -0500 Subject: [PATCH 08/23] Update NetStats.R Correct code so that age is not also updated to be 1 when setting max age for asmr --- R/NetStats.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/R/NetStats.R b/R/NetStats.R index b7d938b..1fd3c20 100644 --- a/R/NetStats.R +++ b/R/NetStats.R @@ -117,9 +117,6 @@ build_netstats <- function(epistats, netparams, num <- out$demog$num <- network.size # Population size by race group - # race.dist.3cat - - if (!is.null(race.prop)) { flattened_race_level <- sapply(race.level, function(x) { @@ -225,7 +222,7 @@ build_netstats <- function(epistats, netparams, # Setting deterministic mortality prob = 1 at upper age limit max.age <- age.limits[2] - asmr[asmr$age >= max.age, ] <- 1 + asmr[asmr$age >= max.age, -1] <- 1 out$demog$asmr <- asmr From 9ea391ce4a90ffcaac17c703f4f543434537698a Mon Sep 17 00:00:00 2001 From: clchand23 Date: Mon, 9 Dec 2024 22:38:05 -0500 Subject: [PATCH 09/23] Update EpiStats.R Comment out language that stops function when we input multiple init.hiv.prev values --- R/EpiStats.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/EpiStats.R b/R/EpiStats.R index 8a52bd6..3b21140 100644 --- a/R/EpiStats.R +++ b/R/EpiStats.R @@ -533,9 +533,9 @@ build_epistats <- function(geog.lvl = NULL, # Output out$hiv.mod <- hiv.mod } else { - if (length(init.hiv.prev) != 3) { - stop("Input parameter init.prev.hiv must be a vector of size three") - } + #if (length(init.hiv.prev) != 3) { + # stop("Input parameter init.prev.hiv must be a vector of size three") + #} if (prod(init.hiv.prev < 1) == 0 || prod(init.hiv.prev > 0) == 0) { stop("All elements of init.hiv.prev must be between 0 and 1 non-inclusive") } From 67642c9d0fc72af68508432a4a301701a4bbd855 Mon Sep 17 00:00:00 2001 From: clchand23 Date: Tue, 10 Dec 2024 10:49:04 -0500 Subject: [PATCH 10/23] Update NetStats.R Generalize update_asmr() function --- R/NetStats.R | 55 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/R/NetStats.R b/R/NetStats.R index 1fd3c20..78ddc99 100644 --- a/R/NetStats.R +++ b/R/NetStats.R @@ -596,45 +596,58 @@ build_netstats <- function(epistats, netparams, #' netstats <- update_asmr(netstats, asmr_df) #' update_asmr <- function(netstats, asmr_df) { - - #Check that all input mortality rates are reasonable values + # Check that all input mortality rates are reasonable values if (sum(is.na(asmr_df$DeathRate)) > 0 || !is.numeric(asmr_df$DeathRate) || min(asmr_df$DeathRate) < 0 || max(asmr_df$DeathRate) > 1) { stop("Ensure all mortality rates are non-NA, numeric, and between 0 and 1.") } - asmr.B <- asmr_df[asmr_df$Race == "Black", 2:3] - asmr.H <- asmr_df[asmr_df$Race == "Hispanic", 2:3] - asmr.W <- asmr_df[asmr_df$Race == "White", 2:3] + # Get unique races + unique_races <- unique(asmr_df$Race) - #Check for appropriate number of input mortality rates - if (min(asmr.B$Age) != 1 || min(asmr.H$Age) != 1 || min(asmr.W$Age) != 1 || - max(asmr.B$Age) != 100 || max(asmr.H$Age) != 100 || - max(asmr.W$Age) != 100 || nrow(asmr.B) != 100 || nrow(asmr.H) != 100 || - nrow(asmr.W) != 100) { - stop("Provide mortality rates by race for 1-yr age groups (1 - 100).") - } + # Initialize a list to store mortality rates by race + asmr_list <- list() - if (max(asmr.B$DeathRate) != 1 || max(asmr.H$DeathRate) != 1 || - max(asmr.W$DeathRate) != 1) { - stop("The mortality rate for at least one age group must be total (1).") + # Validate mortality rates for each race + for (race in unique_races) { + race_data <- asmr_df[asmr_df$Race == race, 2:3] + if (min(race_data$Age) != 1 || max(race_data$Age) != 100 || nrow(race_data) != 100) { + stop(paste0("Provide mortality rates by race for 1-yr age groups (1 - 100) for race: ", race)) + } + if (max(race_data$DeathRate) != 1) { + stop(paste0("The mortality rate for at least one age group must be total (1) for race: ", race)) + } + asmr_list[[race]] <- race_data$DeathRate } + # If race-specific mortality rates should be included if (netstats$race == TRUE) { - asmr <- data.frame(age = 1:100, asmr.B$DeathRate, - asmr.H$DeathRate, asmr.W$DeathRate) + # Combine mortality rates for all races into a data frame + asmr <- data.frame( + age = 1:100, + setNames(do.call(cbind, asmr_list), paste0("asmr.", unique_races)) + ) } else { + # Calculate combined mortality rates using demographic proportions props <- netstats$demog$props - asmr.O <- props$Black * asmr.B$DeathRate + - props$Hispanic * asmr.H$DeathRate + - props$White.Other * asmr.W$DeathRate - asmr <- data.frame(age = 1:100, asmr.O, asmr.O, asmr.O) + asmr_combined <- rep(0, 100) + for (race in unique_races) { + if (!race %in% names(props)) { + stop(paste0("Demographic proportions for race: ", race, " are missing in netstats$demog$props.")) + } + asmr_combined <- asmr_combined + props[[race]] * asmr_list[[race]] + } + # Create a combined mortality data frame + asmr <- data.frame(age = 1:100, asmr_combined, asmr_combined, asmr_combined) + colnames(asmr)[-1] <- c("asmr.O1", "asmr.O2", "asmr.O3") } + # Display time unit consistency message message(paste0("The time unit used in build_netstats() was ", netstats$time.unit, ". Please ensure that that is consistent with your inputs.")) + # Assign the resulting asmr to netstats netstats$demog$asmr <- asmr return(netstats) } From d3b4c67c9487928b8b491968cbbfbd37727003d1 Mon Sep 17 00:00:00 2001 From: clchand23 Date: Fri, 20 Dec 2024 10:29:33 -0500 Subject: [PATCH 11/23] Update EpiStats.R Data manipulation that adds new variables to allow for selecting "hispanic" and "other" sub-categories from ARTnet --- R/EpiStats.R | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/R/EpiStats.R b/R/EpiStats.R index 3b21140..ea9ec8c 100644 --- a/R/EpiStats.R +++ b/R/EpiStats.R @@ -71,6 +71,7 @@ #' natural mortality. #' * `time.unit`: a number between 1 and 30 that specifies time units for ARTnet statistics. Set to #' `7` by default. +#' * `race.level`: #' #' @examples #' # Age and geographic stratification, for the Atlanta metropolitan statistical area @@ -262,9 +263,13 @@ build_epistats <- function(geog.lvl = NULL, # Determine which variables to use in ARTnet if (any(flat_race.level %in% mult_race_cat)) { - l <- merge(l, d[, c("AMIS_ID", "race")], by = "AMIS_ID", all.x = TRUE) - p_race_var <- "p_race2" - race_var <- "race" + + d$race.eth <- ifelse(d$hispan == 1, "hispanic", d$race) + l <- merge(l, d[, c("AMIS_ID", "race.eth")], by = "AMIS_ID", all.x = TRUE) + l$p_race.eth <- ifelse(l$p_hispan == 1, "hispanic", l$p_race2) + + p_race_var <- "p_race.eth" + race_var <- "race.eth" } else { p_race_var <- "p_race.cat" race_var <- "race.cat" From 4aca5a7cfb8c39431479c33bad984b42a9c72e9e Mon Sep 17 00:00:00 2001 From: clchand23 Date: Fri, 20 Dec 2024 10:33:46 -0500 Subject: [PATCH 12/23] Update EpiStats.R Update documentation --- R/EpiStats.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/EpiStats.R b/R/EpiStats.R index ea9ec8c..026f8d2 100644 --- a/R/EpiStats.R +++ b/R/EpiStats.R @@ -71,7 +71,9 @@ #' natural mortality. #' * `time.unit`: a number between 1 and 30 that specifies time units for ARTnet statistics. Set to #' `7` by default. -#' * `race.level`: +#' * `race.level`: a list of race and ethnicity categories from ARTnet that can be used for race stratification +#' in EpiModel. Values must match those in ARTnet, so options include "black", "hispanic", "white", "other", +#' "asian", "ai/an", "mult", "nh/pi". Race categories may be combined (for example, c("white", "other")). #' #' @examples #' # Age and geographic stratification, for the Atlanta metropolitan statistical area From fad1eb93ef131c5c88ee65297dd3edbe9f0f099a Mon Sep 17 00:00:00 2001 From: clchand23 Date: Fri, 20 Dec 2024 12:25:54 -0500 Subject: [PATCH 13/23] Update DESCRIPTION To match DESCRIPTION on main branch --- DESCRIPTION | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 78f54ca..67175e3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,7 +25,6 @@ RoxygenNote: 7.3.2 Encoding: UTF-8 Remotes: github::EpiModel/ARTnetData@main, - github::EpiModel/EpiModel@main, - github::EpiModel/EpiModelHIV-p@main + github::EpiModel/EpiModelHIV-p Roxygen: list(markdown = TRUE) Config/testthat/edition: 3 From 3c49e3c85d3e95b312957a779f98d6ef69550a2d Mon Sep 17 00:00:00 2001 From: clchand23 Date: Fri, 20 Dec 2024 12:31:04 -0500 Subject: [PATCH 14/23] Update build_epistats.Rd Build documentation --- man/build_epistats.Rd | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/man/build_epistats.Rd b/man/build_epistats.Rd index 2b5e79a..1224949 100644 --- a/man/build_epistats.Rd +++ b/man/build_epistats.Rd @@ -8,6 +8,7 @@ build_epistats( geog.lvl = NULL, geog.cat = NULL, race = TRUE, + race.level = list("black", "hispanic", c("white", "other")), age.limits = c(15, 65), age.breaks = c(25, 35, 45, 55), age.sexual.cessation = NULL, @@ -98,6 +99,9 @@ process stops at a certain age but aging and other demographic features should c natural mortality. \item \code{time.unit}: a number between 1 and 30 that specifies time units for ARTnet statistics. Set to \code{7} by default. +\item \code{race.level}: a list of race and ethnicity categories from ARTnet that can be used for race stratification +in EpiModel. Values must match those in ARTnet, so options include "black", "hispanic", "white", "other", +"asian", "ai/an", "mult", "nh/pi". Race categories may be combined (for example, c("white", "other")). } } } From 095b8f8973ee02cb47b6d0588ce56ba4928ca1e7 Mon Sep 17 00:00:00 2001 From: clchand23 Date: Fri, 27 Dec 2024 16:04:05 -0500 Subject: [PATCH 15/23] Update EpiStats.R Create exported make_race_combo() function that can also be used in EpiModel --- R/EpiStats.R | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/R/EpiStats.R b/R/EpiStats.R index 026f8d2..ff68237 100644 --- a/R/EpiStats.R +++ b/R/EpiStats.R @@ -300,18 +300,9 @@ build_epistats <- function(geog.lvl = NULL, } } - # Initialize race.combo and assign combinations dynamically - l$race.combo <- rep(NA, nrow(l)) - combo_index <- 1 - for (i in race.categories) { - # Case 1: Same race as one combination - l$race.combo[l$race.cat.num == i & l$p_race.cat.num == i] <- combo_index - combo_index <- combo_index + 1 + # Initialize race.combo + l$race.combo <- make_race_combo(l$race.cat.num, l$p_race.cat.num) - # Case 2: Race compared with all other race groups - l$race.combo[l$race.cat.num == i & l$p_race.cat.num %in% setdiff(race.categories, i)] <- combo_index - combo_index <- combo_index + 1 - } } ## HIV diagnosed status of index and partners ## @@ -574,6 +565,16 @@ build_epistats <- function(geog.lvl = NULL, return(out) } +#' for a race `n` the combo for `race1 == race2 ==n` the combo is is `2n - 1` +#' for `race1 == n && race2 != n` the combo is `2n` +#' @param race1 a vector of race index for the index +#' @param race2 a vector of race index for the partners +#' @return a vector of the same size with the race combos +make_race_combo <- function(race1, race2) { + race_combo <- ifelse(race1 == race2, 2 * race1 - 1, 2 * race1) + return(race_combo) +} + # strip a `glm` object from all its components, leaving only what is required # for predicting from new data strip_glm <- function(cm) { From b0beb855b58fb3d563085a6fe0d49518e6761423 Mon Sep 17 00:00:00 2001 From: clchand23 Date: Fri, 27 Dec 2024 16:06:41 -0500 Subject: [PATCH 16/23] Update NetParams.R Use new make_race_combo() function in NetParams --- R/EpiStats.R | 2 +- R/NetParams.R | 14 ++------------ 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/R/EpiStats.R b/R/EpiStats.R index ff68237..9824921 100644 --- a/R/EpiStats.R +++ b/R/EpiStats.R @@ -300,7 +300,7 @@ build_epistats <- function(geog.lvl = NULL, } } - # Initialize race.combo + # Assign race.combo l$race.combo <- make_race_combo(l$race.cat.num, l$p_race.cat.num) } diff --git a/R/NetParams.R b/R/NetParams.R index 59916a7..fd895df 100644 --- a/R/NetParams.R +++ b/R/NetParams.R @@ -190,18 +190,8 @@ build_netparams <- function(epistats, } } - # Initialize race.combo and assign combinations dynamically - l$race.combo <- rep(NA, nrow(l)) - combo_index <- 1 - for (i in race.categories) { - # Case 1: Same race as one combination - l$race.combo[l$race.cat.num == i & l$p_race.cat.num == i] <- combo_index - combo_index <- combo_index + 1 - - # Case 2: Race compared with all other race groups - l$race.combo[l$race.cat.num == i & l$p_race.cat.num %in% setdiff(race.categories, i)] <- combo_index - combo_index <- combo_index + 1 - } + # Assign race.combo + l$race.combo <- make_race_combo(l$race.cat.num, l$p_race.cat.num) } From 20a249dcdc81600a96a90fdbcb668cc3ec12f518 Mon Sep 17 00:00:00 2001 From: clchand23 Date: Fri, 27 Dec 2024 16:09:15 -0500 Subject: [PATCH 17/23] Update NetParams.R replace ifelse() with as.integer() for cleaner statement --- R/NetParams.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/NetParams.R b/R/NetParams.R index fd895df..7fd90a3 100644 --- a/R/NetParams.R +++ b/R/NetParams.R @@ -332,7 +332,7 @@ build_netparams <- function(epistats, ## nodematch("race", diff = TRUE) ---- if (race == TRUE) { - lmain$same.race <- ifelse(lmain$race.cat.num == lmain$p_race.cat.num, 1, 0) + lmain$same.race <- as.integer(lmain$race.cat.num == lmain$p_race.cat.num) if (is.null(geog.lvl)) { mod <- glm(same.race ~ as.factor(race.cat.num), From 29bad587ec8c3829334a88bddeee28a0be8dca8d Mon Sep 17 00:00:00 2001 From: clchand23 Date: Fri, 27 Dec 2024 16:11:56 -0500 Subject: [PATCH 18/23] Update NetParams.R Replace ifelse() with as.integer() where appropriate for cleaner code --- R/NetParams.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/NetParams.R b/R/NetParams.R index 7fd90a3..98ffc1c 100644 --- a/R/NetParams.R +++ b/R/NetParams.R @@ -197,7 +197,7 @@ build_netparams <- function(epistats, ## HIV status - l$p_hiv2 <- ifelse(l$p_hiv == 1, 1, 0) + l$p_hiv2 <- as.integer(l$p_hiv == 1) table(l$p_hiv, l$p_hiv2, useNA = "always") hiv.combo <- rep(NA, nrow(l)) @@ -208,7 +208,7 @@ build_netparams <- function(epistats, hiv.combo[l$hiv2 == 0 & l$p_hiv == 2] <- 4 hiv.combo[l$hiv2 == 1 & l$p_hiv == 2] <- 5 - l$hiv.concord.pos <- ifelse(hiv.combo == 2, 1, 0) + l$hiv.concord.pos <- as.integer(hiv.combo == 2) ## Setup output list ## @@ -245,7 +245,7 @@ build_netparams <- function(epistats, lmain$part.age.grp <- cut(as.numeric(lmain$p_age_imp), age.breaks, labels = FALSE, right = FALSE, include.lowest = FALSE) - lmain$same.age.grp <- ifelse(lmain$index.age.grp == lmain$part.age.grp, 1, 0) + lmain$same.age.grp <- as.integer(lmain$index.age.grp == lmain$part.age.grp) if (is.null(geog.lvl)) { mod <- glm(same.age.grp ~ index.age.grp, @@ -568,7 +568,7 @@ build_netparams <- function(epistats, lcasl$part.age.grp <- cut(as.numeric(lcasl$p_age_imp), age.breaks, right = FALSE, labels = FALSE, include.lowest = FALSE) - lcasl$same.age.grp <- ifelse(lcasl$index.age.grp == lcasl$part.age.grp, 1, 0) + lcasl$same.age.grp <- as.integer(lcasl$index.age.grp == lcasl$part.age.grp) if (is.null(geog.lvl)) { mod <- glm(same.age.grp ~ index.age.grp, @@ -656,7 +656,7 @@ build_netparams <- function(epistats, ## nodematch("race") ---- - lcasl$same.race <- ifelse(lcasl$race.cat.num == lcasl$p_race.cat.num, 1, 0) + lcasl$same.race <- as.integer(lcasl$race.cat.num == lcasl$p_race.cat.num) if (is.null(geog.lvl)) { mod <- glm(same.race ~ as.factor(race.cat.num), @@ -897,7 +897,7 @@ build_netparams <- function(epistats, linst$part.age.grp <- cut(as.numeric(linst$p_age_imp), age.breaks, labels = FALSE, right = FALSE, include.lowest = FALSE) - linst$same.age.grp <- ifelse(linst$index.age.grp == linst$part.age.grp, 1, 0) + linst$same.age.grp <- as.integer(linst$index.age.grp == linst$part.age.grp) if (is.null(geog.lvl)) { mod <- glm(same.age.grp ~ index.age.grp, @@ -985,7 +985,7 @@ build_netparams <- function(epistats, ## nodematch("race", diff = TRUE) ---- - linst$same.race <- ifelse(linst$race.cat.num == linst$p_race.cat.num, 1, 0) + linst$same.race <- as.integer(linst$race.cat.num == linst$p_race.cat.num) if (is.null(geog.lvl)) { mod <- glm(same.race ~ as.factor(race.cat.num), From 2f25be2f199e2c57d7dc6eed5be4091b65da6e54 Mon Sep 17 00:00:00 2001 From: clchand23 Date: Fri, 27 Dec 2024 16:19:27 -0500 Subject: [PATCH 19/23] Update NetStats.R Replace sapply() with vapply() in section renaming race levels --- R/NetStats.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/NetStats.R b/R/NetStats.R index 78ddc99..ebab8b4 100644 --- a/R/NetStats.R +++ b/R/NetStats.R @@ -119,10 +119,13 @@ build_netstats <- function(epistats, netparams, # Population size by race group if (!is.null(race.prop)) { - flattened_race_level <- sapply(race.level, function(x) { - x_cap <- sapply(x, function(y) paste0(toupper(substr(y, 1, 1)), tolower(substr(y, 2, nchar(y))))) - paste(x_cap, collapse = ".") - }) + # Capitalize each race and join multiple with a `.` + # c("white", "other") => "White.Other" + flattened_race_level <- vapply( + race.level, + \(x) gsub("\\b(\\w)", "\\U\\1", paste0(x, collapse = "."), perl = TRUE), + character(1) + ) props <- as.data.frame(t(race.prop)) colnames(props) <- flattened_race_level } else { From 9b2d69a29196c17d0f2c3178a6866b8f555084b3 Mon Sep 17 00:00:00 2001 From: clchand23 Date: Fri, 27 Dec 2024 16:29:32 -0500 Subject: [PATCH 20/23] Update NetStats.R Rename output due to same first letter in some race groups --- R/NetStats.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/NetStats.R b/R/NetStats.R index ebab8b4..35ac2c4 100644 --- a/R/NetStats.R +++ b/R/NetStats.R @@ -140,7 +140,7 @@ build_netstats <- function(epistats, netparams, total_remaining <- num for (i in seq_len(length(flattened_race_level) - 1)) { race_name <- flattened_race_level[i] - race_num_var <- paste0("num.", substr(race_name, 1, 1)) + race_num_var <- paste0("num.", race_name) race_num_value <- round(num * props[[race_name]]) out$demog[[race_num_var]] <- race_num_value race.num.vars[[race_name]] <- race_num_value @@ -149,7 +149,7 @@ build_netstats <- function(epistats, netparams, # Assign the residual race group residual_race <- flattened_race_level[length(flattened_race_level)] - residual_num_var <- paste0("num.", substr(residual_race, 1, 1)) + residual_num_var <- paste0("num.", residual_race) out$demog[[residual_num_var]] <- total_remaining race.num.vars[[residual_race]] <- total_remaining From 1ac795e3d6411bd1af7f2bd99cae26a8144b72fc Mon Sep 17 00:00:00 2001 From: clchand23 Date: Fri, 27 Dec 2024 16:58:54 -0500 Subject: [PATCH 21/23] Update NetStats.R Replace sapply with vapply and update based on new names for numbers of agents per race group --- R/NetStats.R | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/R/NetStats.R b/R/NetStats.R index 35ac2c4..ff5886c 100644 --- a/R/NetStats.R +++ b/R/NetStats.R @@ -288,10 +288,14 @@ build_netstats <- function(epistats, netparams, out$attr$active.sex <- attr_active.sex # race attribute - race_numbers <- sapply(flattened_race_level, function(race) { - race_num_var <- paste0("num.", toupper(substr(race, 1, 1))) - out$demog[[race_num_var]] - }) + race_numbers <- vapply( + flattened_race_level, + function(race) { + race_num_var <- paste0("num.", race) + out$demog[[race_num_var]] + }, + numeric(1) + ) race_proportions <- race_numbers / num group_ids <- seq_along(flattened_race_level) From 1ef9ccdf1fc982bcdd4592a81bca853d0fbea7f9 Mon Sep 17 00:00:00 2001 From: clchand23 Date: Fri, 3 Jan 2025 12:32:19 -0500 Subject: [PATCH 22/23] Update NetStats.R Add comment for what for loop does in assigning HIV status Update asmr function to ensure column names include asmr --- R/NetStats.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/NetStats.R b/R/NetStats.R index ff5886c..01ba56c 100644 --- a/R/NetStats.R +++ b/R/NetStats.R @@ -350,6 +350,7 @@ build_netstats <- function(epistats, netparams, init.hiv.prev <- epistats$init.hiv.prev attr_diag.status <- rep(0, network.size) + # Randomly applies HIV diagnosis based on race for (i in seq_along(flattened_race_level)) { samp <- which(attr_race == i) exp <- ceiling(length(samp) * init.hiv.prev[i]) @@ -632,8 +633,11 @@ update_asmr <- function(netstats, asmr_df) { # Combine mortality rates for all races into a data frame asmr <- data.frame( age = 1:100, - setNames(do.call(cbind, asmr_list), paste0("asmr.", unique_races)) + do.call(cbind, asmr_list) ) + + colnames(asmr)[-1] <- paste0("asmr.", names(asmr_list)) + } else { # Calculate combined mortality rates using demographic proportions props <- netstats$demog$props From 5dbc72bd447ad29cc6c53fcf0049840cdd10933b Mon Sep 17 00:00:00 2001 From: clchand23 Date: Mon, 6 Jan 2025 10:59:29 -0500 Subject: [PATCH 23/23] Update make_race_combo() function Add export flag and build documentation --- NAMESPACE | 1 + R/EpiStats.R | 3 +++ man/make_race_combo.Rd | 21 +++++++++++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 man/make_race_combo.Rd diff --git a/NAMESPACE b/NAMESPACE index 561c03e..f513744 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(build_epistats) export(build_netparams) export(build_netstats) +export(make_race_combo) export(reweight_age_pyr) export(trim_epistats) export(trim_netstats) diff --git a/R/EpiStats.R b/R/EpiStats.R index 9824921..1cfd53d 100644 --- a/R/EpiStats.R +++ b/R/EpiStats.R @@ -570,6 +570,9 @@ build_epistats <- function(geog.lvl = NULL, #' @param race1 a vector of race index for the index #' @param race2 a vector of race index for the partners #' @return a vector of the same size with the race combos +#' +#' @export +#' make_race_combo <- function(race1, race2) { race_combo <- ifelse(race1 == race2, 2 * race1 - 1, 2 * race1) return(race_combo) diff --git a/man/make_race_combo.Rd b/man/make_race_combo.Rd new file mode 100644 index 0000000..8389cda --- /dev/null +++ b/man/make_race_combo.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/EpiStats.R +\name{make_race_combo} +\alias{make_race_combo} +\title{for a race \code{n} the combo for \verb{race1 == race2 ==n} the combo is is \verb{2n - 1} +for \code{race1 == n && race2 != n} the combo is \verb{2n}} +\usage{ +make_race_combo(race1, race2) +} +\arguments{ +\item{race1}{a vector of race index for the index} + +\item{race2}{a vector of race index for the partners} +} +\value{ +a vector of the same size with the race combos +} +\description{ +for a race \code{n} the combo for \verb{race1 == race2 ==n} the combo is is \verb{2n - 1} +for \code{race1 == n && race2 != n} the combo is \verb{2n} +}