From c41afbdef9c64a96565f5c9403a577e1ceef7349 Mon Sep 17 00:00:00 2001
From: Danny Colombara <dcolombara@kingcounty.gov>
Date: Fri, 7 Mar 2025 14:47:54 -0800
Subject: [PATCH 01/11] Re-engineered chi_count_by_age

 - new validations
 - built in more tweaks for race3 headaches
 - dropped dependency on tidyr::crossing
 - progress meter
 - better documentation / comments
---
 NAMESPACE                  |   4 +
 R/globals.R                |   5 +-
 R/proto_chi_count_by_age.R | 440 +++++++++++++++++++++++++++----------
 man/chi_count_by_age.Rd    |  40 +++-
 4 files changed, 367 insertions(+), 122 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index e0ff381..698bbab 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -26,6 +26,7 @@ importFrom(data.table,"%between%")
 importFrom(data.table,":=")
 importFrom(data.table,.GRP)
 importFrom(data.table,.SD)
+importFrom(data.table,CJ)
 importFrom(data.table,`:=`)
 importFrom(data.table,between)
 importFrom(data.table,copy)
@@ -47,6 +48,9 @@ importFrom(future.apply,future_lapply)
 importFrom(glue,glue)
 importFrom(glue,glue_sql)
 importFrom(odbc,odbc)
+importFrom(progressr,handlers)
+importFrom(progressr,progressor)
+importFrom(progressr,with_progress)
 importFrom(rads,calc)
 importFrom(rads,round2)
 importFrom(rads,string_clean)
diff --git a/R/globals.R b/R/globals.R
index d646335..81642ca 100644
--- a/R/globals.R
+++ b/R/globals.R
@@ -17,6 +17,7 @@ utils::globalVariables(c(
   "chi_age",
   "chi_geo_kc",
   "chi_year",
+  "combo_idx",
   "comparison_with_kc_sig",
   "count",
   "crosstabs",
@@ -50,6 +51,7 @@ utils::globalVariables(c(
   "lower_bound",
   "notable",
   "numerator",
+  "overall",
   "pattern",
   "pop",
   "pov200grp",
@@ -91,5 +93,6 @@ utils::globalVariables(c(
   "_kingcounty.x",
   "_kingcounty.y",
   "_wastate.x",
-  "_wastate.y"
+  "_wastate.y",
+  ".I"
 ))
diff --git a/R/proto_chi_count_by_age.R b/R/proto_chi_count_by_age.R
index 67c1bcb..377a668 100644
--- a/R/proto_chi_count_by_age.R
+++ b/R/proto_chi_count_by_age.R
@@ -1,132 +1,344 @@
-#' Generate Age-Specific Counts for CHI Data
+#' Generate Age-Specific Counts for Community Health Indicators
 #'
 #' @description
-#' Creates a detailed breakdown of counts by age for CHI data analysis, most
-#' often for age standardization.Processes data according to provided
-#' instructions and handles demographic groupings.
+#' Creates a detailed breakdown of counts by age for CHI data analysis. Primarily
+#' used for age standardization and rate calculations when combined with
+#' population estimates. Processes data according to provided instructions and
+#' handles demographic groupings with special treatment for race and ethnicity
+#' variables.
 #'
 #' @param ph.data Input data frame or data table containing CHI data
-#' @param ph.instructions Data frame or data table containing calculation instructions
-#' @param source_date Date of data source
+#' @param ph.instructions Data frame or data table containing calculation specifications with columns:
+#'  \itemize{
+#'    \item indicator_key: Name of the health metric to calculate
+#'    \item tab: Visualization tab type (kingcounty, demgroups, crosstabs)
+#'    \item cat1_varname, cat2_varname: Variable names for stratification
+#'    \item cat1, cat2: Human-friendly labels for these variables
+#'    \item start, end: Year range for the calculation
+#'  }
+#' @param source_date Date of data source, added to output metadata
 #'
-#' @return A data.table containing age-specific counts with standard CHI groupings
-#' @importFrom data.table setDT rbindlist setnames := setorder data.table
+#' @return A data.table containing age-specific counts with standard CHI groupings:
+#'  \itemize{
+#'    \item indicator_key: Health metric identifier
+#'    \item year: Year range of data (e.g., "2019-2021" or single year)
+#'    \item tab: Visualization tab type
+#'    \item cat1, cat1_varname, cat1_group: Primary stratification variable details
+#'    \item cat2, cat2_varname, cat2_group: Secondary stratification variable details (if applicable)
+#'    \item chi_age: Age value (0-100)
+#'    \item count: Number of cases in that demographic-age group
+#'    \item source_date: Date of data source (if provided)
+#'  }
+#'
+#' @seealso
+#' \code{\link{chi_generate_tro_shell}} which creates ph.instructions used by
+#' \code{chi_count_by_age}
+#'
+#' \code{\link{chi_generate_instructions_pop}} which uses the output of the output
+#' of \code{chi_count_by_age}
+#'
+#' @importFrom data.table setDT rbindlist setnames := setorder data.table CJ
 #' @importFrom rads calc
 #' @importFrom future.apply future_lapply
-#' @importFrom tidyr crossing
+#' @importFrom progressr handlers progressor with_progress
 #' @export
 #'
 chi_count_by_age <- function(ph.data = NULL,
                              ph.instructions = NULL,
-                             source_date = NULL){
-  # Create 'Overall' if needed for crosstabs ----
-  if(!'overall' %in% names(ph.data)){
-    ph.data$overall <- with(ph.data, ifelse(chi_geo_kc == 'King County', 'Overall', NA_character_))
-  }
-
-  # Check to make sure all variables needed exist in the data ----
-  neededbyvars <- setdiff(unique(c(ph.instructions$cat1_varname, ph.instructions$cat2_varname)), c(NA))
-  if("race3" %in% neededbyvars & !"race3_hispanic" %in% neededbyvars){neededbyvars <- c(neededbyvars, 'race3_hispanic')} # By definition, Hispanic cannot be contained within race3
-
-  neededvars <- setdiff(unique(c(ph.instructions$indicator_key, neededbyvars)), c(NA))
-
-  missingvars <- setdiff(neededvars, names(ph.data))
-  if(length(missingvars) > 0 ){
-    stop(paste0("\n\U2620 ph.data is missing the following columns that are specified in ph.instructions: ", paste0(missingvars, collapse = ', '), ". ",
-                "\nIf `race3_hispanic` is listed, that is because, by definition, `race3` cannot have a Hispanic ethnicity in the same variable. So, two ",
-                "\nvariables (`race3` & `race3_hispanic`) will be processed and in the output, it will be called `race3`"))
-  } else{message("\U0001f642 All specified variables exist in ph.data")}
-
-  # Check to make sure all byvariables have the CHI specified encoding ----
-  stdbyvars <- rads.data::misc_chi_byvars[varname %in% setdiff(unique(c(ph.instructions$cat1_varname, ph.instructions$cat2_varname)), c(NA))][, list(varname, group, keepme, reference = 1)]
-  stdbyvars[group %in% c("Hispanic", 'Non-Hispanic') & varname == 'race3', varname := 'race3_hispanic'] # necessary because race3 & Hispanic must be two distinct variables in raw data
-  phbyvars <- rbindlist(lapply(
-    X=as.list(neededbyvars),
-    FUN = function(X){data.table(varname = X, group = setdiff(unique(ph.data[[X]]), NA), ph.data = 1)}))
-  compbyvars <- merge(stdbyvars, phbyvars, by = c('varname', 'group'), all = T)
-  if(nrow(compbyvars[is.na(reference)| is.na(ph.data)]) > 0){
-    print(compbyvars[is.na(reference)| is.na(ph.data)])
-    stop("\n\U2620 the table above shows the varname/group combinations that do not align between the reference table and your ph.data.")
-  } else {message("\U0001f642 All specified cat1_group and cat2_group values align with the reference standard.")}
-
-  # Cycle through a function that generates counts by age ----
-  message("\U023F3 Be patient! The function is generating counts for each row of ph.instructions.")
-  tempCHIcount <- rbindlist(future_lapply(
-    X = as.list(seq(1, nrow(ph.instructions), 1)),
-    FUN = function(X){
-      message(paste0("Calculating estimates for ph.instructions row ", X, " of ", nrow(ph.instructions), "..."))
-
-      # create constants for calc----
-      tempbv1 <- setdiff(ph.instructions[X][['cat1_varname']], c())
-      tempbv2 <- setdiff(ph.instructions[X][['cat2_varname']], c())
-      if(length(tempbv2) == 0){tempbv2 = NA}
-      tempbv <- setdiff(c(tempbv1, tempbv2), c(NA))
-      tempbv <- c(tempbv, "chi_age")
-
-      # create variables of interest used in calc function below
-      tempbv <- tempbv
-      tempend <- ph.instructions[X][['end']]
-      tempstart <- ph.instructions[X][['start']]
-
-      # use calc----
-      if(any(grepl('wastate', tempbv))){
-        tempcount <- rads::calc(ph.data = ph.data,
-                          what = ph.instructions[X][['indicator_key']],
-                          where = chi_year >= tempstart & chi_year <= tempend,
-                          by = tempbv,
-                          metrics = c('numerator'))
+                             source_date = NULL) {
+
+  # Basic validation of inputs ----
+    if (is.null(ph.data) || is.null(ph.instructions)) {
+      stop("Both ph.data and ph.instructions parameters are required")
+    }
+
+    if (!is.data.frame(ph.data)) stop("\n\U1F6D1 ph.data must be a data.frame or data.table")
+    if (!is.data.frame(ph.instructions)) stop("\n\U1F6D1 ph.instructions must be a data.frame or data.table")
+    if (!is.null(source_date) & !inherits(source_date, "Date")) stop("\n\U1F6D1 source_date must be of type Date, if it is provided")
+
+    # Convert inputs to data.table if they're not already
+    ph.data <- setDT(copy(ph.data))
+    ph.instructions <- setDT(copy(ph.instructions))
+
+  # Create 'Overall' category if needed for crosstabs ----
+    if (!"overall" %in% names(ph.data)) {
+      ph.data[, overall := ifelse(chi_geo_kc == "King County", "Overall", NA_character_)]
+    }
+
+  # Check for required variables in the data ----
+    # Extract all variable names needed from instructions
+      needed_byvars <- setdiff(
+        unique(c(ph.instructions$cat1_varname, ph.instructions$cat2_varname)),
+        c(NA)
+      )
+
+    # Special case: race3 requires race3_hispanic by definition
+      # race3 in the Tableau viz is presented as a single variables, but in reality
+      # it is made up of a race indicator and a hispanic ethnicity indicator. So,
+      # when instructions request race3, they need race3_hispanic as well to calculate
+      # the final race3 values (which is race, with Hispanic as ethnicity)
+      if ("race3" %in% needed_byvars & !"race3_hispanic" %in% needed_byvars) {
+        needed_byvars <- c(needed_byvars, "race3_hispanic")
+      }
+
+    # Combine byvar names with indicator keys to get all required variables
+      needed_vars <- setdiff(
+        unique(c(ph.instructions$indicator_key, needed_byvars)),
+        c(NA)
+      )
+
+    # Check if any required variables are missing from the data
+      missing_vars <- setdiff(needed_vars, names(ph.data))
+      if (length(missing_vars) > 0) {
+        stop(paste0(
+          "\n\U2620 ph.data is missing the following columns that are specified in ph.instructions: ",
+          paste0(missing_vars, collapse = ", "), ". ",
+          "\nIf `race3_hispanic` is listed, that is because, by definition, `race3` cannot have a Hispanic ethnicity in the same variable. ",
+          "\nTwo variables (`race3` & `race3_hispanic`) will be processed and in the output, it will be called `race3`"
+        ))
       } else {
-        tempcount <- rads::calc(ph.data = ph.data,
-                          what = ph.instructions[X][['indicator_key']],
-                          where = chi_year >= tempstart & chi_year <= tempend & chi_geo_kc == 'King County',
-                          by = tempbv,
-                          metrics = c('numerator'))
+        message("\U0001f642 All specified variables exist in ph.data")
       }
 
-      # tidy----
-      tempcount[, cat1 := ph.instructions[X][['cat1']]]
-      setnames(tempcount, ph.instructions[X][['cat1_varname']], 'cat1_group')
-      tempcount[, cat1_varname := ph.instructions[X][['cat1_varname']]]
-      tempcount[, cat2 := ph.instructions[X][['cat2']]]
-      if(!is.na(tempbv2) & tempbv1 != tempbv2){
-        setnames(tempcount, ph.instructions[X][['cat2_varname']], 'cat2_group')} else{
-          tempcount[, cat2_group := NA] }
-      tempcount[, cat2_varname := ph.instructions[X][['cat2_varname']]]
-
-      tempcount <- tempcount[!is.na(cat1_group)]
-      tempcount <- tempcount[!(is.na(cat2_group) & !is.na(cat2))]
-      tempcount <- tempcount[!is.na(chi_age)]
-      tempcount <- tempcount[, list(cat1, cat1_varname, cat1_group, cat2, cat2_varname, cat2_group, chi_age, count = numerator)]
-
-      # create reference table with every combination of cat1 x cat2 and age----
-      cat1table <- data.table(cat1 = ph.instructions[X][['cat1']],
-                              cat1_varname = tempbv1,
-                              cat1_group = sort(setdiff(as.character(unique(ph.data[[tempbv1]])), NA)) )
-      cat2table <- suppressWarnings(data.table(cat2 = ph.instructions[X][['cat2']],
-                                               cat2_varname = tempbv2,
-                                               cat2_group = sort(setdiff(as.character(unique(ph.data[[tempbv2]])), NA))) )
-      cattable <- tidyr::crossing(cat1table, cat2table)
-      cattable <- setDT(tidyr::crossing(cattable, data.table(chi_age = 0:100)))
-
-      # merge the counts onto the reference table to get every combo of age x cat1 x cat2----
-      tempcount <- merge(cattable, tempcount, by = c('cat1', 'cat1_varname', 'cat1_group', 'cat2', 'cat2_varname', 'cat2_group', 'chi_age'), all = T)
-      tempcount[is.na(count), count := 0] # when count is NA, it is actually zero and zero is needed for calculating age adjusted rates
-
-      # add on remaining essential identifiers----
-      tempcount[, indicator_key := ph.instructions[X][['indicator_key']]]
-      tempcount[, tab := ph.instructions[X][['tab']]]
-      tempcount[ph.instructions[X][['end']] != ph.instructions[X][['start']],
-                year := paste0(ph.instructions[X][['start']], "-", ph.instructions[X][['end']])]
-      tempcount[ph.instructions[X][['end']] == ph.instructions[X][['start']],
-                year := ph.instructions[X][['end']]]
-
-      # order output----
-      tempcount <- tempcount[, list(indicator_key, year, tab, cat1, cat1_varname, cat1_group, cat2, cat2_varname, cat2_group, chi_age, count)]
-      setorder(tempcount, cat1_group, cat2_group, chi_age)
+  # Verify that categorical variables follow CHI encoding standards ----
+    # Get standardized byvar values from reference data
+      std_byvars <- rads.data::misc_chi_byvars
+      std_byvars <- std_byvars[varname %in% setdiff(unique(c(ph.instructions$cat1_varname, ph.instructions$cat2_varname)), c(NA))]
+      std_byvars <- std_byvars[, list(varname, group, keepme, reference = 1)]
+
+    # Handle race3 and Hispanic special case
+      std_byvars[group %in% c("Hispanic", "Non-Hispanic") & varname == "race3",
+                 varname := "race3_hispanic"]
+
+    # Extract unique values for each byvar in the actual data
+      data_byvars <- rbindlist(lapply(
+        X = as.list(needed_byvars),
+        FUN = function(x) {
+          data.table(
+            varname = x,
+            group = setdiff(unique(ph.data[[x]]), NA),
+            ph.data = 1
+          )
+        }
+      ))
+
+    # Merge actual values to reference standards
+      byvar_comparison <- merge(std_byvars,
+                                data_byvars,
+                                by = c("varname", "group"),
+                                all = TRUE)
+
+    # Check for any mismatches between data and reference standards
+      if (nrow(byvar_comparison[is.na(reference) | is.na(ph.data)]) > 0) {
+        print(byvar_comparison[is.na(reference) | is.na(ph.data)])
+        stop("\n\U2620 The table above shows the varname/group combinations that do not align between the reference table and your ph.data.")
+      } else {
+        message("\U0001f642 All specified cat1_group and cat2_group values align with the reference standard.")
+      }
+
+  # Generate counts for each row in instructions ----
+    message("\U023F3 Be patient! The function is generating counts for each row of ph.instructions.")
+
+      progressr::handlers(handler_progress())
+      with_progress({
+        p <- progressor(nrow(ph.instructions))
+        count_results <- rbindlist(future_lapply(
+          X = as.list(seq_len(nrow(ph.instructions))),
+          FUN = function(row_idx) {
+            p(paste0("Processing row ", row_idx, " of ", nrow(ph.instructions) ))
+            # Set up calculations ----
+              # Extract parameters for this calculation
+              current_instruction <- ph.instructions[row_idx]
+              primary_byvar <- current_instruction[["cat1_varname"]]
+              secondary_byvar <- current_instruction[["cat2_varname"]]
+              if (is.null(secondary_byvar) || length(secondary_byvar) == 0) {
+                secondary_byvar <- NA
+              }
 
+              # Combine all byvars including age
+              all_byvars <- setdiff(c(primary_byvar, secondary_byvar), c(NA))
+              all_byvars <- unique(c(all_byvars, "chi_age"))
+
+            # Calculate counts using rads::calc ----
+              if (any(grepl("wastate", all_byvars))) {
+                # Washington state calculations
+                age_counts <- rads::calc(
+                  ph.data = ph.data[chi_year >= current_instruction[["start"]] & chi_year <= current_instruction[["end"]]],
+                  what = current_instruction[["indicator_key"]],
+                  by = all_byvars,
+                  metrics = c("numerator")
+                )
+              } else {
+                # King County calculations
+                age_counts <- rads::calc(
+                  ph.data = ph.data[chi_year >= current_instruction[["start"]] & chi_year <= current_instruction[["end"]] & chi_geo_kc == "King County"],
+                  what = current_instruction[["indicator_key"]],
+                  by = all_byvars,
+                  metrics = c("numerator")
+                )
+              }
+
+            # Add cat1/cat2 information to results ----
+              # Add cat1# info
+              age_counts[, cat1 := current_instruction[["cat1"]]]
+              setnames(age_counts, primary_byvar, "cat1_group")
+              age_counts[, cat1_varname := primary_byvar]
+
+              # Add cat2# info
+              age_counts[, cat2 := current_instruction[["cat2"]]]
+              if (!is.na(secondary_byvar) & primary_byvar != secondary_byvar) {
+                setnames(age_counts, secondary_byvar, "cat2_group")
+              } else {
+                age_counts[, cat2_group := NA]
+              }
+              age_counts[, cat2_varname := secondary_byvar]
+
+              # Filter out invalid combinations
+              age_counts <- age_counts[!is.na(cat1_group)]
+              age_counts <- age_counts[!(is.na(cat2_group) & !is.na(cat2))]
+              age_counts <- age_counts[!is.na(chi_age)]
+
+              # Keep only necessary columns
+              age_counts <- age_counts[, list(
+                cat1, cat1_varname, cat1_group,
+                cat2, cat2_varname, cat2_group,
+                chi_age, count = numerator
+              )]
+
+            # Create complete reference table for all combinations ----
+              # When there is no data for a specific combination (e.g., NHPI 85+ in
+              # particular geography), the row will be missing. We want the row to be
+              # noted but with the count of 0, not NA.
+
+              # cat1 summary table
+              cat1_table <- data.table(
+                cat1 = current_instruction[["cat1"]],
+                cat1_varname = primary_byvar,
+                cat1_group = sort(setdiff(as.character(unique(ph.data[[primary_byvar]])), NA))
+              )
+
+              # cat2 summary table
+              if (!is.na(secondary_byvar)) {
+                cat2_table <- data.table(
+                  cat2 = current_instruction[["cat2"]],
+                  cat2_varname = secondary_byvar,
+                  cat2_group = sort(setdiff(as.character(unique(ph.data[[secondary_byvar]])), NA))
+                )
+              } else {
+                cat2_table <- data.table(
+                  cat2 = current_instruction[["cat2"]],
+                  cat2_varname = secondary_byvar,
+                  cat2_group = NA_character_
+                )
+              }
+
+              # Create all logical combinations of cat1_table rows & cat2_table rows
+                # This approach preserves the correct relationship between variable
+                # names and their values. For example, if cat1_varname is 'chi_sex' then
+                # cat1_group must only contain values like 'Male' or 'Female', not values
+                # from other variables like 'Asian' or 'NHPI'.
+                #
+                # A simpler approach (like CJ with all columns directly) would create
+                # invalid combinations such as cat1_varname='chi_sex' with cat1_group='Hispanic',
+                # which is nonsensical.
+                #
+                # The nested lapply approach ensures that each category variable is only
+                # paired with its own valid values while still creating all valid combinations
+                # needed for the complete age-specific count table.
+
+              all_combinations <- NULL
+              if (nrow(cat2_table) > 0) {
+                # For each row in cat1_table, combine with each row in cat2_table
+                all_combinations <- rbindlist(lapply(1:nrow(cat1_table), function(i) {
+                  rbindlist(lapply(1:nrow(cat2_table), function(j) {
+                    data.table(
+                      cat1 = cat1_table[i]$cat1,
+                      cat1_varname = cat1_table[i]$cat1_varname,
+                      cat1_group = cat1_table[i]$cat1_group,
+                      cat2 = cat2_table[j]$cat2,
+                      cat2_varname = cat2_table[j]$cat2_varname,
+                      cat2_group = cat2_table[j]$cat2_group
+                    )
+                  }))
+                }))
+              } else {
+                all_combinations <- cat1_table
+                all_combinations[, `:=`(
+                  cat2 = current_instruction[["cat2"]],
+                  cat2_varname = secondary_byvar,
+                  cat2_group = NA_character_
+                )]
+              }
+
+              # Now add all ages to create the final cartesian product
+              all_combinations_with_age <- CJ(
+                combo_idx  = 1:nrow(all_combinations),
+                chi_age = 0:100,
+                unique = TRUE
+              )
+
+              all_combinations_with_age <- merge(
+                all_combinations_with_age,
+                all_combinations[, combo_idx  := .I],
+                by = "combo_idx"
+              )
+
+              all_combinations_with_age[, combo_idx  := NULL]
+
+
+            # Merge counts onto the reference table with all possible combinations ----
+              complete_counts <- merge(
+                all_combinations_with_age,
+                age_counts,
+                by = c("cat1", "cat1_varname", "cat1_group", "cat2", "cat2_varname", "cat2_group", "chi_age"),
+                all.x = TRUE
+              )
+
+              # Replace NA counts with 0 (needed for age-adjusted rates)
+              complete_counts[is.na(count), count := 0]
+
+              # Add remaining identifiers
+              complete_counts[, indicator_key := current_instruction[["indicator_key"]]]
+              complete_counts[, tab := current_instruction[["tab"]]]
+
+              # Format year range appropriately
+              if (current_instruction[["end"]] != current_instruction[["start"]]) {
+                complete_counts[, year := paste0(current_instruction[["start"]], "-", current_instruction[["end"]])]
+              } else {
+                complete_counts[, year := as.character(current_instruction[["start"]])]
+              }
+
+              # Order and select final output columns
+              complete_counts <- complete_counts[, list(
+                indicator_key, year, tab,
+                cat1, cat1_varname, cat1_group,
+                cat2, cat2_varname, cat2_group,
+                chi_age, count
+              )]
+
+              setorder(complete_counts, cat1_group, cat2_group, chi_age)
+              return(complete_counts)
+            } # close function within future_lapply
+          ), # close future_lapply
+          use.names = TRUE) # close rbindlist
+      }) # close with_progress
+
+    # Add source_date if provided
+    if (!is.null(source_date)) {
+      count_results[, source_date := source_date]
+    } else {
+      count_results[, source_date := as.Date(NA_character_)]
     }
-  ), use.names = TRUE)
 
-  # Return ----
-  return(tempCHIcount)
+  # Modify race3 / race3_hispanic ----
+    # as noted above, these are two distinct variables that are presented as a single
+    # variable in Tableau viz so the variable names need to be harmonized
+      count_results[cat1_varname == 'race3_hispanic', cat1_varname := 'race3']
+      count_results[cat2_varname == 'race3_hispanic', cat2_varname := 'race3']
+      drop_race3_groups <- rads.data::misc_chi_byvars[varname == 'race3' & keepme == 'No']$group
+      count_results <- count_results[!(cat1_varname == 'race3' & cat1_group %in% drop_race3_groups)]
+      count_results <- count_results[is.na(cat2) | !(cat2_varname == 'race3' & cat2_group %in% drop_race3_groups)]
+
+  # Return the final results ----
+  return(count_results)
 }
diff --git a/man/chi_count_by_age.Rd b/man/chi_count_by_age.Rd
index d5af4ac..d5645c5 100644
--- a/man/chi_count_by_age.Rd
+++ b/man/chi_count_by_age.Rd
@@ -2,22 +2,48 @@
 % Please edit documentation in R/proto_chi_count_by_age.R
 \name{chi_count_by_age}
 \alias{chi_count_by_age}
-\title{Generate Age-Specific Counts for CHI Data}
+\title{Generate Age-Specific Counts for Community Health Indicators}
 \usage{
 chi_count_by_age(ph.data = NULL, ph.instructions = NULL, source_date = NULL)
 }
 \arguments{
 \item{ph.data}{Input data frame or data table containing CHI data}
 
-\item{ph.instructions}{Data frame or data table containing calculation instructions}
+\item{ph.instructions}{Data frame or data table containing calculation specifications with columns:
+\itemize{
+  \item indicator_key: Name of the health metric to calculate
+  \item tab: Visualization tab type (kingcounty, demgroups, crosstabs)
+  \item cat1_varname, cat2_varname: Variable names for stratification
+  \item cat1, cat2: Human-friendly labels for these variables
+  \item start, end: Year range for the calculation
+}}
 
-\item{source_date}{Date of data source}
+\item{source_date}{Date of data source, added to output metadata}
 }
 \value{
-A data.table containing age-specific counts with standard CHI groupings
+A data.table containing age-specific counts with standard CHI groupings:
+ \itemize{
+   \item indicator_key: Health metric identifier
+   \item year: Year range of data (e.g., "2019-2021" or single year)
+   \item tab: Visualization tab type
+   \item cat1, cat1_varname, cat1_group: Primary stratification variable details
+   \item cat2, cat2_varname, cat2_group: Secondary stratification variable details (if applicable)
+   \item chi_age: Age value (0-100)
+   \item count: Number of cases in that demographic-age group
+   \item source_date: Date of data source (if provided)
+ }
 }
 \description{
-Creates a detailed breakdown of counts by age for CHI data analysis, most
-often for age standardization.Processes data according to provided
-instructions and handles demographic groupings.
+Creates a detailed breakdown of counts by age for CHI data analysis. Primarily
+used for age standardization and rate calculations when combined with
+population estimates. Processes data according to provided instructions and
+handles demographic groupings with special treatment for race and ethnicity
+variables.
+}
+\seealso{
+\code{\link{chi_generate_tro_shell}} which creates ph.instructions used by
+\code{chi_count_by_age}
+
+\code{\link{chi_generate_instructions_pop}} which uses the output of the output
+of \code{chi_count_by_age}
 }

From 53b4c5640fd39f06e34b4434a1fc28db386f763a Mon Sep 17 00:00:00 2001
From: Danny Colombara <dcolombara@kingcounty.gov>
Date: Fri, 7 Mar 2025 14:48:49 -0800
Subject: [PATCH 02/11] Renamed proto_chi_count_by_age > chi_count_by_age

---
 R/{proto_chi_count_by_age.R => chi_count_by_age.R} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename R/{proto_chi_count_by_age.R => chi_count_by_age.R} (100%)

diff --git a/R/proto_chi_count_by_age.R b/R/chi_count_by_age.R
similarity index 100%
rename from R/proto_chi_count_by_age.R
rename to R/chi_count_by_age.R

From da04629d4657a3948ef1a2be4cb96c7e8b3b723b Mon Sep 17 00:00:00 2001
From: Danny Colombara <dcolombara@kingcounty.gov>
Date: Fri, 7 Mar 2025 16:10:26 -0800
Subject: [PATCH 03/11] improved chi_generate_instructions_pop

 - improved roxygen2 header
 - simplified / tidier code, but same fundamental logic / structure
---
 R/proto_chi_generate_instructions_pop.R | 191 +++++++++++++++++-------
 1 file changed, 135 insertions(+), 56 deletions(-)

diff --git a/R/proto_chi_generate_instructions_pop.R b/R/proto_chi_generate_instructions_pop.R
index e679bc7..4883416 100644
--- a/R/proto_chi_generate_instructions_pop.R
+++ b/R/proto_chi_generate_instructions_pop.R
@@ -1,67 +1,146 @@
 #' Generate Population Instructions for CHI Analysis
 #'
 #' @description
-#' Creates a instructions for rads::get_population() based on count data
-#' specifications. Handles various geographic types and demographic groupings.
+#' Creates instructions for \link{\code{CHI_get_proper_pop}} based on a table of count
+#' data. These instructions configure appropriate demographic groupings,
+#' geographic types, and time periods for retrieving population denominators used
+#' in CHI rate calculations.
 #'
-#' @param mycount.data Input data.table containing count data specifications
-#' @param povgeo Geographic level for poverty analysis (NA or 'zip')
+#' @param mycount.data Input data.table produced by \link{\code{chi_count_by_age}},
+#' containing the following columns:
+#'  \itemize{
+#'    \item indicator_key: indicator_key used by CHI
+#'    \item year: Year range (e.g., "2019-2021" or single year)
+#'    \item tab: Visualization tab type
+#'    \item cat1, cat1_varname, cat1_group: Primary stratification variables
+#'    \item cat2, cat2_varname, cat2_group: Secondary stratification variables
+#'    \item chi_age: Single year age
+#'    \item count: Count of events (births, death, hospitalizations, etc. )
+#'  }
+#' @param Geographic level for poverty analysis ('blk' or 'zip')
+#'
+#' @return A data.table containing population processing instructions with columns:
+#'  \itemize{
+#'    \item year: Original year range from input
+#'    \item cat1, cat1_varname: Primary stratification details
+#'    \item cat2, cat2_varname: Secondary stratification details
+#'    \item tab: Visualization tab type
+#'    \item start, stop: Start and end years parsed from the year range
+#'    \item race_type: Race categorization type ('race', 'race_eth', or 'race_aic')
+#'    \item geo_type: Geographic level for analysis ('kc', 'hra', 'region', 'blk', 'zip', 'wa')
+#'    \item group_by1, group_by2: Demographic grouping specifications
+#'  }
+#'
+#' @seealso
+#' \code{\link{chi_count_by_age}} which generates the count data used as input
+#' to this function
+#'
+#' \code{\link{chi_get_proper_pop}} which uses the output of this function
 #'
-#' @return A data.table containing population processing instructions
 #' @importFrom data.table copy `:=` setorder tstrsplit
 #' @importFrom tools toTitleCase
 #' @export
 #'
+chi_generate_instructions_pop <- function(mycount.data,
+                                          povgeo = c('blk', 'zip')) {
+
+  # Validation of inputs ----
+    if (is.null(mycount.data)) {
+      stop("\n\U1F6D1 mycount.data parameter is required")
+    }
+
+    if (!is.data.frame(mycount.data)) {
+      stop("\n\U1F6D1 mycount.data must be a data.frame or data.table")
+    } else {mycount.data <- setDT(copy(mycount.data))}
+
+    povgeo <- match.arg(povgeo)
+
+  # Initial data preparation ----
+    # Create a template with only the necessary columns to avoid duplicates
+    pop.template <- copy(mycount.data)
+    pop.template <- unique(copy(pop.template)[, list(year, cat1, cat1_varname, cat2, cat2_varname, tab)])
+
+  # Process year ranges ----
+    # Split year ranges (e.g., "2019-2021") into start and stop years
+    pop.template[, c("start", "stop") := tstrsplit(year, split = '-')]
+    # For single years, set the stop year equal to the start year
+    pop.template[is.na(stop), stop := start]
+
+  # Set default demographic settings ----
+    # Default race type includes ethnicity (Hispanic as race)
+    pop.template[, race_type := 'race_eth']
+
+  # Handle maternal data prefixes ----
+    # Remove "Birthing person's" prefix to standardize maternal data categories
+    pop.template[grepl("birthing person", cat1, ignore.case = TRUE),
+                 cat1 := tools::toTitleCase(gsub("Birthing person's ", "", cat1))]
+    pop.template[grepl("birthing person", cat2, ignore.case = TRUE),
+                 cat2 := tools::toTitleCase(gsub("Birthing person's ", "", cat2))]
+
+  # Process geographic types and demographic groupings ----
+    # Define OMB AIC (alone or in combination)
+    omb_aic <- c("chi_race_aic_aian", "chi_race_aic_asian", "chi_race_aic_black",
+                 "chi_race_aic_his", "chi_race_aic_nhpi", "chi_race_aic_wht")
+
+    # Process both primary (cat1) and secondary (cat2) stratification variables
+    for(catnum in c("1", "2")) {
+      temp.cat <- paste0("cat", catnum)
+      temp.varname <- paste0(temp.cat, "_varname")
+      temp.groupby <- paste0("group_by", catnum)
+
+      # Set geographic type based on category
+      pop.template[get(temp.cat) == "Cities/neighborhoods", geo_type := "hra"]
+
+      # Set race_type and group_by based on race/ethnicity variable
+      pop.template[get(temp.varname) == "race3", c("race_type", temp.groupby) := 'race']
+      pop.template[get(temp.varname) == "race4", c("race_type", temp.groupby) := 'race_eth']
+      pop.template[get(temp.varname) %in% omb_aic, c("race_type", temp.groupby) := 'race_aic']
+
+      # Filter out non-standard AIC race/ethnicity categories that don't have population data
+      pop.template <- pop.template[!(grepl('_aic_', get(temp.varname)) &
+                                       !get(temp.varname) %in% omb_aic)]
+
+      # Set demographic grouping based on category label
+      pop.template[get(temp.cat) == "Ethnicity", c("race_type", temp.groupby) := 'race_eth']
+      pop.template[get(temp.cat) == "Gender", (temp.groupby) := 'genders']
+      pop.template[get(temp.cat) %in% c("Race", "Race/ethnicity") & get(temp.varname) == 'race4',
+                   (temp.groupby) := 'race_eth']
+      pop.template[(get(temp.cat) == "Race" & get(temp.varname) == 'race3'),
+                   (temp.groupby) := 'race']
+
+      # Set geographic type based on regions
+      pop.template[get(temp.cat) == "Regions" & (is.na(geo_type) | geo_type != 'hra'),
+                   geo_type := "region"]
+      pop.template[get(temp.cat) == "Big cities", geo_type := "hra"]
+      pop.template[get(temp.cat) == "Washington State", geo_type := "wa"]
+    }
+
+  # Handle special geographic cases ----
+    # Set geographic type for poverty analysis to block group level by default
+    pop.template[grepl("poverty$", cat1, ignore.case = TRUE) |
+                   grepl("poverty$", cat2, ignore.case = TRUE),
+                 geo_type := "blk"]
+
+    # Override with zip code level for poverty analysis if specified in povgeo parameter
+    if(!is.na(povgeo) && povgeo == 'zip') {
+      pop.template[grepl("poverty$", cat1, ignore.case = TRUE) |
+                     grepl("poverty$", cat2, ignore.case = TRUE),
+                   geo_type := "zip"]
+    }
+
+    # Special case for combined regions and cities/neighborhoods analysis
+    pop.template[(cat1 == "Regions" & cat2 == "Cities/neighborhoods") |
+                   (cat2 == "Regions" & cat1 == "Cities/neighborhoods"),
+                 geo_type := "blk"]
+
+    # Set missing or default values ----
+    # Replace NA cat2 with "NA" string for consistent processing
+    pop.template[is.na(cat2), cat2 := "NA"]
+
+    # Set default geographic type as King County when not specified
+    pop.template[is.na(geo_type), geo_type := 'kc']
 
-chi_generate_instructions_pop <- function(mycount.data, povgeo = NA){
-  pop.template <- copy(mycount.data)
-  pop.template <- unique(copy(pop.template)[, list(year, cat1, cat1_varname, cat2, cat2_varname, tab)])
-  pop.template[, c("start", "stop") := tstrsplit(year, split = '-') ]
-  pop.template[is.na(stop), stop := start] # need to have an end date even if it is just one year
-
-  pop.template[, race_type := 'race_eth'] # by default has race and OMB 97 with Hispanic as race
-
-  # Drop prefix when using maternal data because do not want to create multiple alternative codings below ----
-  pop.template[grepl("birthing person", cat1, ignore.case = T), cat1 := tools::toTitleCase(gsub("Birthing person's ", "", cat1))]
-  pop.template[grepl("birthing person", cat2, ignore.case = T), cat2 := tools::toTitleCase(gsub("Birthing person's ", "", cat2))]
-
-  # Create geo_type & group_by arguments ----
-  omb_aic <- c("chi_race_aic_aian", "chi_race_aic_asian", "chi_race_aic_black", "chi_race_aic_his", "chi_race_aic_nhpi", "chi_race_aic_wht")
-
-  for(catnum in c("1", "2")){
-    temp.cat <- paste0("cat", catnum)
-    pop.template[get(temp.cat) == "Cities/neighborhoods", geo_type := "hra"]
-    pop.template[get(paste0(temp.cat, "_varname")) == "race3", c("race_type", paste0("group_by", catnum)) := 'race']
-    pop.template[get(paste0(temp.cat, "_varname")) == "race4", c("race_type", paste0("group_by", catnum)) := 'race_eth']
-    pop.template[get(paste0(temp.cat, "_varname")) %in% omb_aic, c("race_type", paste0("group_by", catnum)) := 'race_aic']
-
-    # the only AIC race/eth with pop data are the standard OMB categories
-    pop.template <- pop.template[!(grepl('_aic_', get(paste0(temp.cat, "_varname"))) & !get(paste0(temp.cat, "_varname")) %in% omb_aic)]
-
-    pop.template[get(temp.cat) == "Ethnicity", c("race_type", paste0("group_by", catnum)) := 'race_eth']
-    pop.template[get(temp.cat) == "Gender",  paste0("group_by", catnum) := 'genders']
-    pop.template[get(temp.cat) %in% c("Race", "Race/ethnicity") & get(paste0(temp.cat, "_varname")) == 'race4',
-                 paste0("group_by", catnum) := 'race_eth']
-    pop.template[(get(temp.cat) == "Race" & get(paste0(temp.cat, "_varname")) == 'race3') ,
-                 paste0("group_by", catnum) := 'race']
-    pop.template[get(temp.cat) == "Regions" & (is.na(geo_type) | geo_type != 'hra'), `:=` (geo_type = "region")]
-    pop.template[get(temp.cat) == "Big cities", `:=` (geo_type = "hra")]
-    pop.template[get(temp.cat) == "Washington State", `:=` (geo_type = "wa")]
-  }
-
-  pop.template[grepl("poverty$", cat1, ignore.case = T) | grepl("poverty$", cat2, ignore.case = T), geo_type := "blk"]
-  if(povgeo == 'zip'){
-    pop.template[grepl("poverty$", cat1, ignore.case = T) | grepl("poverty$", cat2, ignore.case = T), geo_type := "zip"]
-  }
-  pop.template[(cat1 == "Regions" & cat2 == "Cities/neighborhoods") |
-                 (cat2 == "Regions" & cat1 == "Cities/neighborhoods"),
-               geo_type := "blk"]
-
-  # the only AIC race/eth with population data are the OMB standard categories
-
-  pop.template[is.na(cat2), cat2 := "NA"] # temporarily set NA to "NA" to facilitate processing with function
-
-  pop.template[is.na(geo_type), geo_type := 'kc'] # when not specified, it is for KC
-
-  pop.template <- unique(pop.template) # because want to minimize the calls to get_popualation to improve speed
+  # Return final results ----
+    # Remove duplicate rows to minimize calls to get_population for efficiency
+    return(unique(pop.template))
 }

From 34d7401bf6f8dafd755a935b6fce3e0239bd4506 Mon Sep 17 00:00:00 2001
From: Danny Colombara <dcolombara@kingcounty.gov>
Date: Fri, 7 Mar 2025 16:11:27 -0800
Subject: [PATCH 04/11] rename proto_chi_generate_instructions_pop

 - file is now called chi_generate_instructions_pop
---
 ...enerate_instructions_pop.R => chi_generate_instructions_pop.R} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename R/{proto_chi_generate_instructions_pop.R => chi_generate_instructions_pop.R} (100%)

diff --git a/R/proto_chi_generate_instructions_pop.R b/R/chi_generate_instructions_pop.R
similarity index 100%
rename from R/proto_chi_generate_instructions_pop.R
rename to R/chi_generate_instructions_pop.R

From 78691b4fc4f87ba2b55069a2d9bd338c24da66e8 Mon Sep 17 00:00:00 2001
From: Danny Colombara <dcolombara@kingcounty.gov>
Date: Mon, 10 Mar 2025 17:40:56 -0700
Subject: [PATCH 05/11] update / revise chi_get_proper_pop

* Improved documentation with detailed Roxygen comments
* Added input validation with helpful error messages
* Clearer section comments
* Replaced tidyr::crossing with data.table
* progressr progress tracking
* Standardized variable naming
* Improved work with categorical variables and crosswalks
---
 NAMESPACE                            |   1 +
 R/chi_generate_instructions_pop.R    |   6 +-
 R/globals.R                          |   1 +
 R/proto_chi_get_proper_pop.R         | 788 ++++++++++++++++++---------
 man/chi_count_by_age.Rd              |   2 +-
 man/chi_generate_instructions_pop.Rd |  42 +-
 man/chi_get_proper_pop.Rd            |  56 ++
 7 files changed, 623 insertions(+), 273 deletions(-)
 create mode 100644 man/chi_get_proper_pop.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 698bbab..de2cc8a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -9,6 +9,7 @@ export(chi_generate_instructions_pop)
 export(chi_generate_metadata)
 export(chi_generate_tro_shell)
 export(chi_get_cols)
+export(chi_get_proper_pop)
 export(chi_get_yaml)
 export(chi_qa_tro)
 export(chi_update_sql)
diff --git a/R/chi_generate_instructions_pop.R b/R/chi_generate_instructions_pop.R
index 4883416..92d74db 100644
--- a/R/chi_generate_instructions_pop.R
+++ b/R/chi_generate_instructions_pop.R
@@ -1,12 +1,12 @@
 #' Generate Population Instructions for CHI Analysis
 #'
 #' @description
-#' Creates instructions for \link{\code{CHI_get_proper_pop}} based on a table of count
+#' Creates instructions for \code{\link{chi_get_proper_pop}} based on a table of count
 #' data. These instructions configure appropriate demographic groupings,
 #' geographic types, and time periods for retrieving population denominators used
 #' in CHI rate calculations.
 #'
-#' @param mycount.data Input data.table produced by \link{\code{chi_count_by_age}},
+#' @param mycount.data Input data.table produced by \code{\link{chi_count_by_age}},
 #' containing the following columns:
 #'  \itemize{
 #'    \item indicator_key: indicator_key used by CHI
@@ -17,7 +17,7 @@
 #'    \item chi_age: Single year age
 #'    \item count: Count of events (births, death, hospitalizations, etc. )
 #'  }
-#' @param Geographic level for poverty analysis ('blk' or 'zip')
+#' @param povgeo Geographic level for poverty analysis ('blk' or 'zip')
 #'
 #' @return A data.table containing population processing instructions with columns:
 #'  \itemize{
diff --git a/R/globals.R b/R/globals.R
index 81642ca..fb2743b 100644
--- a/R/globals.R
+++ b/R/globals.R
@@ -49,6 +49,7 @@ utils::globalVariables(c(
   "latest_yearx",
   "level",
   "lower_bound",
+  "mykey",
   "notable",
   "numerator",
   "overall",
diff --git a/R/proto_chi_get_proper_pop.R b/R/proto_chi_get_proper_pop.R
index dff8d76..2d1fa91 100644
--- a/R/proto_chi_get_proper_pop.R
+++ b/R/proto_chi_get_proper_pop.R
@@ -1,277 +1,541 @@
-# CHI_get_proper_pop() - function to get population for a single row specified by the output of CHI_generate_instructions_pop() ----
-chi_get_proper_pop <- function(pop.template = NULL, pop.genders = NULL, pop.ages = NULL){
-  # check for valid values of pop.genders ----
-  if(is.null(pop.genders)){gendery = c("f", "m")
-  }else{if(!tolower(pop.genders) %in% c('f', 'female', 'm', 'male')){
-    stop("\n\U0001f47f if pop.genders is specified it must have one of the following values: 'F', 'f', 'Female', 'female', 'M', 'm', 'Male', or 'male'")
-  } else {gendery = pop.genders}}
-
-  # check for valid values of pop.ages ----
-  if(is.null(pop.ages)){agesy = c(0:100)
-  }else{if(!is.integer(pop.ages)){
-    stop("\n\U0001f47f if pop.ages is specified it must be vector of integers, e.g., c(0:65)")
-  } else {agesy = pop.ages}}
-
-  # create function to generate the population table corresponding to each row of the pop.template----
-  CHI_get_proper_pop_engine <- function(X, pop.template = NULL){
-    # Status updates ----
-    print(paste0("Process ID ", Sys.getpid(), ": Getting population ", X, " out of ", nrow(pop.template)))
-
-    # Drop prefix when using maternal data because do not want to create multiple alternative codings below ----
-    pop.template[grepl("birthing person", cat1, ignore.case = T), cat1 := tools::toTitleCase(gsub("Birthing person's ", "", cat1))]
-    pop.template[grepl("birthing person", cat2, ignore.case = T), cat2 := tools::toTitleCase(gsub("Birthing person's ", "", cat2))]
-
-    # create the group_by argument ----
-    groupy <- unique(c(c("ages", "geo_id"), setdiff(c(pop.template[X, group_by1], pop.template[X, group_by2]), c(NA))))
-
-    # use rads::get_population ----
-    if(is.na(pop.template[X, geo_type])){
-      tempy <- rads::get_population(group_by = groupy,
-                              race_type = pop.template[X, race_type],
-                              years = pop.template[X, start]:pop.template[X, stop],
-                              genders = gendery,
-                              ages = agesy,
-                              round = F)
-    }
-    if(!is.na(pop.template[X, geo_type])){
-      tempy <- rads::get_population(group_by = groupy,
-                              geo_type = pop.template[X, geo_type],
-                              race_type = pop.template[X, race_type],
-                              years = pop.template[X, start]:pop.template[X, stop],
-                              genders = gendery,
-                              ages = agesy,
-                              round = F)
-    }
-
-    # tidy the population data ----
-    for(catnum in c("cat1", "cat2")){
-      # misc ----
-      tempy[, paste0(catnum) := pop.template[X, get(catnum)]]
-      tempy[, paste0(catnum, "_varname") := pop.template[X, get(paste0(catnum, "_varname"))]]
-
-      tempy[get(catnum) == "King County", paste0(catnum, "_group") := "King County"]
-
-      tempy[get(catnum) == "Washington State", paste0(catnum, "_group") := "Washington State"]
-
-      suppressWarnings(tempy[get(catnum) == "NA" | is.na(get(catnum)),
-                             c(catnum, paste0(catnum, "_group"), paste0(catnum, "_varname")) := "NA"]) # just a random fill value for NA, which will be changed to true NA later
-
-      tempy[get(catnum) %in% c("Cities/neighborhoods", "Regions") &  pop.template[X, geo_type] != 'blk',
-            paste0(catnum, "_group") := geo_id]
-
-      tempy[get(catnum) %in% c("Gender"), paste0(catnum, "_group") := gender]
-
-      tempy[get(catnum) %in% c("Overall"), paste0(catnum, "_group") := "Overall"]
-
-
-      # race/eth ----
-      tempy[get(catnum) == "Ethnicity" | get(paste0(catnum, "_varname")) %in% c('race4'), paste0(catnum, "_group") := race_eth]
-      tempy[get(catnum) == 'Race' & get(paste0(catnum, "_varname")) %in% c('race3'), paste0(catnum, "_group") := race]
-      tempy[get(paste0(catnum, "_group")) == "Multiple race", paste0(catnum, "_group") := "Multiple"]
-      tempy <- tempy[get(catnum) != "Ethnicity" | (get(catnum) == "Ethnicity" & get(paste0(catnum, "_group")) == 'Hispanic'), ]
-
-      # race_aic ----
-      if(pop.template[X, race_type] == 'race_aic'){
-        tempy <- tempy[!(grepl('_aic_', get(paste0(catnum, "_varname"))) &
-                           !((get(paste0(catnum, "_varname")) == 'chi_race_aic_aian' & race_aic == 'AIAN') |
-                               (get(paste0(catnum, "_varname")) == 'chi_race_aic_asian' & race_aic == 'Asian') |
-                               (get(paste0(catnum, "_varname")) == 'chi_race_aic_black' & race_aic == 'Black')|
-                               (get(paste0(catnum, "_varname")) == 'chi_race_aic_his' & race_aic == 'Hispanic') |
-                               (get(paste0(catnum, "_varname")) == 'chi_race_aic_nhpi' & race_aic == 'NHPI') |
-                               (get(paste0(catnum, "_varname")) == 'chi_race_aic_wht' & race_aic == 'White'))
-        )]
-        tempy[grep('_aic', get(paste0(catnum, "_varname"))), paste0(catnum, "_group") := race_aic]
-      }
-
-      # HRAS ----
-      if(tempy[1, geo_type] == 'blk' & tempy[1, get(catnum)] == 'Cities/neighborhoods'){
-        temp.xwalk <- rads.data::spatial_block20_to_hra20_to_region20[, list(geo_id = GEOID20, hra20_name)]
-        tempy <- merge(tempy, temp.xwalk, by = "geo_id", all.x = T, all.y = F)
-        tempy[, paste0(catnum, "_group") := hra20_name]
+#' Get Population Denominators for CHI Analysis
+#'
+#' @description
+#' Retrieves population estimates based on instructions generated by
+#' \code{\link{chi_generate_instructions_pop}}. This function processes demographic
+#' categories, geographic levels, and time periods to create appropriate
+#' population denominators for use in CHI rate calculations.
+#'
+#' @param pop.template A data.table produced by \code{\link{chi_generate_instructions_pop}},
+#' containing instructions for population data retrieval with the following columns:
+#'  \itemize{
+#'    \item year: Year range (e.g., "2019-2021" or single year)
+#'    \item cat1, cat1_varname: Primary stratification variables
+#'    \item cat2, cat2_varname: Secondary stratification variables
+#'    \item tab: Visualization tab type
+#'    \item start, stop: Start and end years parsed from the year range
+#'    \item race_type: Race categorization type ('race', 'race_eth', or 'race_aic')
+#'    \item geo_type: Geographic level for analysis ('kc', 'hra', 'region', 'blk', 'zip', 'wa')
+#'    \item group_by1, group_by2: Race/eth grouping specifications ('race_eth', 'race')
+#'  }
+#' @param pop.genders Optional character vector specifying which genders to include.
+#' Valid values are 'f', 'female', 'm', 'male'. If NULL (default), both genders are included.
+#' @param pop.ages Optional integer vector specifying which ages to include.
+#' If NULL (default), ages 0-100 are included.
+#'
+#' @return A data.table containing population counts with columns:
+#'  \itemize{
+#'    \item chi_age: Single year age
+#'    \item year: Year
+#'    \item cat1, cat1_varname, cat1_group: Primary stratification variables
+#'    \item cat2, cat2_varname, cat2_group: Secondary stratification variables
+#'    \item tab: Visualization tab type
+#'    \item pop: Population count
+#'  }
+#'
+#' @details
+#' The function performs multiple steps to generate proper population denominators:
+#' 1. Validates input parameters
+#' 2. Creates population tables for each row of the template using rads::get_population
+#' 3. Handles special cases for various geographic aggregations and crosswalks
+#' 4. Returns a comprehensive, tidy dataset with population counts
+#'
+#' @seealso
+#' \code{\link{chi_generate_instructions_pop}} which generates the instructions used as input
+#' to this function
+#'
+#' @importFrom data.table setDT rbindlist `:=`
+#' @importFrom future.apply future_lapply
+#' @importFrom tools toTitleCase
+#' @importFrom progressr handlers progressor with_progress
+#' @export
+#'
+chi_get_proper_pop <- function(pop.template = NULL, pop.genders = NULL, pop.ages = NULL) {
+  # Validation of arguments ----
+    # pop.template
+      if (is.null(pop.template)) {
+        stop("\n\U1F6D1 pop.template parameter is required")
       }
 
-      # Regions ----
-      if(tempy[1, geo_type] == 'blk' & tempy[1, get(catnum)] == 'Regions'){
-        temp.xwalk <- rads.data::spatial_block20_to_hra20_to_region20[, list(geo_id = GEOID20, region_name)]
-        tempy <- merge(tempy, temp.xwalk, by = 'geo_id', all.x = T, all.y = F)
-
-        tempy[, paste0(catnum, "_group") := region_name]
+      if (!is.data.frame(pop.template)) {
+        stop("\n\U1F6D1 pop.template must be a data.frame or data.table")
+      } else {
+        pop.template <- setDT(copy(pop.template))
       }
 
-      if(tempy[1, geo_type] == 'hra' & tempy[1, get(catnum)] == 'Regions'){
-        temp.xwalk <- rads.data::spatial_hra20_to_region20[, list(geo_id = hra20_name, region_name)]
-        tempy <- merge(tempy, temp.xwalk, by = 'geo_id', all.x = T, all.y = F)
-
-        tempy[, paste0(catnum, "_group") := region_name]
-      }
+      # Check for required columns
+      required_columns <- c("year", "cat1", "cat1_varname", "cat2", "cat2_varname",
+                            "start", "stop", "race_type", "geo_type", "tab")
+      missing_columns <- required_columns[!required_columns %in% names(pop.template)]
 
-      if(tempy[1, geo_type] == 'zip' & tempy[1, get(catnum)] == 'Regions'){
-        zip_2_region <- rads.data::spatial_zip_to_hra20_pop
-        zip_2_region <- merge(zip_2_region,
-                              rads.data::spatial_hra20_to_region20[, list(hra20_name, region = region_name)],
-                              by = 'hra20_name',
-                              all = T)
-        zip_2_region <- zip_2_region[, list(s2t_fraction = sum(s2t_fraction)), # collapse fractions down to region level
-                                     list(geo_id = as.character(source_id), region)]
-
-        tempy <- merge(tempy, zip_2_region, by = "geo_id", all.x = T, all.y = F, allow.cartesian = T)
-        tempy[, pop := pop * s2t_fraction] # calculate weighted pop
-        tempy[, paste0(catnum, "_group") := region]
+      if (length(missing_columns) > 0) {
+        stop("\n\U1F6D1 pop.template is missing required columns: ",
+             paste(missing_columns, collapse = ", "))
       }
 
-      # Big Cities ----
-      if(tempy[1, get(catnum)] == 'Big cities'){
-        if(tempy[1, geo_type] == 'blk'){
-          blk20_hra20 <- rads.data::spatial_block20_to_hra20_to_region20[, list(geo_id = GEOID20, hra20_name)]
-          tempy <- merge(tempy, blk20_hra20, by = "geo_id", all.x = T, all.y = F)
-          hra20_bigcity <- rads.data::spatial_hra20_to_bigcities[, list(hra20_name, bigcity)]
-          tempy <- merge(tempy, hra20_bigcity, by = 'hra20_name', all.x = T, all.y = F)
+    # pop.genders
+      if (is.null(pop.genders)) {
+        gender_values <- c("f", "m")
+      } else {
+        if (!tolower(pop.genders) %in% c('f', 'female', 'm', 'male')) {
+          stop("\n\U0001f47f if pop.genders is specified, it is limited to one of the following values: 'F', 'f', 'Female', 'female', 'M', 'm', 'Male', or 'male'")
+        } else {
+          gender_values <- pop.genders
         }
-        if(tempy[1, geo_type] == 'hra'){
-          hra20_bigcity <- rads.data::spatial_hra20_to_bigcities[, list(hra20_name, bigcity)]
-          tempy <- merge(tempy, hra20_bigcity, by.x = 'geo_id', by.y = 'hra20_name', all.x = T, all.y = F)
-        }
-        tempy[, paste0(catnum, "_group") := bigcity]
-      }
-
-      # age6 ----
-      tempy[get(paste0(catnum, "_varname")) == "age6" & age %in% 0:17, paste0(catnum, "_group") := "<18"]
-      tempy[get(paste0(catnum, "_varname")) == "age6" & age %in% 18:24, paste0(catnum, "_group") := "18-24"]
-      tempy[get(paste0(catnum, "_varname")) == "age6" & age %in% 25:44, paste0(catnum, "_group") := "25-44"]
-      tempy[get(paste0(catnum, "_varname")) == "age6" & age %in% 45:64, paste0(catnum, "_group") := "45-64"]
-      tempy[get(paste0(catnum, "_varname")) == "age6" & age %in% 65:74, paste0(catnum, "_group") := "65-74"]
-      tempy[get(paste0(catnum, "_varname")) == "age6" & age >= 75, paste0(catnum, "_group") := "75+"]
-
-      # mage5 ----
-      tempy[get(paste0(catnum, "_varname")) == "mage5" & age %in% 10:17, paste0(catnum, "_group") := "10-17"]
-      tempy[get(paste0(catnum, "_varname")) == "mage5" & age %in% 18:24, paste0(catnum, "_group") := "18-24"]
-      tempy[get(paste0(catnum, "_varname")) == "mage5" & age %in% 25:34, paste0(catnum, "_group") := "25-34"]
-      tempy[get(paste0(catnum, "_varname")) == "mage5" & age %in% 35:44, paste0(catnum, "_group") := "35-44"]
-      tempy[get(paste0(catnum, "_varname")) == "mage5" & age >=45, paste0(catnum, "_group") := "45+"]
-
-      # yage4 ----
-      tempy[get(paste0(catnum, "_varname")) == "yage4" & age %in% 0:4, paste0(catnum, "_group") := "0-4"]
-      tempy[get(paste0(catnum, "_varname")) == "yage4" & age %in% 5:9, paste0(catnum, "_group") := "5-9"]
-      tempy[get(paste0(catnum, "_varname")) == "yage4" & age %in% 10:14, paste0(catnum, "_group") := "10-14"]
-      tempy[get(paste0(catnum, "_varname")) == "yage4" & age %in% 15:17, paste0(catnum, "_group") := "15-17"]
-
-      # pov200grp ----
-      if(tempy[1, geo_type] == 'blk' & grepl("poverty$", tempy[1, get(catnum)], ignore.case = T)){
-        tempy[, geo_tract2020 := substr(geo_id, 1, 11)] # have blocks (15 char), so keep first 11 for tracts
-        tempy <- merge(tempy,
-                       rads.data::misc_poverty_groups[geo_type=='Tract'][, list(geo_tract2020 = geo_id, pov200grp)],
-                       by = "geo_tract2020",
-                       all.x = T,
-                       all.y = F)
-        tempy[, paste0(catnum, "_group") := pov200grp]
       }
-      if( tempy[1, geo_type] == 'zip' & grepl("poverty$", tempy[1, get(catnum)], ignore.case = T)){
-        tempy <- merge(tempy,
-                       rads.data::misc_poverty_groups[geo_type=='ZCTA'][, list(geo_id, pov200grp)],
-                       by = 'geo_id',
-                       all.x = T,
-                       all.y = F)
-        tempy[, paste0(catnum, "_group") := pov200grp]
+    # pop.ages
+      if (is.null(pop.ages)) {
+        age_values <- c(0:100)
+      } else {
+        if (!is.integer(pop.ages)) {
+          stop("\n\U0001f47f if pop.ages is specified it must be vector of integers, e.g., c(0:65)")
+        } else {
+          age_values <- pop.ages
+        }
       }
 
-    }
-
-    # Drop if is.na(cat1_group)
-    tempy <- tempy[!is.na(cat1_group)]
-
-    # drop if is.na(cat2_group)
-    tempy <- tempy[!(cat2 != 'NA' & (cat2_group == 'NA') | is.na(cat2_group))] # did not yet switch back to true NA at this point
-
-    # collapse to one row per demographic combination and keep minimum needed columns ----
-    tempy <- tempy[, list(pop = sum(pop)), list(chi_age = age, year, cat1, cat1_varname, cat1_group, cat2, cat2_varname, cat2_group)]
-
-    # ensure each demographic has rows for all relevant ages & only relevant ages ----
-    if(tempy[1]$cat1 == "Age"){
-      if(tempy[1]$cat1_varname == 'age6'){
-        tempage <- data.table(cat1 = "Age", cat1_varname = "age6", chi_age = 0:100)
-        tempage[, cat1_group := cut(chi_age,
-                                    breaks = c(-1, 17, 24, 44, 64, 74, 120),
-                                    labels = c("<18", "18-24", "25-44", "45-64", "65-74", "75+"))]}
-
-      if(tempy[1]$cat1_varname == 'mage5'){
-        tempage <- data.table(cat1 = "Age", cat1_varname = "mage5", chi_age = 10:100)
-        tempage[, cat1_group := cut(chi_age,
-                                    breaks = c(9, 17, 24, 34, 44, 120),
-                                    labels = c('10-17', '18-24', '25-34', '35-44', '45+'))]}
-
-      if(tempy[1]$cat1_varname == 'yage4'){
-        tempage <- data.table(cat1 = "Age", cat1_varname = "yage4", chi_age = 0:17)
-        tempage[, cat1_group := cut(chi_age,
-                                    breaks = c(-1, 4, 9, 14, 17),
-                                    labels = c('0-4', '5-9', '10-14', '15-17'))]}
-
-      temp.demog <- setDT(tidyr::crossing(unique(tempy[, list(year = as.character(year), cat2, cat2_varname, cat2_group)]),
-                                          tempage))
-    }
-
-    if(tempy[1]$cat2 == "Age"){
-      if(tempy[1]$cat2_varname == 'age6'){
-        tempage <- data.table(cat2 = "Age", cat2_varname = "age6", chi_age = 0:100)
-        tempage[, cat2_group := cut(chi_age,
-                                    breaks = c(-1, 17, 24, 44, 64, 74, 120),
-                                    labels = c("<18", "18-24", "25-44", "45-64", "65-74", "75+"))]}
-
-      if(tempy[1]$cat2_varname == 'mage5'){
-        tempage <- data.table(cat2 = "Age", cat2_varname = "mage5", chi_age = 10:100)
-        tempage[, cat2_group := cut(chi_age,
-                                    breaks = c(9, 17, 24, 34, 44, 120),
-                                    labels = c('10-17', '18-24', '25-34', '35-44', '45+'))]}
-
-      if(tempy[1]$cat2_varname == 'yage4'){
-        tempage <- data.table(cat2 = "Age", cat2_varname = "yage4", chi_age = 0:17)
-        tempage[, cat2_group := cut(chi_age,
-                                    breaks = c(-1, 4, 9, 14, 17),
-                                    labels = c('0-4', '5-9', '10-14', '15-17'))]}
-
-      temp.demog <- setDT(tidyr::crossing(unique(tempy[, list(year = as.character(year), cat1, cat1_varname, cat1_group)]),
-                                          tempage))
-    }
+  # Define core population extraction function ----
+    get_population_for_template_row <- function(row_index, pop.template = NULL) {
+      # Status update with progress indicator ----
+        print(paste0("Process ID ", Sys.getpid(), ": Getting population ", row_index, " out of ", nrow(pop.template)))
+
+      # Extract current template row ----
+        current_row <- pop.template[row_index, ]
+
+      # Standardize category names ----
+        # Remove birthing person prefixes to standardize maternal data categories
+          pop.template[grepl("birthing person", cat1, ignore.case = TRUE),
+                       cat1 := tools::toTitleCase(gsub("Birthing person's ", "", cat1))]
+          pop.template[grepl("birthing person", cat2, ignore.case = TRUE),
+                       cat2 := tools::toTitleCase(gsub("Birthing person's ", "", cat2))]
+
+      # Build grouping parameters for pop query ----
+          grouping_vars <- unique(c(
+            c("ages", "geo_id"),
+            setdiff(c(current_row$group_by1, current_row$group_by2), c(NA))
+          ))
+
+      # Use rads::get_population() ----
+        if (is.na(current_row$geo_type)) {
+          population_data <- rads::get_population(
+            group_by = grouping_vars,
+            race_type = current_row$race_type,
+            years = current_row$start:current_row$stop,
+            genders = gender_values,
+            ages = age_values,
+            round = FALSE
+          )
+        } else {
+          population_data <- rads::get_population(
+            group_by = grouping_vars,
+            geo_type = current_row$geo_type,
+            race_type = current_row$race_type,
+            years = current_row$start:current_row$stop,
+            genders = gender_values,
+            ages = age_values,
+            round = FALSE
+          )
+        }
 
-    if(!"Age" %in% unique(c(tempy$cat1, tempy$cat2))){
-      # all combinations of cat1 x cat2
-      temp.demog <- setDT(tidyr::crossing(
-        unique(tempy[, list(cat1, cat1_varname, cat1_group)]),
-        unique(tempy[, list(cat2, cat2_varname, cat2_group)])
-      ))
-      # all combination of cat table with year and age
-      temp.demog <- setDT(tidyr::crossing(
-        temp.demog,
-        data.table(year = as.character(pop.template[X, ]$year), chi_age = 0:100)
-      ))
+      # Process demographic categories ----
+      # Apply category grouping logic for both primary (cat1) and secondary (cat2) categories
+        for (catnum in c("1", "2")) {
+          ## Add category information from template to result ----
+            temp.cat <- paste0("cat", catnum)
+            temp.varname <- paste0("cat", catnum, '_varname')
+            temp.group <- paste0("cat", catnum, '_group')
+            temp.groupby <- paste0("group_by", catnum)
+
+            # had to use set function because regular := syntax caused errors b/c used temp.cat differently on both sides of :=
+            data.table::set(population_data,
+                            j = temp.cat,
+                            value = current_row[[temp.cat]])
+
+            data.table::set(population_data,
+                            j = temp.varname,
+                            value = current_row[[temp.varname]])
+
+            data.table::set(population_data,
+                            j = temp.group,
+                            value = current_row[[temp.groupby]])
+
+          ## Process geographic categories ----
+            # King County
+            population_data[get(temp.cat) == "King County",
+                            c(temp.group) := "King County"]
+
+            # Washington State
+            population_data[get(temp.cat) == "Washington State",
+                            c(temp.group) := "Washington State"]
+
+            # Handle NA values
+            suppressWarnings(
+              population_data[get(temp.cat) == "NA" | is.na(get(temp.cat)),
+                              c(temp.cat, temp.group, temp.varname) := "NA"]
+            )
+
+            # Cities/neighborhoods and Regions
+            population_data[get(temp.cat) %in% c("Cities/neighborhoods", "Regions") &
+                              current_row$geo_type != 'blk',
+                            c(temp.group) := geo_id]
+
+          ## Process gender ----
+            population_data[get(temp.cat) %in% c("Gender"), c(temp.group) := gender]
+
+          ## Process 'Overall' ----
+            population_data[get(temp.cat) %in% c("Overall"), c(temp.group) := "Overall"]
+
+          ## Process race/ethnicity categories ----
+              population_data[get(temp.cat) == "Ethnicity" | get(temp.varname) %in% c('race4'),
+                              c(temp.group) := race_eth]
+
+              population_data[get(temp.cat) == 'Race' & get(temp.varname) %in% c('race3'),
+                              c(temp.group) := race]
+
+              population_data <- population_data[get(temp.cat) != "Ethnicity" | (get(temp.cat) == "Ethnicity" & get(temp.group) == 'Hispanic'), ]
+
+              population_data[get(temp.group) == "Multiple race",
+                              c(temp.group) := "Multiple"]
+
+          ## Process race_aic (alone or in combination) categories ----
+            if (current_row$race_type == 'race_aic') {
+              # Filter to keep only relevant race_aic combinations
+              population_data <- population_data[
+                !(grepl('_aic_', get(temp.varname)) &
+                    !((get(temp.varname) == 'chi_race_aic_aian' & race_aic == 'AIAN') |
+                        (get(temp.varname) == 'chi_race_aic_asian' & race_aic == 'Asian') |
+                        (get(temp.varname) == 'chi_race_aic_black' & race_aic == 'Black') |
+                        (get(temp.varname) == 'chi_race_aic_his' & race_aic == 'Hispanic') |
+                        (get(temp.varname) == 'chi_race_aic_nhpi' & race_aic == 'NHPI') |
+                        (get(temp.varname) == 'chi_race_aic_wht' & race_aic == 'White'))
+                )
+              ]
+
+              # Assign race_aic value to group
+              population_data[grep('_aic', get(temp.varname)),
+                              c(temp.group) := race_aic]
+            }
+
+          ## Process HRAs ----
+            if (population_data[1, geo_type] == 'blk' & population_data[1, get(temp.cat)] == 'Cities/neighborhoods') {
+
+              hra_crosswalk <- rads.data::spatial_block20_to_hra20_to_region20[, list(geo_id = GEOID20, hra20_name)]
+
+              population_data <- merge(population_data,
+                                       hra_crosswalk,
+                                       by = "geo_id",
+                                       all.x = TRUE,
+                                       all.y = FALSE)
+
+              population_data[, c(temp.group) := hra20_name]
+            }
+
+          ## Process Regions ----
+            # Block to Region crosswalk
+              if (population_data[1, geo_type] == 'blk' & population_data[1, get(temp.cat)] == 'Regions') {
+
+                  region_crosswalk <- rads.data::spatial_block20_to_hra20_to_region20[, list(geo_id = GEOID20, region_name)]
+
+                  population_data <- merge(population_data,
+                                           region_crosswalk,
+                                           by = 'geo_id',
+                                           all.x = TRUE,
+                                           all.y = FALSE)
+
+                  population_data[, c(temp.group) := region_name]
+              }
+
+            # HRA to Region crosswalk
+              if (population_data[1, geo_type] == 'hra' & population_data[1, get(temp.cat)] == 'Regions') {
+
+                  region_crosswalk <- rads.data::spatial_hra20_to_region20[, list(geo_id = hra20_name, region_name)]
+
+                  population_data <- merge(population_data,
+                                           region_crosswalk,
+                                           by = 'geo_id',
+                                           all.x = TRUE,
+                                           all.y = FALSE)
+
+                  population_data[, c(temp.group) := region_name]
+              }
+
+            # ZIP to Region crosswalk with population weighting
+              if (population_data[1, geo_type] == 'zip' & population_data[1, get(temp.cat)] == 'Regions') {
+
+                  # Create ZIP to region crosswalk with population weights
+                  zip_region_crosswalk <- rads.data::spatial_zip_to_hra20_pop
+
+                  zip_region_crosswalk <- merge(zip_region_crosswalk,
+                                                rads.data::spatial_hra20_to_region20[, list(hra20_name, region = region_name)],
+                                                by = 'hra20_name',
+                                                all = TRUE)
+
+                  # Aggregate fractional populations to region level
+                  zip_region_crosswalk <- zip_region_crosswalk[,list(s2t_fraction = sum(s2t_fraction)),
+                                                               list(geo_id = as.character(source_id), region)]
+
+                  # Apply population weighting by region
+                  population_data <- merge(population_data,
+                                           zip_region_crosswalk,
+                                           by = "geo_id",
+                                           all.x = TRUE,
+                                           all.y = FALSE,
+                                           allow.cartesian = TRUE)
+                  population_data[, pop := pop * s2t_fraction] # Apply weight to population
+                  population_data[, c(temp.group) := region]
+              }
+
+          ## Process Big Cities ----
+            if (population_data[1, get(temp.cat)] == 'Big cities') {
+              # Block to big city crosswalk
+              if (population_data[1, geo_type] == 'blk') {
+
+                # Two-step crosswalk: block to HRA to big city
+                block_to_hra <- rads.data::spatial_block20_to_hra20_to_region20[, list(geo_id = GEOID20, hra20_name)]
+                hra_to_bigcity <- rads.data::spatial_hra20_to_bigcities[, list(hra20_name, bigcity)]
+
+                block_to_bigcity <- merge(hra_to_bigcity,
+                                          block_to_hra,
+                                          by = 'hra20_name',
+                                          all.x = T,
+                                          all.y = F)[, hra20_name := NULL]
+
+                population_data <- merge(population_data,
+                                         block_to_bigcity,
+                                         by = "geo_id",
+                                         all.x = TRUE,
+                                         all.y = FALSE)
+              }
+
+              # HRA to big city crosswalk
+              if (population_data[1, geo_type] == 'hra') {
+                hra_to_bigcity <- rads.data::spatial_hra20_to_bigcities[, list(hra20_name, bigcity)]
+                population_data <- merge(population_data,
+                                         hra_to_bigcity,
+                                         by.x = 'geo_id',
+                                         by.y = 'hra20_name',
+                                         all.x = TRUE,
+                                         all.y = FALSE)
+              }
+
+              # Assign big city name to group
+              population_data[, c(temp.group) := bigcity]
+            }
+
+          ## Process age groupings ----
+            # Age 6 groups: <18, 18-24, 25-44, 45-64, 65-74, 75+
+              population_data[get(temp.varname) == "age6" & age %in% 0:17,
+                              c(temp.group) := "<18"]
+              population_data[get(temp.varname) == "age6" & age %in% 18:24,
+                              c(temp.group) := "18-24"]
+              population_data[get(temp.varname) == "age6" & age %in% 25:44,
+                              c(temp.group) := "25-44"]
+              population_data[get(temp.varname) == "age6" & age %in% 45:64,
+                              c(temp.group) := "45-64"]
+              population_data[get(temp.varname) == "age6" & age %in% 65:74,
+                              c(temp.group) := "65-74"]
+              population_data[get(temp.varname) == "age6" & age >= 75,
+                              c(temp.group) := "75+"]
+
+            # Maternal age 5 groups: 10-17, 18-24, 25-34, 35-44, 45+
+              population_data[get(temp.varname) == "mage5" & age %in% 10:17,
+                              c(temp.group) := "10-17"]
+              population_data[get(temp.varname) == "mage5" & age %in% 18:24,
+                              c(temp.group) := "18-24"]
+              population_data[get(temp.varname) == "mage5" & age %in% 25:34,
+                              c(temp.group) := "25-34"]
+              population_data[get(temp.varname) == "mage5" & age %in% 35:44,
+                              c(temp.group) := "35-44"]
+              population_data[get(temp.varname) == "mage5" & age >= 45,
+                              c(temp.group) := "45+"]
+
+            # Youth age 4 groups: 0-4, 5-9, 10-14, 15-17
+              population_data[get(temp.varname) == "yage4" & age %in% 0:4,
+                              c(temp.group) := "0-4"]
+              population_data[get(temp.varname) == "yage4" & age %in% 5:9,
+                              c(temp.group) := "5-9"]
+              population_data[get(temp.varname) == "yage4" & age %in% 10:14,
+                              c(temp.group) := "10-14"]
+              population_data[get(temp.varname) == "yage4" & age %in% 15:17,
+                              c(temp.group) := "15-17"]
+
+          ## Process poverty groupings ----
+            # Block level poverty
+              if (population_data[1, geo_type] == 'blk' &
+                  grepl("poverty$", population_data[1, get(temp.cat)], ignore.case = TRUE)) {
+                # Extract tract ID from block ID (first 11 characters)
+                population_data[, geo_tract2020 := substr(geo_id, 1, 11)]
+
+                # Join poverty group data
+                population_data <- merge(
+                  population_data,
+                  rads.data::misc_poverty_groups[geo_type == 'Tract'][, list(geo_tract2020 = geo_id, pov200grp)],
+                  by = "geo_tract2020",
+                  all.x = TRUE,
+                  all.y = FALSE
+                )
+
+                # Assign poverty group
+                population_data[, c(temp.group) := pov200grp]
+              }
+
+            # ZIP level poverty
+              if (population_data[1, geo_type] == 'zip' &
+                  grepl("poverty$", population_data[1, get(temp.cat)], ignore.case = TRUE)) {
+                # Join poverty group data
+                population_data <- merge(
+                  population_data,
+                  rads.data::misc_poverty_groups[geo_type == 'ZCTA'][, list(geo_id, pov200grp)],
+                  by = 'geo_id',
+                  all.x = TRUE,
+                  all.y = FALSE
+                )
+
+                # Assign poverty group
+                population_data[, c(temp.group) := pov200grp]
+              }
+
+          } # close looping over cat1/cat2
+
+      # Filter and clean results ----
+        # Remove rows with missing primary category group
+          population_data <- population_data[!is.na(cat1_group)]
+
+        # Remove rows with missing secondary category group (when category exists)
+          population_data <- population_data[!is.na(cat2) | cat2 == 'NA' | (!is.na(cat2_group) & cat2_group != 'NA')]
+
+      # Aggregate population data ----
+        # Collapse to one row per demographic combination with sum of population
+          population_data <- population_data[, list(pop = sum(pop)),
+                                             list(chi_age = age,
+                                                  year,
+                                                  cat1, cat1_varname, cat1_group,
+                                                  cat2, cat2_varname, cat2_group)]
+
+      # Generate complete demographic combinations ----
+        # Function to handle age group processing and demographic merging
+          process_age_category <- function(population_data, cat_num) {
+            # Define prefix and complementary prefix
+            cat_prefix <- paste0("cat", cat_num)
+            other_cat_prefix <- if(cat_num == 1) "cat2" else "cat1" # need to select the cat that does not have the age var
+
+            # Get the variable name for this category
+            cat_varname <- population_data[1][[paste0(cat_prefix, "_varname")]]
+
+            # Create appropriate age groups based on cat_varname
+            if (cat_varname == 'age6') {
+              age_groups <- data.table(
+                chi_age = 0:100
+              )
+              age_groups[, paste0(cat_prefix, "_group") := cut(
+                chi_age,
+                breaks = c(-1, 17, 24, 44, 64, 74, 120),
+                labels = c("<18", "18-24", "25-44", "45-64", "65-74", "75+")
+              )]
+            } else if (cat_varname == 'mage5') {
+              age_groups <- data.table(
+                chi_age = 10:100
+              )
+              age_groups[, paste0(cat_prefix, "_group") := cut(
+                chi_age,
+                breaks = c(9, 17, 24, 34, 44, 120),
+                labels = c('10-17', '18-24', '25-34', '35-44', '45+')
+              )]
+            } else if (cat_varname == 'yage4') {
+              age_groups <- data.table(
+                chi_age = 0:17
+              )
+              age_groups[, paste0(cat_prefix, "_group") := cut(
+                chi_age,
+                breaks = c(-1, 4, 9, 14, 17),
+                labels = c('0-4', '5-9', '10-14', '15-17')
+              )]
+            }
+
+            # Add category info
+            age_groups[, (cat_prefix) := "Age"]
+            age_groups[, paste0(cat_prefix, "_varname") := cat_varname]
+
+            # Combine demographic dimensions with age groups
+            cols_to_select <- c("year", other_cat_prefix,
+                                paste0(other_cat_prefix, "_varname"),
+                                paste0(other_cat_prefix, "_group"))
+
+            unique_pop_data <- unique(population_data[, cols_to_select, with = FALSE][, key := 1])
+
+            complete_demographics <- unique_pop_data[age_groups[, key := 1], on = "key", allow.cartesian = TRUE]
+            complete_demographics[, key := NULL]
+
+            return(complete_demographics)
+          }
+
+        # Use function to handle age group processing
+          if (population_data[1]$cat1 == "Age") {
+            complete_demographics <- process_age_category(population_data, 1)
+          }
+
+          if (population_data[1]$cat2 == "Age") {
+            complete_demographics <- process_age_category(population_data, 2)
+          }
+
+          if (population_data[1]$cat1 != "Age" & population_data[1]$cat2 != "Age"){
+              # Get unique cat1 groups
+              cat1_groups <- unique(population_data[, list(cat1, cat1_varname, cat1_group, mykey = 1)])
+
+              # Get unique cat2 groups
+              cat2_groups <- unique(population_data[, list(cat2, cat2_varname, cat2_group, mykey = 1)])
+
+              # All combos of cat1 and cat2 groups
+              complete_demographics <- cat1_groups[cat2_groups, on = "mykey", allow.cartesian = TRUE]
+
+              # Create year and age combos
+              year_age <- data.table(year = as.character(current_row$year), chi_age = 0:100, mykey = 1)
+
+              # Get combos for each year/age cat1/cat2 combo
+              complete_demographics <- complete_demographics[year_age, on = "mykey", allow.cartesian = TRUE]
+
+              # Drop key
+              complete_demographics[, mykey := NULL]
+          }
+
+
+
+      # Merge population data with complete demographics grid ----
+        population_data <- suppressWarnings(merge(population_data, complete_demographics, all = TRUE))
+
+        population_data[is.na(pop), pop := 0] # Fill missing population values with zero
+
+      # Add tab column and finalize ----
+        population_data[, tab := current_row$tab]
+
+      # Convert placeholder "NA" strings back to true NA values ----
+        population_data[cat2 == "NA", c("cat2", "cat2_varname", "cat2_group") := NA]
+
+      # Return completed population dataset ----
+        return(population_data)
     }
 
-    tempy <- suppressWarnings(merge(tempy, temp.demog, all = T))
-    tempy[is.na(pop), pop := 0]
-
-    # create combinations of cat1_group and cat2_group that have no population and set == 0 ----
-    # no need with current get_population function, but keep as a placeholder / reminder
-
-    # add tab column ----
-    tempy[, tab := pop.template[X, tab]]
-
-    # tidy when is.na(cat2) ----
-    tempy[cat2 == "NA", c("cat2", "cat2_varname", "cat2_group") := NA]
-
-    # return object ----
-    return(tempy)
-  }
-
-  # use lapply to cycle over each rows and create one big final dataset ----
-  tempy.allpop <- rbindlist(
-    future_lapply(X = as.list(seq(1, nrow(pop.template))),
-                  FUN = function(X){
-                    set.seed(98104) # another attempt to set a seed
-                    CHI_get_proper_pop_engine(X, pop.template = pop.template)},
-                  future.seed = 98104)
-  )
-
-  tempy.allpop <- unique(tempy.allpop)
-
-  # return object ----
-  return(tempy.allpop)
+  # Process all template rows in parallel ----
+    # Combine results from all template rows using future_lapply for parallel processing
+      progressr::handlers(handler_progress())
+      progressr::with_progress({
+        p <- progressr::progressor(nrow(pop.template))
+          all_population_data <- rbindlist(
+            future_lapply(
+              X = as.list(seq(1, nrow(pop.template))),
+              FUN = function(row_index) {
+                p(paste0("Processing row ", row_index, " of ", nrow(pop.template) ))
+                set.seed(98104) # Set consistent seed for reproducibility
+                get_population_for_template_row(row_index, pop.template = pop.template)
+              },
+              future.seed = 98104 # Set seed for parallel processes
+            )
+          )
+      }) # closet progressr::with_progress()
+
+    # Remove duplicate rows in final dataset
+      all_population_data <- unique(all_population_data)
+
+  # Return final population dataset ----
+  return(all_population_data)
 }
diff --git a/man/chi_count_by_age.Rd b/man/chi_count_by_age.Rd
index d5645c5..109c7a5 100644
--- a/man/chi_count_by_age.Rd
+++ b/man/chi_count_by_age.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/proto_chi_count_by_age.R
+% Please edit documentation in R/chi_count_by_age.R
 \name{chi_count_by_age}
 \alias{chi_count_by_age}
 \title{Generate Age-Specific Counts for Community Health Indicators}
diff --git a/man/chi_generate_instructions_pop.Rd b/man/chi_generate_instructions_pop.Rd
index 90c6afa..1485bf6 100644
--- a/man/chi_generate_instructions_pop.Rd
+++ b/man/chi_generate_instructions_pop.Rd
@@ -1,20 +1,48 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/proto_chi_generate_instructions_pop.R
+% Please edit documentation in R/chi_generate_instructions_pop.R
 \name{chi_generate_instructions_pop}
 \alias{chi_generate_instructions_pop}
 \title{Generate Population Instructions for CHI Analysis}
 \usage{
-chi_generate_instructions_pop(mycount.data, povgeo = NA)
+chi_generate_instructions_pop(mycount.data, povgeo = c("blk", "zip"))
 }
 \arguments{
-\item{mycount.data}{Input data.table containing count data specifications}
+\item{mycount.data}{Input data.table produced by \code{\link{chi_count_by_age}},
+containing the following columns:
+ \itemize{
+   \item indicator_key: indicator_key used by CHI
+   \item year: Year range (e.g., "2019-2021" or single year)
+   \item tab: Visualization tab type
+   \item cat1, cat1_varname, cat1_group: Primary stratification variables
+   \item cat2, cat2_varname, cat2_group: Secondary stratification variables
+   \item chi_age: Single year age
+   \item count: Count of events (births, death, hospitalizations, etc. )
+ }}
 
-\item{povgeo}{Geographic level for poverty analysis (NA or 'zip')}
+\item{povgeo}{Geographic level for poverty analysis ('blk' or 'zip')}
 }
 \value{
-A data.table containing population processing instructions
+A data.table containing population processing instructions with columns:
+ \itemize{
+   \item year: Original year range from input
+   \item cat1, cat1_varname: Primary stratification details
+   \item cat2, cat2_varname: Secondary stratification details
+   \item tab: Visualization tab type
+   \item start, stop: Start and end years parsed from the year range
+   \item race_type: Race categorization type ('race', 'race_eth', or 'race_aic')
+   \item geo_type: Geographic level for analysis ('kc', 'hra', 'region', 'blk', 'zip', 'wa')
+   \item group_by1, group_by2: Demographic grouping specifications
+ }
 }
 \description{
-Creates a instructions for rads::get_population() based on count data
-specifications. Handles various geographic types and demographic groupings.
+Creates instructions for \code{\link{chi_get_proper_pop}} based on a table of count
+data. These instructions configure appropriate demographic groupings,
+geographic types, and time periods for retrieving population denominators used
+in CHI rate calculations.
+}
+\seealso{
+\code{\link{chi_count_by_age}} which generates the count data used as input
+to this function
+
+\code{\link{chi_get_proper_pop}} which uses the output of this function
 }
diff --git a/man/chi_get_proper_pop.Rd b/man/chi_get_proper_pop.Rd
new file mode 100644
index 0000000..1a7a292
--- /dev/null
+++ b/man/chi_get_proper_pop.Rd
@@ -0,0 +1,56 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/proto_chi_get_proper_pop.R
+\name{chi_get_proper_pop}
+\alias{chi_get_proper_pop}
+\title{Get Population Denominators for CHI Analysis}
+\usage{
+chi_get_proper_pop(pop.template = NULL, pop.genders = NULL, pop.ages = NULL)
+}
+\arguments{
+\item{pop.template}{A data.table produced by \code{\link{chi_generate_instructions_pop}},
+containing instructions for population data retrieval with the following columns:
+ \itemize{
+   \item year: Year range (e.g., "2019-2021" or single year)
+   \item cat1, cat1_varname: Primary stratification variables
+   \item cat2, cat2_varname: Secondary stratification variables
+   \item tab: Visualization tab type
+   \item start, stop: Start and end years parsed from the year range
+   \item race_type: Race categorization type ('race', 'race_eth', or 'race_aic')
+   \item geo_type: Geographic level for analysis ('kc', 'hra', 'region', 'blk', 'zip', 'wa')
+   \item group_by1, group_by2: Race/eth grouping specifications ('race_eth', 'race')
+ }}
+
+\item{pop.genders}{Optional character vector specifying which genders to include.
+Valid values are 'f', 'female', 'm', 'male'. If NULL (default), both genders are included.}
+
+\item{pop.ages}{Optional integer vector specifying which ages to include.
+If NULL (default), ages 0-100 are included.}
+}
+\value{
+A data.table containing population counts with columns:
+ \itemize{
+   \item chi_age: Single year age
+   \item year: Year
+   \item cat1, cat1_varname, cat1_group: Primary stratification variables
+   \item cat2, cat2_varname, cat2_group: Secondary stratification variables
+   \item tab: Visualization tab type
+   \item pop: Population count
+ }
+}
+\description{
+Retrieves population estimates based on instructions generated by
+\code{\link{chi_generate_instructions_pop}}. This function processes demographic
+categories, geographic levels, and time periods to create appropriate
+population denominators for use in CHI rate calculations.
+}
+\details{
+The function performs multiple steps to generate proper population denominators:
+1. Validates input parameters
+2. Creates population tables for each row of the template using rads::get_population
+3. Handles special cases for various geographic aggregations and crosswalks
+4. Returns a comprehensive, tidy dataset with population counts
+}
+\seealso{
+\code{\link{chi_generate_instructions_pop}} which generates the instructions used as input
+to this function
+}

From 02db414e6b81e86e7fa6a7220af68a20d94363a1 Mon Sep 17 00:00:00 2001
From: Danny Colombara <dcolombara@kingcounty.gov>
Date: Tue, 11 Mar 2025 09:31:07 -0700
Subject: [PATCH 06/11] FIXED error for dropping missing cat2_group

 - previous logic for dropping when cat2_group
   was NA / 'NA' and cat1 group NOT NA / 'NA'
   was flawed
---
 R/proto_chi_get_proper_pop.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/proto_chi_get_proper_pop.R b/R/proto_chi_get_proper_pop.R
index 2d1fa91..ad94e13 100644
--- a/R/proto_chi_get_proper_pop.R
+++ b/R/proto_chi_get_proper_pop.R
@@ -402,7 +402,8 @@ chi_get_proper_pop <- function(pop.template = NULL, pop.genders = NULL, pop.ages
           population_data <- population_data[!is.na(cat1_group)]
 
         # Remove rows with missing secondary category group (when category exists)
-          population_data <- population_data[!is.na(cat2) | cat2 == 'NA' | (!is.na(cat2_group) & cat2_group != 'NA')]
+          population_data <- population_data[is.na(cat2) | cat2 == 'NA' |
+                                               (!is.na(cat2) & cat2 != 'NA' & !is.na(cat2_group) & cat2_group != 'NA'),]
 
       # Aggregate population data ----
         # Collapse to one row per demographic combination with sum of population

From f395df3b775b8744f32949469e536508bf235361 Mon Sep 17 00:00:00 2001
From: Danny Colombara <dcolombara@kingcounty.gov>
Date: Tue, 11 Mar 2025 09:32:26 -0700
Subject: [PATCH 07/11] proto_chi_get_proper_pop > chi_get_proper_pop

 - just rename because now fully functional
---
 R/{proto_chi_get_proper_pop.R => chi_get_proper_pop.R} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename R/{proto_chi_get_proper_pop.R => chi_get_proper_pop.R} (100%)

diff --git a/R/proto_chi_get_proper_pop.R b/R/chi_get_proper_pop.R
similarity index 100%
rename from R/proto_chi_get_proper_pop.R
rename to R/chi_get_proper_pop.R

From 39aafe138b0f5fa264c92fb710f3b0b35b3476c8 Mon Sep 17 00:00:00 2001
From: Danny Colombara <dcolombara@kingcounty.gov>
Date: Tue, 11 Mar 2025 14:05:28 -0700
Subject: [PATCH 08/11] Improved proto_chi_drop_illogical_ages header

---
 R/globals.R                       |   2 +
 R/proto_chi_drop_illogical_ages.R | 111 +++++++++++++++++++++++++-----
 man/chi_drop_illogical_ages.Rd    |  47 ++++++++++---
 3 files changed, 136 insertions(+), 24 deletions(-)

diff --git a/R/globals.R b/R/globals.R
index fb2743b..72a30e3 100644
--- a/R/globals.R
+++ b/R/globals.R
@@ -49,6 +49,8 @@ utils::globalVariables(c(
   "latest_yearx",
   "level",
   "lower_bound",
+  "min_age",
+  "max_age",
   "mykey",
   "notable",
   "numerator",
diff --git a/R/proto_chi_drop_illogical_ages.R b/R/proto_chi_drop_illogical_ages.R
index 84a10cf..fdaa403 100644
--- a/R/proto_chi_drop_illogical_ages.R
+++ b/R/proto_chi_drop_illogical_ages.R
@@ -1,27 +1,106 @@
-#' Drop Illogical Age Combinations from CHI Data
+#' Filter Out Age Values That Don't Match Their Corresponding Age Group Categories
 #'
 #' @description
-#' Removes age combinations that don't make logical sense based on category
-#' groupings. For example, removes cases where the age falls outside the range
-#' specified by age group categories.
+#' This function filters a data.table to remove rows with inconsistent age values.
+#' It compares the single year age value (specified by \code{agevar}) against age
+#' ranges defined in \code{cat1_group} and \code{cat2_group} columns, keeping only
+#' those rows where:
+#' \itemize{
+#'   \item The categorical variable is not age-related, OR
+#'   \item The age value falls within the range specified by the corresponding age group
+#' }
 #'
-#' @param ph.data Input data.table to process
-#' @param agevar Name of the age variable (defaults to 'chi_age')
+#' Age groups are expected to be in formats like "10-17", "<5", or "45+", which
+#' the function automatically parses into numeric ranges.
+#'
+#' @param ph.data A data.table or data.frame containing category and age data to be filtered.
+#' @param agevar Character string specifying the name of the age variable column.
+#'
+#' Default: \code{agevar = 'chi_age'}
+#'
+#' @return A filtered data.table with only logically consistent age values
+#'
+#' @details
+#' The function interprets special formats in age group strings:
+#' \itemize{
+#'   \item "<1" is treated as age "0-0" (age zero)
+#'   \item "<N" is converted to "0-N" (ages 0 through N)
+#'   \item "N+" is converted to "N-120" (ages N through 120)
+#' }
+#'
+#' @seealso
+#' \code{\link{chi_count_by_age}} and \code{\link{chi_get_proper_pop}} - This function
+#' is typically applied to tables resulting from merging the output of these two functions.
+#'
+#' @examples
+#' \dontrun{
+#' # Assuming infmort_combo is a data.frame or data.table with age, cat1,
+#' # cat1_group, cat2, cat2_group columns:
+#'
+#' filtered_data <- chi_drop_illogical_ages(ph.data = infmort_combo, agevar = 'age')
+#' }
 #'
-#' @return A data.table with illogical age combinations removed
 #' @importFrom data.table copy := fcase between
 #' @export
 #'
+chi_drop_illogical_ages <- function(ph.data, agevar = 'chi_age') {
+  # Validate ph.data
+  if (!is.data.frame(ph.data)) {
+    stop("'ph.data' must be a data.frame or data.table")
+  }
+
+  # Convert to data.table if needed
+    setDT(ph.data)
+
+  # Check if required columns exist
+  critical_cols <- c(agevar, "cat1", "cat1_group", "cat2", "cat2_group")
+  missing_cols <- setdiff(critical_cols, names(ph.data))
+
+  if (length(missing_cols) > 0) {
+    stop(paste("The following required columns are missing:",
+               paste(missing_cols, collapse = ", ")))
+  }
 
-chi_drop_illogical_ages <- function(ph.data, agevar = 'chi_age'){
-  ph.data = copy(ph.data)
-  for(CatNum in c("cat1", "cat2")){
-    ph.data[, paste0(CatNum, '_group_temp') := fcase(get(paste0(CatNum, "_group")) == '<1', '0-0', # <1 is special!
-                                                 get(CatNum) %in% c("Age", "Birthing person's age"), gsub("<", "0-", gsub("\\+", "-120", get(paste0(CatNum, '_group')))))]
-    ph.data[, AgeMin := gsub("-.*", "", get(paste0(CatNum, '_group_temp')))]
-    ph.data[, AgeMax := gsub(".*-", "", get(paste0(CatNum, '_group_temp')))]
-    ph.data <- ph.data[!get(CatNum) %in% c("Age", "Birthing person's age")  | between(get(agevar), AgeMin, AgeMax)]
-    ph.data[, c("AgeMin", paste0(CatNum, '_group_temp'), "AgeMax") := NULL]
+  # Ensure agevar column contains numeric values
+  if (!is.numeric(ph.data[[agevar]])) {
+    warning(paste("'", agevar, "' column is not numeric. Attempting to convert...", sep = ""))
+    tryCatch({
+      ph.data[, (agevar) := as.numeric(get(agevar))]
+    }, error = function(e) {
+      stop(paste("'", agevar, "' could not be converted to numeric. Error: ", e$message, sep = ""))
+    })
   }
+
+  # Loop for cat1 and cat2
+  for (catnum in c("cat1", "cat2")) {
+    # Get column names for this category
+    catnum_group <- paste0(catnum, "_group")
+    temp_catnum_group <- paste0(catnum_group, "_temp")
+
+    # Create a standardized version of the age group
+    ph.data[, (temp_catnum_group) := data.table::fcase(
+      get(catnum_group) == '<1', '0-0',
+
+      grepl("<", get(catnum_group)), gsub("<", "0-", get(catnum_group)),
+
+      grepl("\\+", get(catnum_group)), gsub("\\+", "-120", get(catnum_group)),
+
+      grepl('-', get(catnum_group)), get(catnum_group)
+    )]
+
+    # Extract min and max age
+    ph.data[, "min_age" := as.numeric(gsub("-.*", "", get(temp_catnum_group)))]
+    ph.data[, "max_age" := as.numeric(gsub(".*-", "", get(temp_catnum_group)))]
+
+    # Keep rows where either:
+    # 1. The cat is not age-related, OR
+    # 2. The cat is age-related AND age value is within the min-max range
+    ph.data <- ph.data[!grepl(' age$|^age$', get(catnum), ignore.case = T) |
+                         data.table::between(get(agevar), min_age, max_age)]
+
+    # Clean up temporary columns
+    ph.data[, c(temp_catnum_group, "min_age", "max_age") := NULL]
+  }
+
   return(ph.data)
 }
diff --git a/man/chi_drop_illogical_ages.Rd b/man/chi_drop_illogical_ages.Rd
index 4d83d44..f4b906d 100644
--- a/man/chi_drop_illogical_ages.Rd
+++ b/man/chi_drop_illogical_ages.Rd
@@ -1,21 +1,52 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/proto_chi_drop_illogical_ages.R
+% Please edit documentation in R/chi_drop_illogical_ages.R
 \name{chi_drop_illogical_ages}
 \alias{chi_drop_illogical_ages}
-\title{Drop Illogical Age Combinations from CHI Data}
+\title{Filter Out Age Values That Don't Match Their Corresponding Age Group Categories}
 \usage{
 chi_drop_illogical_ages(ph.data, agevar = "chi_age")
 }
 \arguments{
-\item{ph.data}{Input data.table to process}
+\item{ph.data}{A data.table or data.frame containing category and age data to be filtered.}
 
-\item{agevar}{Name of the age variable (defaults to 'chi_age')}
+\item{agevar}{Character string specifying the name of the age variable column.
+
+Default: \code{agevar = 'chi_age'}}
 }
 \value{
-A data.table with illogical age combinations removed
+A filtered data.table with only logically consistent age values
 }
 \description{
-Removes age combinations that don't make logical sense based on category
-groupings. For example, removes cases where the age falls outside the range
-specified by age group categories.
+This function filters a data.table to remove rows with inconsistent age values.
+It compares the single year age value (specified by \code{agevar}) against age
+ranges defined in \code{cat1_group} and \code{cat2_group} columns, keeping only
+those rows where:
+\itemize{
+  \item The categorical variable is not age-related, OR
+  \item The age value falls within the range specified by the corresponding age group
+}
+
+Age groups are expected to be in formats like "10-17", "<5", or "45+", which
+the function automatically parses into numeric ranges.
+}
+\details{
+The function interprets special formats in age group strings:
+\itemize{
+  \item "<1" is treated as age "0-0" (age zero)
+  \item "<N" is converted to "0-N" (ages 0 through N)
+  \item "N+" is converted to "N-120" (ages N through 120)
+}
+}
+\examples{
+\dontrun{
+# Assuming infmort_combo is a data.frame or data.table with age, cat1,
+# cat1_group, cat2, cat2_group columns:
+
+filtered_data <- chi_drop_illogical_ages(ph.data = infmort_combo, agevar = 'age')
+}
+
+}
+\seealso{
+\code{\link{chi_count_by_age}} and \code{\link{chi_get_proper_pop}} - This function
+is typically applied to tables resulting from merging the output of these two functions.
 }

From 2ce28ace9c477c7a644b8ee96be358744966acc9 Mon Sep 17 00:00:00 2001
From: Danny Colombara <dcolombara@kingcounty.gov>
Date: Tue, 11 Mar 2025 14:07:16 -0700
Subject: [PATCH 09/11] fix chi_get_proper_pop

 - changed references to 'key' column to 'mykey'
   because `key` is reserved (or maybe should
   be reserved) by data.table
---
 R/chi_get_proper_pop.R    | 6 +++---
 man/chi_get_proper_pop.Rd | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/chi_get_proper_pop.R b/R/chi_get_proper_pop.R
index ad94e13..0fb0e15 100644
--- a/R/chi_get_proper_pop.R
+++ b/R/chi_get_proper_pop.R
@@ -462,10 +462,10 @@ chi_get_proper_pop <- function(pop.template = NULL, pop.genders = NULL, pop.ages
                                 paste0(other_cat_prefix, "_varname"),
                                 paste0(other_cat_prefix, "_group"))
 
-            unique_pop_data <- unique(population_data[, cols_to_select, with = FALSE][, key := 1])
+            unique_pop_data <- unique(population_data[, cols_to_select, with = FALSE][, mykey := 1])
 
-            complete_demographics <- unique_pop_data[age_groups[, key := 1], on = "key", allow.cartesian = TRUE]
-            complete_demographics[, key := NULL]
+            complete_demographics <- unique_pop_data[age_groups[, mykey := 1], on = "mykey", allow.cartesian = TRUE]
+            complete_demographics[, mykey := NULL]
 
             return(complete_demographics)
           }
diff --git a/man/chi_get_proper_pop.Rd b/man/chi_get_proper_pop.Rd
index 1a7a292..ab46819 100644
--- a/man/chi_get_proper_pop.Rd
+++ b/man/chi_get_proper_pop.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/proto_chi_get_proper_pop.R
+% Please edit documentation in R/chi_get_proper_pop.R
 \name{chi_get_proper_pop}
 \alias{chi_get_proper_pop}
 \title{Get Population Denominators for CHI Analysis}

From 2b6133e69e85d533a204aae6d19950888aa8cbd1 Mon Sep 17 00:00:00 2001
From: Danny Colombara <dcolombara@kingcounty.gov>
Date: Tue, 11 Mar 2025 14:08:46 -0700
Subject: [PATCH 10/11] rename proto_chi_drop_illogical_ages.R

 - drop proto prefix because fully documented & updated
---
 R/{proto_chi_drop_illogical_ages.R => chi_drop_illogical_ages.R} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename R/{proto_chi_drop_illogical_ages.R => chi_drop_illogical_ages.R} (100%)

diff --git a/R/proto_chi_drop_illogical_ages.R b/R/chi_drop_illogical_ages.R
similarity index 100%
rename from R/proto_chi_drop_illogical_ages.R
rename to R/chi_drop_illogical_ages.R

From 06092232c1cb9da5ce4069198307e0d93b13fcd5 Mon Sep 17 00:00:00 2001
From: Danny Colombara <dcolombara@kingcounty.gov>
Date: Fri, 14 Mar 2025 12:36:25 -0700
Subject: [PATCH 11/11] harmonization of cat1/2 loops across code

 - now use `catnum`, `catgroup`, and `catvarname` in a standard way across functions
---
 R/chi_drop_illogical_ages.R       |  20 ++--
 R/chi_generate_instructions_pop.R |  33 +++---
 R/chi_get_proper_pop.R            | 161 +++++++++++++++---------------
 3 files changed, 106 insertions(+), 108 deletions(-)

diff --git a/R/chi_drop_illogical_ages.R b/R/chi_drop_illogical_ages.R
index fdaa403..8452f1a 100644
--- a/R/chi_drop_illogical_ages.R
+++ b/R/chi_drop_illogical_ages.R
@@ -74,23 +74,23 @@ chi_drop_illogical_ages <- function(ph.data, agevar = 'chi_age') {
   # Loop for cat1 and cat2
   for (catnum in c("cat1", "cat2")) {
     # Get column names for this category
-    catnum_group <- paste0(catnum, "_group")
-    temp_catnum_group <- paste0(catnum_group, "_temp")
+    catgroup <- paste0(catnum, "_group")
+    temp_catgroup <- paste0(catgroup, "_temp")
 
     # Create a standardized version of the age group
-    ph.data[, (temp_catnum_group) := data.table::fcase(
-      get(catnum_group) == '<1', '0-0',
+    ph.data[, (temp_catgroup) := data.table::fcase(
+      get(catgroup) == '<1', '0-0',
 
-      grepl("<", get(catnum_group)), gsub("<", "0-", get(catnum_group)),
+      grepl("<", get(catgroup)), gsub("<", "0-", get(catgroup)),
 
-      grepl("\\+", get(catnum_group)), gsub("\\+", "-120", get(catnum_group)),
+      grepl("\\+", get(catgroup)), gsub("\\+", "-120", get(catgroup)),
 
-      grepl('-', get(catnum_group)), get(catnum_group)
+      grepl('-', get(catgroup)), get(catgroup)
     )]
 
     # Extract min and max age
-    ph.data[, "min_age" := as.numeric(gsub("-.*", "", get(temp_catnum_group)))]
-    ph.data[, "max_age" := as.numeric(gsub(".*-", "", get(temp_catnum_group)))]
+    ph.data[, "min_age" := as.numeric(gsub("-.*", "", get(temp_catgroup)))]
+    ph.data[, "max_age" := as.numeric(gsub(".*-", "", get(temp_catgroup)))]
 
     # Keep rows where either:
     # 1. The cat is not age-related, OR
@@ -99,7 +99,7 @@ chi_drop_illogical_ages <- function(ph.data, agevar = 'chi_age') {
                          data.table::between(get(agevar), min_age, max_age)]
 
     # Clean up temporary columns
-    ph.data[, c(temp_catnum_group, "min_age", "max_age") := NULL]
+    ph.data[, c(temp_catgroup, "min_age", "max_age") := NULL]
   }
 
   return(ph.data)
diff --git a/R/chi_generate_instructions_pop.R b/R/chi_generate_instructions_pop.R
index 92d74db..53d4914 100644
--- a/R/chi_generate_instructions_pop.R
+++ b/R/chi_generate_instructions_pop.R
@@ -83,36 +83,35 @@ chi_generate_instructions_pop <- function(mycount.data,
                  "chi_race_aic_his", "chi_race_aic_nhpi", "chi_race_aic_wht")
 
     # Process both primary (cat1) and secondary (cat2) stratification variables
-    for(catnum in c("1", "2")) {
-      temp.cat <- paste0("cat", catnum)
-      temp.varname <- paste0(temp.cat, "_varname")
-      temp.groupby <- paste0("group_by", catnum)
+    for(catnum in c("cat1", "cat2")) {
+      catvarname <- paste0(catnum, "_varname")
+      temp.groupby <- paste0("group_by", gsub('cat', '', catnum))
 
       # Set geographic type based on category
-      pop.template[get(temp.cat) == "Cities/neighborhoods", geo_type := "hra"]
+      pop.template[get(catnum) == "Cities/neighborhoods", geo_type := "hra"]
 
       # Set race_type and group_by based on race/ethnicity variable
-      pop.template[get(temp.varname) == "race3", c("race_type", temp.groupby) := 'race']
-      pop.template[get(temp.varname) == "race4", c("race_type", temp.groupby) := 'race_eth']
-      pop.template[get(temp.varname) %in% omb_aic, c("race_type", temp.groupby) := 'race_aic']
+      pop.template[get(catvarname) == "race3", c("race_type", temp.groupby) := 'race']
+      pop.template[get(catvarname) == "race4", c("race_type", temp.groupby) := 'race_eth']
+      pop.template[get(catvarname) %in% omb_aic, c("race_type", temp.groupby) := 'race_aic']
 
       # Filter out non-standard AIC race/ethnicity categories that don't have population data
-      pop.template <- pop.template[!(grepl('_aic_', get(temp.varname)) &
-                                       !get(temp.varname) %in% omb_aic)]
+      pop.template <- pop.template[!(grepl('_aic_', get(catvarname)) &
+                                       !get(catvarname) %in% omb_aic)]
 
       # Set demographic grouping based on category label
-      pop.template[get(temp.cat) == "Ethnicity", c("race_type", temp.groupby) := 'race_eth']
-      pop.template[get(temp.cat) == "Gender", (temp.groupby) := 'genders']
-      pop.template[get(temp.cat) %in% c("Race", "Race/ethnicity") & get(temp.varname) == 'race4',
+      pop.template[get(catnum) == "Ethnicity", c("race_type", temp.groupby) := 'race_eth']
+      pop.template[get(catnum) == "Gender", (temp.groupby) := 'genders']
+      pop.template[get(catnum) %in% c("Race", "Race/ethnicity") & get(catvarname) == 'race4',
                    (temp.groupby) := 'race_eth']
-      pop.template[(get(temp.cat) == "Race" & get(temp.varname) == 'race3'),
+      pop.template[(get(catnum) == "Race" & get(catvarname) == 'race3'),
                    (temp.groupby) := 'race']
 
       # Set geographic type based on regions
-      pop.template[get(temp.cat) == "Regions" & (is.na(geo_type) | geo_type != 'hra'),
+      pop.template[get(catnum) == "Regions" & (is.na(geo_type) | geo_type != 'hra'),
                    geo_type := "region"]
-      pop.template[get(temp.cat) == "Big cities", geo_type := "hra"]
-      pop.template[get(temp.cat) == "Washington State", geo_type := "wa"]
+      pop.template[get(catnum) == "Big cities", geo_type := "hra"]
+      pop.template[get(catnum) == "Washington State", geo_type := "wa"]
     }
 
   # Handle special geographic cases ----
diff --git a/R/chi_get_proper_pop.R b/R/chi_get_proper_pop.R
index 0fb0e15..2f673f9 100644
--- a/R/chi_get_proper_pop.R
+++ b/R/chi_get_proper_pop.R
@@ -139,85 +139,84 @@ chi_get_proper_pop <- function(pop.template = NULL, pop.genders = NULL, pop.ages
 
       # Process demographic categories ----
       # Apply category grouping logic for both primary (cat1) and secondary (cat2) categories
-        for (catnum in c("1", "2")) {
+        for (catnum in c("cat1", "cat2")) {
           ## Add category information from template to result ----
-            temp.cat <- paste0("cat", catnum)
-            temp.varname <- paste0("cat", catnum, '_varname')
-            temp.group <- paste0("cat", catnum, '_group')
-            temp.groupby <- paste0("group_by", catnum)
+            catvarname <- paste0(catnum, '_varname')
+            catgroup <- paste0(catnum, '_group')
+            temp.groupby <- paste0("group_by", gsub('cat', '', catnum))
 
-            # had to use set function because regular := syntax caused errors b/c used temp.cat differently on both sides of :=
+            # had to use set function because regular := syntax caused errors b/c used catnum differently on both sides of :=
             data.table::set(population_data,
-                            j = temp.cat,
-                            value = current_row[[temp.cat]])
+                            j = catnum,
+                            value = current_row[[catnum]])
 
             data.table::set(population_data,
-                            j = temp.varname,
-                            value = current_row[[temp.varname]])
+                            j = catvarname,
+                            value = current_row[[catvarname]])
 
             data.table::set(population_data,
-                            j = temp.group,
+                            j = catgroup,
                             value = current_row[[temp.groupby]])
 
           ## Process geographic categories ----
             # King County
-            population_data[get(temp.cat) == "King County",
-                            c(temp.group) := "King County"]
+            population_data[get(catnum) == "King County",
+                            c(catgroup) := "King County"]
 
             # Washington State
-            population_data[get(temp.cat) == "Washington State",
-                            c(temp.group) := "Washington State"]
+            population_data[get(catnum) == "Washington State",
+                            c(catgroup) := "Washington State"]
 
             # Handle NA values
             suppressWarnings(
-              population_data[get(temp.cat) == "NA" | is.na(get(temp.cat)),
-                              c(temp.cat, temp.group, temp.varname) := "NA"]
+              population_data[get(catnum) == "NA" | is.na(get(catnum)),
+                              c(catnum, catgroup, catvarname) := "NA"]
             )
 
             # Cities/neighborhoods and Regions
-            population_data[get(temp.cat) %in% c("Cities/neighborhoods", "Regions") &
+            population_data[get(catnum) %in% c("Cities/neighborhoods", "Regions") &
                               current_row$geo_type != 'blk',
-                            c(temp.group) := geo_id]
+                            c(catgroup) := geo_id]
 
           ## Process gender ----
-            population_data[get(temp.cat) %in% c("Gender"), c(temp.group) := gender]
+            population_data[get(catnum) %in% c("Gender"), c(catgroup) := gender]
 
           ## Process 'Overall' ----
-            population_data[get(temp.cat) %in% c("Overall"), c(temp.group) := "Overall"]
+            population_data[get(catnum) %in% c("Overall"), c(catgroup) := "Overall"]
 
           ## Process race/ethnicity categories ----
-              population_data[get(temp.cat) == "Ethnicity" | get(temp.varname) %in% c('race4'),
-                              c(temp.group) := race_eth]
+              population_data[get(catnum) == "Ethnicity" | get(catvarname) %in% c('race4'),
+                              c(catgroup) := race_eth]
 
-              population_data[get(temp.cat) == 'Race' & get(temp.varname) %in% c('race3'),
-                              c(temp.group) := race]
+              population_data[get(catnum) == 'Race' & get(catvarname) %in% c('race3'),
+                              c(catgroup) := race]
 
-              population_data <- population_data[get(temp.cat) != "Ethnicity" | (get(temp.cat) == "Ethnicity" & get(temp.group) == 'Hispanic'), ]
+              population_data <- population_data[get(catnum) != "Ethnicity" | (get(catnum) == "Ethnicity" & get(catgroup) == 'Hispanic'), ]
 
-              population_data[get(temp.group) == "Multiple race",
-                              c(temp.group) := "Multiple"]
+              population_data[get(catgroup) == "Multiple race",
+                              c(catgroup) := "Multiple"]
 
           ## Process race_aic (alone or in combination) categories ----
             if (current_row$race_type == 'race_aic') {
               # Filter to keep only relevant race_aic combinations
               population_data <- population_data[
-                !(grepl('_aic_', get(temp.varname)) &
-                    !((get(temp.varname) == 'chi_race_aic_aian' & race_aic == 'AIAN') |
-                        (get(temp.varname) == 'chi_race_aic_asian' & race_aic == 'Asian') |
-                        (get(temp.varname) == 'chi_race_aic_black' & race_aic == 'Black') |
-                        (get(temp.varname) == 'chi_race_aic_his' & race_aic == 'Hispanic') |
-                        (get(temp.varname) == 'chi_race_aic_nhpi' & race_aic == 'NHPI') |
-                        (get(temp.varname) == 'chi_race_aic_wht' & race_aic == 'White'))
+                !(grepl('_aic_', get(catvarname)) &
+                    !((get(catvarname) == 'chi_race_aic_aian' & race_aic == 'AIAN') |
+                        (get(catvarname) == 'chi_race_aic_asian' & race_aic == 'Asian') |
+                        (get(catvarname) == 'chi_race_aic_black' & race_aic == 'Black') |
+                        (get(catvarname) == 'chi_race_aic_his' & race_aic == 'Hispanic') |
+                        (get(catvarname) == 'chi_race_aic_nhpi' & race_aic == 'NHPI') |
+                        (get(catvarname) == 'chi_race_aic_wht' & race_aic == 'White'))
                 )
               ]
 
               # Assign race_aic value to group
-              population_data[grep('_aic', get(temp.varname)),
-                              c(temp.group) := race_aic]
+              population_data[grep('_aic', get(catvarname)),
+                              c(catgroup) := race_aic]
             }
 
           ## Process HRAs ----
-            if (population_data[1, geo_type] == 'blk' & population_data[1, get(temp.cat)] == 'Cities/neighborhoods') {
+            if (population_data[1, geo_type] == 'blk' & population_data[1, get(catnum)] == 'Cities/neighborhoods') {
 
               hra_crosswalk <- rads.data::spatial_block20_to_hra20_to_region20[, list(geo_id = GEOID20, hra20_name)]
 
@@ -227,12 +226,12 @@ chi_get_proper_pop <- function(pop.template = NULL, pop.genders = NULL, pop.ages
                                        all.x = TRUE,
                                        all.y = FALSE)
 
-              population_data[, c(temp.group) := hra20_name]
+              population_data[, c(catgroup) := hra20_name]
             }
 
           ## Process Regions ----
             # Block to Region crosswalk
-              if (population_data[1, geo_type] == 'blk' & population_data[1, get(temp.cat)] == 'Regions') {
+              if (population_data[1, geo_type] == 'blk' & population_data[1, get(catnum)] == 'Regions') {
 
                   region_crosswalk <- rads.data::spatial_block20_to_hra20_to_region20[, list(geo_id = GEOID20, region_name)]
 
@@ -242,11 +241,11 @@ chi_get_proper_pop <- function(pop.template = NULL, pop.genders = NULL, pop.ages
                                            all.x = TRUE,
                                            all.y = FALSE)
 
-                  population_data[, c(temp.group) := region_name]
+                  population_data[, c(catgroup) := region_name]
               }
 
             # HRA to Region crosswalk
-              if (population_data[1, geo_type] == 'hra' & population_data[1, get(temp.cat)] == 'Regions') {
+              if (population_data[1, geo_type] == 'hra' & population_data[1, get(catnum)] == 'Regions') {
 
                   region_crosswalk <- rads.data::spatial_hra20_to_region20[, list(geo_id = hra20_name, region_name)]
 
@@ -256,11 +255,11 @@ chi_get_proper_pop <- function(pop.template = NULL, pop.genders = NULL, pop.ages
                                            all.x = TRUE,
                                            all.y = FALSE)
 
-                  population_data[, c(temp.group) := region_name]
+                  population_data[, c(catgroup) := region_name]
               }
 
             # ZIP to Region crosswalk with population weighting
-              if (population_data[1, geo_type] == 'zip' & population_data[1, get(temp.cat)] == 'Regions') {
+              if (population_data[1, geo_type] == 'zip' & population_data[1, get(catnum)] == 'Regions') {
 
                   # Create ZIP to region crosswalk with population weights
                   zip_region_crosswalk <- rads.data::spatial_zip_to_hra20_pop
@@ -282,11 +281,11 @@ chi_get_proper_pop <- function(pop.template = NULL, pop.genders = NULL, pop.ages
                                            all.y = FALSE,
                                            allow.cartesian = TRUE)
                   population_data[, pop := pop * s2t_fraction] # Apply weight to population
-                  population_data[, c(temp.group) := region]
+                  population_data[, c(catgroup) := region]
               }
 
           ## Process Big Cities ----
-            if (population_data[1, get(temp.cat)] == 'Big cities') {
+            if (population_data[1, get(catnum)] == 'Big cities') {
               # Block to big city crosswalk
               if (population_data[1, geo_type] == 'blk') {
 
@@ -319,50 +318,50 @@ chi_get_proper_pop <- function(pop.template = NULL, pop.genders = NULL, pop.ages
               }
 
               # Assign big city name to group
-              population_data[, c(temp.group) := bigcity]
+              population_data[, c(catgroup) := bigcity]
             }
 
           ## Process age groupings ----
             # Age 6 groups: <18, 18-24, 25-44, 45-64, 65-74, 75+
-              population_data[get(temp.varname) == "age6" & age %in% 0:17,
-                              c(temp.group) := "<18"]
-              population_data[get(temp.varname) == "age6" & age %in% 18:24,
-                              c(temp.group) := "18-24"]
-              population_data[get(temp.varname) == "age6" & age %in% 25:44,
-                              c(temp.group) := "25-44"]
-              population_data[get(temp.varname) == "age6" & age %in% 45:64,
-                              c(temp.group) := "45-64"]
-              population_data[get(temp.varname) == "age6" & age %in% 65:74,
-                              c(temp.group) := "65-74"]
-              population_data[get(temp.varname) == "age6" & age >= 75,
-                              c(temp.group) := "75+"]
+              population_data[get(catvarname) == "age6" & age %in% 0:17,
+                              c(catgroup) := "<18"]
+              population_data[get(catvarname) == "age6" & age %in% 18:24,
+                              c(catgroup) := "18-24"]
+              population_data[get(catvarname) == "age6" & age %in% 25:44,
+                              c(catgroup) := "25-44"]
+              population_data[get(catvarname) == "age6" & age %in% 45:64,
+                              c(catgroup) := "45-64"]
+              population_data[get(catvarname) == "age6" & age %in% 65:74,
+                              c(catgroup) := "65-74"]
+              population_data[get(catvarname) == "age6" & age >= 75,
+                              c(catgroup) := "75+"]
 
             # Maternal age 5 groups: 10-17, 18-24, 25-34, 35-44, 45+
-              population_data[get(temp.varname) == "mage5" & age %in% 10:17,
-                              c(temp.group) := "10-17"]
-              population_data[get(temp.varname) == "mage5" & age %in% 18:24,
-                              c(temp.group) := "18-24"]
-              population_data[get(temp.varname) == "mage5" & age %in% 25:34,
-                              c(temp.group) := "25-34"]
-              population_data[get(temp.varname) == "mage5" & age %in% 35:44,
-                              c(temp.group) := "35-44"]
-              population_data[get(temp.varname) == "mage5" & age >= 45,
-                              c(temp.group) := "45+"]
+              population_data[get(catvarname) == "mage5" & age %in% 10:17,
+                              c(catgroup) := "10-17"]
+              population_data[get(catvarname) == "mage5" & age %in% 18:24,
+                              c(catgroup) := "18-24"]
+              population_data[get(catvarname) == "mage5" & age %in% 25:34,
+                              c(catgroup) := "25-34"]
+              population_data[get(catvarname) == "mage5" & age %in% 35:44,
+                              c(catgroup) := "35-44"]
+              population_data[get(catvarname) == "mage5" & age >= 45,
+                              c(catgroup) := "45+"]
 
             # Youth age 4 groups: 0-4, 5-9, 10-14, 15-17
-              population_data[get(temp.varname) == "yage4" & age %in% 0:4,
-                              c(temp.group) := "0-4"]
-              population_data[get(temp.varname) == "yage4" & age %in% 5:9,
-                              c(temp.group) := "5-9"]
-              population_data[get(temp.varname) == "yage4" & age %in% 10:14,
-                              c(temp.group) := "10-14"]
-              population_data[get(temp.varname) == "yage4" & age %in% 15:17,
-                              c(temp.group) := "15-17"]
+              population_data[get(catvarname) == "yage4" & age %in% 0:4,
+                              c(catgroup) := "0-4"]
+              population_data[get(catvarname) == "yage4" & age %in% 5:9,
+                              c(catgroup) := "5-9"]
+              population_data[get(catvarname) == "yage4" & age %in% 10:14,
+                              c(catgroup) := "10-14"]
+              population_data[get(catvarname) == "yage4" & age %in% 15:17,
+                              c(catgroup) := "15-17"]
 
           ## Process poverty groupings ----
             # Block level poverty
               if (population_data[1, geo_type] == 'blk' &
-                  grepl("poverty$", population_data[1, get(temp.cat)], ignore.case = TRUE)) {
+                  grepl("poverty$", population_data[1, get(catnum)], ignore.case = TRUE)) {
                 # Extract tract ID from block ID (first 11 characters)
                 population_data[, geo_tract2020 := substr(geo_id, 1, 11)]
 
@@ -376,12 +375,12 @@ chi_get_proper_pop <- function(pop.template = NULL, pop.genders = NULL, pop.ages
                 )
 
                 # Assign poverty group
-                population_data[, c(temp.group) := pov200grp]
+                population_data[, c(catgroup) := pov200grp]
               }
 
             # ZIP level poverty
               if (population_data[1, geo_type] == 'zip' &
-                  grepl("poverty$", population_data[1, get(temp.cat)], ignore.case = TRUE)) {
+                  grepl("poverty$", population_data[1, get(catnum)], ignore.case = TRUE)) {
                 # Join poverty group data
                 population_data <- merge(
                   population_data,
@@ -392,7 +391,7 @@ chi_get_proper_pop <- function(pop.template = NULL, pop.genders = NULL, pop.ages
                 )
 
                 # Assign poverty group
-                population_data[, c(temp.group) := pov200grp]
+                population_data[, c(catgroup) := pov200grp]
               }
 
           } # close looping over cat1/cat2