PHSKC-APDE · rwbuie · Feb 24, 2025 · Feb 10, 2025 · Feb 10, 2025 · Feb 10, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -5,15 +5,14 @@ Version: 0.0.1
 Authors@R: person(
   "Ronald","Buie", 
   email = "rbuie@kingcounty.gov", 
-  role = c("auth", "cre"))
-Description: This package is for use by epidemiologists within Public Health Seattle
-    and King County, and aligns to analytics guidelines and practices for Community
+  role = c("aut", "cre"))
+Description: This package provides tools for epidemiologists within Public Health Seattle
+    and King County. It aligns with analytics guidelines and practices for Community
     Health Indicators (CHI) as reported by the Assessment, Policy Development, and
-    Evaluation Unit. The major activities assisted by this packages are 
-    Simplifying the execution of identical CHI analyses across multiple variables
-    Creating CHI Tableau ready output of the analysis
-    Performing quality assurance regarding data structure and formatting
-    Pushing and pulling data to APDE servers
+    Evaluation Unit. The package simplifies the execution of identical CHI analyses 
+    across multiple variables, creates CHI Tableau-ready output of the analysis, 
+    performs quality assurance regarding data structure and formatting, and 
+    facilitates pushing and pulling data to APDE servers.
 URL: https://github.com/PHSKC-APDE/apde.chi.tools
 BugReports: https://github.com/PHSKC-APDE/apde.chi.tools/issues
 License: MIT + file LICENSE
@@ -23,18 +22,17 @@ Imports:
     data.table (>= 1.14.2),
     DBI (>= 1.1.0),
     dtsurvey (>= 0.0.2.0),
+    future, 
+    future.apply,
     glue (>= 1.6.1),
-    keyring (>= 1.2.0),
-    lubridate (>= 1.6.0),
-    methods,
     odbc (>= 1.2.2),
-    rads.data (>= 1.0.7.5),
-    stats,
-    utf8 (>= 1.1.4),
+    rads, 
+    rads.data,
+    tidyr,
     yaml (>= 2.2.1),
     utils
 Remotes:
-    github::PHSKC-APDE/rads.data, github::PHSKC-APDE/dtsurvey, github::PHSKC-APDE/apde.chi.tools
+    github::PHSKC-APDE/rads, github::PHSKC-APDE/dtsurvey
 Suggests: 
     httr,
     knitr,
@@ -43,8 +41,7 @@ Suggests:
     testthat (>= 3.0.0),
     srvyr,
     survey (>= 4.0)
-VignetteBuilder: knitr
 Depends: 
-    R (>= 3.1)
+    R (>= 4.1)
 Config/testthat/edition: 3
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
diff --git a/NAMESPACE b/NAMESPACE
@@ -23,5 +23,7 @@ importFrom(data.table,setorder)
 importFrom(data.table,shift)
 importFrom(glue,glue)
 importFrom(rads,chi_qa)
+importFrom(rads,substrRight)
+importFrom(tidyr,crossing)
 importFrom(utils,write.table)
 importFrom(yaml,yaml.load)
diff --git a/R/chi_generate_metadata.R b/R/chi_generate_metadata.R
@@ -7,13 +7,14 @@
 #' @param est.current current year's tableau ready output with completed estimates
 #'
 #' @return table of metadata
+#' @importFrom rads substrRight
 #' @export
 #'
 chi_generate_metadata <- function(meta.old = NULL,
                                   est.current = NULL){
   # get new metadata ----
   meta.new <- unique(est.current[tab == "metadata",
-                                 .(indicator_key,
+                                 list(indicator_key,
                                    latest_yearx = as.integer(year),
                                    latest_year_resultx = result,
                                    run_datex = run_date,
@@ -33,7 +34,7 @@ chi_generate_metadata <- function(meta.old = NULL,
   meta.new[, c("latest_yearx", "latest_year_resultx", "run_datex", "latest_year_countx", "latest_year_kc_popx") := NULL]
 
   # update valid_years ----
-  meta.new[as.integer(latest_year) > suppressWarnings(as.integer(substrRight(valid_years, 1, 4))),
+  meta.new[as.integer(latest_year) > suppressWarnings(as.integer(rads::substrRight(valid_years, 1, 4))),
            valid_years := suppressWarnings(paste(as.integer(substr(valid_years, 1, 4)):as.integer(latest_year), collapse = " "))]
 
   # Ensure there are no missing important metadata cells ----

diff --git a/R/chi_generate_tro_shell.R b/R/chi_generate_tro_shell.R
@@ -20,6 +20,7 @@
 #' @param ph.analysis_set name of data.table to parse
 #' @param start.year the earliest year to be used for estimates
 #' @param end.year the latest year to be used for aggregate estimates (note, the earliest year for trends estimates is calculated from from the span and number of periods)
+#' @param year.span the number of years to be included in a single non-trend period
 #' @param trend.span the number of years to be included in a single trend period
 #' @param trend.periods the number of periods to be included in a trend
 #' @returns data table with a single row for each calculation to be performed in generating Tableau Ready Output for CHI reporting
@@ -36,6 +37,7 @@
 chi_generate_tro_shell <- function(ph.analysis_set,
                                       start.year,
                                       end.year,
+                                      year.span = NULL,
                                       trend.span = NULL,
                                       trend.periods = NULL){
   #parameterization checks

diff --git a/R/chi_process_nontrends.R b/R/chi_process_nontrends.R
@@ -24,6 +24,7 @@
 #' @import dtsurvey
 #' @import future
 #' @import future.apply
+#' @importFrom tidyr crossing
 chi_process_nontrends <- function(ph.analysis_set = NULL,
                                   myset = NULL){
 
@@ -35,15 +36,15 @@ chi_process_nontrends <- function(ph.analysis_set = NULL,
   tempy <- rbindlist(lapply(as.list(seq(1, length(subtabs))),
                             FUN = function(subtab){
                               tempx <- subsets[get(subtabs[subtab]) == 'x',
-                                               .(tab = subtabs[subtab], cat1, cat1_varname, cat2 = NA_character_, cat2_varname = NA_character_)]
+                                               list(tab = subtabs[subtab], cat1, cat1_varname, cat2 = NA_character_, cat2_varname = NA_character_)]
                               tempx <- setDT(tidyr::crossing(tempx, data.table(indicator_key = sub_indicators)))
 
                             }))
 
   # crosstabs are a bit more complicated
   sub_crosstabs = setDT(tidyr::crossing(
-    unique(subsets[crosstabs == 'x', .(cat1, cat1_varname)]),
-    unique(subsets[crosstabs == 'x', .(cat2 = cat1, cat2_varname = cat1_varname)]) ))
+    unique(subsets[crosstabs == 'x', list(cat1, cat1_varname)]),
+    unique(subsets[crosstabs == 'x', list(cat2 = cat1, cat2_varname = cat1_varname)]) ))
   sub_crosstabs <- sub_crosstabs[cat1 == 'King County' | cat1_varname != cat2_varname]
   sub_crosstabs <- sub_crosstabs[!(cat1_varname == 'race3' & cat2_varname %in% c('race3', 'race4'))] # do not want race x race
   sub_crosstabs <- sub_crosstabs[!(cat2_varname == 'race3' & cat1_varname %in% c('race3', 'race4'))] # do not want race x race

diff --git a/R/chi_process_trends.R b/R/chi_process_trends.R
@@ -1,7 +1,7 @@
 #' CHI Generate Trend Years
 #'
 #' @param indicator_key chi indicator key variable
-#' @param span number of years to include in a trend year estimate
+#' @param trend.span the number of years to be included in a single trend period
 #' @param end.year last year of a trend year time series
 #' @param trend.periods number of periods to calculate
 #'
@@ -15,12 +15,13 @@
 #' @returns TRO with rows for each indicator key and span of years within the provided time frame
 #' @keywords CHI, Tableau, Production
 #' @import dtsurvey
+#' @importFrom tidyr crossing
 
 chi_process_trends <- function(indicator_key = NULL,
                                      trend.span = NULL,
                                      end.year = NULL,
                                      trend.periods = NULL){
-  last.start <- end.year-(span-1)
+  last.start <- end.year-(trend.span-1)
   all.start.years <- last.start:(last.start-(trend.periods-1))
   all.end.years <- end.year:(end.year-(trend.periods-1))
   spandt <- data.table(end = all.end.years, start = all.start.years)

diff --git a/R/globals.R b/R/globals.R
@@ -0,0 +1,15 @@
+# Declare global variables for devtools::check()
+utils::globalVariables(c(
+  "varname", "group", "keepme", "reference", "chi_year", "tempstart",
+  "tempend", "chi_geo_kc", "cat1", "cat1_varname", "cat2", "cat2_varname",
+  "chi_age", "numerator", "count", "cat2_group", "indicator_key", "tab",
+  "round2", "lower_bound", "cat1_group", "cat2", "tempbv", "run_date",
+  "chi", "data_source", "time_trends", "suppression", "rse", "caution",
+  "comparison_with_kc_sig", "AgeMin", "AgeMax", "start", "race_type", "geo_type",
+  "denominator", "latest_yearx", "latest_year", "latest_year_resultx", "latest_year_result",
+  "run_datex", "latest_year_countx", "latest_year_count", "latest_year_kc_popx",
+  "latest_year_kc_pop", "valid_years", "end", "group_by1", "group_by2", "geo_id",
+  "gender", "race_eth", "race", "race_aic", "GEOID20", "hra20_name", "region_name",
+  "s2t_fraction", "source_id", "region", "pop", "bigcity", "geo_tract2020", "pov200grp",
+  "crosstabs", "vebrose", "span", "age", 'year.span'
+))
diff --git a/R/proto_chi_calc.R b/R/proto_chi_calc.R
@@ -7,9 +7,6 @@ chi_calc <- function(ph.data = NULL,
                      suppress_high = 9,
                      source_name = 'blahblah',
                      source_date = NULL){
-
-
-
   # Error if ph.instructions has no data ----
   if(nrow(ph.instructions) == 0){
     stop("\n\U0001f47f the table ph.instructions does not have any rows.")
@@ -59,21 +56,21 @@ chi_calc <- function(ph.data = NULL,
       if(length(tempbv2) == 0){tempbv2 = NA}
       tempbv <- setdiff(c(tempbv1, tempbv2), c(NA))
 
-      # send constants to global environment so it can be used by the calc() function below
-      assign("tempbv", tempbv, envir = .GlobalEnv)
-      assign("tempend", ph.instructions[X][['end']], envir = .GlobalEnv)
-      assign("tempstart", ph.instructions[X][['start']], envir = .GlobalEnv)
+      # create variables of interest used in calc function below
+      tempbv <- tempbv
+      tempend <- ph.instructions[X][['end']]
+      tempstart <- ph.instructions[X][['start']]
 
       # use calc()----
       if(rate == FALSE){ # standard proportion analysis
         if(any(grepl('wastate', tempbv))){
-          tempest <- calc(ph.data = ph.data,
+          tempest <- rads::calc(ph.data = ph.data,
                           what = ph.instructions[X][['indicator_key']],
                           where = chi_year >= tempstart & chi_year <= tempend,
                           by = tempbv,
                           metrics = c('mean', 'numerator', 'denominator', 'rse'))
         } else {
-          tempest <- calc(ph.data = ph.data,
+          tempest <- rads::calc(ph.data = ph.data,
                           what = ph.instructions[X][['indicator_key']],
                           where = chi_year >= tempstart & chi_year <= tempend & chi_geo_kc == 'King County',
                           by = tempbv,
@@ -82,14 +79,14 @@ chi_calc <- function(ph.data = NULL,
       }
       if(rate == TRUE){
         if(any(grepl('wastate', tempbv))){
-          tempest <- calc(ph.data = ph.data,
+          tempest <- rads::calc(ph.data = ph.data,
                           what = ph.instructions[X][['indicator_key']],
                           where = chi_year >= tempstart & chi_year <= tempend,
                           by = tempbv,
                           metrics = c('rate', 'numerator', 'denominator', 'rse'),
                           per = rate_per)
         } else {
-          tempest <- calc(ph.data = ph.data,
+          tempest <- rads::calc(ph.data = ph.data,
                           what = ph.instructions[X][['indicator_key']],
                           where = chi_year >= tempstart & chi_year <= tempend & chi_geo_kc == 'King County',
                           by = tempbv,
@@ -186,9 +183,6 @@ chi_calc <- function(ph.data = NULL,
                cat2 := 'Ethnicity']
   }
 
-  # drop temporary vars that were sent to global environment for calc() ----
-  rm(tempbv, tempend, tempstart, envir = .GlobalEnv)
-
   # Create additional necessary CHI columns ----
   tempCHIest[, source_date := as.Date(source_date)]
   tempCHIest[, run_date := as.Date(Sys.Date(), "%Y%m%d")]
@@ -197,7 +191,7 @@ chi_calc <- function(ph.data = NULL,
   tempCHIest[, data_source := source_name]
   tempCHIest[, time_trends := NA] # NA because no longer calculated
 
-  tempCHIest <- compare_estimate(mydt = tempCHIest,
+  tempCHIest <- rads::compare_estimate(mydt = tempCHIest,
                                  id_vars = c("indicator_key", "year"),
                                  key_where = cat1_group == "King County" & tab != "crosstabs",
                                  new_col = "comparison_with_kc",
@@ -215,7 +209,7 @@ chi_calc <- function(ph.data = NULL,
 
 
   # Keep and order standard CHI columns ----
-  tempCHIest <- tempCHIest[, c(chi_cols()[]), with = F]
+  tempCHIest <- tempCHIest[, c(rads::chi_cols()[]), with = F]
 
   tempCHIest <- tempCHIest[, cat1 := factor(cat1, levels = c("King County", sort(setdiff(unique(tempCHIest$cat1), "King County"))) )]
   tempCHIest <- tempCHIest[, tab := factor(tab, levels = c(c("_kingcounty","demgroups", "trends"),  sort(setdiff(unique(tempCHIest$tab), c("_kingcounty","demgroups", "trends")))) )]

diff --git a/R/proto_chi_count_by_age.R b/R/proto_chi_count_by_age.R
@@ -45,20 +45,20 @@ chi_count_by_age <- function(ph.data = NULL,
       tempbv <- setdiff(c(tempbv1, tempbv2), c(NA))
       tempbv <- c(tempbv, "chi_age")
 
-      # send constants to global environment so it can be used by the calc() function below
-      assign("tempbv", tempbv, envir = .GlobalEnv)
-      assign("tempend", ph.instructions[X][['end']], envir = .GlobalEnv)
-      assign("tempstart", ph.instructions[X][['start']], envir = .GlobalEnv)
+      # create variables of interest used in calc function below
+      tempbv <- tempbv
+      tempend <- ph.instructions[X][['end']]
+      tempstart <- ph.instructions[X][['start']]
 
       # use calc----
       if(any(grepl('wastate', tempbv))){
-        tempcount <- calc(ph.data = ph.data,
+        tempcount <- rads::calc(ph.data = ph.data,
                           what = ph.instructions[X][['indicator_key']],
                           where = chi_year >= tempstart & chi_year <= tempend,
                           by = tempbv,
                           metrics = c('numerator'))
       } else {
-        tempcount <- calc(ph.data = ph.data,
+        tempcount <- rads::calc(ph.data = ph.data,
                           what = ph.instructions[X][['indicator_key']],
                           where = chi_year >= tempstart & chi_year <= tempend & chi_geo_kc == 'King County',
                           by = tempbv,
@@ -103,7 +103,7 @@ chi_count_by_age <- function(ph.data = NULL,
                 year := ph.instructions[X][['end']]]
 
       # order output----
-      tempcount <- tempcount[, .(indicator_key, year, tab, cat1, cat1_varname, cat1_group, cat2, cat2_varname, cat2_group, chi_age, count)]
+      tempcount <- tempcount[, list(indicator_key, year, tab, cat1, cat1_varname, cat1_group, cat2, cat2_varname, cat2_group, chi_age, count)]
       setorder(tempcount, cat1_group, cat2_group, chi_age)
 
     }

diff --git a/R/proto_chi_generate_instructions_pop.R b/R/proto_chi_generate_instructions_pop.R
@@ -1,6 +1,6 @@
 chi_generate_instructions_pop <- function(mycount.data, povgeo = NA){
   pop.template <- copy(mycount.data)
-  pop.template <- unique(copy(pop.template)[, .(year, cat1, cat1_varname, cat2, cat2_varname, tab)])
+  pop.template <- unique(copy(pop.template)[, list(year, cat1, cat1_varname, cat2, cat2_varname, tab)])
   pop.template[, c("start", "stop") := tstrsplit(year, split = '-') ]
   pop.template[is.na(stop), stop := start] # need to have an end date even if it is just one year