From 011c30d85b103dbdbb87da247e35a64824aaf226 Mon Sep 17 00:00:00 2001 From: Buie Date: Tue, 18 Mar 2025 16:00:46 -0700 Subject: [PATCH 01/12] remove start year from generate_tro_shell --- R/chi_generate_tro_shell.R | 5 ----- 1 file changed, 5 deletions(-) diff --git a/R/chi_generate_tro_shell.R b/R/chi_generate_tro_shell.R index 05bc574..99df736 100644 --- a/R/chi_generate_tro_shell.R +++ b/R/chi_generate_tro_shell.R @@ -26,7 +26,6 @@ #' } #' #' @param ph.analysis_set name of data.table to parse -#' @param start.year the earliest year to be used for estimates #' @param end.year the latest year to be used for aggregate estimates #' @param year.span the number of years to be included in a single non-trend period #' @param trend.span the number of years to be included in a single trend period @@ -54,7 +53,6 @@ #' @export #' chi_generate_tro_shell <- function(ph.analysis_set, - start.year, end.year, year.span = NULL, trend.span = NULL, @@ -64,9 +62,6 @@ chi_generate_tro_shell <- function(ph.analysis_set, if (missing(ph.analysis_set)) stop("\n\U1F6D1 ph.analysis_set must be provided") if (!is.data.frame(ph.analysis_set)) stop("\n\U1F6D1 ph.analysis_set must be a data.frame or data.table") - if (missing(start.year)) stop("\n\U1F6D1 start.year must be provided") - if (!is.numeric(start.year) || length(start.year) != 1) stop("\n\U1F6D1 start.year must be a single numeric value") - if (missing(end.year)) stop("\n\U1F6D1 end.year must be provided") if (!is.numeric(end.year) || length(end.year) != 1) stop("\n\U1F6D1 end.year must be a single numeric value") From 8678bd4ea749d29f5260f3a0e9cd6def13e84b49 Mon Sep 17 00:00:00 2001 From: Buie Date: Wed, 19 Mar 2025 02:38:04 -0700 Subject: [PATCH 02/12] fixed bug where non age groups with "-" "<", "+" were being captured fixed type missmatch error --- R/chi_drop_illogical_ages.R | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/R/chi_drop_illogical_ages.R b/R/chi_drop_illogical_ages.R index 8452f1a..d919df3 100644 --- a/R/chi_drop_illogical_ages.R +++ b/R/chi_drop_illogical_ages.R @@ -79,14 +79,13 @@ chi_drop_illogical_ages <- function(ph.data, agevar = 'chi_age') { # Create a standardized version of the age group ph.data[, (temp_catgroup) := data.table::fcase( - get(catgroup) == '<1', '0-0', + get(catgroup) == '<1' & grepl(' age$|^age$', get(catnum), ignore.case = T), '0-0', - grepl("<", get(catgroup)), gsub("<", "0-", get(catgroup)), + grepl("<", get(catgroup)) &grepl(' age$|^age$', get(catnum), ignore.case = T), gsub("<", "0-", get(catgroup)), - grepl("\\+", get(catgroup)), gsub("\\+", "-120", get(catgroup)), + grepl("\\+", get(catgroup))& grepl(' age$|^age$', get(catnum), ignore.case = T), gsub("\\+", "-120", get(catgroup)), - grepl('-', get(catgroup)), get(catgroup) - )] + grepl('-', get(catgroup))& grepl(' age$|^age$', get(catnum), ignore.case = T), as.character(get(catgroup)))] # Extract min and max age ph.data[, "min_age" := as.numeric(gsub("-.*", "", get(temp_catgroup)))] From 333e824f9584a4f97c06a331cd804b8bcf7a103a Mon Sep 17 00:00:00 2001 From: Buie Date: Mon, 24 Mar 2025 16:13:05 -0700 Subject: [PATCH 03/12] move chi_get_cols to its own R file --- R/chi_get_cols.R | 19 +++++++++++++++++++ R/chi_get_yaml.R | 20 -------------------- 2 files changed, 19 insertions(+), 20 deletions(-) create mode 100644 R/chi_get_cols.R diff --git a/R/chi_get_cols.R b/R/chi_get_cols.R new file mode 100644 index 0000000..583aa48 --- /dev/null +++ b/R/chi_get_cols.R @@ -0,0 +1,19 @@ +#' Get CHI variable column names +#' +#' Returns a character vector of column names defined in the CHI YAML reference file. +#' This helper function provides easy access to the standardized CHI variable names. +#' +#' @return A character vector of column names +#' @importFrom yaml read_yaml +#' @export +#' +#' @examples +#' cols <- chi_get_cols() +chi_get_cols <- function() { + chi.yaml.filepath <- system.file("ref", "chi_qa.yaml", package = "apde.chi.tools") + if (chi.yaml.filepath == "") { + stop("Could not find reference file chi_qa.yaml") + } + chi.yaml <- yaml::read_yaml(chi.yaml.filepath) + return(names(chi.yaml$vars)) +} diff --git a/R/chi_get_yaml.R b/R/chi_get_yaml.R index 7d7c813..9ad1ebf 100644 --- a/R/chi_get_yaml.R +++ b/R/chi_get_yaml.R @@ -1,23 +1,3 @@ -#' Get CHI variable column names -#' -#' Returns a character vector of column names defined in the CHI YAML reference file. -#' This helper function provides easy access to the standardized CHI variable names. -#' -#' @return A character vector of column names -#' @importFrom yaml read_yaml -#' @export -#' -#' @examples -#' cols <- chi_get_cols() -chi_get_cols <- function() { - chi.yaml.filepath <- system.file("ref", "chi_qa.yaml", package = "apde.chi.tools") - if (chi.yaml.filepath == "") { - stop("Could not find reference file chi_qa.yaml") - } - chi.yaml <- yaml::read_yaml(chi.yaml.filepath) - return(names(chi.yaml$vars)) -} - #' Get CHI YAML configuration #' #' Returns the complete CHI YAML configuration as a list. From 0bd5be59acf54d1e7c35e91bc49338fdb449b9d1 Mon Sep 17 00:00:00 2001 From: Buie Date: Mon, 24 Mar 2025 16:37:40 -0700 Subject: [PATCH 04/12] update yaml to current standard --- inst/ref/chi_qa.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/inst/ref/chi_qa.yaml b/inst/ref/chi_qa.yaml index 71cdf8d..cb0f5a7 100644 --- a/inst/ref/chi_qa.yaml +++ b/inst/ref/chi_qa.yaml @@ -18,6 +18,9 @@ vars: upper_bound: float se: float rse: float + comparison_with_kc: varchar(255) + time_trends: varchar(255) + significance: varchar(255) caution: varchar(255) suppression: varchar(255) numerator: float From 1ef683c595d87acaeca8759324e77a37b5b43d03 Mon Sep 17 00:00:00 2001 From: Buie Date: Tue, 25 Mar 2025 12:22:09 -0700 Subject: [PATCH 05/12] added handling for set numbers to be any number (not required to be 1...N . Added error for missing any set data --- R/chi_generate_tro_shell.R | 58 ++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/R/chi_generate_tro_shell.R b/R/chi_generate_tro_shell.R index 99df736..2e1480b 100644 --- a/R/chi_generate_tro_shell.R +++ b/R/chi_generate_tro_shell.R @@ -53,48 +53,50 @@ #' @export #' chi_generate_tro_shell <- function(ph.analysis_set, - end.year, - year.span = NULL, - trend.span = NULL, - trend.periods = NULL){ + end.year, + year.span = NULL, + trend.span = NULL, + trend.periods = NULL){ # Input validation - if (missing(ph.analysis_set)) stop("\n\U1F6D1 ph.analysis_set must be provided") - if (!is.data.frame(ph.analysis_set)) stop("\n\U1F6D1 ph.analysis_set must be a data.frame or data.table") + if (missing(ph.analysis_set)) stop("\n\U1F6D1 ph.analysis_set must be provided") + if (!is.data.frame(ph.analysis_set)) stop("\n\U1F6D1 ph.analysis_set must be a data.frame or data.table") + if (!("set" %in% names(ph.analysis_set)) | anyNA(ph.analysis_set$set)) { + stop("\n\u1F6D1 set number must be provided for all rows") + } + if (missing(end.year)) stop("\n\U1F6D1 end.year must be provided") + if (!is.numeric(end.year) || length(end.year) != 1) stop("\n\U1F6D1 end.year must be a single numeric value") - if (missing(end.year)) stop("\n\U1F6D1 end.year must be provided") - if (!is.numeric(end.year) || length(end.year) != 1) stop("\n\U1F6D1 end.year must be a single numeric value") + if (!is.null(year.span) && (!is.numeric(year.span) || length(year.span) != 1)) { + stop("\n\U1F6D1 year.span must be NULL or a single numeric value") + } - if (!is.null(year.span) && (!is.numeric(year.span) || length(year.span) != 1)) { - stop("\n\U1F6D1 year.span must be NULL or a single numeric value") - } + if (!is.null(trend.span) && (!is.numeric(trend.span) || length(trend.span) != 1)) { + stop("\n\U1F6D1 trend.span must be NULL or a single numeric value") + } - if (!is.null(trend.span) && (!is.numeric(trend.span) || length(trend.span) != 1)) { - stop("\n\U1F6D1 trend.span must be NULL or a single numeric value") - } + if (!is.null(trend.periods) && (!is.numeric(trend.periods) || length(trend.periods) != 1)) { + stop("\n\U1F6D1 trend.periods must be NULL or a single numeric value") + } - if (!is.null(trend.periods) && (!is.numeric(trend.periods) || length(trend.periods) != 1)) { - stop("\n\U1F6D1 trend.periods must be NULL or a single numeric value") - } - # Convert to data.table if needed - if (!is.data.table(ph.analysis_set)) setDT(ph.analysis_set) + # Convert to data.table if needed + if (!is.data.table(ph.analysis_set)) setDT(ph.analysis_set) - #parameterization checks + # parameterization checks if("x" %in% ph.analysis_set$trends & (is.null(trend.span) | is.null(trend.periods))) {stop("you have indicated that a trends analysis is to be conducted, but have not indicated both the span and number of periods for this analysis.")} - #ph.analysis_set checks - - - #advisory messages + # advisory messages if("x" %in% ph.analysis_set$trends) {message("Note: trends are applied backwards from end.year")} # Race / ethnicity is a chronic headache with CHI. Need to remove rows for race4 & Ethnicity because should be Race/ethnicity ph.analysis_set <- ph.analysis_set[!(cat1_varname == 'race4' & cat1 == 'Ethnicity')] # apply the template generating function + # generate vector of sets + sets <- unique(ph.analysis_set$set) template <- rbindlist( - lapply(X = seq(1, length(unique(ph.analysis_set$set))), + lapply(X = sets, FUN = chi_process_nontrends, ph.analysis_set = ph.analysis_set)) # split trends from other tabs because processed for multiple years @@ -112,9 +114,9 @@ chi_generate_tro_shell <- function(ph.analysis_set, # add years to template (trends) if(nrow(template.trends) > 0){ trend.years <- chi_process_trends(indicator_key = intersect(unique(template$indicator_key), unique(template.trends$indicator_key)), - trend.span = trend.span, - end.year = end.year, - trend.periods = trend.periods) + trend.span = trend.span, + end.year = end.year, + trend.periods = trend.periods) template.trends <- merge(template.trends, trend.years, by = 'indicator_key', all = T, allow.cartesian = T) # append trends template to main template From 88727c340e22cf8def9d600ce75af9debc77fb48 Mon Sep 17 00:00:00 2001 From: Danny Colombara Date: Fri, 28 Mar 2025 15:04:06 -0700 Subject: [PATCH 06/12] refreshed helpfiles --- man/chi_generate_tro_shell.Rd | 3 --- man/chi_get_cols.Rd | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/man/chi_generate_tro_shell.Rd b/man/chi_generate_tro_shell.Rd index 50ab14d..74a6e17 100644 --- a/man/chi_generate_tro_shell.Rd +++ b/man/chi_generate_tro_shell.Rd @@ -6,7 +6,6 @@ \usage{ chi_generate_tro_shell( ph.analysis_set, - start.year, end.year, year.span = NULL, trend.span = NULL, @@ -16,8 +15,6 @@ chi_generate_tro_shell( \arguments{ \item{ph.analysis_set}{name of data.table to parse} -\item{start.year}{the earliest year to be used for estimates} - \item{end.year}{the latest year to be used for aggregate estimates} \item{year.span}{the number of years to be included in a single non-trend period} diff --git a/man/chi_get_cols.Rd b/man/chi_get_cols.Rd index 2047f92..8580d92 100644 --- a/man/chi_get_cols.Rd +++ b/man/chi_get_cols.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/chi_get_yaml.R +% Please edit documentation in R/chi_get_cols.R \name{chi_get_cols} \alias{chi_get_cols} \title{Get CHI variable column names} From d6f17bf2dc5599f000fbd082afccef03151e58ff Mon Sep 17 00:00:00 2001 From: Danny Colombara Date: Fri, 28 Mar 2025 15:04:50 -0700 Subject: [PATCH 07/12] updated tests to remove start.year argument no longer a valid argument for chi_generate_tro_shell --- tests/testthat/test-chi_generate_tro_shell.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-chi_generate_tro_shell.R b/tests/testthat/test-chi_generate_tro_shell.R index 5bf4a43..18370b7 100644 --- a/tests/testthat/test-chi_generate_tro_shell.R +++ b/tests/testthat/test-chi_generate_tro_shell.R @@ -2,8 +2,8 @@ test_that("chi_generate_tro_shell validates inputs correctly", { test_data <- setup_test_data() expect_error(chi_generate_tro_shell(), "ph.analysis_set must be provided") - expect_error(chi_generate_tro_shell(data.frame(), start.year = "2023"), - "start.year must be a single numeric value") + expect_error(chi_generate_tro_shell(data.frame()), + "set number must be provided for all rows") }) test_that("ingest template format", { @@ -29,7 +29,6 @@ test_that("ingest template format", { trends, set_indicator_keys) DT <- chi_generate_tro_shell(ph.analysis_set = template, - start.year = 2021, end.year = 2022, year.span = 5, trend.span = 3, From 62f418190c3349e636e8df8b7f90d34dcd1f4a82 Mon Sep 17 00:00:00 2001 From: Danny Colombara Date: Fri, 28 Mar 2025 15:05:28 -0700 Subject: [PATCH 08/12] updated vignette for chi_generate_tro_shell start.year no longer a valid argument chi_generate_tro_shell --- quarto_docs/Calculating_Prevalences.qmd | 1 - 1 file changed, 1 deletion(-) diff --git a/quarto_docs/Calculating_Prevalences.qmd b/quarto_docs/Calculating_Prevalences.qmd index db37ad1..1a72a63 100644 --- a/quarto_docs/Calculating_Prevalences.qmd +++ b/quarto_docs/Calculating_Prevalences.qmd @@ -132,7 +132,6 @@ To analyze our data consistently, we need to generate a structured set of calcul myinstructions <- chi_generate_tro_shell( ph.analysis_set = analysis_sets, - start.year = latest_year-4, # earliest year to be used for estimates end.year = latest_year, # latest year to be used for aggregate estimates year.span = 5, # number of years included in a single period trend.span = 3, # number of years included in a trend single period From b2e11dd3713da7668724384d1600e0bb37161f4b Mon Sep 17 00:00:00 2001 From: Buie Date: Fri, 28 Mar 2025 16:03:06 -0700 Subject: [PATCH 09/12] remove start year from test of chi_generate_tro_shell --- tests/testthat/test-chi_generate_tro_shell.R | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/testthat/test-chi_generate_tro_shell.R b/tests/testthat/test-chi_generate_tro_shell.R index 5bf4a43..0cd52f2 100644 --- a/tests/testthat/test-chi_generate_tro_shell.R +++ b/tests/testthat/test-chi_generate_tro_shell.R @@ -29,7 +29,6 @@ test_that("ingest template format", { trends, set_indicator_keys) DT <- chi_generate_tro_shell(ph.analysis_set = template, - start.year = 2021, end.year = 2022, year.span = 5, trend.span = 3, From 9f6febce8b9e54aa2a4ca1767d5e781433e4e613 Mon Sep 17 00:00:00 2001 From: Buie Date: Fri, 28 Mar 2025 16:21:01 -0700 Subject: [PATCH 10/12] updated chi yaml to 2025 standard, added a pre2025 yaml for validating against prior tables --- inst/ref/chi_qa.yaml | 3 -- inst/ref/chi_qa.yaml_pre2025.yaml | 62 +++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 3 deletions(-) create mode 100644 inst/ref/chi_qa.yaml_pre2025.yaml diff --git a/inst/ref/chi_qa.yaml b/inst/ref/chi_qa.yaml index cb0f5a7..71cdf8d 100644 --- a/inst/ref/chi_qa.yaml +++ b/inst/ref/chi_qa.yaml @@ -18,9 +18,6 @@ vars: upper_bound: float se: float rse: float - comparison_with_kc: varchar(255) - time_trends: varchar(255) - significance: varchar(255) caution: varchar(255) suppression: varchar(255) numerator: float diff --git a/inst/ref/chi_qa.yaml_pre2025.yaml b/inst/ref/chi_qa.yaml_pre2025.yaml new file mode 100644 index 0000000..cb0f5a7 --- /dev/null +++ b/inst/ref/chi_qa.yaml_pre2025.yaml @@ -0,0 +1,62 @@ +schema: APDE_WIP +table: +years: + +vars: + data_source: varchar(255) + indicator_key: varchar(255) + tab: varchar(255) + year: varchar(255) + cat1: varchar(255) + cat1_group: nvarchar(2000) + cat1_varname: varchar(255) + cat2: varchar(255) + cat2_group: nvarchar(2000) + cat2_varname: varchar(255) + result: float + lower_bound: float + upper_bound: float + se: float + rse: float + comparison_with_kc: varchar(255) + time_trends: varchar(255) + significance: varchar(255) + caution: varchar(255) + suppression: varchar(255) + numerator: float + denominator: float + chi: tinyint + source_date: date + run_date: date + +metadata: + data_source: varchar(255) + indicator_key: varchar(255) + result_type: varchar(255) + valence: varchar(255) + latest_year: int + latest_year_result: float + latest_year_kc_pop: int + latest_year_count: int + map_type: varchar(255) + unit: varchar(255) + valid_years: varchar(255) + chi: tinyint + run_date: date + +toc: + data_source: varchar(255) + indicator_key: varchar(255) + topic_chi: varchar(255) + topic_bsk: varchar(255) + topic_bskhs: varchar(255) + title_toc: varchar(255) + description: varchar(1000) + url_backup: varchar(500) + latest_year_bk: varchar(255) + latest_rate_bk: varchar(255) + toc_bsk: float + toc_bskhs: float + toc_chi: float + toc_cc: float + From 0a237d4acd28e5c37f6bca34180037e176777b1a Mon Sep 17 00:00:00 2001 From: Buie Date: Fri, 28 Mar 2025 16:55:12 -0700 Subject: [PATCH 11/12] added metadat option to get chi_get_cols --- R/chi_get_cols.R | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/R/chi_get_cols.R b/R/chi_get_cols.R index 583aa48..4f40acd 100644 --- a/R/chi_get_cols.R +++ b/R/chi_get_cols.R @@ -1,19 +1,25 @@ #' Get CHI variable column names #' +#' @description #' Returns a character vector of column names defined in the CHI YAML reference file. #' This helper function provides easy access to the standardized CHI variable names. #' -#' @return A character vector of column names +#' @param metadata returns metadata column names instead of primary data +#' +#' @return A character vector of column names for the chi data (Default) or metadata #' @importFrom yaml read_yaml #' @export #' #' @examples #' cols <- chi_get_cols() -chi_get_cols <- function() { +chi_get_cols <- function(metadata = FALSE) { chi.yaml.filepath <- system.file("ref", "chi_qa.yaml", package = "apde.chi.tools") if (chi.yaml.filepath == "") { stop("Could not find reference file chi_qa.yaml") } chi.yaml <- yaml::read_yaml(chi.yaml.filepath) + if(metadata){ + return(names(chi.yaml$metadata)) + } return(names(chi.yaml$vars)) } From 34b1a46e218a38bbd8988f01e20ae489df067622 Mon Sep 17 00:00:00 2001 From: Buie Date: Fri, 28 Mar 2025 16:57:57 -0700 Subject: [PATCH 12/12] updated for expanded chi_get_cols --- R/chi_qa_tro.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/chi_qa_tro.R b/R/chi_qa_tro.R index 17a1db1..4bb63f9 100644 --- a/R/chi_qa_tro.R +++ b/R/chi_qa_tro.R @@ -148,7 +148,7 @@ chi_qa_tro <- function(CHIestimates, } } - missing.var <- setdiff(names(CHImetadata), names(unlist(chi_get_yaml()$metadata))) + missing.var <- setdiff(names(CHImetadata), chi_get_cols(metadata = TRUE)) if(length(missing.var) > 0){ status <- 0 if(verbose){