Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions R/chi_drop_illogical_ages.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,13 @@ chi_drop_illogical_ages <- function(ph.data, agevar = 'chi_age') {

# Create a standardized version of the age group
ph.data[, (temp_catgroup) := data.table::fcase(
get(catgroup) == '<1', '0-0',
get(catgroup) == '<1' & grepl(' age$|^age$', get(catnum), ignore.case = T), '0-0',

grepl("<", get(catgroup)), gsub("<", "0-", get(catgroup)),
grepl("<", get(catgroup)) &grepl(' age$|^age$', get(catnum), ignore.case = T), gsub("<", "0-", get(catgroup)),

grepl("\\+", get(catgroup)), gsub("\\+", "-120", get(catgroup)),
grepl("\\+", get(catgroup))& grepl(' age$|^age$', get(catnum), ignore.case = T), gsub("\\+", "-120", get(catgroup)),

grepl('-', get(catgroup)), get(catgroup)
)]
grepl('-', get(catgroup))& grepl(' age$|^age$', get(catnum), ignore.case = T), as.character(get(catgroup)))]

# Extract min and max age
ph.data[, "min_age" := as.numeric(gsub("-.*", "", get(temp_catgroup)))]
Expand Down
63 changes: 30 additions & 33 deletions R/chi_generate_tro_shell.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
#' }
#'
#' @param ph.analysis_set name of data.table to parse
#' @param start.year the earliest year to be used for estimates
#' @param end.year the latest year to be used for aggregate estimates
#' @param year.span the number of years to be included in a single non-trend period
#' @param trend.span the number of years to be included in a single trend period
Expand Down Expand Up @@ -54,52 +53,50 @@
#' @export
#'
chi_generate_tro_shell <- function(ph.analysis_set,
start.year,
end.year,
year.span = NULL,
trend.span = NULL,
trend.periods = NULL){
end.year,
year.span = NULL,
trend.span = NULL,
trend.periods = NULL){

# Input validation
if (missing(ph.analysis_set)) stop("\n\U1F6D1 ph.analysis_set must be provided")
if (!is.data.frame(ph.analysis_set)) stop("\n\U1F6D1 ph.analysis_set must be a data.frame or data.table")

if (missing(start.year)) stop("\n\U1F6D1 start.year must be provided")
if (!is.numeric(start.year) || length(start.year) != 1) stop("\n\U1F6D1 start.year must be a single numeric value")
if (missing(ph.analysis_set)) stop("\n\U1F6D1 ph.analysis_set must be provided")
if (!is.data.frame(ph.analysis_set)) stop("\n\U1F6D1 ph.analysis_set must be a data.frame or data.table")
if (!("set" %in% names(ph.analysis_set)) | anyNA(ph.analysis_set$set)) {
stop("\n\u1F6D1 set number must be provided for all rows")
}
if (missing(end.year)) stop("\n\U1F6D1 end.year must be provided")
if (!is.numeric(end.year) || length(end.year) != 1) stop("\n\U1F6D1 end.year must be a single numeric value")

if (missing(end.year)) stop("\n\U1F6D1 end.year must be provided")
if (!is.numeric(end.year) || length(end.year) != 1) stop("\n\U1F6D1 end.year must be a single numeric value")
if (!is.null(year.span) && (!is.numeric(year.span) || length(year.span) != 1)) {
stop("\n\U1F6D1 year.span must be NULL or a single numeric value")
}

if (!is.null(year.span) && (!is.numeric(year.span) || length(year.span) != 1)) {
stop("\n\U1F6D1 year.span must be NULL or a single numeric value")
}
if (!is.null(trend.span) && (!is.numeric(trend.span) || length(trend.span) != 1)) {
stop("\n\U1F6D1 trend.span must be NULL or a single numeric value")
}

if (!is.null(trend.span) && (!is.numeric(trend.span) || length(trend.span) != 1)) {
stop("\n\U1F6D1 trend.span must be NULL or a single numeric value")
}
if (!is.null(trend.periods) && (!is.numeric(trend.periods) || length(trend.periods) != 1)) {
stop("\n\U1F6D1 trend.periods must be NULL or a single numeric value")
}

if (!is.null(trend.periods) && (!is.numeric(trend.periods) || length(trend.periods) != 1)) {
stop("\n\U1F6D1 trend.periods must be NULL or a single numeric value")
}

# Convert to data.table if needed
if (!is.data.table(ph.analysis_set)) setDT(ph.analysis_set)
# Convert to data.table if needed
if (!is.data.table(ph.analysis_set)) setDT(ph.analysis_set)

#parameterization checks
# parameterization checks
if("x" %in% ph.analysis_set$trends & (is.null(trend.span) | is.null(trend.periods))) {stop("you have indicated that a trends analysis is to be conducted, but have not indicated both the span and number of periods for this analysis.")}

#ph.analysis_set checks


#advisory messages
# advisory messages
if("x" %in% ph.analysis_set$trends) {message("Note: trends are applied backwards from end.year")}

# Race / ethnicity is a chronic headache with CHI. Need to remove rows for race4 & Ethnicity because should be Race/ethnicity
ph.analysis_set <- ph.analysis_set[!(cat1_varname == 'race4' & cat1 == 'Ethnicity')]

# apply the template generating function
# generate vector of sets
sets <- unique(ph.analysis_set$set)
template <- rbindlist(
lapply(X = seq(1, length(unique(ph.analysis_set$set))),
lapply(X = sets,
FUN = chi_process_nontrends, ph.analysis_set = ph.analysis_set))

# split trends from other tabs because processed for multiple years
Expand All @@ -117,9 +114,9 @@ chi_generate_tro_shell <- function(ph.analysis_set,
# add years to template (trends)
if(nrow(template.trends) > 0){
trend.years <- chi_process_trends(indicator_key = intersect(unique(template$indicator_key), unique(template.trends$indicator_key)),
trend.span = trend.span,
end.year = end.year,
trend.periods = trend.periods)
trend.span = trend.span,
end.year = end.year,
trend.periods = trend.periods)
template.trends <- merge(template.trends, trend.years, by = 'indicator_key', all = T, allow.cartesian = T)

# append trends template to main template
Expand Down
25 changes: 25 additions & 0 deletions R/chi_get_cols.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#' Get CHI variable column names
#'
#' @description
#' Returns a character vector of column names defined in the CHI YAML reference file.
#' This helper function provides easy access to the standardized CHI variable names.
#'
#' @param metadata returns metadata column names instead of primary data
#'
#' @return A character vector of column names for the chi data (Default) or metadata
#' @importFrom yaml read_yaml
#' @export
#'
#' @examples
#' cols <- chi_get_cols()
chi_get_cols <- function(metadata = FALSE) {
chi.yaml.filepath <- system.file("ref", "chi_qa.yaml", package = "apde.chi.tools")
if (chi.yaml.filepath == "") {
stop("Could not find reference file chi_qa.yaml")
}
chi.yaml <- yaml::read_yaml(chi.yaml.filepath)
if(metadata){
return(names(chi.yaml$metadata))
}
return(names(chi.yaml$vars))
}
20 changes: 0 additions & 20 deletions R/chi_get_yaml.R
Original file line number Diff line number Diff line change
@@ -1,23 +1,3 @@
#' Get CHI variable column names
#'
#' Returns a character vector of column names defined in the CHI YAML reference file.
#' This helper function provides easy access to the standardized CHI variable names.
#'
#' @return A character vector of column names
#' @importFrom yaml read_yaml
#' @export
#'
#' @examples
#' cols <- chi_get_cols()
chi_get_cols <- function() {
chi.yaml.filepath <- system.file("ref", "chi_qa.yaml", package = "apde.chi.tools")
if (chi.yaml.filepath == "") {
stop("Could not find reference file chi_qa.yaml")
}
chi.yaml <- yaml::read_yaml(chi.yaml.filepath)
return(names(chi.yaml$vars))
}

#' Get CHI YAML configuration
#'
#' Returns the complete CHI YAML configuration as a list.
Expand Down
2 changes: 1 addition & 1 deletion R/chi_qa_tro.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ chi_qa_tro <- function(CHIestimates,
}
}

missing.var <- setdiff(names(CHImetadata), names(unlist(chi_get_yaml()$metadata)))
missing.var <- setdiff(names(CHImetadata), chi_get_cols(metadata = TRUE))
if(length(missing.var) > 0){
status <- 0
if(verbose){
Expand Down
62 changes: 62 additions & 0 deletions inst/ref/chi_qa.yaml_pre2025.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
schema: APDE_WIP
table:
years:

vars:
data_source: varchar(255)
indicator_key: varchar(255)
tab: varchar(255)
year: varchar(255)
cat1: varchar(255)
cat1_group: nvarchar(2000)
cat1_varname: varchar(255)
cat2: varchar(255)
cat2_group: nvarchar(2000)
cat2_varname: varchar(255)
result: float
lower_bound: float
upper_bound: float
se: float
rse: float
comparison_with_kc: varchar(255)
time_trends: varchar(255)
significance: varchar(255)
caution: varchar(255)
suppression: varchar(255)
numerator: float
denominator: float
chi: tinyint
source_date: date
run_date: date

metadata:
data_source: varchar(255)
indicator_key: varchar(255)
result_type: varchar(255)
valence: varchar(255)
latest_year: int
latest_year_result: float
latest_year_kc_pop: int
latest_year_count: int
map_type: varchar(255)
unit: varchar(255)
valid_years: varchar(255)
chi: tinyint
run_date: date

toc:
data_source: varchar(255)
indicator_key: varchar(255)
topic_chi: varchar(255)
topic_bsk: varchar(255)
topic_bskhs: varchar(255)
title_toc: varchar(255)
description: varchar(1000)
url_backup: varchar(500)
latest_year_bk: varchar(255)
latest_rate_bk: varchar(255)
toc_bsk: float
toc_bskhs: float
toc_chi: float
toc_cc: float

3 changes: 0 additions & 3 deletions man/chi_generate_tro_shell.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/chi_get_cols.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion quarto_docs/Calculating_Prevalences.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ To analyze our data consistently, we need to generate a structured set of calcul

myinstructions <- chi_generate_tro_shell(
ph.analysis_set = analysis_sets,
start.year = latest_year-4, # earliest year to be used for estimates
end.year = latest_year, # latest year to be used for aggregate estimates
year.span = 5, # number of years included in a single period
trend.span = 3, # number of years included in a trend single period
Expand Down
5 changes: 2 additions & 3 deletions tests/testthat/test-chi_generate_tro_shell.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ test_that("chi_generate_tro_shell validates inputs correctly", {
test_data <- setup_test_data()

expect_error(chi_generate_tro_shell(), "ph.analysis_set must be provided")
expect_error(chi_generate_tro_shell(data.frame(), start.year = "2023"),
"start.year must be a single numeric value")
expect_error(chi_generate_tro_shell(data.frame()),
"set number must be provided for all rows")
})

test_that("ingest template format", {
Expand All @@ -29,7 +29,6 @@ test_that("ingest template format", {
trends,
set_indicator_keys)
DT <- chi_generate_tro_shell(ph.analysis_set = template,
start.year = 2021,
end.year = 2022,
year.span = 5,
trend.span = 3,
Expand Down