diff --git a/R/compare_cols.R b/R/compare_cols.R deleted file mode 100644 index 32202fdb..00000000 --- a/R/compare_cols.R +++ /dev/null @@ -1,38 +0,0 @@ -#' Compares contents of 2 vectors -#' -#' Function to compare contents of 2 vectors - used to summarize of which data columns are found in a given standard. Used in \code{detectStandard()} and \code{validateSettings()} -#' -#' @param data_cols A character vector with column names in the data frame -#' @param standard_cols A character vector with column names in the data standard -#' @return A list summarizing the comparison between \code{data_cols} and \code{standard_cols}. List has character vectors for "matched_columns", "extra_columns" and "missing_columns" parameters, and a boolean "match" parameter indicating that there are no missing columns. -#' -#' -#' @examples -#' #match == FALSE -#' safetyGraphics:::compare_cols(data_cols=c("a","b","c"), -#' standard_cols=c("d","e","f")) -#' -#' # match == TRUE -#' safetyGraphics:::compare_cols(names(adlbc), -#' safetyGraphics:::getRequiredColumns(standard="ADaM")) -#' @keywords internal - -compare_cols<-function(data_cols, standard_cols){ - compare_summary <- list() - compare_summary[["matched_columns"]]<-intersect(data_cols, standard_cols) - compare_summary[["extra_columns"]]<-setdiff(data_cols,standard_cols) - compare_summary[["missing_columns"]]<-setdiff(standard_cols,data_cols) - - #if there are no missing columns then call this a match - - if (length(compare_summary[["missing_columns"]])==0) { - compare_summary[["match"]] <- "Full" - } else if(length(compare_summary[["matched_columns"]])>0) { - compare_summary[["match"]] <- "Partial" - } else { - compare_summary[["match"]] <- "None" - } - - - return(compare_summary) -} diff --git a/R/detectStandard.R b/R/detectStandard.R index 936bbf41..feb3c7cc 100644 --- a/R/detectStandard.R +++ b/R/detectStandard.R @@ -1,37 +1,39 @@ #' Detect the data standard used for a data set #' -#' This function attempts to detect the data CDISC clinical standard used in a given R data frame. +#' This function attempts to detect the data CDISC clinical standard used in a given R data frame. #' #' This function compares the columns in the provided \code{"data"} with the required columns for a given data standard/domain combination. The function is designed to work with the SDTM and AdAM CDISC() standards for clinical trial data. Currently, only "labs" is the only domain supported. #' -#' @param data A data frame in which to detect the data standard +#' @param data A data frame in which to detect the data standard +#' @param includeFields specifies whether to check the data set for field level data in addition to columns. Default: \code{TRUE}. #' @param domain The data domain for the data set provided. Default: \code{"labs"}. -#' @return A list containing the matching \code{"standard"} ("ADaM", "SDTM" or "None") and a list of \code{"details"} describing each standard considered. #' -#' @examples +#' +#' @return A list containing the matching \code{"standard"} ("ADaM", "SDTM" or "None") and a list of \code{"details"} describing each standard considered. #' +#' +#' @examples +#' #' detectStandard(adlbc)[["standard"]] #AdAM #' detectStandard(iris)[["standard"]] #none -#' +#' #' \dontrun{ -#' detectStandard(adlbc,domain="AE") #throws error. AE domain not supported in this release. +#' detectStandard(adlbc,domain="AE") #throws error. AE domain not supported in this release. #' } -#' +#' #' @export -detectStandard <- function(data, domain="labs"){ +detectStandard <- function(data, includeFields=TRUE, domain="labs"){ stopifnot( domain=="labs", typeof(domain)=="character" ) - + + # Create placeholder list, with Standard = None. standard_list <- list() standard_list[["details"]] = list() - data_cols<-toupper(colnames(data)) - - # Check to see if data columns match the standards - standard_list[["details"]][["ADaM"]]<-compare_cols(data_cols,getRequiredColumns(standard="ADaM")) - standard_list[["details"]][["SDTM"]]<-compare_cols(data_cols,getRequiredColumns(standard="SDTM")) - + standard_list[["details"]][["ADaM"]]<-evaluateStandard(data,standard="ADaM", includeFields=includeFields, domain=domain) + standard_list[["details"]][["SDTM"]]<-evaluateStandard(data,standard="SDTM", includeFields=includeFields, domain=domain) + # Determine the final standard if(standard_list[["details"]][["SDTM"]][["match"]] == "Full"){ standard_list[["standard"]]<- "SDTM" @@ -40,14 +42,14 @@ detectStandard <- function(data, domain="labs"){ } else if(standard_list[["details"]][["SDTM"]][["match"]] == "Partial" | standard_list[["details"]][["ADaM"]][["match"]] == "Partial"){ standard_list[["standard"]] <- ifelse( - length(standard_list[["details"]][["ADaM"]][["matched_columns"]]) > - length(standard_list[["details"]][["SDTM"]][["matched_columns"]]), + length(standard_list[["details"]][["ADaM"]][["valid_count"]]) > + length(standard_list[["details"]][["SDTM"]][["valid_count"]]), "ADaM" , "SDTM" #SDTM if they are equal ) - + } else { standard_list[["standard"]]<-"None" } - + return(standard_list) -} \ No newline at end of file +} diff --git a/R/evaluateStandard.R b/R/evaluateStandard.R new file mode 100644 index 00000000..ae820df5 --- /dev/null +++ b/R/evaluateStandard.R @@ -0,0 +1,74 @@ +#' Evaluate a data set against a data standard +#' +#' Determines whether the required data elements in a data standard are found in a given data frame +#' +#' @param data data.frame to evaluate +#' @param standard standard to evaluate +#' @param includeFields should field level data be evaluated? +#' @param domain data domain. "labs" only for now. +#' +#' @return a list describing to what degree the data set matches the data standard. The "match" property describes compliance with the standard as "Full", "Partial" or "None". The "checks" property is a list of the data elements expected for the standard and whether they are "valid" in the given data set. "valid_checks" and "invalid_checks" provide counts of the specified checks. +#' +#' @examples +#' safetyGraphics:::evaluateStandard(data=adlbc, standard="adam") # Match is TRUE +#' safetyGraphics:::evaluateStandard(data=adlbc, standard="sdtm") # Match is FALSE +#' +#' @import dplyr +#' @importFrom purrr map +#' @importFrom rlang .data +#' +#' @keywords internal + + +evaluateStandard <- function(data, standard, includeFields=TRUE, domain="labs"){ + + stopifnot( + is.data.frame(data), + is.character(standard), + is.logical(includeFields), + is.character(domain), + tolower(standard) %in% c("adam","sdtm") + ) + + standard<-tolower(standard) + + compare_summary<-list() + compare_summary[["standard"]]<-standard + + # Get metadata for settings using the specified standard and see if required data elements are found + standardChecks <- getSettingsMetadata(cols=c("text_key", "column_mapping", "field_mapping", "field_column_key", "setting_required","standard_val",standard)) %>% + rename("standard_val"=standard) %>% + filter(.data$column_mapping == TRUE || .data$field_mapping ==TRUE) %>% + filter(.data$setting_required==TRUE) %>% + mutate(type = ifelse(.data$column_mapping, "column", "field")) %>% + rowwise %>% + mutate(field_column_name = ifelse(.data$field_mapping, getSettingsMetadata(cols=standard, text_keys=.data$field_column_key),"")) %>% + mutate(valid = ifelse(.data$column_mapping, + hasColumn(data=data, columnName=.data$standard_val), + hasField(data=data, columnName=.data$field_column_name, fieldValue=.data$standard_val) + )) %>% + select(.data$text_key, .data$standard_val, .data$type, .data$valid) + + # filter out the field level checks if includeChecks is false + if(!includeFields){ + standardChecks <- standardChecks %>% filter(.data$type != "field") + } + + # compare_summary[["checks"]] <- split(standardChecks, seq(nrow(standardChecks)))%>%map(~as.list(.)) #coerce to list of lists? + compare_summary[["checks"]] <- standardChecks #or just keep the tibble ... + + # count valid/invalid data elements + compare_summary[["valid_count"]] <- standardChecks %>% filter(.data$valid) %>% nrow() + compare_summary[["invalid_count"]] <- standardChecks %>% filter(!.data$valid) %>% nrow() + + + if (compare_summary[["invalid_count"]]==0) { + compare_summary[["match"]] <- "Full" + } else if(compare_summary[["valid_count"]]>0) { + compare_summary[["match"]] <- "Partial" + } else { + compare_summary[["match"]] <- "None" + } + + return(compare_summary) +} \ No newline at end of file diff --git a/R/hasColumn.R b/R/hasColumn.R new file mode 100644 index 00000000..d64fc52d --- /dev/null +++ b/R/hasColumn.R @@ -0,0 +1,23 @@ +#' Check whether a column is found in a data set +#' +#' Checks whether a specified column is found in a specified data set +#' +#' @param columnName The column to look for. +#' @param data the data.frame to search. +#' @return logical scalar. TRUE if the column is found. FALSE otherwise +#' +#' @examples +#' safetyGraphics:::hasColumn(columnName="PARAM",data=adlbc) #TRUE +#' safetyGraphics:::hasColumn(columnName="Not_a_column",data=adlbc) #FALSE +#' +#' @keywords internal + +hasColumn <- function(columnName, data){ + stopifnot( + typeof(columnName)=="character", + length(columnName)==1, + is.data.frame(data) + ) + + return(toupper(columnName) %in% toupper(colnames(data))) +} diff --git a/R/hasField.R b/R/hasField.R new file mode 100644 index 00000000..ab038568 --- /dev/null +++ b/R/hasField.R @@ -0,0 +1,33 @@ +#' Check whether a specified field value is found in a data set +#' +#' This checks whether a specific value is found in a specified column in a specified data set +#' +#' @param fieldValue A value to check for. +#' @param columnName The column to check. +#' @param data the data.frame to search. +#' @return logical scalar. TRUE if field_value is found. FALSE otherwise +#' +#' @examples +#' safetyGraphics:::hasField(fieldValue="Bilirubin (umol/L)",columnName="PARAM",data=adlbc) #TRUE +#' safetyGraphics:::hasField(fieldValue="Not_a_real_value",columnName="",data=adlbc) #FALSE +#' +#' @keywords internal + +hasField<- function(fieldValue, columnName, data){ + stopifnot( + length(fieldValue)==1, + typeof(columnName)=="character", + length(columnName)==1, + is.data.frame(data) + ) + + columnFound <- hasColumn(columnName=columnName, data=data) + if(columnFound){ + validFields <- unique(data[[columnName]]) + } else{ + validFields <- c() + } + + validFields <- unique(data[[columnName]]) + return(fieldValue %in% validFields) +} diff --git a/R/settingsMetadata.R b/R/settingsMetadata.R index 5f395407..3e07bf21 100644 --- a/R/settingsMetadata.R +++ b/R/settingsMetadata.R @@ -14,6 +14,7 @@ #' \item{column_type}{Expected type for the data column values. Should be "character","logical" or "numeric"} #' \item{column_required}{Flag indicating whether the associated data column should be considered required} #' \item{field_mapping}{Flag indicating whether the setting corresponds to a field-level mapping in the data} +#' \item{field_column_key}{Key for the column that provides options for the field-level mapping in the data} #' \item{adam}{Settings values for the ADaM standard} #' \item{sdtm}{Settings values for the SDTM standard} #' } diff --git a/data-raw/settingsMetadata.csv b/data-raw/settingsMetadata.csv index 3ac67a28..08f04869 100644 --- a/data-raw/settingsMetadata.csv +++ b/data-raw/settingsMetadata.csv @@ -1,26 +1,26 @@ -chart_edish,text_key,label,description,setting_type,setting_required,column_mapping,column_type,column_required,field_mapping,adam,sdtm -TRUE,id_col,ID column,Unique subject identifier variable name.,character,TRUE,TRUE,character,TRUE,FALSE,USUBJID,USUBJID -TRUE,value_col,Value column,Lab result variable name.,character,TRUE,TRUE,numeric,TRUE,FALSE,AVAL,STRESN -TRUE,measure_col,Measure column,Lab measure variable name,character,TRUE,TRUE,character,TRUE,FALSE,PARAM,TEST -TRUE,normal_col_low,Lower Limit of Normal column,Lower limit of normal variable name,character,TRUE,TRUE,numeric,TRUE,FALSE,A1LO,STNRLO -TRUE,normal_col_high,Upper Limit of Normal column,Upper limit of normal variable name,character,TRUE,TRUE,numeric,TRUE,FALSE,A1HI,STNRHI -TRUE,studyday_col,Study Day column,Visit day variable name,character,TRUE,TRUE,numeric,TRUE,FALSE,ADY,DY -TRUE,visit_col,Visit column,Visit variable name,character,FALSE,TRUE,character,FALSE,FALSE,VISIT,VISIT -TRUE,visitn_col,Visit Number column,Visit number variable name,character,FALSE,TRUE,numeric,FALSE,FALSE,VISITNUM,VISITNUM -TRUE,filters,Filters columns,"An optional list of specifications for filters. Each filter is a nested, named list (containing the filter value column: 'value_col' and associated label: 'label') within the larger list.",vector,FALSE,TRUE,NA,FALSE,FALSE,, -TRUE,group_cols,Group columns,"An optional list of specifications for grouping columns. Each group column is a nested, named list (containing the group variable column: 'value_col' and associated label: 'label') within the larger list. ",vector,FALSE,TRUE,NA,FALSE,FALSE,, -TRUE,measure_values--ALT,Alanine Aminotransferase value,Value used for Alanine Aminotransferase in the specified measure column,character,FALSE,FALSE,NA,NA,TRUE,Alanine Aminotransferase (U/L),"Aminotransferase, alanine (ALT)" -TRUE,measure_values--AST,Aspartate Aminotransferase value,Value used for Aspartate Aminotransferase in the specified measure column,character,FALSE,FALSE,NA,NA,TRUE,Aspartate Aminotransferase (U/L),"Aminotransferase, aspartate (AST)" -TRUE,measure_values--TB,Total Bilirubin value,Value used for Total Bilirubin in the specified measure column,character,FALSE,FALSE,NA,NA,TRUE,Bilirubin (umol/L),Total Bilirubin -TRUE,measure_values--ALP,Alkaline Phosphatase value,Value used for Alkaline Phosphatase in the specified measure column,character,FALSE,FALSE,NA,NA,TRUE,Alkaline Phosphatase (U/L),Alkaline phosphatase (ALP) -TRUE,baseline--value_col,Baseline column,An optional list defining which column represent the baseline visit(s) of the study.,character,FALSE,TRUE,NA,FALSE,FALSE,, -TRUE,baseline--values,Baseline values,An optional list defining which values (one or more) represent the baseline visit(s) of the study.,vector,FALSE,FALSE,NA,NA,TRUE,, -TRUE,analysisFlag--value_col,Analysis Flag column,An optional list defining which column should be used in eDish and mDish analyses.,character,FALSE,TRUE,NA,FALSE,FALSE,, -TRUE,analysisFlag--values,Analysis Flag values,An optional list defining which values should be used in eDish and mDish analyses.,vector,FALSE,FALSE,NA,NA,TRUE,, -TRUE,x_options,X axis options,"Specifies variable options for the x-axis using the key values from measure_values (e.g. 'ALT'). When multiple options are specified, a control allowing the user to interactively change the x variable is shown. ",vector,FALSE,FALSE,NA,NA,FALSE,, -TRUE,y_options,Y axis options,"Specifies variable options for the y-axis using the key values from measure_values (e.g. 'TB'). When multiple options are specified, a control allowing the user to interactively change the y variable is shown. ",vector,FALSE,FALSE,NA,NA,FALSE,, -TRUE,visit_window,Default Visit Window in Days ,Default visit window used to highlight eDish points where x and y measures occurred within the specified number of days. Editable by user after render. ,numeric,FALSE,FALSE,NA,NA,FALSE,, -TRUE,r_ratio_filter,Show R Ratio Filter?,Specifies whether the R Ratio filter should be shown. R ratio is defined as: (ALT value/ULN for ALT) / (ALP value/ULN for ALP).,logical,FALSE,FALSE,NA,NA,FALSE,, -TRUE,r_ratio_cut,Default R Ratio Cut,Default cut point for R Ratio filter. Ignored when r_ratio_filter = FALSE. User can update this setting via the UI when r_ratio_filter = TRUE. ,numeric,FALSE,FALSE,NA,NA,FALSE,, -TRUE,showTitle,Show Chart Title? ,Specifies whether the title should be drawn above the controls.,logical,FALSE,FALSE,NA,NA,FALSE,, -TRUE,warningText,Warning text,"Informational text to be displayed near the top of the controls (beneath the title, if any). No warning is displayed if warningText = ''. ",Character,FALSE,FALSE,NA,NA,FALSE,, +chart_edish,text_key,label,description,setting_type,setting_required,column_mapping,column_type,column_required,field_mapping,field_column_key,adam,sdtm +TRUE,id_col,ID column,Unique subject identifier variable name.,character,TRUE,TRUE,character,TRUE,FALSE,,USUBJID,USUBJID +TRUE,value_col,Value column,Lab result variable name.,character,TRUE,TRUE,numeric,TRUE,FALSE,,AVAL,STRESN +TRUE,measure_col,Measure column,Lab measure variable name,character,TRUE,TRUE,character,TRUE,FALSE,,PARAM,TEST +TRUE,normal_col_low,Lower Limit of Normal column,Lower limit of normal variable name,character,TRUE,TRUE,numeric,TRUE,FALSE,,A1LO,STNRLO +TRUE,normal_col_high,Upper Limit of Normal column,Upper limit of normal variable name,character,TRUE,TRUE,numeric,TRUE,FALSE,,A1HI,STNRHI +TRUE,studyday_col,Study Day column,Visit day variable name,character,TRUE,TRUE,numeric,TRUE,FALSE,,ADY,DY +TRUE,visit_col,Visit column,Visit variable name,character,FALSE,TRUE,character,FALSE,FALSE,,VISIT,VISIT +TRUE,visitn_col,Visit Number column,Visit number variable name,character,FALSE,TRUE,numeric,FALSE,FALSE,,VISITNUM,VISITNUM +TRUE,filters,Filters columns,"An optional list of specifications for filters. Each filter is a nested, named list (containing the filter value column: 'value_col' and associated label: 'label') within the larger list.",vector,FALSE,TRUE,NA,FALSE,FALSE,,, +TRUE,group_cols,Group columns,"An optional list of specifications for grouping columns. Each group column is a nested, named list (containing the group variable column: 'value_col' and associated label: 'label') within the larger list. ",vector,FALSE,TRUE,NA,FALSE,FALSE,,, +TRUE,measure_values--ALT,Alanine Aminotransferase value,Value used for Alanine Aminotransferase in the specified measure column,character,TRUE,FALSE,NA,NA,TRUE,measure_col,Alanine Aminotransferase (U/L),"Aminotransferase, alanine (ALT)" +TRUE,measure_values--AST,Aspartate Aminotransferase value,Value used for Aspartate Aminotransferase in the specified measure column,character,TRUE,FALSE,NA,NA,TRUE,measure_col,Aspartate Aminotransferase (U/L),"Aminotransferase, aspartate (AST)" +TRUE,measure_values--TB,Total Bilirubin value,Value used for Total Bilirubin in the specified measure column,character,TRUE,FALSE,NA,NA,TRUE,measure_col,Bilirubin (umol/L),Total Bilirubin +TRUE,measure_values--ALP,Alkaline Phosphatase value,Value used for Alkaline Phosphatase in the specified measure column,character,TRUE,FALSE,NA,NA,TRUE,measure_col,Alkaline Phosphatase (U/L),Alkaline phosphatase (ALP) +TRUE,baseline--value_col,Baseline column,An optional list defining which column represent the baseline visit(s) of the study.,character,FALSE,TRUE,NA,FALSE,FALSE,,, +TRUE,baseline--values,Baseline values,An optional list defining which values (one or more) represent the baseline visit(s) of the study.,vector,FALSE,FALSE,NA,NA,TRUE,baseline--value_col,, +TRUE,analysisFlag--value_col,Analysis Flag column,An optional list defining which column should be used in eDish and mDish analyses.,character,FALSE,TRUE,NA,FALSE,FALSE,,, +TRUE,analysisFlag--values,Analysis Flag values,An optional list defining which values should be used in eDish and mDish analyses.,vector,FALSE,FALSE,NA,NA,TRUE,analysisFlag--value_col,, +TRUE,x_options,X axis options,"Specifies variable options for the x-axis using the key values from measure_values (e.g. 'ALT'). When multiple options are specified, a control allowing the user to interactively change the x variable is shown. ",vector,FALSE,FALSE,NA,NA,FALSE,,, +TRUE,y_options,Y axis options,"Specifies variable options for the y-axis using the key values from measure_values (e.g. 'TB'). When multiple options are specified, a control allowing the user to interactively change the y variable is shown. ",vector,FALSE,FALSE,NA,NA,FALSE,,, +TRUE,visit_window,Default Visit Window in Days ,Default visit window used to highlight eDish points where x and y measures occurred within the specified number of days. Editable by user after render. ,numeric,FALSE,FALSE,NA,NA,FALSE,,, +TRUE,r_ratio_filter,Show R Ratio Filter?,Specifies whether the R Ratio filter should be shown. R ratio is defined as: (ALT value/ULN for ALT) / (ALP value/ULN for ALP).,logical,FALSE,FALSE,NA,NA,FALSE,,, +TRUE,r_ratio_cut,Default R Ratio Cut,Default cut point for R Ratio filter. Ignored when r_ratio_filter = FALSE. User can update this setting via the UI when r_ratio_filter = TRUE. ,numeric,FALSE,FALSE,NA,NA,FALSE,,, +TRUE,showTitle,Show Chart Title? ,Specifies whether the title should be drawn above the controls.,logical,FALSE,FALSE,NA,NA,FALSE,,, +TRUE,warningText,Warning text,"Informational text to be displayed near the top of the controls (beneath the title, if any). No warning is displayed if warningText = ''. ",Character,FALSE,FALSE,NA,NA,FALSE,,, \ No newline at end of file diff --git a/data/adlbc.rda b/data/adlbc.rda index aea28611..aa28c2db 100644 Binary files a/data/adlbc.rda and b/data/adlbc.rda differ diff --git a/data/settingsMetadata.rda b/data/settingsMetadata.rda index 82ebf9c0..559f7135 100644 Binary files a/data/settingsMetadata.rda and b/data/settingsMetadata.rda differ diff --git a/man/compare_cols.Rd b/man/compare_cols.Rd deleted file mode 100644 index a5a67420..00000000 --- a/man/compare_cols.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/compare_cols.R -\name{compare_cols} -\alias{compare_cols} -\title{Compares contents of 2 vectors} -\usage{ -compare_cols(data_cols, standard_cols) -} -\arguments{ -\item{data_cols}{A character vector with column names in the data frame} - -\item{standard_cols}{A character vector with column names in the data standard} -} -\value{ -A list summarizing the comparison between \code{data_cols} and \code{standard_cols}. List has character vectors for "matched_columns", "extra_columns" and "missing_columns" parameters, and a boolean "match" parameter indicating that there are no missing columns. -} -\description{ -Function to compare contents of 2 vectors - used to summarize of which data columns are found in a given standard. Used in \code{detectStandard()} and \code{validateSettings()} -} -\examples{ -#match == FALSE -safetyGraphics:::compare_cols(data_cols=c("a","b","c"), - standard_cols=c("d","e","f")) - -# match == TRUE -safetyGraphics:::compare_cols(names(adlbc), - safetyGraphics:::getRequiredColumns(standard="ADaM")) -} -\keyword{internal} diff --git a/man/detectStandard.Rd b/man/detectStandard.Rd index 7eacb633..bfae29b5 100644 --- a/man/detectStandard.Rd +++ b/man/detectStandard.Rd @@ -4,11 +4,13 @@ \alias{detectStandard} \title{Detect the data standard used for a data set} \usage{ -detectStandard(data, domain = "labs") +detectStandard(data, includeFields = TRUE, domain = "labs") } \arguments{ \item{data}{A data frame in which to detect the data standard} +\item{includeFields}{specifies whether to check the data set for field level data in addition to columns. Default: \code{TRUE}.} + \item{domain}{The data domain for the data set provided. Default: \code{"labs"}.} } \value{ @@ -21,11 +23,12 @@ This function attempts to detect the data CDISC clinical standard used in a give This function compares the columns in the provided \code{"data"} with the required columns for a given data standard/domain combination. The function is designed to work with the SDTM and AdAM CDISC() standards for clinical trial data. Currently, only "labs" is the only domain supported. } \examples{ + detectStandard(adlbc)[["standard"]] #AdAM detectStandard(iris)[["standard"]] #none \dontrun{ - detectStandard(adlbc,domain="AE") #throws error. AE domain not supported in this release. + detectStandard(adlbc,domain="AE") #throws error. AE domain not supported in this release. } } diff --git a/man/evaluateStandard.Rd b/man/evaluateStandard.Rd new file mode 100644 index 00000000..ac1a38ab --- /dev/null +++ b/man/evaluateStandard.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/evaluateStandard.R +\name{evaluateStandard} +\alias{evaluateStandard} +\title{Evaluate a data set against a data standard} +\usage{ +evaluateStandard(data, standard, includeFields = TRUE, domain = "labs") +} +\arguments{ +\item{data}{data.frame to evaluate} + +\item{standard}{standard to evaluate} + +\item{includeFields}{should field level data be evaluated?} + +\item{domain}{data domain. "labs" only for now.} +} +\value{ +a list describing to what degree the data set matches the data standard. The "match" property describes compliance with the standard as "Full", "Partial" or "None". The "checks" property is a list of the data elements expected for the standard and whether they are "valid" in the given data set. "valid_checks" and "invalid_checks" provide counts of the specified checks. +} +\description{ +Determines whether the required data elements in a data standard are found in a given data frame +} +\examples{ +safetyGraphics:::evaluateStandard(data=adlbc, standard="adam") # Match is TRUE +safetyGraphics:::evaluateStandard(data=adlbc, standard="sdtm") # Match is FALSE + +} +\keyword{internal} diff --git a/man/hasColumn.Rd b/man/hasColumn.Rd new file mode 100644 index 00000000..2373cecc --- /dev/null +++ b/man/hasColumn.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/hasColumn.R +\name{hasColumn} +\alias{hasColumn} +\title{Check whether a column is found in a data set} +\usage{ +hasColumn(columnName, data) +} +\arguments{ +\item{columnName}{The column to look for.} + +\item{data}{the data.frame to search.} +} +\value{ +logical scalar. TRUE if the column is found. FALSE otherwise +} +\description{ +Checks whether a specified column is found in a specified data set +} +\examples{ +safetyGraphics:::hasColumn(columnName="PARAM",data=adlbc) #TRUE +safetyGraphics:::hasColumn(columnName="Not_a_column",data=adlbc) #FALSE + +} +\keyword{internal} diff --git a/man/hasField.Rd b/man/hasField.Rd new file mode 100644 index 00000000..5ce1103a --- /dev/null +++ b/man/hasField.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/hasField.R +\name{hasField} +\alias{hasField} +\title{Check whether a specified field value is found in a data set} +\usage{ +hasField(fieldValue, columnName, data) +} +\arguments{ +\item{fieldValue}{A value to check for.} + +\item{columnName}{The column to check.} + +\item{data}{the data.frame to search.} +} +\value{ +logical scalar. TRUE if field_value is found. FALSE otherwise +} +\description{ +This checks whether a specific value is found in a specified column in a specified data set +} +\examples{ +safetyGraphics:::hasField(fieldValue="Bilirubin (umol/L)",columnName="PARAM",data=adlbc) #TRUE +safetyGraphics:::hasField(fieldValue="Not_a_real_value",columnName="",data=adlbc) #FALSE + +} +\keyword{internal} diff --git a/man/settingsMetadata.Rd b/man/settingsMetadata.Rd index 0f4678a8..00355fd0 100644 --- a/man/settingsMetadata.Rd +++ b/man/settingsMetadata.Rd @@ -16,6 +16,7 @@ \item{column_type}{Expected type for the data column values. Should be "character","logical" or "numeric"} \item{column_required}{Flag indicating whether the associated data column should be considered required} \item{field_mapping}{Flag indicating whether the setting corresponds to a field-level mapping in the data} + \item{field_column_key}{Key for the column that provides options for the field-level mapping in the data} \item{adam}{Settings values for the ADaM standard} \item{sdtm}{Settings values for the SDTM standard} }} diff --git a/tests/testthat/test_compareStandard.R b/tests/testthat/test_compareStandard.R new file mode 100644 index 00000000..e69de29b diff --git a/tests/testthat/test_detectStandard.R b/tests/testthat/test_detectStandard.R index 2d41343a..687e71c7 100644 --- a/tests/testthat/test_detectStandard.R +++ b/tests/testthat/test_detectStandard.R @@ -19,22 +19,24 @@ test_that("correct standards are identified",{ expect_equal(detectStandard(adlbc)[["details"]][["ADaM"]][["match"]], "Full") expect_equal(detectStandard(adlbc)[["details"]][["SDTM"]][["match"]], "Partial") - adam_test_data<-data.frame(USUBJID="001",AVAL=10,PARAM="HDL",VISIT="Visit 1",VISITNUM=1,ADY=0,A1LO=0,A1HI=20) + adam_params <- c("Alanine Aminotransferase (U/L)","Aspartate Aminotransferase (U/L)","Bilirubin (umol/L)","Alkaline Phosphatase (U/L)") + adam_test_data<-data.frame(USUBJID="001",AVAL=10,PARAM=adam_params, VISIT="Visit 1",VISITNUM=1,ADY=0,A1LO=0,A1HI=20) expect_equal(detectStandard(adam_test_data)[["standard"]],"ADaM") expect_equal(detectStandard(adam_test_data)[["details"]][["ADaM"]][["match"]], "Full") expect_equal(detectStandard(adam_test_data)[["details"]][["SDTM"]][["match"]], "Partial") - sdtm_test_data<-data.frame(USUBJID="001",STRESN=10,TEST="HDL",VISIT="Visit 1",VISITNUM=1,DY=0,STNRLO=0,STNRHI=20) + sdtm_params<-c("Aminotransferase, alanine (ALT)","Aminotransferase, aspartate (AST)","Total Bilirubin","Alkaline phosphatase (ALP)") + sdtm_test_data<-data.frame(USUBJID="001",STRESN=10,TEST=sdtm_params,VISIT="Visit 1",VISITNUM=1,DY=0,STNRLO=0,STNRHI=20) expect_equal(detectStandard(sdtm_test_data)[["standard"]],"SDTM") expect_equal(detectStandard(sdtm_test_data)[["details"]][["ADaM"]][["match"]], "Partial") expect_equal(detectStandard(sdtm_test_data)[["details"]][["SDTM"]][["match"]], "Full") - empty_test_data<-data.frame() + empty_test_data<-data.frame("") expect_equal(detectStandard(empty_test_data)[["standard"]],"None") expect_equal(detectStandard(empty_test_data)[["details"]][["ADaM"]][["match"]], "None") expect_equal(detectStandard(empty_test_data)[["details"]][["SDTM"]][["match"]], "None") - case_sensitive_test_data<-data.frame(usubjid="001",AVAL=10,PARAM="HDL",VISIT="Visit 1",VISITNUM=1,ADY=0,A1LO=0,A1HI=20) + case_sensitive_test_data<-data.frame(usubjid="001",AVAL=10,PARAM=adam_params, VISIT="Visit 1",VISITNUM=1,ADY=0,A1LO=0,A1HI=20) expect_equal(detectStandard(case_sensitive_test_data)[["standard"]],"ADaM") expect_equal(detectStandard(case_sensitive_test_data)[["details"]][["ADaM"]][["match"]], "Full") expect_equal(detectStandard(case_sensitive_test_data)[["details"]][["SDTM"]][["match"]], "Partial") diff --git a/tests/testthat/test_evaluateStandard.R b/tests/testthat/test_evaluateStandard.R new file mode 100644 index 00000000..72dd5ec8 --- /dev/null +++ b/tests/testthat/test_evaluateStandard.R @@ -0,0 +1,47 @@ +context("Tests for the evaluateStandard() function") +library(safetyGraphics) + +test_that("basic test cases evaluate as expected",{ + expect_equal(evaluateStandard(data=adlbc, standard="adam")[["match"]],"Full") + expect_equal(evaluateStandard(data=adlbc, standard="sdtm")[["match"]],"Partial") + expect_equal(evaluateStandard(data=data.frame(), standard="sdtm")[["match"]],"None") +}) + +test_that("a list with the expected properties and structure is returned",{ + a<- evaluateStandard(data=data.frame(),standard="adam") + + expect_is(a,"list") + expect_named(a,c('standard', 'checks', 'valid_count', 'invalid_count', 'match')) + expect_is(a[["standard"]],"character") + expect_is(a[["match"]],"character") + expect_is(a[["checks"]],"tbl") + expect_is(a[["valid_count"]],"integer") + expect_is(a[["invalid_count"]],"integer") +}) + +test_that("expected number of checks (in)valid",{ + expect_equal(evaluateStandard(data=adlbc, standard="sdtm")[["valid_count"]],1) + expect_equal(evaluateStandard(data=adlbc, standard="sdtm")[["invalid_count"]],9) + + adlbc_edit <- adlbc + adlbc_edit$TEST <- adlbc_edit$PARAM + a<-evaluateStandard(data=adlbc_edit, standard="sdtm") + expect_equal(a[["valid_count"]],2) + expect_equal(a[["invalid_count"]],8) + expect_true(a[["checks"]]%>%filter(text_key=="measure_col")%>%select(valid)%>%unlist) +}) + + +test_that("field level data is ignored when useFields=false",{ + noFields<-evaluateStandard(data=adlbc, standard="adam", includeFields=FALSE) + expect_equal(noFields[["match"]],"Full") + expect_equal(noFields[["valid_count"]],6) +}) + +test_that("invalid options throw errors",{ + expect_error(evaluateStandard(data=list(a=1,b=2), standard="sdtm")) + expect_error(evaluateStandard(data="notadataframe", standard="sdtm")) + expect_error(evaluateStandard(data=adlbc, standard=123)) + expect_error(evaluateStandard(data=adlbc, standard="notarealstandard")) + expect_error(evaluateStandard(data=adlbc, standard="adam", includeFields="yesPlease")) +}) \ No newline at end of file diff --git a/tests/testthat/test_getRequiredSettings.R b/tests/testthat/test_getRequiredSettings.R index 1bca5f5a..6e4604e8 100644 --- a/tests/testthat/test_getRequiredSettings.R +++ b/tests/testthat/test_getRequiredSettings.R @@ -2,13 +2,17 @@ context("Tests for the getRequiredSettings() function") library(safetyGraphics) library(testthat) -defaultRequiredCols <- list( +defaultRequiredSettings <- list( list("id_col"), list("value_col"), list("measure_col"), list("normal_col_low"), list("normal_col_high"), - list("studyday_col") + list("studyday_col"), + list("measure_values","ALT"), + list("measure_values","AST"), + list("measure_values","TB"), + list("measure_values","ALP") ) @@ -30,7 +34,7 @@ customMetadata<- data.frame( test_that("default function call works as expected",{ a<- safetyGraphics::getRequiredSettings() expect_is(a,"list") - expect_equal(a,defaultRequiredCols) + expect_equal(a,defaultRequiredSettings) }) test_that("options work as expected",{ diff --git a/tests/testthat/test_getSettingsMetadata.R b/tests/testthat/test_getSettingsMetadata.R index 4cad3e7a..1b43492e 100644 --- a/tests/testthat/test_getSettingsMetadata.R +++ b/tests/testthat/test_getSettingsMetadata.R @@ -135,5 +135,5 @@ test_that("filter_expr parameters works as expected",{ ) expect_equal(safetyGraphics:::getSettingsMetadata(filter_expr=text_key=="id_col",cols="description"),"Unique subject identifier variable name.") expect_length(safetyGraphics:::getSettingsMetadata(filter_expr=column_type=="numeric",cols="text_key",chart="edish"),5) - expect_length(safetyGraphics:::getSettingsMetadata(filter_expr=setting_required,cols="text_key",chart="edish"),6) + expect_length(safetyGraphics:::getSettingsMetadata(filter_expr=setting_required,cols="text_key",chart="edish"),10) }) \ No newline at end of file diff --git a/tests/testthat/test_hasColumn.R b/tests/testthat/test_hasColumn.R new file mode 100644 index 00000000..5145492b --- /dev/null +++ b/tests/testthat/test_hasColumn.R @@ -0,0 +1,21 @@ +context("Tests for the hasColumn() function") +library(safetyGraphics) + +test_that("columns are found when expected",{ + # simple test case works + expect_true(hasColumn(columnName="PARAM",data=adlbc)) + expect_true(hasColumn(columnName="SUBJID",data=adlbc)) + + #doesn't care about case + expect_true(hasColumn(columnName="param",data=adlbc)) + expect_true(hasColumn(columnName="SuBjId",data=adlbc)) + + # returns false when fieldValue isn't there or there is a type mismatch + expect_false(hasColumn(columnName="PARAMETER",data=adlbc)) + expect_false(hasColumn(columnName="SUBJID2",data=adlbc)) + + # fails with invalid parameters + expect_error(hasColumn(columnName=123,data=adlbc)) + expect_error(hasColumn(columnName=c("PARAM","SUBJID"),data=adlbc)) + expect_error(hasColumn(columnName="PARAM",data=list(adlbc))) +}) diff --git a/tests/testthat/test_hasField.R b/tests/testthat/test_hasField.R new file mode 100644 index 00000000..a1e9197e --- /dev/null +++ b/tests/testthat/test_hasField.R @@ -0,0 +1,26 @@ +context("Tests for the cField() function") +library(safetyGraphics) + +test_that("fields are found when expected",{ + # simple test case works + expect_true(hasField(fieldValue="Bilirubin (umol/L)",columnName="PARAM",data=adlbc)) + + # flexible regarding type + expect_true(hasField(fieldValue=1015,columnName="SUBJID",data=adlbc)) + expect_true(hasField(fieldValue="1015",columnName="SUBJID",data=adlbc)) + expect_true(hasField(fieldValue=list(1015),columnName="SUBJID",data=adlbc)) + expect_true(hasField(fieldValue=list(1015),columnName="SUBJID",data=adlbc)) + + #but not regarding length + expect_error(hasField(fieldValue=list(1015,1023),columnName="SUBJID",data=adlbc)) + expect_error(hasField(fieldValue=c("a","b"),columnName="PARAM",data=adlbc)) + + + # returns false when fieldValue isn't there or there is a type mismatch + expect_false(hasField(fieldValue="Not_a_real_value",columnName="PARAM",data=adlbc)) + expect_false(hasField(fieldValue=12,columnName="PARAM",data=adlbc)) + + # fails with invalid parameters + expect_error(hasField(fieldValue="Bilirubin (umol/L)",columnName=c("PARAM","ID"),data=adlbc)) + expect_error(hasField(columnName="PARAM",data=list(adlbc))) #fieldValue missing +})