diff --git a/R/compare_cols.R b/R/compare_cols.R
deleted file mode 100644
index 32202fdb..00000000
--- a/R/compare_cols.R
+++ /dev/null
@@ -1,38 +0,0 @@
-#' Compares contents of 2 vectors
-#'
-#' Function to compare contents of 2 vectors - used to summarize of which data columns are found in a given standard. Used in \code{detectStandard()} and \code{validateSettings()}
-#'
-#' @param data_cols A character vector with column names in the data frame
-#' @param standard_cols A character vector with column names in the data standard
-#' @return A list summarizing the comparison between \code{data_cols} and \code{standard_cols}. List has character vectors for "matched_columns", "extra_columns" and "missing_columns" parameters, and a boolean "match" parameter indicating that there are no missing columns.
-#'
-#'
-#' @examples
-#' #match == FALSE
-#' safetyGraphics:::compare_cols(data_cols=c("a","b","c"),
-#' standard_cols=c("d","e","f"))
-#'
-#' # match == TRUE
-#' safetyGraphics:::compare_cols(names(adlbc),
-#' safetyGraphics:::getRequiredColumns(standard="ADaM"))
-#' @keywords internal
-
-compare_cols<-function(data_cols, standard_cols){
- compare_summary <- list()
- compare_summary[["matched_columns"]]<-intersect(data_cols, standard_cols)
- compare_summary[["extra_columns"]]<-setdiff(data_cols,standard_cols)
- compare_summary[["missing_columns"]]<-setdiff(standard_cols,data_cols)
-
- #if there are no missing columns then call this a match
-
- if (length(compare_summary[["missing_columns"]])==0) {
- compare_summary[["match"]] <- "Full"
- } else if(length(compare_summary[["matched_columns"]])>0) {
- compare_summary[["match"]] <- "Partial"
- } else {
- compare_summary[["match"]] <- "None"
- }
-
-
- return(compare_summary)
-}
diff --git a/R/detectStandard.R b/R/detectStandard.R
index 936bbf41..feb3c7cc 100644
--- a/R/detectStandard.R
+++ b/R/detectStandard.R
@@ -1,37 +1,39 @@
#' Detect the data standard used for a data set
#'
-#' This function attempts to detect the data CDISC clinical standard used in a given R data frame.
+#' This function attempts to detect the data CDISC clinical standard used in a given R data frame.
#'
#' This function compares the columns in the provided \code{"data"} with the required columns for a given data standard/domain combination. The function is designed to work with the SDTM and AdAM CDISC() standards for clinical trial data. Currently, only "labs" is the only domain supported.
#'
-#' @param data A data frame in which to detect the data standard
+#' @param data A data frame in which to detect the data standard
+#' @param includeFields specifies whether to check the data set for field level data in addition to columns. Default: \code{TRUE}.
#' @param domain The data domain for the data set provided. Default: \code{"labs"}.
-#' @return A list containing the matching \code{"standard"} ("ADaM", "SDTM" or "None") and a list of \code{"details"} describing each standard considered. #'
-#' @examples
+#'
+#' @return A list containing the matching \code{"standard"} ("ADaM", "SDTM" or "None") and a list of \code{"details"} describing each standard considered. #'
+#'
+#' @examples
+#'
#' detectStandard(adlbc)[["standard"]] #AdAM
#' detectStandard(iris)[["standard"]] #none
-#'
+#'
#' \dontrun{
-#' detectStandard(adlbc,domain="AE") #throws error. AE domain not supported in this release.
+#' detectStandard(adlbc,domain="AE") #throws error. AE domain not supported in this release.
#' }
-#'
+#'
#' @export
-detectStandard <- function(data, domain="labs"){
+detectStandard <- function(data, includeFields=TRUE, domain="labs"){
stopifnot(
domain=="labs",
typeof(domain)=="character"
)
-
+
+
# Create placeholder list, with Standard = None.
standard_list <- list()
standard_list[["details"]] = list()
- data_cols<-toupper(colnames(data))
-
- # Check to see if data columns match the standards
- standard_list[["details"]][["ADaM"]]<-compare_cols(data_cols,getRequiredColumns(standard="ADaM"))
- standard_list[["details"]][["SDTM"]]<-compare_cols(data_cols,getRequiredColumns(standard="SDTM"))
-
+ standard_list[["details"]][["ADaM"]]<-evaluateStandard(data,standard="ADaM", includeFields=includeFields, domain=domain)
+ standard_list[["details"]][["SDTM"]]<-evaluateStandard(data,standard="SDTM", includeFields=includeFields, domain=domain)
+
# Determine the final standard
if(standard_list[["details"]][["SDTM"]][["match"]] == "Full"){
standard_list[["standard"]]<- "SDTM"
@@ -40,14 +42,14 @@ detectStandard <- function(data, domain="labs"){
} else if(standard_list[["details"]][["SDTM"]][["match"]] == "Partial" |
standard_list[["details"]][["ADaM"]][["match"]] == "Partial"){
standard_list[["standard"]] <- ifelse(
- length(standard_list[["details"]][["ADaM"]][["matched_columns"]]) >
- length(standard_list[["details"]][["SDTM"]][["matched_columns"]]),
+ length(standard_list[["details"]][["ADaM"]][["valid_count"]]) >
+ length(standard_list[["details"]][["SDTM"]][["valid_count"]]),
"ADaM" , "SDTM" #SDTM if they are equal
)
-
+
} else {
standard_list[["standard"]]<-"None"
}
-
+
return(standard_list)
-}
\ No newline at end of file
+}
diff --git a/R/evaluateStandard.R b/R/evaluateStandard.R
new file mode 100644
index 00000000..ae820df5
--- /dev/null
+++ b/R/evaluateStandard.R
@@ -0,0 +1,74 @@
+#' Evaluate a data set against a data standard
+#'
+#' Determines whether the required data elements in a data standard are found in a given data frame
+#'
+#' @param data data.frame to evaluate
+#' @param standard standard to evaluate
+#' @param includeFields should field level data be evaluated?
+#' @param domain data domain. "labs" only for now.
+#'
+#' @return a list describing to what degree the data set matches the data standard. The "match" property describes compliance with the standard as "Full", "Partial" or "None". The "checks" property is a list of the data elements expected for the standard and whether they are "valid" in the given data set. "valid_checks" and "invalid_checks" provide counts of the specified checks.
+#'
+#' @examples
+#' safetyGraphics:::evaluateStandard(data=adlbc, standard="adam") # Match is TRUE
+#' safetyGraphics:::evaluateStandard(data=adlbc, standard="sdtm") # Match is FALSE
+#'
+#' @import dplyr
+#' @importFrom purrr map
+#' @importFrom rlang .data
+#'
+#' @keywords internal
+
+
+evaluateStandard <- function(data, standard, includeFields=TRUE, domain="labs"){
+
+ stopifnot(
+ is.data.frame(data),
+ is.character(standard),
+ is.logical(includeFields),
+ is.character(domain),
+ tolower(standard) %in% c("adam","sdtm")
+ )
+
+ standard<-tolower(standard)
+
+ compare_summary<-list()
+ compare_summary[["standard"]]<-standard
+
+ # Get metadata for settings using the specified standard and see if required data elements are found
+ standardChecks <- getSettingsMetadata(cols=c("text_key", "column_mapping", "field_mapping", "field_column_key", "setting_required","standard_val",standard)) %>%
+ rename("standard_val"=standard) %>%
+ filter(.data$column_mapping == TRUE || .data$field_mapping ==TRUE) %>%
+ filter(.data$setting_required==TRUE) %>%
+ mutate(type = ifelse(.data$column_mapping, "column", "field")) %>%
+ rowwise %>%
+ mutate(field_column_name = ifelse(.data$field_mapping, getSettingsMetadata(cols=standard, text_keys=.data$field_column_key),"")) %>%
+ mutate(valid = ifelse(.data$column_mapping,
+ hasColumn(data=data, columnName=.data$standard_val),
+ hasField(data=data, columnName=.data$field_column_name, fieldValue=.data$standard_val)
+ )) %>%
+ select(.data$text_key, .data$standard_val, .data$type, .data$valid)
+
+ # filter out the field level checks if includeChecks is false
+ if(!includeFields){
+ standardChecks <- standardChecks %>% filter(.data$type != "field")
+ }
+
+ # compare_summary[["checks"]] <- split(standardChecks, seq(nrow(standardChecks)))%>%map(~as.list(.)) #coerce to list of lists?
+ compare_summary[["checks"]] <- standardChecks #or just keep the tibble ...
+
+ # count valid/invalid data elements
+ compare_summary[["valid_count"]] <- standardChecks %>% filter(.data$valid) %>% nrow()
+ compare_summary[["invalid_count"]] <- standardChecks %>% filter(!.data$valid) %>% nrow()
+
+
+ if (compare_summary[["invalid_count"]]==0) {
+ compare_summary[["match"]] <- "Full"
+ } else if(compare_summary[["valid_count"]]>0) {
+ compare_summary[["match"]] <- "Partial"
+ } else {
+ compare_summary[["match"]] <- "None"
+ }
+
+ return(compare_summary)
+}
\ No newline at end of file
diff --git a/R/hasColumn.R b/R/hasColumn.R
new file mode 100644
index 00000000..d64fc52d
--- /dev/null
+++ b/R/hasColumn.R
@@ -0,0 +1,23 @@
+#' Check whether a column is found in a data set
+#'
+#' Checks whether a specified column is found in a specified data set
+#'
+#' @param columnName The column to look for.
+#' @param data the data.frame to search.
+#' @return logical scalar. TRUE if the column is found. FALSE otherwise
+#'
+#' @examples
+#' safetyGraphics:::hasColumn(columnName="PARAM",data=adlbc) #TRUE
+#' safetyGraphics:::hasColumn(columnName="Not_a_column",data=adlbc) #FALSE
+#'
+#' @keywords internal
+
+hasColumn <- function(columnName, data){
+ stopifnot(
+ typeof(columnName)=="character",
+ length(columnName)==1,
+ is.data.frame(data)
+ )
+
+ return(toupper(columnName) %in% toupper(colnames(data)))
+}
diff --git a/R/hasField.R b/R/hasField.R
new file mode 100644
index 00000000..ab038568
--- /dev/null
+++ b/R/hasField.R
@@ -0,0 +1,33 @@
+#' Check whether a specified field value is found in a data set
+#'
+#' This checks whether a specific value is found in a specified column in a specified data set
+#'
+#' @param fieldValue A value to check for.
+#' @param columnName The column to check.
+#' @param data the data.frame to search.
+#' @return logical scalar. TRUE if field_value is found. FALSE otherwise
+#'
+#' @examples
+#' safetyGraphics:::hasField(fieldValue="Bilirubin (umol/L)",columnName="PARAM",data=adlbc) #TRUE
+#' safetyGraphics:::hasField(fieldValue="Not_a_real_value",columnName="",data=adlbc) #FALSE
+#'
+#' @keywords internal
+
+hasField<- function(fieldValue, columnName, data){
+ stopifnot(
+ length(fieldValue)==1,
+ typeof(columnName)=="character",
+ length(columnName)==1,
+ is.data.frame(data)
+ )
+
+ columnFound <- hasColumn(columnName=columnName, data=data)
+ if(columnFound){
+ validFields <- unique(data[[columnName]])
+ } else{
+ validFields <- c()
+ }
+
+ validFields <- unique(data[[columnName]])
+ return(fieldValue %in% validFields)
+}
diff --git a/R/settingsMetadata.R b/R/settingsMetadata.R
index 5f395407..3e07bf21 100644
--- a/R/settingsMetadata.R
+++ b/R/settingsMetadata.R
@@ -14,6 +14,7 @@
#' \item{column_type}{Expected type for the data column values. Should be "character","logical" or "numeric"}
#' \item{column_required}{Flag indicating whether the associated data column should be considered required}
#' \item{field_mapping}{Flag indicating whether the setting corresponds to a field-level mapping in the data}
+#' \item{field_column_key}{Key for the column that provides options for the field-level mapping in the data}
#' \item{adam}{Settings values for the ADaM standard}
#' \item{sdtm}{Settings values for the SDTM standard}
#' }
diff --git a/data-raw/settingsMetadata.csv b/data-raw/settingsMetadata.csv
index 3ac67a28..08f04869 100644
--- a/data-raw/settingsMetadata.csv
+++ b/data-raw/settingsMetadata.csv
@@ -1,26 +1,26 @@
-chart_edish,text_key,label,description,setting_type,setting_required,column_mapping,column_type,column_required,field_mapping,adam,sdtm
-TRUE,id_col,ID column,Unique subject identifier variable name.,character,TRUE,TRUE,character,TRUE,FALSE,USUBJID,USUBJID
-TRUE,value_col,Value column,Lab result variable name.,character,TRUE,TRUE,numeric,TRUE,FALSE,AVAL,STRESN
-TRUE,measure_col,Measure column,Lab measure variable name,character,TRUE,TRUE,character,TRUE,FALSE,PARAM,TEST
-TRUE,normal_col_low,Lower Limit of Normal column,Lower limit of normal variable name,character,TRUE,TRUE,numeric,TRUE,FALSE,A1LO,STNRLO
-TRUE,normal_col_high,Upper Limit of Normal column,Upper limit of normal variable name,character,TRUE,TRUE,numeric,TRUE,FALSE,A1HI,STNRHI
-TRUE,studyday_col,Study Day column,Visit day variable name,character,TRUE,TRUE,numeric,TRUE,FALSE,ADY,DY
-TRUE,visit_col,Visit column,Visit variable name,character,FALSE,TRUE,character,FALSE,FALSE,VISIT,VISIT
-TRUE,visitn_col,Visit Number column,Visit number variable name,character,FALSE,TRUE,numeric,FALSE,FALSE,VISITNUM,VISITNUM
-TRUE,filters,Filters columns,"An optional list of specifications for filters. Each filter is a nested, named list (containing the filter value column: 'value_col' and associated label: 'label') within the larger list.",vector,FALSE,TRUE,NA,FALSE,FALSE,,
-TRUE,group_cols,Group columns,"An optional list of specifications for grouping columns. Each group column is a nested, named list (containing the group variable column: 'value_col' and associated label: 'label') within the larger list. ",vector,FALSE,TRUE,NA,FALSE,FALSE,,
-TRUE,measure_values--ALT,Alanine Aminotransferase value,Value used for Alanine Aminotransferase in the specified measure column,character,FALSE,FALSE,NA,NA,TRUE,Alanine Aminotransferase (U/L),"Aminotransferase, alanine (ALT)"
-TRUE,measure_values--AST,Aspartate Aminotransferase value,Value used for Aspartate Aminotransferase in the specified measure column,character,FALSE,FALSE,NA,NA,TRUE,Aspartate Aminotransferase (U/L),"Aminotransferase, aspartate (AST)"
-TRUE,measure_values--TB,Total Bilirubin value,Value used for Total Bilirubin in the specified measure column,character,FALSE,FALSE,NA,NA,TRUE,Bilirubin (umol/L),Total Bilirubin
-TRUE,measure_values--ALP,Alkaline Phosphatase value,Value used for Alkaline Phosphatase in the specified measure column,character,FALSE,FALSE,NA,NA,TRUE,Alkaline Phosphatase (U/L),Alkaline phosphatase (ALP)
-TRUE,baseline--value_col,Baseline column,An optional list defining which column represent the baseline visit(s) of the study.,character,FALSE,TRUE,NA,FALSE,FALSE,,
-TRUE,baseline--values,Baseline values,An optional list defining which values (one or more) represent the baseline visit(s) of the study.,vector,FALSE,FALSE,NA,NA,TRUE,,
-TRUE,analysisFlag--value_col,Analysis Flag column,An optional list defining which column should be used in eDish and mDish analyses.,character,FALSE,TRUE,NA,FALSE,FALSE,,
-TRUE,analysisFlag--values,Analysis Flag values,An optional list defining which values should be used in eDish and mDish analyses.,vector,FALSE,FALSE,NA,NA,TRUE,,
-TRUE,x_options,X axis options,"Specifies variable options for the x-axis using the key values from measure_values (e.g. 'ALT'). When multiple options are specified, a control allowing the user to interactively change the x variable is shown. ",vector,FALSE,FALSE,NA,NA,FALSE,,
-TRUE,y_options,Y axis options,"Specifies variable options for the y-axis using the key values from measure_values (e.g. 'TB'). When multiple options are specified, a control allowing the user to interactively change the y variable is shown. ",vector,FALSE,FALSE,NA,NA,FALSE,,
-TRUE,visit_window,Default Visit Window in Days ,Default visit window used to highlight eDish points where x and y measures occurred within the specified number of days. Editable by user after render. ,numeric,FALSE,FALSE,NA,NA,FALSE,,
-TRUE,r_ratio_filter,Show R Ratio Filter?,Specifies whether the R Ratio filter should be shown. R ratio is defined as: (ALT value/ULN for ALT) / (ALP value/ULN for ALP).,logical,FALSE,FALSE,NA,NA,FALSE,,
-TRUE,r_ratio_cut,Default R Ratio Cut,Default cut point for R Ratio filter. Ignored when r_ratio_filter = FALSE. User can update this setting via the UI when r_ratio_filter = TRUE. ,numeric,FALSE,FALSE,NA,NA,FALSE,,
-TRUE,showTitle,Show Chart Title? ,Specifies whether the title should be drawn above the controls.,logical,FALSE,FALSE,NA,NA,FALSE,,
-TRUE,warningText,Warning text,"Informational text to be displayed near the top of the controls (beneath the title, if any). No warning is displayed if warningText = ''. ",Character,FALSE,FALSE,NA,NA,FALSE,,
+chart_edish,text_key,label,description,setting_type,setting_required,column_mapping,column_type,column_required,field_mapping,field_column_key,adam,sdtm
+TRUE,id_col,ID column,Unique subject identifier variable name.,character,TRUE,TRUE,character,TRUE,FALSE,,USUBJID,USUBJID
+TRUE,value_col,Value column,Lab result variable name.,character,TRUE,TRUE,numeric,TRUE,FALSE,,AVAL,STRESN
+TRUE,measure_col,Measure column,Lab measure variable name,character,TRUE,TRUE,character,TRUE,FALSE,,PARAM,TEST
+TRUE,normal_col_low,Lower Limit of Normal column,Lower limit of normal variable name,character,TRUE,TRUE,numeric,TRUE,FALSE,,A1LO,STNRLO
+TRUE,normal_col_high,Upper Limit of Normal column,Upper limit of normal variable name,character,TRUE,TRUE,numeric,TRUE,FALSE,,A1HI,STNRHI
+TRUE,studyday_col,Study Day column,Visit day variable name,character,TRUE,TRUE,numeric,TRUE,FALSE,,ADY,DY
+TRUE,visit_col,Visit column,Visit variable name,character,FALSE,TRUE,character,FALSE,FALSE,,VISIT,VISIT
+TRUE,visitn_col,Visit Number column,Visit number variable name,character,FALSE,TRUE,numeric,FALSE,FALSE,,VISITNUM,VISITNUM
+TRUE,filters,Filters columns,"An optional list of specifications for filters. Each filter is a nested, named list (containing the filter value column: 'value_col' and associated label: 'label') within the larger list.",vector,FALSE,TRUE,NA,FALSE,FALSE,,,
+TRUE,group_cols,Group columns,"An optional list of specifications for grouping columns. Each group column is a nested, named list (containing the group variable column: 'value_col' and associated label: 'label') within the larger list. ",vector,FALSE,TRUE,NA,FALSE,FALSE,,,
+TRUE,measure_values--ALT,Alanine Aminotransferase value,Value used for Alanine Aminotransferase in the specified measure column,character,TRUE,FALSE,NA,NA,TRUE,measure_col,Alanine Aminotransferase (U/L),"Aminotransferase, alanine (ALT)"
+TRUE,measure_values--AST,Aspartate Aminotransferase value,Value used for Aspartate Aminotransferase in the specified measure column,character,TRUE,FALSE,NA,NA,TRUE,measure_col,Aspartate Aminotransferase (U/L),"Aminotransferase, aspartate (AST)"
+TRUE,measure_values--TB,Total Bilirubin value,Value used for Total Bilirubin in the specified measure column,character,TRUE,FALSE,NA,NA,TRUE,measure_col,Bilirubin (umol/L),Total Bilirubin
+TRUE,measure_values--ALP,Alkaline Phosphatase value,Value used for Alkaline Phosphatase in the specified measure column,character,TRUE,FALSE,NA,NA,TRUE,measure_col,Alkaline Phosphatase (U/L),Alkaline phosphatase (ALP)
+TRUE,baseline--value_col,Baseline column,An optional list defining which column represent the baseline visit(s) of the study.,character,FALSE,TRUE,NA,FALSE,FALSE,,,
+TRUE,baseline--values,Baseline values,An optional list defining which values (one or more) represent the baseline visit(s) of the study.,vector,FALSE,FALSE,NA,NA,TRUE,baseline--value_col,,
+TRUE,analysisFlag--value_col,Analysis Flag column,An optional list defining which column should be used in eDish and mDish analyses.,character,FALSE,TRUE,NA,FALSE,FALSE,,,
+TRUE,analysisFlag--values,Analysis Flag values,An optional list defining which values should be used in eDish and mDish analyses.,vector,FALSE,FALSE,NA,NA,TRUE,analysisFlag--value_col,,
+TRUE,x_options,X axis options,"Specifies variable options for the x-axis using the key values from measure_values (e.g. 'ALT'). When multiple options are specified, a control allowing the user to interactively change the x variable is shown. ",vector,FALSE,FALSE,NA,NA,FALSE,,,
+TRUE,y_options,Y axis options,"Specifies variable options for the y-axis using the key values from measure_values (e.g. 'TB'). When multiple options are specified, a control allowing the user to interactively change the y variable is shown. ",vector,FALSE,FALSE,NA,NA,FALSE,,,
+TRUE,visit_window,Default Visit Window in Days ,Default visit window used to highlight eDish points where x and y measures occurred within the specified number of days. Editable by user after render. ,numeric,FALSE,FALSE,NA,NA,FALSE,,,
+TRUE,r_ratio_filter,Show R Ratio Filter?,Specifies whether the R Ratio filter should be shown. R ratio is defined as: (ALT value/ULN for ALT) / (ALP value/ULN for ALP).,logical,FALSE,FALSE,NA,NA,FALSE,,,
+TRUE,r_ratio_cut,Default R Ratio Cut,Default cut point for R Ratio filter. Ignored when r_ratio_filter = FALSE. User can update this setting via the UI when r_ratio_filter = TRUE. ,numeric,FALSE,FALSE,NA,NA,FALSE,,,
+TRUE,showTitle,Show Chart Title? ,Specifies whether the title should be drawn above the controls.,logical,FALSE,FALSE,NA,NA,FALSE,,,
+TRUE,warningText,Warning text,"Informational text to be displayed near the top of the controls (beneath the title, if any). No warning is displayed if warningText = ''. ",Character,FALSE,FALSE,NA,NA,FALSE,,,
\ No newline at end of file
diff --git a/data/adlbc.rda b/data/adlbc.rda
index aea28611..aa28c2db 100644
Binary files a/data/adlbc.rda and b/data/adlbc.rda differ
diff --git a/data/settingsMetadata.rda b/data/settingsMetadata.rda
index 82ebf9c0..559f7135 100644
Binary files a/data/settingsMetadata.rda and b/data/settingsMetadata.rda differ
diff --git a/man/compare_cols.Rd b/man/compare_cols.Rd
deleted file mode 100644
index a5a67420..00000000
--- a/man/compare_cols.Rd
+++ /dev/null
@@ -1,29 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/compare_cols.R
-\name{compare_cols}
-\alias{compare_cols}
-\title{Compares contents of 2 vectors}
-\usage{
-compare_cols(data_cols, standard_cols)
-}
-\arguments{
-\item{data_cols}{A character vector with column names in the data frame}
-
-\item{standard_cols}{A character vector with column names in the data standard}
-}
-\value{
-A list summarizing the comparison between \code{data_cols} and \code{standard_cols}. List has character vectors for "matched_columns", "extra_columns" and "missing_columns" parameters, and a boolean "match" parameter indicating that there are no missing columns.
-}
-\description{
-Function to compare contents of 2 vectors - used to summarize of which data columns are found in a given standard. Used in \code{detectStandard()} and \code{validateSettings()}
-}
-\examples{
-#match == FALSE
-safetyGraphics:::compare_cols(data_cols=c("a","b","c"),
- standard_cols=c("d","e","f"))
-
-# match == TRUE
-safetyGraphics:::compare_cols(names(adlbc),
- safetyGraphics:::getRequiredColumns(standard="ADaM"))
-}
-\keyword{internal}
diff --git a/man/detectStandard.Rd b/man/detectStandard.Rd
index 7eacb633..bfae29b5 100644
--- a/man/detectStandard.Rd
+++ b/man/detectStandard.Rd
@@ -4,11 +4,13 @@
\alias{detectStandard}
\title{Detect the data standard used for a data set}
\usage{
-detectStandard(data, domain = "labs")
+detectStandard(data, includeFields = TRUE, domain = "labs")
}
\arguments{
\item{data}{A data frame in which to detect the data standard}
+\item{includeFields}{specifies whether to check the data set for field level data in addition to columns. Default: \code{TRUE}.}
+
\item{domain}{The data domain for the data set provided. Default: \code{"labs"}.}
}
\value{
@@ -21,11 +23,12 @@ This function attempts to detect the data CDISC clinical standard used in a give
This function compares the columns in the provided \code{"data"} with the required columns for a given data standard/domain combination. The function is designed to work with the SDTM and AdAM CDISC() standards for clinical trial data. Currently, only "labs" is the only domain supported.
}
\examples{
+
detectStandard(adlbc)[["standard"]] #AdAM
detectStandard(iris)[["standard"]] #none
\dontrun{
- detectStandard(adlbc,domain="AE") #throws error. AE domain not supported in this release.
+ detectStandard(adlbc,domain="AE") #throws error. AE domain not supported in this release.
}
}
diff --git a/man/evaluateStandard.Rd b/man/evaluateStandard.Rd
new file mode 100644
index 00000000..ac1a38ab
--- /dev/null
+++ b/man/evaluateStandard.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/evaluateStandard.R
+\name{evaluateStandard}
+\alias{evaluateStandard}
+\title{Evaluate a data set against a data standard}
+\usage{
+evaluateStandard(data, standard, includeFields = TRUE, domain = "labs")
+}
+\arguments{
+\item{data}{data.frame to evaluate}
+
+\item{standard}{standard to evaluate}
+
+\item{includeFields}{should field level data be evaluated?}
+
+\item{domain}{data domain. "labs" only for now.}
+}
+\value{
+a list describing to what degree the data set matches the data standard. The "match" property describes compliance with the standard as "Full", "Partial" or "None". The "checks" property is a list of the data elements expected for the standard and whether they are "valid" in the given data set. "valid_checks" and "invalid_checks" provide counts of the specified checks.
+}
+\description{
+Determines whether the required data elements in a data standard are found in a given data frame
+}
+\examples{
+safetyGraphics:::evaluateStandard(data=adlbc, standard="adam") # Match is TRUE
+safetyGraphics:::evaluateStandard(data=adlbc, standard="sdtm") # Match is FALSE
+
+}
+\keyword{internal}
diff --git a/man/hasColumn.Rd b/man/hasColumn.Rd
new file mode 100644
index 00000000..2373cecc
--- /dev/null
+++ b/man/hasColumn.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/hasColumn.R
+\name{hasColumn}
+\alias{hasColumn}
+\title{Check whether a column is found in a data set}
+\usage{
+hasColumn(columnName, data)
+}
+\arguments{
+\item{columnName}{The column to look for.}
+
+\item{data}{the data.frame to search.}
+}
+\value{
+logical scalar. TRUE if the column is found. FALSE otherwise
+}
+\description{
+Checks whether a specified column is found in a specified data set
+}
+\examples{
+safetyGraphics:::hasColumn(columnName="PARAM",data=adlbc) #TRUE
+safetyGraphics:::hasColumn(columnName="Not_a_column",data=adlbc) #FALSE
+
+}
+\keyword{internal}
diff --git a/man/hasField.Rd b/man/hasField.Rd
new file mode 100644
index 00000000..5ce1103a
--- /dev/null
+++ b/man/hasField.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/hasField.R
+\name{hasField}
+\alias{hasField}
+\title{Check whether a specified field value is found in a data set}
+\usage{
+hasField(fieldValue, columnName, data)
+}
+\arguments{
+\item{fieldValue}{A value to check for.}
+
+\item{columnName}{The column to check.}
+
+\item{data}{the data.frame to search.}
+}
+\value{
+logical scalar. TRUE if field_value is found. FALSE otherwise
+}
+\description{
+This checks whether a specific value is found in a specified column in a specified data set
+}
+\examples{
+safetyGraphics:::hasField(fieldValue="Bilirubin (umol/L)",columnName="PARAM",data=adlbc) #TRUE
+safetyGraphics:::hasField(fieldValue="Not_a_real_value",columnName="",data=adlbc) #FALSE
+
+}
+\keyword{internal}
diff --git a/man/settingsMetadata.Rd b/man/settingsMetadata.Rd
index 0f4678a8..00355fd0 100644
--- a/man/settingsMetadata.Rd
+++ b/man/settingsMetadata.Rd
@@ -16,6 +16,7 @@
\item{column_type}{Expected type for the data column values. Should be "character","logical" or "numeric"}
\item{column_required}{Flag indicating whether the associated data column should be considered required}
\item{field_mapping}{Flag indicating whether the setting corresponds to a field-level mapping in the data}
+ \item{field_column_key}{Key for the column that provides options for the field-level mapping in the data}
\item{adam}{Settings values for the ADaM standard}
\item{sdtm}{Settings values for the SDTM standard}
}}
diff --git a/tests/testthat/test_compareStandard.R b/tests/testthat/test_compareStandard.R
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/testthat/test_detectStandard.R b/tests/testthat/test_detectStandard.R
index 2d41343a..687e71c7 100644
--- a/tests/testthat/test_detectStandard.R
+++ b/tests/testthat/test_detectStandard.R
@@ -19,22 +19,24 @@ test_that("correct standards are identified",{
expect_equal(detectStandard(adlbc)[["details"]][["ADaM"]][["match"]], "Full")
expect_equal(detectStandard(adlbc)[["details"]][["SDTM"]][["match"]], "Partial")
- adam_test_data<-data.frame(USUBJID="001",AVAL=10,PARAM="HDL",VISIT="Visit 1",VISITNUM=1,ADY=0,A1LO=0,A1HI=20)
+ adam_params <- c("Alanine Aminotransferase (U/L)","Aspartate Aminotransferase (U/L)","Bilirubin (umol/L)","Alkaline Phosphatase (U/L)")
+ adam_test_data<-data.frame(USUBJID="001",AVAL=10,PARAM=adam_params, VISIT="Visit 1",VISITNUM=1,ADY=0,A1LO=0,A1HI=20)
expect_equal(detectStandard(adam_test_data)[["standard"]],"ADaM")
expect_equal(detectStandard(adam_test_data)[["details"]][["ADaM"]][["match"]], "Full")
expect_equal(detectStandard(adam_test_data)[["details"]][["SDTM"]][["match"]], "Partial")
- sdtm_test_data<-data.frame(USUBJID="001",STRESN=10,TEST="HDL",VISIT="Visit 1",VISITNUM=1,DY=0,STNRLO=0,STNRHI=20)
+ sdtm_params<-c("Aminotransferase, alanine (ALT)","Aminotransferase, aspartate (AST)","Total Bilirubin","Alkaline phosphatase (ALP)")
+ sdtm_test_data<-data.frame(USUBJID="001",STRESN=10,TEST=sdtm_params,VISIT="Visit 1",VISITNUM=1,DY=0,STNRLO=0,STNRHI=20)
expect_equal(detectStandard(sdtm_test_data)[["standard"]],"SDTM")
expect_equal(detectStandard(sdtm_test_data)[["details"]][["ADaM"]][["match"]], "Partial")
expect_equal(detectStandard(sdtm_test_data)[["details"]][["SDTM"]][["match"]], "Full")
- empty_test_data<-data.frame()
+ empty_test_data<-data.frame("")
expect_equal(detectStandard(empty_test_data)[["standard"]],"None")
expect_equal(detectStandard(empty_test_data)[["details"]][["ADaM"]][["match"]], "None")
expect_equal(detectStandard(empty_test_data)[["details"]][["SDTM"]][["match"]], "None")
- case_sensitive_test_data<-data.frame(usubjid="001",AVAL=10,PARAM="HDL",VISIT="Visit 1",VISITNUM=1,ADY=0,A1LO=0,A1HI=20)
+ case_sensitive_test_data<-data.frame(usubjid="001",AVAL=10,PARAM=adam_params, VISIT="Visit 1",VISITNUM=1,ADY=0,A1LO=0,A1HI=20)
expect_equal(detectStandard(case_sensitive_test_data)[["standard"]],"ADaM")
expect_equal(detectStandard(case_sensitive_test_data)[["details"]][["ADaM"]][["match"]], "Full")
expect_equal(detectStandard(case_sensitive_test_data)[["details"]][["SDTM"]][["match"]], "Partial")
diff --git a/tests/testthat/test_evaluateStandard.R b/tests/testthat/test_evaluateStandard.R
new file mode 100644
index 00000000..72dd5ec8
--- /dev/null
+++ b/tests/testthat/test_evaluateStandard.R
@@ -0,0 +1,47 @@
+context("Tests for the evaluateStandard() function")
+library(safetyGraphics)
+
+test_that("basic test cases evaluate as expected",{
+ expect_equal(evaluateStandard(data=adlbc, standard="adam")[["match"]],"Full")
+ expect_equal(evaluateStandard(data=adlbc, standard="sdtm")[["match"]],"Partial")
+ expect_equal(evaluateStandard(data=data.frame(), standard="sdtm")[["match"]],"None")
+})
+
+test_that("a list with the expected properties and structure is returned",{
+ a<- evaluateStandard(data=data.frame(),standard="adam")
+
+ expect_is(a,"list")
+ expect_named(a,c('standard', 'checks', 'valid_count', 'invalid_count', 'match'))
+ expect_is(a[["standard"]],"character")
+ expect_is(a[["match"]],"character")
+ expect_is(a[["checks"]],"tbl")
+ expect_is(a[["valid_count"]],"integer")
+ expect_is(a[["invalid_count"]],"integer")
+})
+
+test_that("expected number of checks (in)valid",{
+ expect_equal(evaluateStandard(data=adlbc, standard="sdtm")[["valid_count"]],1)
+ expect_equal(evaluateStandard(data=adlbc, standard="sdtm")[["invalid_count"]],9)
+
+ adlbc_edit <- adlbc
+ adlbc_edit$TEST <- adlbc_edit$PARAM
+ a<-evaluateStandard(data=adlbc_edit, standard="sdtm")
+ expect_equal(a[["valid_count"]],2)
+ expect_equal(a[["invalid_count"]],8)
+ expect_true(a[["checks"]]%>%filter(text_key=="measure_col")%>%select(valid)%>%unlist)
+})
+
+
+test_that("field level data is ignored when useFields=false",{
+ noFields<-evaluateStandard(data=adlbc, standard="adam", includeFields=FALSE)
+ expect_equal(noFields[["match"]],"Full")
+ expect_equal(noFields[["valid_count"]],6)
+})
+
+test_that("invalid options throw errors",{
+ expect_error(evaluateStandard(data=list(a=1,b=2), standard="sdtm"))
+ expect_error(evaluateStandard(data="notadataframe", standard="sdtm"))
+ expect_error(evaluateStandard(data=adlbc, standard=123))
+ expect_error(evaluateStandard(data=adlbc, standard="notarealstandard"))
+ expect_error(evaluateStandard(data=adlbc, standard="adam", includeFields="yesPlease"))
+})
\ No newline at end of file
diff --git a/tests/testthat/test_getRequiredSettings.R b/tests/testthat/test_getRequiredSettings.R
index 1bca5f5a..6e4604e8 100644
--- a/tests/testthat/test_getRequiredSettings.R
+++ b/tests/testthat/test_getRequiredSettings.R
@@ -2,13 +2,17 @@ context("Tests for the getRequiredSettings() function")
library(safetyGraphics)
library(testthat)
-defaultRequiredCols <- list(
+defaultRequiredSettings <- list(
list("id_col"),
list("value_col"),
list("measure_col"),
list("normal_col_low"),
list("normal_col_high"),
- list("studyday_col")
+ list("studyday_col"),
+ list("measure_values","ALT"),
+ list("measure_values","AST"),
+ list("measure_values","TB"),
+ list("measure_values","ALP")
)
@@ -30,7 +34,7 @@ customMetadata<- data.frame(
test_that("default function call works as expected",{
a<- safetyGraphics::getRequiredSettings()
expect_is(a,"list")
- expect_equal(a,defaultRequiredCols)
+ expect_equal(a,defaultRequiredSettings)
})
test_that("options work as expected",{
diff --git a/tests/testthat/test_getSettingsMetadata.R b/tests/testthat/test_getSettingsMetadata.R
index 4cad3e7a..1b43492e 100644
--- a/tests/testthat/test_getSettingsMetadata.R
+++ b/tests/testthat/test_getSettingsMetadata.R
@@ -135,5 +135,5 @@ test_that("filter_expr parameters works as expected",{
)
expect_equal(safetyGraphics:::getSettingsMetadata(filter_expr=text_key=="id_col",cols="description"),"Unique subject identifier variable name.")
expect_length(safetyGraphics:::getSettingsMetadata(filter_expr=column_type=="numeric",cols="text_key",chart="edish"),5)
- expect_length(safetyGraphics:::getSettingsMetadata(filter_expr=setting_required,cols="text_key",chart="edish"),6)
+ expect_length(safetyGraphics:::getSettingsMetadata(filter_expr=setting_required,cols="text_key",chart="edish"),10)
})
\ No newline at end of file
diff --git a/tests/testthat/test_hasColumn.R b/tests/testthat/test_hasColumn.R
new file mode 100644
index 00000000..5145492b
--- /dev/null
+++ b/tests/testthat/test_hasColumn.R
@@ -0,0 +1,21 @@
+context("Tests for the hasColumn() function")
+library(safetyGraphics)
+
+test_that("columns are found when expected",{
+ # simple test case works
+ expect_true(hasColumn(columnName="PARAM",data=adlbc))
+ expect_true(hasColumn(columnName="SUBJID",data=adlbc))
+
+ #doesn't care about case
+ expect_true(hasColumn(columnName="param",data=adlbc))
+ expect_true(hasColumn(columnName="SuBjId",data=adlbc))
+
+ # returns false when fieldValue isn't there or there is a type mismatch
+ expect_false(hasColumn(columnName="PARAMETER",data=adlbc))
+ expect_false(hasColumn(columnName="SUBJID2",data=adlbc))
+
+ # fails with invalid parameters
+ expect_error(hasColumn(columnName=123,data=adlbc))
+ expect_error(hasColumn(columnName=c("PARAM","SUBJID"),data=adlbc))
+ expect_error(hasColumn(columnName="PARAM",data=list(adlbc)))
+})
diff --git a/tests/testthat/test_hasField.R b/tests/testthat/test_hasField.R
new file mode 100644
index 00000000..a1e9197e
--- /dev/null
+++ b/tests/testthat/test_hasField.R
@@ -0,0 +1,26 @@
+context("Tests for the cField() function")
+library(safetyGraphics)
+
+test_that("fields are found when expected",{
+ # simple test case works
+ expect_true(hasField(fieldValue="Bilirubin (umol/L)",columnName="PARAM",data=adlbc))
+
+ # flexible regarding type
+ expect_true(hasField(fieldValue=1015,columnName="SUBJID",data=adlbc))
+ expect_true(hasField(fieldValue="1015",columnName="SUBJID",data=adlbc))
+ expect_true(hasField(fieldValue=list(1015),columnName="SUBJID",data=adlbc))
+ expect_true(hasField(fieldValue=list(1015),columnName="SUBJID",data=adlbc))
+
+ #but not regarding length
+ expect_error(hasField(fieldValue=list(1015,1023),columnName="SUBJID",data=adlbc))
+ expect_error(hasField(fieldValue=c("a","b"),columnName="PARAM",data=adlbc))
+
+
+ # returns false when fieldValue isn't there or there is a type mismatch
+ expect_false(hasField(fieldValue="Not_a_real_value",columnName="PARAM",data=adlbc))
+ expect_false(hasField(fieldValue=12,columnName="PARAM",data=adlbc))
+
+ # fails with invalid parameters
+ expect_error(hasField(fieldValue="Bilirubin (umol/L)",columnName=c("PARAM","ID"),data=adlbc))
+ expect_error(hasField(columnName="PARAM",data=list(adlbc))) #fieldValue missing
+})