SafetyGraphics · jwildfire · Feb 25, 2019 · Feb 22, 2019 · Feb 22, 2019 · Feb 22, 2019
diff --git a/R/detectStandard.R b/R/detectStandard.R
@@ -1,16 +1,16 @@
 #' Detect the data standard used for a data set
 #'
-#' This function attempts to detect the data CDISC clinical standard used in a given R data frame.
+#' This function attempts to detect the clinical data standard used in a given R data frame.
 #'
-#' This function compares the columns in the provided \code{"data"} with the required columns for a given data standard/domain combination. The function is designed to work with the SDTM and AdAM CDISC(<https://www.cdisc.org/>) standards for clinical trial data. Currently, "labs" is the only domain supported.
+#' This function compares the columns in the provided \code{"data"} with the required columns for a given data standard/domain combination. The function is designed to work with the SDTM and AdAM CDISC(<https://www.cdisc.org/>) standards for clinical trial data by default. Additional standards can be added by modifying the \code{"standardMetadata"} data set included as part of this package. Currently, "labs" is the only domain supported.
 #'
 #' @param data A data frame in which to detect the data standard
 #' @param includeFields specifies whether to check the data set for field level data in addition to columns.  Default: \code{TRUE}.
 #' @param domain The data domain for the data set provided.  Default: \code{"labs"}.
-#' @return A list containing the matching \code{"standard"} ("ADaM", "SDTM" or "None") and a list of  \code{"details"} describing each standard considered.
+#' @return A list containing the matching \code{"standard"} from \code{"standardMetadata"} and a list of  \code{"details"} describing each standard considered.
 #' @examples 
 
-#' detectStandard(adlbc)[["standard"]] #AdAM
+#' detectStandard(adlbc)[["standard"]] #adam
 #' detectStandard(iris)[["standard"]] #none
 #'
 #' \dontrun{
@@ -26,28 +26,45 @@ detectStandard <- function(data, includeFields=TRUE, domain="labs"){
   )
 
 
-  # Create placeholder list, with Standard = None.
+  # Create placeholder list, with Standard = none.
+  available_standards <- safetyGraphics::standardsMetadata %>% select(-.data$text_key) %>% names
   standard_list <- list()
   standard_list[["details"]] = list()
-  standard_list[["details"]][["ADaM"]]<-evaluateStandard(data,standard="ADaM", includeFields=includeFields, domain=domain)
-  standard_list[["details"]][["SDTM"]]<-evaluateStandard(data,standard="SDTM", includeFields=includeFields, domain=domain)
+  standard_list[["standard"]] = "none"
+  standard_list[["standard_percent"]] = 0
+
+  for(standard in available_standards){
+    # evaluate the current standard and save the result
+    standard_list[["details"]][[standard]]<-evaluateStandard(data,standard=standard, includeFields=includeFields, domain=domain)  
+
+    # if the current standard is a better match, use it as the overall standard
+    # if there is a tie, don't change the standard - this means the column order in standardMetadata breaks ties!
+    current_percent <- standard_list[["details"]][[standard]][["match_percent"]]
+    overall_percent <- standard_list[["standard_percent"]]
+    if(current_percent > overall_percent){
+      standard_list[["standard"]] <- standard  
+      standard_list[["standard_percent"]] <- current_percent
+    }
+  }
 
   # Determine the final standard
-  if(standard_list[["details"]][["SDTM"]][["match"]] == "Full"){
-    standard_list[["standard"]]<- "SDTM"
-  } else if(standard_list[["details"]][["ADaM"]][["match"]] == "Full"){
-    standard_list[["standard"]]<- "ADaM"
-  } else if(standard_list[["details"]][["SDTM"]][["match"]] == "Partial" |
-           standard_list[["details"]][["ADaM"]][["match"]] == "Partial"){
-  standard_list[["standard"]] <- ifelse(
-    length(standard_list[["details"]][["ADaM"]][["valid_count"]]) >
-      length(standard_list[["details"]][["SDTM"]][["valid_count"]]),
-      "ADaM" , "SDTM" #SDTM if they are equal
-    )
-
-  } else {
-    standard_list[["standard"]]<-"None"
-  }
+
+  # TODO: write a general algorithm to do this ...
+  # if(standard_list[["details"]][["sdtm"]][["match"]] == "Full"){
+  #   standard_list[["standard"]]<- "sdtm"
+  # } else if(standard_list[["details"]][["adam"]][["match"]] == "Full"){
+  #   standard_list[["standard"]]<- "adam"
+  # } else if(standard_list[["details"]][["sdtm"]][["match"]] == "Partial" |
+  #          standard_list[["details"]][["adam"]][["match"]] == "Partial"){
+  # standard_list[["standard"]] <- ifelse(
+  #   length(standard_list[["details"]][["adam"]][["valid_count"]]) >
+  #     length(standard_list[["details"]][["sdtm"]][["valid_count"]]),
+  #     "adam" , "sdtm" #SDTM if they are equal
+  #   )
+  # 
+  # } else {
+  #   standard_list[["standard"]]<-"None"
+  # }
 
   return(standard_list)
 }
diff --git a/R/evaluateStandard.R b/R/evaluateStandard.R
@@ -7,7 +7,7 @@
 #' @param includeFields should field level data be evaluated? 
 #' @param domain data domain. "labs" only for now. 
 #' 
-#' @return a list describing to what degree the data set matches the data standard. The "match" property describes compliance with the standard as "Full", "Partial" or "None". The "checks" property is a list of the data elements expected for the standard and whether they are "valid" in the given data set. "valid_checks" and "invalid_checks" provide counts of the specified checks. 
+#' @return a list describing to what degree the data set matches the data standard. The "match" property describes compliance with the standard as "full", "partial" or "none". The "checks" property is a list of the data elements expected for the standard and whether they are "valid" in the given data set. "total_checks", "valid_checks" and "invalid_checks" provide counts of the specified checks. "match_percent" is calculated as valid_checks/total_checks.
 #'  
 #' @examples
 #' safetyGraphics:::evaluateStandard(data=adlbc, standard="adam") # Match is TRUE
@@ -26,8 +26,7 @@ evaluateStandard <- function(data, standard, includeFields=TRUE, domain="labs"){
     is.data.frame(data),
     is.character(standard),
     is.logical(includeFields),
-    is.character(domain),
-    tolower(standard) %in% c("adam","sdtm")
+    is.character(domain)
   )
 
   standard<-tolower(standard)
@@ -43,7 +42,8 @@ evaluateStandard <- function(data, standard, includeFields=TRUE, domain="labs"){
   mutate(type = ifelse(.data$column_mapping, "column", "field")) %>% 
   rowwise %>%
   mutate(field_column_name = ifelse(.data$field_mapping, getSettingsMetadata(cols=standard, text_keys=.data$field_column_key),"")) %>%
-  mutate(valid = ifelse(.data$column_mapping,
+  mutate(
+    valid = ifelse(.data$column_mapping,
     hasColumn(data=data, columnName=.data$standard_val),
     hasField(data=data, columnName=.data$field_column_name, fieldValue=.data$standard_val)
   )) %>%
@@ -54,20 +54,20 @@ evaluateStandard <- function(data, standard, includeFields=TRUE, domain="labs"){
     standardChecks <- standardChecks %>% filter(.data$type != "field")
   }
 
- # compare_summary[["checks"]] <- split(standardChecks, seq(nrow(standardChecks)))%>%map(~as.list(.)) #coerce to list of lists?
-  compare_summary[["checks"]] <- standardChecks #or just keep the tibble ... 
+  compare_summary[["checks"]] <- standardChecks 
 
   # count valid/invalid data elements
+  compare_summary[["total_count"]] <- standardChecks %>% nrow()
   compare_summary[["valid_count"]] <- standardChecks %>% filter(.data$valid) %>% nrow()
   compare_summary[["invalid_count"]] <- standardChecks %>% filter(!.data$valid) %>% nrow()
-
-
+  compare_summary[["match_percent"]] <- compare_summary[["valid_count"]] / compare_summary[["total_count"]]
+  
   if (compare_summary[["invalid_count"]]==0) {
-     compare_summary[["match"]] <- "Full"
+     compare_summary[["match"]] <- "full"
   } else if(compare_summary[["valid_count"]]>0) {
-    compare_summary[["match"]] <- "Partial"
+    compare_summary[["match"]] <- "partial"
   } else {
-    compare_summary[["match"]] <- "None"
+    compare_summary[["match"]] <- "none"
   }
 
   return(compare_summary)

diff --git a/R/getSettingsMetadata.R b/R/getSettingsMetadata.R
@@ -5,6 +5,7 @@
 #' @param charts optional vector of chart names used to filter the metadata. Exact matches only (case-insensitive). All rows returned by default.
 #' @param text_keys optional vector of keys used to filter the metadata. Partial matches for any of the strings are returned (case-insensitive). All rows returned by default.
 #' @param filter_expr optional filter expression used to subset the data.
+#' @param add_standards should data standard info stored in standardsMetadata be included
 #' @param cols optional vector of columns to return from the metadata. All columns returned by default. 
 #' @param metadata metadata data frame to be queried
 #' 
@@ -27,9 +28,15 @@
 #' 
 #' @export
 
-getSettingsMetadata<-function(charts=NULL, text_keys=NULL, cols=NULL, filter_expr=NULL, metadata = safetyGraphics::settingsMetadata){
+getSettingsMetadata<-function(charts=NULL, text_keys=NULL, cols=NULL, filter_expr=NULL, add_standards=TRUE, metadata = safetyGraphics::settingsMetadata){
 
-  md <- metadata
+  md <- metadata %>% mutate(text_key=as.character(.data$text_key))
+
+  if(add_standards){
+    ms<-safetyGraphics::standardsMetadata %>% mutate(text_key=as.character(.data$text_key))
+    md<-md%>%left_join(ms, by="text_key")
+  }
+
   all_columns <- names(md)
 
   #filter the metadata based on the charts option (if any)

diff --git a/R/settingsMetadata.R b/R/settingsMetadata.R
@@ -8,15 +8,13 @@
 #'    \item{text_key}{Text key indicating the setting name. \code{'--'} delimiter indicates a nested setting}
 #'    \item{label}{Label}
 #'    \item{description}{Description}
-#'    \item{setting_cat}{Setting category (data, measure, appearance)}
 #'    \item{setting_type}{Expected type for setting value. Should be "character", "vector", "numeric" or "logical"}
 #'    \item{setting_required}{Flag indicating if the setting is required}
 #'    \item{column_mapping}{Flag indicating if the setting corresponds to a column in the associated data}
 #'    \item{column_type}{Expected type for the data column values. Should be "character","logical" or "numeric"}
 #'    \item{field_mapping}{Flag indicating whether the setting corresponds to a field-level mapping in the data}
 #'    \item{field_column_key}{Key for the column that provides options for the field-level mapping in the data}
-#'    \item{adam}{Settings values for the ADaM standard}
-#'    \item{sdtm}{Settings values for the SDTM standard}
+#'    \item{setting_cat}{Setting category (data, measure, appearance)}
 #' }    
 #' 
 #' @source Created for this package

diff --git a/R/standardsMetadata.R b/R/standardsMetadata.R
@@ -0,0 +1,13 @@
+#' Standards Metadata 
+#'
+#' Metadata about the data standards used to configure safetyGraphics charts. One record per unique setting. Columns contain default setting values for clinical data standards, like the CDISC "adam" and "sdtm" standards. 
+#'
+#' @format A data frame with 25 rows and 3 columns
+#' \describe{
+#'    \item{text_key}{Text key indicating the setting name. \code{'--'} delimiter indicates a nested setting}
+#'    \item{adam}{Settings values for the ADaM standard}
+#'    \item{sdtm}{Settings values for the SDTM standard}
+#' }    
+#' 
+#' @source Created for this package
+"standardsMetadata"
diff --git a/data-raw/csv_to_rda.R b/data-raw/csv_to_rda.R
@@ -3,5 +3,8 @@ library(usethis)
 ablbc <- read.csv("data-raw/adlbc.csv")
 usethis::use_data(adlbc, overwrite = TRUE)
 
-settingsMetadata<- read.csv("data-raw/settingsMetadata.csv")
+settingsMetadata <- read.csv("data-raw/settingsMetadata.csv")
 usethis::use_data(settingsMetadata, overwrite = TRUE)
+
+standardsMetadata <- read.csv("data-raw/standardsMetadata.csv")
+usethis::use_data(standardsMetadata, overwrite = TRUE)