Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 0 additions & 38 deletions R/compare_cols.R

This file was deleted.

42 changes: 22 additions & 20 deletions R/detectStandard.R
Original file line number Diff line number Diff line change
@@ -1,37 +1,39 @@
#' Detect the data standard used for a data set
#'
#' This function attempts to detect the data CDISC clinical standard used in a given R data frame.
#' This function attempts to detect the data CDISC clinical standard used in a given R data frame.
#'
#' This function compares the columns in the provided \code{"data"} with the required columns for a given data standard/domain combination. The function is designed to work with the SDTM and AdAM CDISC(<https://www.cdisc.org/>) standards for clinical trial data. Currently, only "labs" is the only domain supported.
#'
#' @param data A data frame in which to detect the data standard
#' @param data A data frame in which to detect the data standard
#' @param includeFields specifies whether to check the data set for field level data in addition to columns. Default: \code{TRUE}.
#' @param domain The data domain for the data set provided. Default: \code{"labs"}.
#' @return A list containing the matching \code{"standard"} ("ADaM", "SDTM" or "None") and a list of \code{"details"} describing each standard considered. #'
#' @examples
#'
#' @return A list containing the matching \code{"standard"} ("ADaM", "SDTM" or "None") and a list of \code{"details"} describing each standard considered. #'
#'
#' @examples
#'
#' detectStandard(adlbc)[["standard"]] #AdAM
#' detectStandard(iris)[["standard"]] #none
#'
#'
#' \dontrun{
#' detectStandard(adlbc,domain="AE") #throws error. AE domain not supported in this release.
#' detectStandard(adlbc,domain="AE") #throws error. AE domain not supported in this release.
#' }
#'
#'
#' @export

detectStandard <- function(data, domain="labs"){
detectStandard <- function(data, includeFields=TRUE, domain="labs"){
stopifnot(
domain=="labs",
typeof(domain)=="character"
)



# Create placeholder list, with Standard = None.
standard_list <- list()
standard_list[["details"]] = list()
data_cols<-toupper(colnames(data))

# Check to see if data columns match the standards
standard_list[["details"]][["ADaM"]]<-compare_cols(data_cols,getRequiredColumns(standard="ADaM"))
standard_list[["details"]][["SDTM"]]<-compare_cols(data_cols,getRequiredColumns(standard="SDTM"))

standard_list[["details"]][["ADaM"]]<-evaluateStandard(data,standard="ADaM", includeFields=includeFields, domain=domain)
standard_list[["details"]][["SDTM"]]<-evaluateStandard(data,standard="SDTM", includeFields=includeFields, domain=domain)

# Determine the final standard
if(standard_list[["details"]][["SDTM"]][["match"]] == "Full"){
standard_list[["standard"]]<- "SDTM"
Expand All @@ -40,14 +42,14 @@ detectStandard <- function(data, domain="labs"){
} else if(standard_list[["details"]][["SDTM"]][["match"]] == "Partial" |
standard_list[["details"]][["ADaM"]][["match"]] == "Partial"){
standard_list[["standard"]] <- ifelse(
length(standard_list[["details"]][["ADaM"]][["matched_columns"]]) >
length(standard_list[["details"]][["SDTM"]][["matched_columns"]]),
length(standard_list[["details"]][["ADaM"]][["valid_count"]]) >
length(standard_list[["details"]][["SDTM"]][["valid_count"]]),
"ADaM" , "SDTM" #SDTM if they are equal
)

} else {
standard_list[["standard"]]<-"None"
}

return(standard_list)
}
}
74 changes: 74 additions & 0 deletions R/evaluateStandard.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#' Evaluate a data set against a data standard
#'
#' Determines whether the required data elements in a data standard are found in a given data frame
#'
#' @param data data.frame to evaluate
#' @param standard standard to evaluate
#' @param includeFields should field level data be evaluated?
#' @param domain data domain. "labs" only for now.
#'
#' @return a list describing to what degree the data set matches the data standard. The "match" property describes compliance with the standard as "Full", "Partial" or "None". The "checks" property is a list of the data elements expected for the standard and whether they are "valid" in the given data set. "valid_checks" and "invalid_checks" provide counts of the specified checks.
#'
#' @examples
#' safetyGraphics:::evaluateStandard(data=adlbc, standard="adam") # Match is TRUE
#' safetyGraphics:::evaluateStandard(data=adlbc, standard="sdtm") # Match is FALSE
#'
#' @import dplyr
#' @importFrom purrr map
#' @importFrom rlang .data
#'
#' @keywords internal


evaluateStandard <- function(data, standard, includeFields=TRUE, domain="labs"){

stopifnot(
is.data.frame(data),
is.character(standard),
is.logical(includeFields),
is.character(domain),
tolower(standard) %in% c("adam","sdtm")
)

standard<-tolower(standard)

compare_summary<-list()
compare_summary[["standard"]]<-standard

# Get metadata for settings using the specified standard and see if required data elements are found
standardChecks <- getSettingsMetadata(cols=c("text_key", "column_mapping", "field_mapping", "field_column_key", "setting_required","standard_val",standard)) %>%
rename("standard_val"=standard) %>%
filter(.data$column_mapping == TRUE || .data$field_mapping ==TRUE) %>%
filter(.data$setting_required==TRUE) %>%
mutate(type = ifelse(.data$column_mapping, "column", "field")) %>%
rowwise %>%
mutate(field_column_name = ifelse(.data$field_mapping, getSettingsMetadata(cols=standard, text_keys=.data$field_column_key),"")) %>%
mutate(valid = ifelse(.data$column_mapping,
hasColumn(data=data, columnName=.data$standard_val),
hasField(data=data, columnName=.data$field_column_name, fieldValue=.data$standard_val)
)) %>%
select(.data$text_key, .data$standard_val, .data$type, .data$valid)

# filter out the field level checks if includeChecks is false
if(!includeFields){
standardChecks <- standardChecks %>% filter(.data$type != "field")
}

# compare_summary[["checks"]] <- split(standardChecks, seq(nrow(standardChecks)))%>%map(~as.list(.)) #coerce to list of lists?
compare_summary[["checks"]] <- standardChecks #or just keep the tibble ...

# count valid/invalid data elements
compare_summary[["valid_count"]] <- standardChecks %>% filter(.data$valid) %>% nrow()
compare_summary[["invalid_count"]] <- standardChecks %>% filter(!.data$valid) %>% nrow()


if (compare_summary[["invalid_count"]]==0) {
compare_summary[["match"]] <- "Full"
} else if(compare_summary[["valid_count"]]>0) {
compare_summary[["match"]] <- "Partial"
} else {
compare_summary[["match"]] <- "None"
}

return(compare_summary)
}
23 changes: 23 additions & 0 deletions R/hasColumn.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#' Check whether a column is found in a data set
#'
#' Checks whether a specified column is found in a specified data set
#'
#' @param columnName The column to look for.
#' @param data the data.frame to search.
#' @return logical scalar. TRUE if the column is found. FALSE otherwise
#'
#' @examples
#' safetyGraphics:::hasColumn(columnName="PARAM",data=adlbc) #TRUE
#' safetyGraphics:::hasColumn(columnName="Not_a_column",data=adlbc) #FALSE
#'
#' @keywords internal

hasColumn <- function(columnName, data){
stopifnot(
typeof(columnName)=="character",
length(columnName)==1,
is.data.frame(data)
)

return(toupper(columnName) %in% toupper(colnames(data)))
}
33 changes: 33 additions & 0 deletions R/hasField.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#' Check whether a specified field value is found in a data set
#'
#' This checks whether a specific value is found in a specified column in a specified data set
#'
#' @param fieldValue A value to check for.
#' @param columnName The column to check.
#' @param data the data.frame to search.
#' @return logical scalar. TRUE if field_value is found. FALSE otherwise
#'
#' @examples
#' safetyGraphics:::hasField(fieldValue="Bilirubin (umol/L)",columnName="PARAM",data=adlbc) #TRUE
#' safetyGraphics:::hasField(fieldValue="Not_a_real_value",columnName="",data=adlbc) #FALSE
#'
#' @keywords internal

hasField<- function(fieldValue, columnName, data){
stopifnot(
length(fieldValue)==1,
typeof(columnName)=="character",
length(columnName)==1,
is.data.frame(data)
)

columnFound <- hasColumn(columnName=columnName, data=data)
if(columnFound){
validFields <- unique(data[[columnName]])
} else{
validFields <- c()
}

validFields <- unique(data[[columnName]])
return(fieldValue %in% validFields)
}
1 change: 1 addition & 0 deletions R/settingsMetadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#' \item{column_type}{Expected type for the data column values. Should be "character","logical" or "numeric"}
#' \item{column_required}{Flag indicating whether the associated data column should be considered required}
#' \item{field_mapping}{Flag indicating whether the setting corresponds to a field-level mapping in the data}
#' \item{field_column_key}{Key for the column that provides options for the field-level mapping in the data}
#' \item{adam}{Settings values for the ADaM standard}
#' \item{sdtm}{Settings values for the SDTM standard}
#' }
Expand Down
52 changes: 26 additions & 26 deletions data-raw/settingsMetadata.csv
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
chart_edish,text_key,label,description,setting_type,setting_required,column_mapping,column_type,column_required,field_mapping,adam,sdtm
TRUE,id_col,ID column,Unique subject identifier variable name.,character,TRUE,TRUE,character,TRUE,FALSE,USUBJID,USUBJID
TRUE,value_col,Value column,Lab result variable name.,character,TRUE,TRUE,numeric,TRUE,FALSE,AVAL,STRESN
TRUE,measure_col,Measure column,Lab measure variable name,character,TRUE,TRUE,character,TRUE,FALSE,PARAM,TEST
TRUE,normal_col_low,Lower Limit of Normal column,Lower limit of normal variable name,character,TRUE,TRUE,numeric,TRUE,FALSE,A1LO,STNRLO
TRUE,normal_col_high,Upper Limit of Normal column,Upper limit of normal variable name,character,TRUE,TRUE,numeric,TRUE,FALSE,A1HI,STNRHI
TRUE,studyday_col,Study Day column,Visit day variable name,character,TRUE,TRUE,numeric,TRUE,FALSE,ADY,DY
TRUE,visit_col,Visit column,Visit variable name,character,FALSE,TRUE,character,FALSE,FALSE,VISIT,VISIT
TRUE,visitn_col,Visit Number column,Visit number variable name,character,FALSE,TRUE,numeric,FALSE,FALSE,VISITNUM,VISITNUM
TRUE,filters,Filters columns,"An optional list of specifications for filters. Each filter is a nested, named list (containing the filter value column: 'value_col' and associated label: 'label') within the larger list.",vector,FALSE,TRUE,NA,FALSE,FALSE,,
TRUE,group_cols,Group columns,"An optional list of specifications for grouping columns. Each group column is a nested, named list (containing the group variable column: 'value_col' and associated label: 'label') within the larger list. ",vector,FALSE,TRUE,NA,FALSE,FALSE,,
TRUE,measure_values--ALT,Alanine Aminotransferase value,Value used for Alanine Aminotransferase in the specified measure column,character,FALSE,FALSE,NA,NA,TRUE,Alanine Aminotransferase (U/L),"Aminotransferase, alanine (ALT)"
TRUE,measure_values--AST,Aspartate Aminotransferase value,Value used for Aspartate Aminotransferase in the specified measure column,character,FALSE,FALSE,NA,NA,TRUE,Aspartate Aminotransferase (U/L),"Aminotransferase, aspartate (AST)"
TRUE,measure_values--TB,Total Bilirubin value,Value used for Total Bilirubin in the specified measure column,character,FALSE,FALSE,NA,NA,TRUE,Bilirubin (umol/L),Total Bilirubin
TRUE,measure_values--ALP,Alkaline Phosphatase value,Value used for Alkaline Phosphatase in the specified measure column,character,FALSE,FALSE,NA,NA,TRUE,Alkaline Phosphatase (U/L),Alkaline phosphatase (ALP)
TRUE,baseline--value_col,Baseline column,An optional list defining which column represent the baseline visit(s) of the study.,character,FALSE,TRUE,NA,FALSE,FALSE,,
TRUE,baseline--values,Baseline values,An optional list defining which values (one or more) represent the baseline visit(s) of the study.,vector,FALSE,FALSE,NA,NA,TRUE,,
TRUE,analysisFlag--value_col,Analysis Flag column,An optional list defining which column should be used in eDish and mDish analyses.,character,FALSE,TRUE,NA,FALSE,FALSE,,
TRUE,analysisFlag--values,Analysis Flag values,An optional list defining which values should be used in eDish and mDish analyses.,vector,FALSE,FALSE,NA,NA,TRUE,,
TRUE,x_options,X axis options,"Specifies variable options for the x-axis using the key values from measure_values (e.g. 'ALT'). When multiple options are specified, a control allowing the user to interactively change the x variable is shown. ",vector,FALSE,FALSE,NA,NA,FALSE,,
TRUE,y_options,Y axis options,"Specifies variable options for the y-axis using the key values from measure_values (e.g. 'TB'). When multiple options are specified, a control allowing the user to interactively change the y variable is shown. ",vector,FALSE,FALSE,NA,NA,FALSE,,
TRUE,visit_window,Default Visit Window in Days ,Default visit window used to highlight eDish points where x and y measures occurred within the specified number of days. Editable by user after render. ,numeric,FALSE,FALSE,NA,NA,FALSE,,
TRUE,r_ratio_filter,Show R Ratio Filter?,Specifies whether the R Ratio filter should be shown. R ratio is defined as: (ALT value/ULN for ALT) / (ALP value/ULN for ALP).,logical,FALSE,FALSE,NA,NA,FALSE,,
TRUE,r_ratio_cut,Default R Ratio Cut,Default cut point for R Ratio filter. Ignored when r_ratio_filter = FALSE. User can update this setting via the UI when r_ratio_filter = TRUE. ,numeric,FALSE,FALSE,NA,NA,FALSE,,
TRUE,showTitle,Show Chart Title? ,Specifies whether the title should be drawn above the controls.,logical,FALSE,FALSE,NA,NA,FALSE,,
TRUE,warningText,Warning text,"Informational text to be displayed near the top of the controls (beneath the title, if any). No warning is displayed if warningText = ''. ",Character,FALSE,FALSE,NA,NA,FALSE,,
chart_edish,text_key,label,description,setting_type,setting_required,column_mapping,column_type,column_required,field_mapping,field_column_key,adam,sdtm
TRUE,id_col,ID column,Unique subject identifier variable name.,character,TRUE,TRUE,character,TRUE,FALSE,,USUBJID,USUBJID
TRUE,value_col,Value column,Lab result variable name.,character,TRUE,TRUE,numeric,TRUE,FALSE,,AVAL,STRESN
TRUE,measure_col,Measure column,Lab measure variable name,character,TRUE,TRUE,character,TRUE,FALSE,,PARAM,TEST
TRUE,normal_col_low,Lower Limit of Normal column,Lower limit of normal variable name,character,TRUE,TRUE,numeric,TRUE,FALSE,,A1LO,STNRLO
TRUE,normal_col_high,Upper Limit of Normal column,Upper limit of normal variable name,character,TRUE,TRUE,numeric,TRUE,FALSE,,A1HI,STNRHI
TRUE,studyday_col,Study Day column,Visit day variable name,character,TRUE,TRUE,numeric,TRUE,FALSE,,ADY,DY
TRUE,visit_col,Visit column,Visit variable name,character,FALSE,TRUE,character,FALSE,FALSE,,VISIT,VISIT
TRUE,visitn_col,Visit Number column,Visit number variable name,character,FALSE,TRUE,numeric,FALSE,FALSE,,VISITNUM,VISITNUM
TRUE,filters,Filters columns,"An optional list of specifications for filters. Each filter is a nested, named list (containing the filter value column: 'value_col' and associated label: 'label') within the larger list.",vector,FALSE,TRUE,NA,FALSE,FALSE,,,
TRUE,group_cols,Group columns,"An optional list of specifications for grouping columns. Each group column is a nested, named list (containing the group variable column: 'value_col' and associated label: 'label') within the larger list. ",vector,FALSE,TRUE,NA,FALSE,FALSE,,,
TRUE,measure_values--ALT,Alanine Aminotransferase value,Value used for Alanine Aminotransferase in the specified measure column,character,TRUE,FALSE,NA,NA,TRUE,measure_col,Alanine Aminotransferase (U/L),"Aminotransferase, alanine (ALT)"
TRUE,measure_values--AST,Aspartate Aminotransferase value,Value used for Aspartate Aminotransferase in the specified measure column,character,TRUE,FALSE,NA,NA,TRUE,measure_col,Aspartate Aminotransferase (U/L),"Aminotransferase, aspartate (AST)"
TRUE,measure_values--TB,Total Bilirubin value,Value used for Total Bilirubin in the specified measure column,character,TRUE,FALSE,NA,NA,TRUE,measure_col,Bilirubin (umol/L),Total Bilirubin
TRUE,measure_values--ALP,Alkaline Phosphatase value,Value used for Alkaline Phosphatase in the specified measure column,character,TRUE,FALSE,NA,NA,TRUE,measure_col,Alkaline Phosphatase (U/L),Alkaline phosphatase (ALP)
TRUE,baseline--value_col,Baseline column,An optional list defining which column represent the baseline visit(s) of the study.,character,FALSE,TRUE,NA,FALSE,FALSE,,,
TRUE,baseline--values,Baseline values,An optional list defining which values (one or more) represent the baseline visit(s) of the study.,vector,FALSE,FALSE,NA,NA,TRUE,baseline--value_col,,
TRUE,analysisFlag--value_col,Analysis Flag column,An optional list defining which column should be used in eDish and mDish analyses.,character,FALSE,TRUE,NA,FALSE,FALSE,,,
TRUE,analysisFlag--values,Analysis Flag values,An optional list defining which values should be used in eDish and mDish analyses.,vector,FALSE,FALSE,NA,NA,TRUE,analysisFlag--value_col,,
TRUE,x_options,X axis options,"Specifies variable options for the x-axis using the key values from measure_values (e.g. 'ALT'). When multiple options are specified, a control allowing the user to interactively change the x variable is shown. ",vector,FALSE,FALSE,NA,NA,FALSE,,,
TRUE,y_options,Y axis options,"Specifies variable options for the y-axis using the key values from measure_values (e.g. 'TB'). When multiple options are specified, a control allowing the user to interactively change the y variable is shown. ",vector,FALSE,FALSE,NA,NA,FALSE,,,
TRUE,visit_window,Default Visit Window in Days ,Default visit window used to highlight eDish points where x and y measures occurred within the specified number of days. Editable by user after render. ,numeric,FALSE,FALSE,NA,NA,FALSE,,,
TRUE,r_ratio_filter,Show R Ratio Filter?,Specifies whether the R Ratio filter should be shown. R ratio is defined as: (ALT value/ULN for ALT) / (ALP value/ULN for ALP).,logical,FALSE,FALSE,NA,NA,FALSE,,,
TRUE,r_ratio_cut,Default R Ratio Cut,Default cut point for R Ratio filter. Ignored when r_ratio_filter = FALSE. User can update this setting via the UI when r_ratio_filter = TRUE. ,numeric,FALSE,FALSE,NA,NA,FALSE,,,
TRUE,showTitle,Show Chart Title? ,Specifies whether the title should be drawn above the controls.,logical,FALSE,FALSE,NA,NA,FALSE,,,
TRUE,warningText,Warning text,"Informational text to be displayed near the top of the controls (beneath the title, if any). No warning is displayed if warningText = ''. ",Character,FALSE,FALSE,NA,NA,FALSE,,,
Binary file modified data/adlbc.rda
Binary file not shown.
Binary file modified data/settingsMetadata.rda
Binary file not shown.
Loading