Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 39 additions & 22 deletions R/detectStandard.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
#' Detect the data standard used for a data set
#'
#' This function attempts to detect the data CDISC clinical standard used in a given R data frame.
#' This function attempts to detect the clinical data standard used in a given R data frame.
#'
#' This function compares the columns in the provided \code{"data"} with the required columns for a given data standard/domain combination. The function is designed to work with the SDTM and AdAM CDISC(<https://www.cdisc.org/>) standards for clinical trial data. Currently, "labs" is the only domain supported.
#' This function compares the columns in the provided \code{"data"} with the required columns for a given data standard/domain combination. The function is designed to work with the SDTM and AdAM CDISC(<https://www.cdisc.org/>) standards for clinical trial data by default. Additional standards can be added by modifying the \code{"standardMetadata"} data set included as part of this package. Currently, "labs" is the only domain supported.
#'
#' @param data A data frame in which to detect the data standard
#' @param includeFields specifies whether to check the data set for field level data in addition to columns. Default: \code{TRUE}.
#' @param domain The data domain for the data set provided. Default: \code{"labs"}.
#' @return A list containing the matching \code{"standard"} ("ADaM", "SDTM" or "None") and a list of \code{"details"} describing each standard considered.
#' @return A list containing the matching \code{"standard"} from \code{"standardMetadata"} and a list of \code{"details"} describing each standard considered.
#' @examples

#' detectStandard(adlbc)[["standard"]] #AdAM
#' detectStandard(adlbc)[["standard"]] #adam
#' detectStandard(iris)[["standard"]] #none
#'
#' \dontrun{
Expand All @@ -26,28 +26,45 @@ detectStandard <- function(data, includeFields=TRUE, domain="labs"){
)


# Create placeholder list, with Standard = None.
# Create placeholder list, with Standard = none.
available_standards <- safetyGraphics::standardsMetadata %>% select(-.data$text_key) %>% names
standard_list <- list()
standard_list[["details"]] = list()
standard_list[["details"]][["ADaM"]]<-evaluateStandard(data,standard="ADaM", includeFields=includeFields, domain=domain)
standard_list[["details"]][["SDTM"]]<-evaluateStandard(data,standard="SDTM", includeFields=includeFields, domain=domain)
standard_list[["standard"]] = "none"
standard_list[["standard_percent"]] = 0

for(standard in available_standards){
# evaluate the current standard and save the result
standard_list[["details"]][[standard]]<-evaluateStandard(data,standard=standard, includeFields=includeFields, domain=domain)

# if the current standard is a better match, use it as the overall standard
# if there is a tie, don't change the standard - this means the column order in standardMetadata breaks ties!
current_percent <- standard_list[["details"]][[standard]][["match_percent"]]
overall_percent <- standard_list[["standard_percent"]]
if(current_percent > overall_percent){
standard_list[["standard"]] <- standard
standard_list[["standard_percent"]] <- current_percent
}
}

# Determine the final standard
if(standard_list[["details"]][["SDTM"]][["match"]] == "Full"){
standard_list[["standard"]]<- "SDTM"
} else if(standard_list[["details"]][["ADaM"]][["match"]] == "Full"){
standard_list[["standard"]]<- "ADaM"
} else if(standard_list[["details"]][["SDTM"]][["match"]] == "Partial" |
standard_list[["details"]][["ADaM"]][["match"]] == "Partial"){
standard_list[["standard"]] <- ifelse(
length(standard_list[["details"]][["ADaM"]][["valid_count"]]) >
length(standard_list[["details"]][["SDTM"]][["valid_count"]]),
"ADaM" , "SDTM" #SDTM if they are equal
)

} else {
standard_list[["standard"]]<-"None"
}

# TODO: write a general algorithm to do this ...
# if(standard_list[["details"]][["sdtm"]][["match"]] == "Full"){
# standard_list[["standard"]]<- "sdtm"
# } else if(standard_list[["details"]][["adam"]][["match"]] == "Full"){
# standard_list[["standard"]]<- "adam"
# } else if(standard_list[["details"]][["sdtm"]][["match"]] == "Partial" |
# standard_list[["details"]][["adam"]][["match"]] == "Partial"){
# standard_list[["standard"]] <- ifelse(
# length(standard_list[["details"]][["adam"]][["valid_count"]]) >
# length(standard_list[["details"]][["sdtm"]][["valid_count"]]),
# "adam" , "sdtm" #SDTM if they are equal
# )
#
# } else {
# standard_list[["standard"]]<-"None"
# }

return(standard_list)
}
22 changes: 11 additions & 11 deletions R/evaluateStandard.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#' @param includeFields should field level data be evaluated?
#' @param domain data domain. "labs" only for now.
#'
#' @return a list describing to what degree the data set matches the data standard. The "match" property describes compliance with the standard as "Full", "Partial" or "None". The "checks" property is a list of the data elements expected for the standard and whether they are "valid" in the given data set. "valid_checks" and "invalid_checks" provide counts of the specified checks.
#' @return a list describing to what degree the data set matches the data standard. The "match" property describes compliance with the standard as "full", "partial" or "none". The "checks" property is a list of the data elements expected for the standard and whether they are "valid" in the given data set. "total_checks", "valid_checks" and "invalid_checks" provide counts of the specified checks. "match_percent" is calculated as valid_checks/total_checks.
#'
#' @examples
#' safetyGraphics:::evaluateStandard(data=adlbc, standard="adam") # Match is TRUE
Expand All @@ -26,8 +26,7 @@ evaluateStandard <- function(data, standard, includeFields=TRUE, domain="labs"){
is.data.frame(data),
is.character(standard),
is.logical(includeFields),
is.character(domain),
tolower(standard) %in% c("adam","sdtm")
is.character(domain)
)

standard<-tolower(standard)
Expand All @@ -43,7 +42,8 @@ evaluateStandard <- function(data, standard, includeFields=TRUE, domain="labs"){
mutate(type = ifelse(.data$column_mapping, "column", "field")) %>%
rowwise %>%
mutate(field_column_name = ifelse(.data$field_mapping, getSettingsMetadata(cols=standard, text_keys=.data$field_column_key),"")) %>%
mutate(valid = ifelse(.data$column_mapping,
mutate(
valid = ifelse(.data$column_mapping,
hasColumn(data=data, columnName=.data$standard_val),
hasField(data=data, columnName=.data$field_column_name, fieldValue=.data$standard_val)
)) %>%
Expand All @@ -54,20 +54,20 @@ evaluateStandard <- function(data, standard, includeFields=TRUE, domain="labs"){
standardChecks <- standardChecks %>% filter(.data$type != "field")
}

# compare_summary[["checks"]] <- split(standardChecks, seq(nrow(standardChecks)))%>%map(~as.list(.)) #coerce to list of lists?
compare_summary[["checks"]] <- standardChecks #or just keep the tibble ...
compare_summary[["checks"]] <- standardChecks

# count valid/invalid data elements
compare_summary[["total_count"]] <- standardChecks %>% nrow()
compare_summary[["valid_count"]] <- standardChecks %>% filter(.data$valid) %>% nrow()
compare_summary[["invalid_count"]] <- standardChecks %>% filter(!.data$valid) %>% nrow()


compare_summary[["match_percent"]] <- compare_summary[["valid_count"]] / compare_summary[["total_count"]]
if (compare_summary[["invalid_count"]]==0) {
compare_summary[["match"]] <- "Full"
compare_summary[["match"]] <- "full"
} else if(compare_summary[["valid_count"]]>0) {
compare_summary[["match"]] <- "Partial"
compare_summary[["match"]] <- "partial"
} else {
compare_summary[["match"]] <- "None"
compare_summary[["match"]] <- "none"
}

return(compare_summary)
Expand Down
11 changes: 9 additions & 2 deletions R/getSettingsMetadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#' @param charts optional vector of chart names used to filter the metadata. Exact matches only (case-insensitive). All rows returned by default.
#' @param text_keys optional vector of keys used to filter the metadata. Partial matches for any of the strings are returned (case-insensitive). All rows returned by default.
#' @param filter_expr optional filter expression used to subset the data.
#' @param add_standards should data standard info stored in standardsMetadata be included
#' @param cols optional vector of columns to return from the metadata. All columns returned by default.
#' @param metadata metadata data frame to be queried
#'
Expand All @@ -27,9 +28,15 @@
#'
#' @export

getSettingsMetadata<-function(charts=NULL, text_keys=NULL, cols=NULL, filter_expr=NULL, metadata = safetyGraphics::settingsMetadata){
getSettingsMetadata<-function(charts=NULL, text_keys=NULL, cols=NULL, filter_expr=NULL, add_standards=TRUE, metadata = safetyGraphics::settingsMetadata){

md <- metadata
md <- metadata %>% mutate(text_key=as.character(.data$text_key))

if(add_standards){
ms<-safetyGraphics::standardsMetadata %>% mutate(text_key=as.character(.data$text_key))
md<-md%>%left_join(ms, by="text_key")
}

all_columns <- names(md)

#filter the metadata based on the charts option (if any)
Expand Down
4 changes: 1 addition & 3 deletions R/settingsMetadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,13 @@
#' \item{text_key}{Text key indicating the setting name. \code{'--'} delimiter indicates a nested setting}
#' \item{label}{Label}
#' \item{description}{Description}
#' \item{setting_cat}{Setting category (data, measure, appearance)}
#' \item{setting_type}{Expected type for setting value. Should be "character", "vector", "numeric" or "logical"}
#' \item{setting_required}{Flag indicating if the setting is required}
#' \item{column_mapping}{Flag indicating if the setting corresponds to a column in the associated data}
#' \item{column_type}{Expected type for the data column values. Should be "character","logical" or "numeric"}
#' \item{field_mapping}{Flag indicating whether the setting corresponds to a field-level mapping in the data}
#' \item{field_column_key}{Key for the column that provides options for the field-level mapping in the data}
#' \item{adam}{Settings values for the ADaM standard}
#' \item{sdtm}{Settings values for the SDTM standard}
#' \item{setting_cat}{Setting category (data, measure, appearance)}
#' }
#'
#' @source Created for this package
Expand Down
13 changes: 13 additions & 0 deletions R/standardsMetadata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#' Standards Metadata
#'
#' Metadata about the data standards used to configure safetyGraphics charts. One record per unique setting. Columns contain default setting values for clinical data standards, like the CDISC "adam" and "sdtm" standards.
#'
#' @format A data frame with 25 rows and 3 columns
#' \describe{
#' \item{text_key}{Text key indicating the setting name. \code{'--'} delimiter indicates a nested setting}
#' \item{adam}{Settings values for the ADaM standard}
#' \item{sdtm}{Settings values for the SDTM standard}
#' }
#'
#' @source Created for this package
"standardsMetadata"
5 changes: 4 additions & 1 deletion data-raw/csv_to_rda.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,8 @@ library(usethis)
ablbc <- read.csv("data-raw/adlbc.csv")
usethis::use_data(adlbc, overwrite = TRUE)

settingsMetadata<- read.csv("data-raw/settingsMetadata.csv")
settingsMetadata <- read.csv("data-raw/settingsMetadata.csv")
usethis::use_data(settingsMetadata, overwrite = TRUE)

standardsMetadata <- read.csv("data-raw/standardsMetadata.csv")
usethis::use_data(standardsMetadata, overwrite = TRUE)
Loading