diff --git a/R/utils_checks.R b/R/utils_checks.R index 1d5537ac..601f1685 100644 --- a/R/utils_checks.R +++ b/R/utils_checks.R @@ -142,6 +142,24 @@ MSstatsPrepareForDataProcess = function(input, log_base, fix_missing) { } +#' Map IsotopeLabelType values to canonical "H"/"L" levels +#' @param x character or factor vector of IsotopeLabelType values +#' @return factor with levels from \code{c("H","L")} restricted to those present +#' @keywords internal +#' @noRd +.mapIsotopeLabelType = function(x) { + label_map <- c( + "h" = "H", + "l" = "L", + "heavy" = "H", + "light" = "L" + ) + key <- tolower(trimws(as.character(x))) + mapped <- unname(label_map[key]) + factor(mapped, levels = intersect(c("H", "L"), mapped)) +} + + #' Check validity of data that were not processed by MSstats converter #' @param input data.table #' @inheritParams MSstatsPrepareForDataProcess @@ -208,12 +226,7 @@ MSstatsPrepareForDataProcess = function(input, log_base, fix_missing) { stop("Statistical tools in MSstats are only proper for label-free or with reference peptide experiments.") } - input$ISOTOPELABELTYPE = factor(input$ISOTOPELABELTYPE) - if (data.table::uniqueN(input$ISOTOPELABELTYPE) == 2) { - levels(input$ISOTOPELABELTYPE) = c("H", "L") - } else { - levels(input$ISOTOPELABELTYPE) = "L" - } + input$ISOTOPELABELTYPE <- .mapIsotopeLabelType(input$ISOTOPELABELTYPE) input } @@ -231,12 +244,7 @@ MSstatsPrepareForDataProcess = function(input, log_base, fix_missing) { } input$PEPTIDE = paste(input$PEPTIDESEQUENCE, input$PRECURSORCHARGE, sep = "_") input$TRANSITION = paste(input$FRAGMENTION, input$PRODUCTCHARGE, sep = "_") - input$ISOTOPELABELTYPE = factor(input$ISOTOPELABELTYPE) - if (data.table::uniqueN(input$ISOTOPELABELTYPE) == 2) { - levels(input$ISOTOPELABELTYPE) = c("H", "L") - } else { - levels(input$ISOTOPELABELTYPE) = "L" - } + input$ISOTOPELABELTYPE <- .mapIsotopeLabelType(input$ISOTOPELABELTYPE) input } diff --git a/inst/tinytest/test_dataProcess.R b/inst/tinytest/test_dataProcess.R index b8b01faa..4c979d2b 100644 --- a/inst/tinytest/test_dataProcess.R +++ b/inst/tinytest/test_dataProcess.R @@ -21,6 +21,14 @@ expect_true( "H" %in% QuantDataDefault$FeatureLevelData$LABEL, info = "FeatureLevelData should contain heavy-label (H) rows for label-based SRM data" ) +expect_true( + "L" %in% QuantDataDefault$FeatureLevelData$LABEL, + info = "SRMRawData FeatureLevelData must contain L rows" +) +expect_true( + nrow(QuantDataDefault$ProteinLevelData) > 0, + info = "SRMRawData must produce non-empty ProteinLevelData" +) # Test dataProcess with technical replicates & fractions ------------------ diff --git a/inst/tinytest/test_utils_checks.R b/inst/tinytest/test_utils_checks.R new file mode 100644 index 00000000..90aa95f2 --- /dev/null +++ b/inst/tinytest/test_utils_checks.R @@ -0,0 +1,92 @@ +# .prepareForDataProcess tests --------------------------------------------- +input <- data.frame( + PeptideModifiedSequence = c("Apeptide", "BPEPTIDE"), + PrecursorCharge = c(2L, 3L), + FragmentIon = c("b2", "y3"), + ProductCharge = c(1L, 1L), + IsotopeLabelType = c("H", "L") +) +result_peptideModifiedSequence <- MSstats:::.prepareForDataProcess(input) +expect_true( + "PEPTIDESEQUENCE" %in% colnames(result_peptideModifiedSequence), + info = "PEPTIDESEQUENCE should exist after renaming PEPTIDEMODIFIEDSEQUENCE" +) +expect_false( + "PEPTIDEMODIFIEDSEQUENCE" %in% colnames(result_peptideModifiedSequence), + info = "PEPTIDEMODIFIEDSEQUENCE should no longer exist after renaming" +) + + +make_input <- function(labels) { + dt <- data.table::data.table( + PeptideSequence = rep("PEPT", length(labels)), + PrecursorCharge = rep(2L, length(labels)), + FragmentIon = rep("y3", length(labels)), + ProductCharge = rep(1L, length(labels)), + IsotopeLabelType = labels + ) + return(dt) +} + +result <- MSstats:::.prepareForDataProcess(make_input(c("heavy", "light"))) +expect_true( + "H" %in% as.character(result$ISOTOPELABELTYPE), + info = "label_map: 'heavy' must map to factor level 'H'" +) +expect_true( + "L" %in% as.character(result$ISOTOPELABELTYPE), + info = "label_map: 'light' must map to factor level 'L'" +) +expect_equal( + levels(result$ISOTOPELABELTYPE), c("H", "L"), + info = "label_map: factor levels must be exactly c('H', 'L')" +) + + +result_light <- MSstats:::.prepareForDataProcess(make_input(rep("light", 4))) +expect_equal( + unique(as.character(result_light$ISOTOPELABELTYPE)), "L", + info = "label_map: all 'light' input must produce all 'L' output" +) +expect_equal( + levels(result_light$ISOTOPELABELTYPE), c("L"), + info = "label_map: factor levels for H/L input must still be c('L')" +) + +result_hl <- MSstats:::.prepareForDataProcess(make_input(c("H", "L", "H", "L"))) +expect_equal( + as.character(result_hl$ISOTOPELABELTYPE), c("H", "L", "H", "L"), + info = "label_map: 'H' / 'L' strings must still pass through unchanged" +) +expect_equal( + levels(result_hl$ISOTOPELABELTYPE), c("H", "L"), + info = "label_map: factor levels for H/L input must still be c('H', 'L')" +) + +result_ll <- MSstats:::.prepareForDataProcess(make_input(rep("L", 5))) +expect_equal( + unique(as.character(result_ll$ISOTOPELABELTYPE)), "L", + info = "label_map: all 'L' input must produce all 'L' output" +) + +result_other <- MSstats:::.prepareForDataProcess(make_input(rep("test", 5))) +expect_true( + all(is.na(result_other$ISOTOPELABELTYPE)), + info = "Other IsotopeLabelType maps to NA" +) + +result_na <- MSstats:::.prepareForDataProcess(make_input(rep(NA, 5))) +expect_true( + all(is.na(result_na$ISOTOPELABELTYPE)), + info = "NA IsotopeLabelType maps to NA" +) + +result_na_l_h <- MSstats:::.prepareForDataProcess(make_input(c(NA, "L", "H"))) +expect_true( + any(is.na(result_na_l_h$ISOTOPELABELTYPE)), + info = "NA IsotopeLabelType maps to NA" +) +expect_equal( + levels(result_na_l_h$ISOTOPELABELTYPE), c("H", "L"), + info = "label_map: factor levels for H/L input must still be c('H', 'L')" +)