From f1892bd8d2f49df6f10fef7ee2643819ec541925 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Sun, 12 Apr 2026 16:37:13 -0400 Subject: [PATCH 1/6] fix(checks): support 'heavy'/'light' IsotopeLabelType explicitly --- R/utils_checks.R | 16 +++++--- inst/tinytest/test_dataProcess.R | 8 ++++ inst/tinytest/test_utils_checks.R | 61 +++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 6 deletions(-) create mode 100644 inst/tinytest/test_utils_checks.R diff --git a/R/utils_checks.R b/R/utils_checks.R index 1d5537ac..97ced22c 100644 --- a/R/utils_checks.R +++ b/R/utils_checks.R @@ -231,12 +231,16 @@ MSstatsPrepareForDataProcess = function(input, log_base, fix_missing) { } input$PEPTIDE = paste(input$PEPTIDESEQUENCE, input$PRECURSORCHARGE, sep = "_") input$TRANSITION = paste(input$FRAGMENTION, input$PRODUCTCHARGE, sep = "_") - input$ISOTOPELABELTYPE = factor(input$ISOTOPELABELTYPE) - if (data.table::uniqueN(input$ISOTOPELABELTYPE) == 2) { - levels(input$ISOTOPELABELTYPE) = c("H", "L") - } else { - levels(input$ISOTOPELABELTYPE) = "L" - } + label_map <- c( + "H" = "H", + "L" = "L", + "heavy" = "H", + "light" = "L" + ) + input$ISOTOPELABELTYPE <- factor( + label_map[as.character(input$ISOTOPELABELTYPE)], + levels = c("H", "L", "NA") + ) input } diff --git a/inst/tinytest/test_dataProcess.R b/inst/tinytest/test_dataProcess.R index b8b01faa..4c979d2b 100644 --- a/inst/tinytest/test_dataProcess.R +++ b/inst/tinytest/test_dataProcess.R @@ -21,6 +21,14 @@ expect_true( "H" %in% QuantDataDefault$FeatureLevelData$LABEL, info = "FeatureLevelData should contain heavy-label (H) rows for label-based SRM data" ) +expect_true( + "L" %in% QuantDataDefault$FeatureLevelData$LABEL, + info = "SRMRawData FeatureLevelData must contain L rows" +) +expect_true( + nrow(QuantDataDefault$ProteinLevelData) > 0, + info = "SRMRawData must produce non-empty ProteinLevelData" +) # Test dataProcess with technical replicates & fractions ------------------ diff --git a/inst/tinytest/test_utils_checks.R b/inst/tinytest/test_utils_checks.R new file mode 100644 index 00000000..606da492 --- /dev/null +++ b/inst/tinytest/test_utils_checks.R @@ -0,0 +1,61 @@ +# .prepareForDataProcess tests --------------------------------------------- + +make_input <- function(labels) { + dt <- data.table::data.table( + PeptideSequence = rep("PEPT", length(labels)), + PrecursorCharge = rep(2L, length(labels)), + FragmentIon = rep("y3", length(labels)), + ProductCharge = rep(1L, length(labels)), + IsotopeLabelType = labels + ) + return(dt) +} + +result <- MSstats:::.prepareForDataProcess(make_input(c("heavy", "light"))) +expect_true( + "H" %in% as.character(result$ISOTOPELABELTYPE), + info = "label_map: 'heavy' must map to factor level 'H'" +) +expect_true( + "L" %in% as.character(result$ISOTOPELABELTYPE), + info = "label_map: 'light' must map to factor level 'L'" +) +expect_equal( + levels(result$ISOTOPELABELTYPE), c("H", "L", "NA"), + info = "label_map: factor levels must be exactly c('H', 'L', 'NA')" +) + + +result_light <- MSstats:::.prepareForDataProcess(make_input(rep("light", 4))) +expect_equal( + unique(as.character(result_light$ISOTOPELABELTYPE)), "L", + info = "label_map: all 'light' input must produce all 'L' output" +) + +result_hl <- MSstats:::.prepareForDataProcess(make_input(c("H", "L", "H", "L"))) +expect_equal( + as.character(result_hl$ISOTOPELABELTYPE), c("H", "L", "H", "L"), + info = "label_map: 'H' / 'L' strings must still pass through unchanged" +) +expect_equal( + levels(result_hl$ISOTOPELABELTYPE), c("H", "L", "NA"), + info = "label_map: factor levels for H/L input must still be c('H', 'L', 'NA')" +) + +result_ll <- MSstats:::.prepareForDataProcess(make_input(rep("L", 5))) +expect_equal( + unique(as.character(result_ll$ISOTOPELABELTYPE)), "L", + info = "label_map: all 'L' input must produce all 'L' output" +) + +result_other <- MSstats:::.prepareForDataProcess(make_input(rep("bruH", 5))) +expect_true( + all(is.na(result_other$ISOTOPELABELTYPE)), + info = "Other IsotopeLabelType maps to NA" +) + +result_na <- MSstats:::.prepareForDataProcess(make_input(rep(NA, 5))) +expect_true( + all(is.na(result_na$ISOTOPELABELTYPE)), + info = "NA IsotopeLabelType maps to NA" +) From 26333178c40526b55aaf94678b0bc30f4b9d43e7 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Sun, 12 Apr 2026 16:44:43 -0400 Subject: [PATCH 2/6] added unit tests --- R/utils_checks.R | 2 +- inst/tinytest/test_utils_checks.R | 35 ++++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/R/utils_checks.R b/R/utils_checks.R index 97ced22c..9ff54857 100644 --- a/R/utils_checks.R +++ b/R/utils_checks.R @@ -239,7 +239,7 @@ MSstatsPrepareForDataProcess = function(input, log_base, fix_missing) { ) input$ISOTOPELABELTYPE <- factor( label_map[as.character(input$ISOTOPELABELTYPE)], - levels = c("H", "L", "NA") + levels = c("H", "L") ) input } diff --git a/inst/tinytest/test_utils_checks.R b/inst/tinytest/test_utils_checks.R index 606da492..a1c36034 100644 --- a/inst/tinytest/test_utils_checks.R +++ b/inst/tinytest/test_utils_checks.R @@ -1,4 +1,21 @@ # .prepareForDataProcess tests --------------------------------------------- +input <- data.frame( + PeptideModifiedSequence = c("Apeptide", "BPEPTIDE"), + PrecursorCharge = c(2L, 3L), + FragmentIon = c("b2", "y3"), + ProductCharge = c(1L, 1L), + IsotopeLabelType = c("H", "L") +) +result_peptideModifiedSequence <- MSstats:::.prepareForDataProcess(input) +expect_true( + "PEPTIDESEQUENCE" %in% colnames(result_peptideModifiedSequence), + info = "PEPTIDESEQUENCE should exist after renaming PEPTIDEMODIFIEDSEQUENCE" +) +expect_false( + "PEPTIDEMODIFIEDSEQUENCE" %in% colnames(result_peptideModifiedSequence), + info = "PEPTIDEMODIFIEDSEQUENCE should no longer exist after renaming" +) + make_input <- function(labels) { dt <- data.table::data.table( @@ -21,8 +38,8 @@ expect_true( info = "label_map: 'light' must map to factor level 'L'" ) expect_equal( - levels(result$ISOTOPELABELTYPE), c("H", "L", "NA"), - info = "label_map: factor levels must be exactly c('H', 'L', 'NA')" + levels(result$ISOTOPELABELTYPE), c("H", "L"), + info = "label_map: factor levels must be exactly c('H', 'L')" ) @@ -38,8 +55,8 @@ expect_equal( info = "label_map: 'H' / 'L' strings must still pass through unchanged" ) expect_equal( - levels(result_hl$ISOTOPELABELTYPE), c("H", "L", "NA"), - info = "label_map: factor levels for H/L input must still be c('H', 'L', 'NA')" + levels(result_hl$ISOTOPELABELTYPE), c("H", "L"), + info = "label_map: factor levels for H/L input must still be c('H', 'L')" ) result_ll <- MSstats:::.prepareForDataProcess(make_input(rep("L", 5))) @@ -48,14 +65,22 @@ expect_equal( info = "label_map: all 'L' input must produce all 'L' output" ) -result_other <- MSstats:::.prepareForDataProcess(make_input(rep("bruH", 5))) +result_other <- MSstats:::.prepareForDataProcess(make_input(rep("test", 5))) expect_true( all(is.na(result_other$ISOTOPELABELTYPE)), info = "Other IsotopeLabelType maps to NA" ) +expect_equal( + levels(result_other$ISOTOPELABELTYPE), c("H", "L"), + info = "label_map: factor levels must be exactly c('H', 'L')" +) result_na <- MSstats:::.prepareForDataProcess(make_input(rep(NA, 5))) expect_true( all(is.na(result_na$ISOTOPELABELTYPE)), info = "NA IsotopeLabelType maps to NA" ) +expect_equal( + levels(result_na$ISOTOPELABELTYPE), c("H", "L"), + info = "label_map: factor levels must be exactly c('H', 'L')" +) From b0f8c8e03c599f86872e0c7ca04b9135b8522a8b Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Sun, 12 Apr 2026 16:50:41 -0400 Subject: [PATCH 3/6] fix unit tests --- R/utils_checks.R | 2 +- inst/tinytest/test_utils_checks.R | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/R/utils_checks.R b/R/utils_checks.R index 9ff54857..982065a5 100644 --- a/R/utils_checks.R +++ b/R/utils_checks.R @@ -239,7 +239,7 @@ MSstatsPrepareForDataProcess = function(input, log_base, fix_missing) { ) input$ISOTOPELABELTYPE <- factor( label_map[as.character(input$ISOTOPELABELTYPE)], - levels = c("H", "L") + levels = intersect(c("H", "L"), label_map[as.character(input$ISOTOPELABELTYPE)]) ) input } diff --git a/inst/tinytest/test_utils_checks.R b/inst/tinytest/test_utils_checks.R index a1c36034..0d0983b2 100644 --- a/inst/tinytest/test_utils_checks.R +++ b/inst/tinytest/test_utils_checks.R @@ -48,6 +48,10 @@ expect_equal( unique(as.character(result_light$ISOTOPELABELTYPE)), "L", info = "label_map: all 'light' input must produce all 'L' output" ) +expect_equal( + levels(result_hl$ISOTOPELABELTYPE), c("L"), + info = "label_map: factor levels for H/L input must still be c('L')" +) result_hl <- MSstats:::.prepareForDataProcess(make_input(c("H", "L", "H", "L"))) expect_equal( @@ -70,17 +74,9 @@ expect_true( all(is.na(result_other$ISOTOPELABELTYPE)), info = "Other IsotopeLabelType maps to NA" ) -expect_equal( - levels(result_other$ISOTOPELABELTYPE), c("H", "L"), - info = "label_map: factor levels must be exactly c('H', 'L')" -) result_na <- MSstats:::.prepareForDataProcess(make_input(rep(NA, 5))) expect_true( all(is.na(result_na$ISOTOPELABELTYPE)), info = "NA IsotopeLabelType maps to NA" ) -expect_equal( - levels(result_na$ISOTOPELABELTYPE), c("H", "L"), - info = "label_map: factor levels must be exactly c('H', 'L')" -) From f4c41891a350126ab5ece7f66f9143e61e60edbc Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Sun, 12 Apr 2026 16:54:21 -0400 Subject: [PATCH 4/6] really fix unit tests --- inst/tinytest/test_utils_checks.R | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/inst/tinytest/test_utils_checks.R b/inst/tinytest/test_utils_checks.R index 0d0983b2..90aa95f2 100644 --- a/inst/tinytest/test_utils_checks.R +++ b/inst/tinytest/test_utils_checks.R @@ -49,7 +49,7 @@ expect_equal( info = "label_map: all 'light' input must produce all 'L' output" ) expect_equal( - levels(result_hl$ISOTOPELABELTYPE), c("L"), + levels(result_light$ISOTOPELABELTYPE), c("L"), info = "label_map: factor levels for H/L input must still be c('L')" ) @@ -80,3 +80,13 @@ expect_true( all(is.na(result_na$ISOTOPELABELTYPE)), info = "NA IsotopeLabelType maps to NA" ) + +result_na_l_h <- MSstats:::.prepareForDataProcess(make_input(c(NA, "L", "H"))) +expect_true( + any(is.na(result_na_l_h$ISOTOPELABELTYPE)), + info = "NA IsotopeLabelType maps to NA" +) +expect_equal( + levels(result_na_l_h$ISOTOPELABELTYPE), c("H", "L"), + info = "label_map: factor levels for H/L input must still be c('H', 'L')" +) From 13fadf0e0be4ab22ff475d4e6e69e8b219ce69ab Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Sun, 12 Apr 2026 17:04:52 -0400 Subject: [PATCH 5/6] address coderabbit comment --- R/utils_checks.R | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/R/utils_checks.R b/R/utils_checks.R index 982065a5..b735ce1d 100644 --- a/R/utils_checks.R +++ b/R/utils_checks.R @@ -142,6 +142,22 @@ MSstatsPrepareForDataProcess = function(input, log_base, fix_missing) { } +#' Map IsotopeLabelType values to canonical "H"/"L" levels +#' @param x character or factor vector of IsotopeLabelType values +#' @return factor with levels from \code{c("H","L")} restricted to those present +#' @keywords internal +.mapIsotopeLabelType = function(x) { + label_map <- c( + "H" = "H", + "L" = "L", + "heavy" = "H", + "light" = "L" + ) + mapped <- label_map[as.character(x)] + factor(mapped, levels = intersect(c("H", "L"), mapped)) +} + + #' Check validity of data that were not processed by MSstats converter #' @param input data.table #' @inheritParams MSstatsPrepareForDataProcess @@ -208,12 +224,7 @@ MSstatsPrepareForDataProcess = function(input, log_base, fix_missing) { stop("Statistical tools in MSstats are only proper for label-free or with reference peptide experiments.") } - input$ISOTOPELABELTYPE = factor(input$ISOTOPELABELTYPE) - if (data.table::uniqueN(input$ISOTOPELABELTYPE) == 2) { - levels(input$ISOTOPELABELTYPE) = c("H", "L") - } else { - levels(input$ISOTOPELABELTYPE) = "L" - } + input$ISOTOPELABELTYPE <- .mapIsotopeLabelType(input$ISOTOPELABELTYPE) input } @@ -231,16 +242,7 @@ MSstatsPrepareForDataProcess = function(input, log_base, fix_missing) { } input$PEPTIDE = paste(input$PEPTIDESEQUENCE, input$PRECURSORCHARGE, sep = "_") input$TRANSITION = paste(input$FRAGMENTION, input$PRODUCTCHARGE, sep = "_") - label_map <- c( - "H" = "H", - "L" = "L", - "heavy" = "H", - "light" = "L" - ) - input$ISOTOPELABELTYPE <- factor( - label_map[as.character(input$ISOTOPELABELTYPE)], - levels = intersect(c("H", "L"), label_map[as.character(input$ISOTOPELABELTYPE)]) - ) + input$ISOTOPELABELTYPE <- .mapIsotopeLabelType(input$ISOTOPELABELTYPE) input } From 7030c0083cdd78ddb13511da330c194c0d935da5 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Sun, 12 Apr 2026 17:13:33 -0400 Subject: [PATCH 6/6] address coderabbit comment 2 --- R/utils_checks.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/utils_checks.R b/R/utils_checks.R index b735ce1d..601f1685 100644 --- a/R/utils_checks.R +++ b/R/utils_checks.R @@ -146,14 +146,16 @@ MSstatsPrepareForDataProcess = function(input, log_base, fix_missing) { #' @param x character or factor vector of IsotopeLabelType values #' @return factor with levels from \code{c("H","L")} restricted to those present #' @keywords internal +#' @noRd .mapIsotopeLabelType = function(x) { label_map <- c( - "H" = "H", - "L" = "L", + "h" = "H", + "l" = "L", "heavy" = "H", "light" = "L" ) - mapped <- label_map[as.character(x)] + key <- tolower(trimws(as.character(x))) + mapped <- unname(label_map[key]) factor(mapped, levels = intersect(c("H", "L"), mapped)) }