Vitek-Lab · tonywu1999 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/R/dataProcess.R b/R/dataProcess.R
@@ -557,8 +557,9 @@ MSstatsSummarizeSingleTMP = function(single_protein, impute, censored_symbol,
         return(list(NULL, NULL))
     } else {
         single_protein = single_protein[!is.na(newABUNDANCE), ]
-        is_labeled = nlevels(single_protein$LABEL) > 1
-        result = .runTukey(single_protein, is_labeled, censored_symbol, 
+        is_labeled_reference = "is_labeled_ref" %in% colnames(single_protein) &&
+            any(single_protein$is_labeled_ref, na.rm = TRUE)
+        result = .runTukey(single_protein, is_labeled_reference, censored_symbol,
                            remove50missing)
     }
     list(result, survival)

diff --git a/R/utils_censored.R b/R/utils_censored.R
@@ -31,49 +31,47 @@ MSstatsHandleMissing = function(input, summary_method, impute,
 
     if (impute & !is.null(missing_symbol)) {
         input$censored = FALSE
+        use_for_analysis = if ("is_labeled_ref" %in% colnames(input)) !input$is_labeled_ref else rep(TRUE, nrow(input))
         ## if intensity = 1, but abundance > cutoff after normalization, it also should be censored.
         if (!is.null(censored_cutoff)) {
-            quantiles = input[!is.na(INTENSITY) & INTENSITY > 1 & LABEL == "L", 
-                              quantile(ABUNDANCE, 
-                                       prob = c(0.01, 0.25, 0.5, 0.75, 
-                                                censored_cutoff), 
+            quantiles = input[use_for_analysis & !is.na(INTENSITY) & INTENSITY > 1,
+                              quantile(ABUNDANCE,
+                                       prob = c(0.01, 0.25, 0.5, 0.75,
+                                                censored_cutoff),
                                        na.rm = TRUE)]
             iqr = quantiles[4] - quantiles[2]
             multiplier = (quantiles[5] - quantiles[4]) / iqr
-            cutoff_lower = (quantiles[2] - multiplier * iqr) 
-            input$censored = !is.na(input$INTENSITY) & 
-                input$LABEL == "L" &
+            cutoff_lower = (quantiles[2] - multiplier * iqr)
+            input$censored = use_for_analysis & !is.na(input$INTENSITY) &
                 input$ABUNDANCE < cutoff_lower
             if (cutoff_lower <= 0 & !is.null(missing_symbol) & missing_symbol == "0") {
-                zero_one_filter = !is.na(input$ABUNDANCE) & input$ABUNDANCE <= 0
+                zero_one_filter = use_for_analysis & !is.na(input$ABUNDANCE) & input$ABUNDANCE <= 0
                 input$censored = ifelse(zero_one_filter, TRUE, input$censored)
             }
             if (!is.null(missing_symbol) & missing_symbol == "NA") {
-                input$censored = ifelse(is.na(input$INTENSITY), TRUE, 
+                input$censored = ifelse(use_for_analysis & is.na(input$INTENSITY), TRUE,
                                         input$censored)
             }
-            
-            msg = paste('** Log2 intensities under cutoff =', 
-                        format(cutoff_lower, digits = 5), 
+
+            msg = paste('** Log2 intensities under cutoff =',
+                        format(cutoff_lower, digits = 5),
                         ' were considered as censored missing values.')
             msg_2 = paste("** Log2 intensities =", missing_symbol, "were considered as censored missing values.")
-            
+
             getOption("MSstatsMsg")("INFO", msg)
             getOption("MSstatsMsg")("INFO", msg_2)
-            
+
             getOption("MSstatsLog")("INFO", msg)
             getOption("MSstatsLog")("INFO", msg_2)
-            
+
         } else {
             if (missing_symbol == '0') {
-                input$censored = input$LABEL == "L" & 
-                    !is.na(input$INTENSITY) &
+                input$censored = use_for_analysis & !is.na(input$INTENSITY) &
                     (input$INTENSITY == 1 | input$ABUNDANCE <= 0)
             } else if (missing_symbol == 'NA') {
-                input$censored = input$LABEL == "L" & is.na(input$ABUNDANCE)
+                input$censored = use_for_analysis & is.na(input$ABUNDANCE)
             }
         }
-        input[, censored := ifelse(LABEL == "H", FALSE, censored)]
     } else {
         input$censored = FALSE
     }
@@ -128,16 +126,17 @@ MSstatsHandleMissing = function(input, summary_method, impute,
 #' @param censored_symbol `censoredInt` parameter to dataProcess
 #' @keywords internal
 .getNonMissingFilter = function(input, impute, censored_symbol) {
+    use_for_analysis = if ("is_labeled_ref" %in% colnames(input)) !input$is_labeled_ref else rep(TRUE, nrow(input))
     if (impute) {
         if (!is.null(censored_symbol)) {
             if (censored_symbol == "0") {
-                nonmissing_filter = input$LABEL == "L" & !is.na(input$newABUNDANCE) & input$newABUNDANCE != 0
+                nonmissing_filter = use_for_analysis & !is.na(input$newABUNDANCE) & input$newABUNDANCE != 0
             } else if (censored_symbol == "NA") {
-                nonmissing_filter = input$LABEL == "L" & !is.na(input$newABUNDANCE)
-            }  
-        } 
+                nonmissing_filter = use_for_analysis & !is.na(input$newABUNDANCE)
+            }
+        }
     } else {
-        nonmissing_filter = input$LABEL == "L" & !is.na(input$newABUNDANCE) & input$newABUNDANCE != 0
+        nonmissing_filter = use_for_analysis & !is.na(input$newABUNDANCE) & input$newABUNDANCE != 0
     }
     nonmissing_filter
 }
diff --git a/R/utils_normalize.R b/R/utils_normalize.R
@@ -28,8 +28,13 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s
     normalization_method = toupper(normalization_method)
     if (normalization_method == "NONE" | normalization_method == "FALSE") {
         return(input)
-    } else if (normalization_method == "EQUALIZEMEDIANS") {
+    }
+
+    if (normalization_method == "EQUALIZEMEDIANS") {
         input = .normalizeMedian(input)
+        if ("H" %in% input$LABEL) {
+            input[, is_labeled_ref := LABEL == "H"]
+        }
     } else if (normalization_method == "QUANTILE") {
         input = .normalizeQuantile(input)
     } else if (normalization_method == "GLOBALSTANDARDS") {
@@ -191,15 +196,24 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s
 #' @keywords internal
 .normalizeGlobalStandards = function(input, peptides_dict, standards) {
     PeptideSequence = PEPTIDE = PROTEIN = median_by_fraction = NULL
-    Standard = FRACTION = LABEL = ABUNDANCE = RUN = GROUP = NULL
+    Standard = FRACTION = LABEL = ABUNDANCE = RUN = mean_by_run = NULL
 
     input_with_peptides <- merge(input, peptides_dict, by = "PEPTIDE", all.x = TRUE)
+    if (length(standards) == 1 && standards == "unlabeled") {
+        standards = unique(input_with_peptides[is.na(input_with_peptides$LABEL), ]$PeptideSequence)
+        if (length(standards) == 0) {
+            msg = "nameStandards = 'unlabeled' but no unlabeled peptides found in data."
+            getOption("MSstatsLog")("ERROR", msg)
+            stop(msg)
+        }
+    }
     standards_data <- input_with_peptides[
-        (PeptideSequence %in% standards | PROTEIN %in% standards) & 
-            GROUP != "0" & 
+        (PeptideSequence %in% standards | PROTEIN %in% standards) &
             !is.na(ABUNDANCE)
     ]
-    missing_standards <- standards[!standards %in% c(standards_data$PeptideSequence, standards_data$PROTEIN)]
+    missing_standards <- standards[
+        !standards %in% c(standards_data$PeptideSequence, standards_data$PROTEIN)
+    ]
     if (length(missing_standards) > 0) {
         msg <- paste("Global standard peptides or proteins,",
                      paste(missing_standards, collapse = ", "),
@@ -210,12 +224,12 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s
     standards_data[, standard := ifelse(!is.na(PeptideSequence) & PeptideSequence %in% standards,
                                         PeptideSequence,
                                         PROTEIN)]
-    means_by_standard <- standards_data[, 
+    means_by_standard <- standards_data[,
                                         list(mean_abundance = mean(ABUNDANCE, na.rm = TRUE)),
                                         by = .(RUN, standard)
     ]
-    means_by_standard <- dcast(means_by_standard, 
-                               RUN ~ standard, 
+    means_by_standard <- dcast(means_by_standard,
+                               RUN ~ standard,
                                value.var = "mean_abundance")
     means_by_standard = data.table::melt(means_by_standard, id.vars = "RUN",
                                          variable.name = "Standard", value.name = "ABUNDANCE")
@@ -227,16 +241,11 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s
     means_by_standard[, ABUNDANCE := NULL]
     means_by_standard[, Standard := NULL]
     means_by_standard = unique(means_by_standard)
-    
+
     input = merge(input, means_by_standard, all.x = TRUE, by = c("RUN", "FRACTION"))
-    input[, ABUNDANCE := ifelse(LABEL == "L", ABUNDANCE - mean_by_run + median_by_fraction, ABUNDANCE)]
-
-    if (data.table::uniqueN(input$FRACTION) == 1L) {
-        msg = "Normalization : normalization with global standards protein - okay"
-    } else {
-        msg = "Normalization : normalization with global standards protein - okay"
-    }
-    getOption("MSstatsLog")("INFO", msg)
+    input[, ABUNDANCE := ABUNDANCE - mean_by_run + median_by_fraction]
+
+    getOption("MSstatsLog")("INFO", "Normalization : normalization with global standards protein - okay")
     input[ , !(colnames(input) %in% c("mean_by_run", "median_by_fraction")), with = FALSE]
 }
 

diff --git a/R/utils_summarization_prepare.R b/R/utils_summarization_prepare.R
@@ -31,9 +31,9 @@ MSstatsPrepareForSummarization = function(input, method, impute, censored_symbol
         input[, ANOMALYSCORES := NA]
     }
 
-    label = data.table::uniqueN(input$LABEL) == 2
-    if (label) {
-        input[, ref_covariate := factor(ifelse(LABEL == "L", RUN, 0))]
+    add_ref_covariate = "is_labeled_ref" %in% colnames(input) && any(input$is_labeled_ref, na.rm = TRUE)
+    if (add_ref_covariate) {
+      input[, ref_covariate := factor(ifelse(LABEL == "L", RUN, 0))]
     }
 
     if (is.element("remove", colnames(input))) {

diff --git a/inst/tinytest/test_dataProcess.R b/inst/tinytest/test_dataProcess.R
@@ -239,3 +239,42 @@ for (i in 1:nrow(protein_runs_4_5)) {
                            "than to AFPLAEWQPSDVDQR", afpla_abundance,
                            "(dist =", round(dist_to_afpla, 4), ")"))
 }
+
+
+# MSstatsSummarizeSingleTMP tests ------------------------------------------
+
+MSstatsConvert::MSstatsLogsSettings(FALSE)
+
+single_protein_labeled <- data.table::data.table(
+    PROTEIN       = rep("P1", 4),
+    FEATURE       = rep("F1", 4),
+    LABEL         = c("H",   "L",   "H",   "L"),
+    RUN           = c("R1",  "R1",  "R2",  "R2"),
+    newABUNDANCE  = c(10.0,  14.0,  12.0,  16.0),
+    is_labeled_ref = c(TRUE,  FALSE, TRUE,  FALSE),
+    censored      = rep(FALSE, 4),
+    n_obs         = rep(2L,  4),
+    n_obs_run     = rep(1L,  4),
+    prop_features = rep(1.0, 4)
+)
+
+result_labeled <- MSstatsSummarizeSingleTMP(
+    single_protein_labeled,
+    impute          = FALSE,
+    censored_symbol = NULL,
+    remove50missing = FALSE,
+    aft_iterations  = 90
+)
+
+protein_level_labeled <- result_labeled[[1]]
+
+expect_equal(
+    sort(as.numeric(protein_level_labeled$LogIntensities)),
+    c(15, 15),
+    info = paste(
+        "MSstatsSummarizeSingleTMP: 'is_labeled_ref' column present with TRUE values must set",
+        "is_labeled_reference=TRUE, causing .runTukey to call .adjustLRuns;",
+        "both runs should converge to H-adjusted LogIntensities of 15,",
+        "not the raw L values (14, 16) that the unlabeled path would return"
+    )
+)
diff --git a/inst/tinytest/test_utils_censored.R b/inst/tinytest/test_utils_censored.R
@@ -65,3 +65,111 @@ imputed_val_p2 <- dt_zero[
 ]
 expect_equal(imputed_val_p2, expected_val_p2)
 
+
+# Test MSstatsHandleMissing
+make_cens_input <- function() {
+  data.table::data.table(
+    PROTEIN   = rep("P1", 8),
+    FEATURE   = rep("F1", 8),
+    LABEL     = c("L","L","L","L","H","H","H","H"),
+    RUN       = rep(c("R1","R2","R3","R4"), 2),
+    GROUP     = rep(c("G1","G1","G2","G2"), 2),
+    FRACTION  = rep(1L, 8),
+    INTENSITY = c(1L, 1L, 100L, 200L,   # L: first two will be censored
+                  1L, 1L, 100L, 200L),  # H: same intensities but must NOT be censored
+    ABUNDANCE = c(0, 0, log2(100), log2(200),
+                  0, 0, log2(100), log2(200)),
+    is_labeled_ref = c(FALSE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE)
+  )
+}
+
+cens_in <- make_cens_input()
+out_cens <- MSstatsHandleMissing(
+  cens_in,
+  summary_method  = "TMP",
+  impute          = TRUE,
+  missing_symbol  = "0",
+  censored_cutoff = NULL
+)
+
+# H rows must never be flagged as censored regardless of intensity
+expect_false(
+  any(out_cens[LABEL == "H", censored]),
+  info = "H rows (is_labeled_ref=TRUE) must never be flagged censored (use_for_analysis=FALSE)"
+)
+
+# L rows with intensity==1 must be flagged censored
+expect_true(
+  any(out_cens[LABEL == "L" & INTENSITY == 1L, censored]),
+  info = "L rows with intensity=1 (use_for_analysis=TRUE) must be flagged censored"
+)
+
+# L rows with intensity>1 must NOT be flagged censored
+expect_false(
+  any(out_cens[LABEL == "L" & INTENSITY > 1L, censored]),
+  info = "L rows with high intensity must not be flagged censored"
+)
+
+
+# Tests for .getNonMissingFilter -----------------------------------------------
+
+dt_nmf_no_ref <- data.table::data.table(
+    newABUNDANCE = c(1.5, 0.0, NA_real_, 2.0)
+)
+result_no_ref <- MSstats:::.getNonMissingFilter(dt_nmf_no_ref, impute = FALSE, censored_symbol = "0")
+expect_equal(
+    result_no_ref, c(TRUE, FALSE, FALSE, TRUE),
+    info = ".getNonMissingFilter: without 'is_labeled_ref' column all rows have use_for_analysis=TRUE; zero and NA excluded"
+)
+
+dt_nmf_with_ref <- data.table::data.table(
+    newABUNDANCE   = c(1.5, 2.0, 3.0, 1.0),
+    is_labeled_ref = c(TRUE, FALSE, FALSE, TRUE)
+)
+result_with_ref <- MSstats:::.getNonMissingFilter(dt_nmf_with_ref, impute = FALSE, censored_symbol = "0")
+expect_equal(
+    result_with_ref, c(FALSE, TRUE, TRUE, FALSE),
+    info = ".getNonMissingFilter: is_labeled_ref=TRUE rows must be excluded (use_for_analysis=FALSE) regardless of abundance"
+)
+
+dt_nmf_ref_valid <- data.table::data.table(
+    newABUNDANCE   = c(5.0, 3.0),
+    is_labeled_ref = c(TRUE, FALSE)
+)
+result_ref_valid <- MSstats:::.getNonMissingFilter(dt_nmf_ref_valid, impute = FALSE, censored_symbol = "0")
+expect_false(
+    result_ref_valid[1],
+    info = ".getNonMissingFilter: is_labeled_ref=TRUE row with valid abundance must still yield FALSE"
+)
+expect_true(
+    result_ref_valid[2],
+    info = ".getNonMissingFilter: is_labeled_ref=FALSE row with valid abundance must yield TRUE"
+)
+
+dt_nmf_impute_na <- data.table::data.table(
+    newABUNDANCE = c(1.5, 0.0, NA_real_, 2.0)
+)
+result_impute_na <- MSstats:::.getNonMissingFilter(dt_nmf_impute_na, impute = TRUE, censored_symbol = "NA")
+expect_equal(
+    result_impute_na, c(TRUE, TRUE, FALSE, TRUE),
+    info = ".getNonMissingFilter: impute=TRUE, censored_symbol='NA' treats zero as non-missing"
+)
+
+dt_nmf_impute_zero <- data.table::data.table(
+    newABUNDANCE = c(1.5, 0.0, NA_real_, 2.0)
+)
+result_impute_zero <- MSstats:::.getNonMissingFilter(dt_nmf_impute_zero, impute = TRUE, censored_symbol = "0")
+expect_equal(
+    result_impute_zero, c(TRUE, FALSE, FALSE, TRUE),
+    info = ".getNonMissingFilter: impute=TRUE, censored_symbol='0' treats zero as missing"
+)
+
+dt_nmf_ref_impute_na <- data.table::data.table(
+    newABUNDANCE   = c(1.5, 2.0, 0.0, 3.0),
+    is_labeled_ref = c(TRUE, FALSE, FALSE, TRUE)
+)
+result_ref_impute_na <- MSstats:::.getNonMissingFilter(dt_nmf_ref_impute_na, impute = TRUE, censored_symbol = "NA")
+expect_equal(
+    result_ref_impute_na, c(FALSE, TRUE, TRUE, FALSE),
+    info = ".getNonMissingFilter: is_labeled_ref=TRUE rows excluded even when impute=TRUE, censored_symbol='NA'"
+)