From 4c11e1d424e3286c988a51c3c9e70edc420af5ac Mon Sep 17 00:00:00 2001
From: tonywu1999 <wu.anthon@northeastern.edu>
Date: Fri, 24 Apr 2026 12:50:15 -0400
Subject: [PATCH 1/2] docs(impute): Update documentation w.r.t. censoredInt

---
 R/dataProcess.R                          | 43 +++++++++++++-----------
 man/MSstatsSummarizeSingleTMP.Rd         | 22 ++++++------
 man/MSstatsSummarizeWithMultipleCores.Rd | 22 ++++++------
 man/MSstatsSummarizeWithSingleCore.Rd    | 22 ++++++------
 man/dataProcess.Rd                       | 23 +++++++------
 man/dot-getNonMissingFilterStats.Rd      | 13 +++----
 man/dot-runTukey.Rd                      | 15 +++++----
 7 files changed, 86 insertions(+), 74 deletions(-)

diff --git a/R/dataProcess.R b/R/dataProcess.R
index a922fb5d..624c9846 100755
--- a/R/dataProcess.R
+++ b/R/dataProcess.R
@@ -41,16 +41,17 @@
 #' variance among intensities from features. FALSE means that we cannot assume equal 
 #' variance among intensities from features, then we will account for heterogeneous 
 #' variation from different features.
-#' @param censoredInt Missing values are censored or at random. 
-#' 'NA' (default) assumes that all 'NA's in 'Intensity' column are censored. 
-#' '0' uses zero intensities as censored intensity. 
-#' In this case, NA intensities are missing at random. 
-#' The output from Skyline should use '0'. 
-#' Null assumes that all NA intensites are randomly missing.
-#' @param MBimpute only for summaryMethod = "TMP" and censoredInt = 'NA' or '0'. 
-#' TRUE (default) imputes missing values with 'NA' or '0' (depending on censoredInt option) 
-#' by Accelerated failure model. If set to FALSE, no missing values are imputed. 
-#' FALSE is appropriate only when missingness is assumed to be at random.
+#' @param censoredInt Indicates how censored missing values are encoded in the
+#' 'Intensity' column. 'NA' (default) treats all NA intensities as left-censored
+#' (i.e., below the limit of detection). '0' treats zero intensities as
+#' left-censored; in this case NA intensities are assumed to be missing at
+#' random and are not censored. Skyline output should use '0'. NULL assumes
+#' that all missing values are missing at random — no values are treated as
+#' censored, and imputation is disabled (MBimpute is ignored).
+#' @param MBimpute only for summaryMethod = "TMP" and censoredInt = 'NA' or '0'.
+#' TRUE (default) imputes censored missing values using an Accelerated Failure
+#' Time model. FALSE leaves censored values at their cutoff without imputation.
+#' Has no effect when censoredInt = NULL, since no values are considered censored.
 #' See MSstats vignettes for recommendations on which imputation option to use.
 #' @param remove50missing only for summaryMethod = "TMP". TRUE removes the proteins 
 #' where every run has at least 50\% missing values for each peptide. FALSE is default.
@@ -186,17 +187,19 @@ dataProcess = function(
 #' variance among intensities from features. FALSE means that we cannot assume 
 #' equal variance among intensities from features, then we will account for
 #' heterogeneous variation from different features.
-#' @param censored_symbol Missing values are censored or at random. 
-#' 'NA' (default) assumes that all 'NA's in 'Intensity' column are censored. 
-#' '0' uses zero intensities as censored intensity. 
-#' In this case, NA intensities are missing at random. 
-#' The output from Skyline should use '0'. 
-#' Null assumes that all NA intensites are randomly missing.
-#' @param remove50missing only for summaryMethod = "TMP". TRUE removes the proteins 
+#' @param censored_symbol Indicates how censored missing values are encoded in
+#' the 'Intensity' column. 'NA' (default) treats all NA intensities as
+#' left-censored (i.e., below the limit of detection). '0' treats zero
+#' intensities as left-censored; in this case NA intensities are assumed to be
+#' missing at random and are not censored. Skyline output should use '0'. NULL
+#' assumes that all missing values are missing at random — no values are treated
+#' as censored, and imputation is disabled (impute is ignored).
+#' @param remove50missing only for summaryMethod = "TMP". TRUE removes the proteins
 #' where every run has at least 50\% missing values for each peptide. FALSE is default.
-#' @param impute only for summaryMethod = "TMP" and censoredInt = 'NA' or '0'. 
-#' TRUE (default) imputes 'NA' or '0' (depending on censoredInt option) by Accelated failure model. 
-#' FALSE uses the values assigned by cutoffCensored
+#' @param impute only for summaryMethod = "TMP" and censored_symbol = 'NA' or '0'.
+#' TRUE (default) imputes censored missing values using an Accelerated Failure
+#' Time model. FALSE leaves censored values at their cutoff without imputation.
+#' Has no effect when censored_symbol = NULL, since no values are considered censored.
 #' @param numberOfCores Number of cores for parallel processing. When > 1, 
 #' a logfile named `MSstats_dataProcess_log_progress.log` is created to 
 #' track progress. Only works for Linux & Mac OS. Default is 1.
diff --git a/man/MSstatsSummarizeSingleTMP.Rd b/man/MSstatsSummarizeSingleTMP.Rd
index 7ee20acc..3d0198d8 100644
--- a/man/MSstatsSummarizeSingleTMP.Rd
+++ b/man/MSstatsSummarizeSingleTMP.Rd
@@ -15,18 +15,20 @@ MSstatsSummarizeSingleTMP(
 \arguments{
 \item{single_protein}{feature-level data for a single protein}
 
-\item{impute}{only for summaryMethod = "TMP" and censoredInt = 'NA' or '0'. 
-TRUE (default) imputes 'NA' or '0' (depending on censoredInt option) by Accelated failure model. 
-FALSE uses the values assigned by cutoffCensored}
+\item{impute}{only for summaryMethod = "TMP" and censored_symbol = 'NA' or '0'.
+TRUE (default) imputes censored missing values using an Accelerated Failure
+Time model. FALSE leaves censored values at their cutoff without imputation.
+Has no effect when censored_symbol = NULL, since no values are considered censored.}
 
-\item{censored_symbol}{Missing values are censored or at random. 
-'NA' (default) assumes that all 'NA's in 'Intensity' column are censored. 
-'0' uses zero intensities as censored intensity. 
-In this case, NA intensities are missing at random. 
-The output from Skyline should use '0'. 
-Null assumes that all NA intensites are randomly missing.}
+\item{censored_symbol}{Indicates how censored missing values are encoded in
+the 'Intensity' column. 'NA' (default) treats all NA intensities as
+left-censored (i.e., below the limit of detection). '0' treats zero
+intensities as left-censored; in this case NA intensities are assumed to be
+missing at random and are not censored. Skyline output should use '0'. NULL
+assumes that all missing values are missing at random — no values are treated
+as censored, and imputation is disabled (impute is ignored).}
 
-\item{remove50missing}{only for summaryMethod = "TMP". TRUE removes the proteins 
+\item{remove50missing}{only for summaryMethod = "TMP". TRUE removes the proteins
 where every run has at least 50\% missing values for each peptide. FALSE is default.}
 
 \item{aft_iterations}{number of iterations for AFT model fitting}
diff --git a/man/MSstatsSummarizeWithMultipleCores.Rd b/man/MSstatsSummarizeWithMultipleCores.Rd
index 57a96e91..245188e8 100644
--- a/man/MSstatsSummarizeWithMultipleCores.Rd
+++ b/man/MSstatsSummarizeWithMultipleCores.Rd
@@ -20,18 +20,20 @@ MSstatsSummarizeWithMultipleCores(
 
 \item{method}{summarization method: "linear" or "TMP"}
 
-\item{impute}{only for summaryMethod = "TMP" and censoredInt = 'NA' or '0'. 
-TRUE (default) imputes 'NA' or '0' (depending on censoredInt option) by Accelated failure model. 
-FALSE uses the values assigned by cutoffCensored}
+\item{impute}{only for summaryMethod = "TMP" and censored_symbol = 'NA' or '0'.
+TRUE (default) imputes censored missing values using an Accelerated Failure
+Time model. FALSE leaves censored values at their cutoff without imputation.
+Has no effect when censored_symbol = NULL, since no values are considered censored.}
 
-\item{censored_symbol}{Missing values are censored or at random. 
-'NA' (default) assumes that all 'NA's in 'Intensity' column are censored. 
-'0' uses zero intensities as censored intensity. 
-In this case, NA intensities are missing at random. 
-The output from Skyline should use '0'. 
-Null assumes that all NA intensites are randomly missing.}
+\item{censored_symbol}{Indicates how censored missing values are encoded in
+the 'Intensity' column. 'NA' (default) treats all NA intensities as
+left-censored (i.e., below the limit of detection). '0' treats zero
+intensities as left-censored; in this case NA intensities are assumed to be
+missing at random and are not censored. Skyline output should use '0'. NULL
+assumes that all missing values are missing at random — no values are treated
+as censored, and imputation is disabled (impute is ignored).}
 
-\item{remove50missing}{only for summaryMethod = "TMP". TRUE removes the proteins 
+\item{remove50missing}{only for summaryMethod = "TMP". TRUE removes the proteins
 where every run has at least 50\% missing values for each peptide. FALSE is default.}
 
 \item{equal_variance}{only for summaryMethod = "linear". Default is TRUE. 
diff --git a/man/MSstatsSummarizeWithSingleCore.Rd b/man/MSstatsSummarizeWithSingleCore.Rd
index 68f44e4d..40d309af 100644
--- a/man/MSstatsSummarizeWithSingleCore.Rd
+++ b/man/MSstatsSummarizeWithSingleCore.Rd
@@ -19,18 +19,20 @@ MSstatsSummarizeWithSingleCore(
 
 \item{method}{summarization method: "linear" or "TMP"}
 
-\item{impute}{only for summaryMethod = "TMP" and censoredInt = 'NA' or '0'. 
-TRUE (default) imputes 'NA' or '0' (depending on censoredInt option) by Accelated failure model. 
-FALSE uses the values assigned by cutoffCensored}
+\item{impute}{only for summaryMethod = "TMP" and censored_symbol = 'NA' or '0'.
+TRUE (default) imputes censored missing values using an Accelerated Failure
+Time model. FALSE leaves censored values at their cutoff without imputation.
+Has no effect when censored_symbol = NULL, since no values are considered censored.}
 
-\item{censored_symbol}{Missing values are censored or at random. 
-'NA' (default) assumes that all 'NA's in 'Intensity' column are censored. 
-'0' uses zero intensities as censored intensity. 
-In this case, NA intensities are missing at random. 
-The output from Skyline should use '0'. 
-Null assumes that all NA intensites are randomly missing.}
+\item{censored_symbol}{Indicates how censored missing values are encoded in
+the 'Intensity' column. 'NA' (default) treats all NA intensities as
+left-censored (i.e., below the limit of detection). '0' treats zero
+intensities as left-censored; in this case NA intensities are assumed to be
+missing at random and are not censored. Skyline output should use '0'. NULL
+assumes that all missing values are missing at random — no values are treated
+as censored, and imputation is disabled (impute is ignored).}
 
-\item{remove50missing}{only for summaryMethod = "TMP". TRUE removes the proteins 
+\item{remove50missing}{only for summaryMethod = "TMP". TRUE removes the proteins
 where every run has at least 50\% missing values for each peptide. FALSE is default.}
 
 \item{equal_variance}{only for summaryMethod = "linear". Default is TRUE. 
diff --git a/man/dataProcess.Rd b/man/dataProcess.Rd
index 813e6130..95e32fc7 100644
--- a/man/dataProcess.Rd
+++ b/man/dataProcess.Rd
@@ -80,17 +80,18 @@ variance among intensities from features. FALSE means that we cannot assume equa
 variance among intensities from features, then we will account for heterogeneous 
 variation from different features.}
 
-\item{censoredInt}{Missing values are censored or at random. 
-'NA' (default) assumes that all 'NA's in 'Intensity' column are censored. 
-'0' uses zero intensities as censored intensity. 
-In this case, NA intensities are missing at random. 
-The output from Skyline should use '0'. 
-Null assumes that all NA intensites are randomly missing.}
-
-\item{MBimpute}{only for summaryMethod = "TMP" and censoredInt = 'NA' or '0'. 
-TRUE (default) imputes missing values with 'NA' or '0' (depending on censoredInt option) 
-by Accelerated failure model. If set to FALSE, no missing values are imputed. 
-FALSE is appropriate only when missingness is assumed to be at random.
+\item{censoredInt}{Indicates how censored missing values are encoded in the
+'Intensity' column. 'NA' (default) treats all NA intensities as left-censored
+(i.e., below the limit of detection). '0' treats zero intensities as
+left-censored; in this case NA intensities are assumed to be missing at
+random and are not censored. Skyline output should use '0'. NULL assumes
+that all missing values are missing at random — no values are treated as
+censored, and imputation is disabled (MBimpute is ignored).}
+
+\item{MBimpute}{only for summaryMethod = "TMP" and censoredInt = 'NA' or '0'.
+TRUE (default) imputes censored missing values using an Accelerated Failure
+Time model. FALSE leaves censored values at their cutoff without imputation.
+Has no effect when censoredInt = NULL, since no values are considered censored.
 See MSstats vignettes for recommendations on which imputation option to use.}
 
 \item{remove50missing}{only for summaryMethod = "TMP". TRUE removes the proteins 
diff --git a/man/dot-getNonMissingFilterStats.Rd b/man/dot-getNonMissingFilterStats.Rd
index e2c59980..7f8b8970 100644
--- a/man/dot-getNonMissingFilterStats.Rd
+++ b/man/dot-getNonMissingFilterStats.Rd
@@ -9,12 +9,13 @@
 \arguments{
 \item{input}{data.table with data for a single protein}
 
-\item{censored_symbol}{Missing values are censored or at random. 
-'NA' (default) assumes that all 'NA's in 'Intensity' column are censored. 
-'0' uses zero intensities as censored intensity. 
-In this case, NA intensities are missing at random. 
-The output from Skyline should use '0'. 
-Null assumes that all NA intensites are randomly missing.}
+\item{censored_symbol}{Indicates how censored missing values are encoded in
+the 'Intensity' column. 'NA' (default) treats all NA intensities as
+left-censored (i.e., below the limit of detection). '0' treats zero
+intensities as left-censored; in this case NA intensities are assumed to be
+missing at random and are not censored. Skyline output should use '0'. NULL
+assumes that all missing values are missing at random — no values are treated
+as censored, and imputation is disabled (impute is ignored).}
 }
 \value{
 data.table
diff --git a/man/dot-runTukey.Rd b/man/dot-runTukey.Rd
index 76b6ab1b..b6425a04 100644
--- a/man/dot-runTukey.Rd
+++ b/man/dot-runTukey.Rd
@@ -15,14 +15,15 @@ subtracting the H value and adding back the H median, and only L results
 are returned. If FALSE (e.g. protein turnover), each label is summarized
 independently and results for all labels are returned.}
 
-\item{censored_symbol}{Missing values are censored or at random. 
-'NA' (default) assumes that all 'NA's in 'Intensity' column are censored. 
-'0' uses zero intensities as censored intensity. 
-In this case, NA intensities are missing at random. 
-The output from Skyline should use '0'. 
-Null assumes that all NA intensites are randomly missing.}
+\item{censored_symbol}{Indicates how censored missing values are encoded in
+the 'Intensity' column. 'NA' (default) treats all NA intensities as
+left-censored (i.e., below the limit of detection). '0' treats zero
+intensities as left-censored; in this case NA intensities are assumed to be
+missing at random and are not censored. Skyline output should use '0'. NULL
+assumes that all missing values are missing at random — no values are treated
+as censored, and imputation is disabled (impute is ignored).}
 
-\item{remove50missing}{only for summaryMethod = "TMP". TRUE removes the proteins 
+\item{remove50missing}{only for summaryMethod = "TMP". TRUE removes the proteins
 where every run has at least 50\% missing values for each peptide. FALSE is default.}
 }
 \value{

From e0210d79336a403ebbb47ebef52ca4a4ba8e2329 Mon Sep 17 00:00:00 2001
From: tonywu1999 <wu.anthon@northeastern.edu>
Date: Fri, 24 Apr 2026 14:16:58 -0400
Subject: [PATCH 2/2] fix docs again

---
 R/dataProcess.R                          | 6 ++++--
 man/MSstatsSummarizeSingleTMP.Rd         | 3 ++-
 man/MSstatsSummarizeWithMultipleCores.Rd | 3 ++-
 man/MSstatsSummarizeWithSingleCore.Rd    | 3 ++-
 man/dataProcess.Rd                       | 3 ++-
 5 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/R/dataProcess.R b/R/dataProcess.R
index 624c9846..e9581c8b 100755
--- a/R/dataProcess.R
+++ b/R/dataProcess.R
@@ -50,7 +50,8 @@
 #' censored, and imputation is disabled (MBimpute is ignored).
 #' @param MBimpute only for summaryMethod = "TMP" and censoredInt = 'NA' or '0'.
 #' TRUE (default) imputes censored missing values using an Accelerated Failure
-#' Time model. FALSE leaves censored values at their cutoff without imputation.
+#' Time model. FALSE excludes censored observations from summarization entirely,
+#' treating them as missing at random; no imputed values are introduced.
 #' Has no effect when censoredInt = NULL, since no values are considered censored.
 #' See MSstats vignettes for recommendations on which imputation option to use.
 #' @param remove50missing only for summaryMethod = "TMP". TRUE removes the proteins 
@@ -198,7 +199,8 @@ dataProcess = function(
 #' where every run has at least 50\% missing values for each peptide. FALSE is default.
 #' @param impute only for summaryMethod = "TMP" and censored_symbol = 'NA' or '0'.
 #' TRUE (default) imputes censored missing values using an Accelerated Failure
-#' Time model. FALSE leaves censored values at their cutoff without imputation.
+#' Time model. FALSE excludes censored observations from summarization entirely,
+#' treating them as missing at random; no imputed values are introduced.
 #' Has no effect when censored_symbol = NULL, since no values are considered censored.
 #' @param numberOfCores Number of cores for parallel processing. When > 1, 
 #' a logfile named `MSstats_dataProcess_log_progress.log` is created to 
diff --git a/man/MSstatsSummarizeSingleTMP.Rd b/man/MSstatsSummarizeSingleTMP.Rd
index 3d0198d8..041ef76e 100644
--- a/man/MSstatsSummarizeSingleTMP.Rd
+++ b/man/MSstatsSummarizeSingleTMP.Rd
@@ -17,7 +17,8 @@ MSstatsSummarizeSingleTMP(
 
 \item{impute}{only for summaryMethod = "TMP" and censored_symbol = 'NA' or '0'.
 TRUE (default) imputes censored missing values using an Accelerated Failure
-Time model. FALSE leaves censored values at their cutoff without imputation.
+Time model. FALSE excludes censored observations from summarization entirely,
+treating them as missing at random; no imputed values are introduced.
 Has no effect when censored_symbol = NULL, since no values are considered censored.}
 
 \item{censored_symbol}{Indicates how censored missing values are encoded in
diff --git a/man/MSstatsSummarizeWithMultipleCores.Rd b/man/MSstatsSummarizeWithMultipleCores.Rd
index 245188e8..dfd7749d 100644
--- a/man/MSstatsSummarizeWithMultipleCores.Rd
+++ b/man/MSstatsSummarizeWithMultipleCores.Rd
@@ -22,7 +22,8 @@ MSstatsSummarizeWithMultipleCores(
 
 \item{impute}{only for summaryMethod = "TMP" and censored_symbol = 'NA' or '0'.
 TRUE (default) imputes censored missing values using an Accelerated Failure
-Time model. FALSE leaves censored values at their cutoff without imputation.
+Time model. FALSE excludes censored observations from summarization entirely,
+treating them as missing at random; no imputed values are introduced.
 Has no effect when censored_symbol = NULL, since no values are considered censored.}
 
 \item{censored_symbol}{Indicates how censored missing values are encoded in
diff --git a/man/MSstatsSummarizeWithSingleCore.Rd b/man/MSstatsSummarizeWithSingleCore.Rd
index 40d309af..4892dccb 100644
--- a/man/MSstatsSummarizeWithSingleCore.Rd
+++ b/man/MSstatsSummarizeWithSingleCore.Rd
@@ -21,7 +21,8 @@ MSstatsSummarizeWithSingleCore(
 
 \item{impute}{only for summaryMethod = "TMP" and censored_symbol = 'NA' or '0'.
 TRUE (default) imputes censored missing values using an Accelerated Failure
-Time model. FALSE leaves censored values at their cutoff without imputation.
+Time model. FALSE excludes censored observations from summarization entirely,
+treating them as missing at random; no imputed values are introduced.
 Has no effect when censored_symbol = NULL, since no values are considered censored.}
 
 \item{censored_symbol}{Indicates how censored missing values are encoded in
diff --git a/man/dataProcess.Rd b/man/dataProcess.Rd
index 95e32fc7..1ed4fe03 100644
--- a/man/dataProcess.Rd
+++ b/man/dataProcess.Rd
@@ -90,7 +90,8 @@ censored, and imputation is disabled (MBimpute is ignored).}
 
 \item{MBimpute}{only for summaryMethod = "TMP" and censoredInt = 'NA' or '0'.
 TRUE (default) imputes censored missing values using an Accelerated Failure
-Time model. FALSE leaves censored values at their cutoff without imputation.
+Time model. FALSE excludes censored observations from summarization entirely,
+treating them as missing at random; no imputed values are introduced.
 Has no effect when censoredInt = NULL, since no values are considered censored.
 See MSstats vignettes for recommendations on which imputation option to use.}