Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
35747e9
added weights to groupComparison
devonjkohler Apr 4, 2025
c0a8ac6
adding anomaly scores to dataProcess
devonjkohler Apr 13, 2025
89c031e
anomaly weighted summarization
devonjkohler Apr 13, 2025
1574fd9
anomaly scores into summaraization and model
devonjkohler Apr 15, 2025
bb7ca01
fixed anomaly bug
devonjkohler Apr 29, 2025
56dd620
imputation convergence bug
devonjkohler May 9, 2025
4655736
anomaly score to column validator
devonjkohler May 9, 2025
1b3c60f
Merge branch 'feature-model_weights' of https://github.com/Vitek-Lab/…
devonjkohler May 9, 2025
e14d871
bug fix
devonjkohler May 13, 2025
8ee6fd5
testing remove pca
devonjkohler May 13, 2025
2978d18
minor bug fixes
devonjkohler Jul 9, 2025
323a9bc
lm model
devonjkohler Jul 9, 2025
5aff109
bug fix
devonjkohler Jul 15, 2025
aa3f5b0
MSstats+ vignette
devonjkohler Jul 16, 2025
be2a8d6
added manual converter example
devonjkohler Jul 25, 2025
f7a957b
documentation bugs
devonjkohler Jul 25, 2025
e019a9a
MSstatsBig example
devonjkohler Aug 5, 2025
3a184ca
test(anomaly_scores): Add unit tests for dataProcess and groupCompari…
tonywu1999 Aug 26, 2025
c71c16c
add defaults for aft_iterations parameter to prevent Shiny from breaking
tonywu1999 Sep 5, 2025
c140362
Add more granular unit tests for data summarization subfunctions
tonywu1999 Sep 8, 2025
dcdfd78
Merge pull request #173 from Vitek-Lab/tony-feature-model_weights
devonjkohler Sep 9, 2025
beecaae
merge with changes in devel
devonjkohler Sep 9, 2025
5b972e7
minor text edits to msstatsplus vignette
devonjkohler Sep 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ make_contrast_run_quant <- function(input, coefs, contrast_matrix, counts, is_la
.Call(`_MSstats_make_contrast_run_quant`, input, coefs, contrast_matrix, counts, is_labeled, is_reference)
}

get_linear_summary <- function(input, coefs, counts, is_labeled) {
.Call(`_MSstats_get_linear_summary`, input, coefs, counts, is_labeled)
get_linear_summary <- function(input, coefs, counts, is_labeled, cov_mat) {
.Call(`_MSstats_get_linear_summary`, input, coefs, counts, is_labeled, cov_mat)
}

median_polish_summary <- function(x, eps = 0.01, maxiter = 10L) {
Expand Down
147 changes: 116 additions & 31 deletions R/dataProcess.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
#' Smaller numbers can be set to improve processing times. This option is by default on
#' at a high number (100) to improve processing times without affecting differential analysis.
#' @param summaryMethod "TMP" (default) means Tukey's median polish,
#' which is robust estimation method. "linear" uses linear mixed model.
#' which is robust estimation method. "linear" uses linear mixed model. If
#' anomaly detection algorithm is performed, "linear" must be used.
#' @param equalFeatureVar only for summaryMethod = "linear". default is TRUE.
#' Logical variable for whether the model should account for heterogeneous variation
#' among intensities from different features. Default is TRUE, which assume equal
Expand All @@ -58,6 +59,7 @@
#' @param numberOfCores Number of cores for parallel processing. When > 1,
#' a logfile named `MSstats_dataProcess_log_progress.log` is created to
#' track progress. Only works for Linux & Mac OS. Default is 1.
#' @param aft_iterations Number of iterations for AFT model fitting. Default is 90.
#' @inheritParams .documentFunction
#'
#' @importFrom utils sessionInfo
Expand Down Expand Up @@ -126,7 +128,7 @@ dataProcess = function(
equalFeatureVar = TRUE, censoredInt = "NA", MBimpute = TRUE,
remove50missing = FALSE, fix_missing = NULL, maxQuantileforCensored = 0.999,
use_log_file = TRUE, append = FALSE, verbose = TRUE, log_file_path = NULL,
numberOfCores = 1
numberOfCores = 1, aft_iterations=90
) {
MSstatsConvert::MSstatsLogsSettings(use_log_file, append, verbose,
log_file_path,
Expand All @@ -137,7 +139,8 @@ dataProcess = function(
list(method = featureSubset, n_top = n_top_feature,
remove_uninformative = remove_uninformative_feature_outlier),
list(method = summaryMethod, equal_var = equalFeatureVar),
list(symbol = censoredInt, MB = MBimpute))
list(symbol = censoredInt, MB = MBimpute),
colnames(raw))

peptides_dict = makePeptidesDictionary(as.data.table(unclass(raw)), normalization)
input = MSstatsPrepareForDataProcess(raw, logTrans, fix_missing)
Expand All @@ -159,7 +162,7 @@ dataProcess = function(
summarized = tryCatch(MSstatsSummarizeWithMultipleCores(input, summaryMethod,
MBimpute, censoredInt,
remove50missing, equalFeatureVar,
numberOfCores),
numberOfCores, aft_iterations),
error = function(e) {
print(e)
NULL
Expand Down Expand Up @@ -197,13 +200,15 @@ dataProcess = function(
#' @param numberOfCores Number of cores for parallel processing. When > 1,
#' a logfile named `MSstats_dataProcess_log_progress.log` is created to
#' track progress. Only works for Linux & Mac OS. Default is 1.
#' @param aft_iterations Number of iterations for AFT model fitting. Default is 90.
#'
#' @importFrom parallel makeCluster parLapply stopCluster clusterExport
#' @importFrom parallel makeCluster parLapply stopCluster clusterExport
#'
#' @return list of length one with run-level data.
#'
MSstatsSummarizeWithMultipleCores = function(input, method, impute, censored_symbol,
remove50missing, equal_variance, numberOfCores = 1) {
remove50missing, equal_variance, numberOfCores = 1,
aft_iterations = 90) {
if (numberOfCores > 1) {
protein_indices = split(seq_len(nrow(input)), list(input$PROTEIN))
num_proteins = length(protein_indices)
Expand All @@ -227,7 +232,8 @@ MSstatsSummarizeWithMultipleCores = function(input, method, impute, censored_sym
}
single_protein = input[protein_indices[[i]],]
MSstatsSummarizeSingleTMP(
single_protein, impute, censored_symbol, remove50missing)
single_protein, impute, censored_symbol, remove50missing,
aft_iterations)
})
} else {
summarized_results = parallel::parLapply(cl, seq_len(num_proteins), function(i) {
Expand All @@ -236,20 +242,29 @@ MSstatsSummarizeWithMultipleCores = function(input, method, impute, censored_sym
sep = "\n", file = "MSstats_dataProcess_log_progress.log", append = TRUE)
}
single_protein = input[protein_indices[[i]],]
MSstatsSummarizeSingleLinear(single_protein, equal_variance)
MSstatsSummarizeSingleLinear(
Copy link
Copy Markdown
Contributor

@tonywu1999 tonywu1999 Sep 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

equal_variance is not passed into MSstatsSummarizeSingleLinear

single_protein,
impute,
censored_symbol,
remove50missing,
aft_iterations)
})
}
parallel::stopCluster(cl)
return(summarized_results)
} else {
return(MSstatsSummarizeWithSingleCore(input, method, impute, censored_symbol,
remove50missing, equal_variance))
return(MSstatsSummarizeWithSingleCore(input, method, impute,
censored_symbol,
remove50missing,
equal_variance,
aft_iterations))
}
}

#' Feature-level data summarization with 1 core
#'
#' @inheritParams MSstatsSummarizeWithMultipleCores
#' @param aft_iterations Number of iterations for AFT model fitting. Default is 90.
#'
#' @importFrom data.table uniqueN
#' @importFrom utils setTxtProgressBar
Expand All @@ -271,12 +286,12 @@ MSstatsSummarizeWithMultipleCores = function(input, method, impute, censored_sym
#' input = MSstatsSelectFeatures(input, "all")
#' processed = getProcessed(input)
#' input = MSstatsPrepareForSummarization(input, method, impute, cens, FALSE)
#' summarized = MSstatsSummarizeWithSingleCore(input, method, impute, cens, FALSE, TRUE)
#' summarized = MSstatsSummarizeWithSingleCore(input, method, impute, cens, FALSE, TRUE, 100)
#' length(summarized) # list of summarization outputs for each protein
#' head(summarized[[1]][[1]]) # run-level summary
#'
MSstatsSummarizeWithSingleCore = function(input, method, impute, censored_symbol,
remove50missing, equal_variance) {
remove50missing, equal_variance, aft_iterations = 90) {


protein_indices = split(seq_len(nrow(input)), list(input$PROTEIN))
Expand All @@ -287,16 +302,19 @@ MSstatsSummarizeWithSingleCore = function(input, method, impute, censored_symbol
for (protein_id in seq_len(num_proteins)) {
single_protein = input[protein_indices[[protein_id]],]
summarized_results[[protein_id]] = MSstatsSummarizeSingleTMP(
single_protein, impute, censored_symbol, remove50missing)
single_protein, impute, censored_symbol, remove50missing,
aft_iterations)
setTxtProgressBar(pb, protein_id)
}
close(pb)
} else {
pb = utils::txtProgressBar(min = 0, max = num_proteins, style = 3)
for (protein_id in seq_len(num_proteins)) {
single_protein = input[protein_indices[[protein_id]],]
summarized_result = MSstatsSummarizeSingleLinear(single_protein,
equal_variance)
summarized_result = MSstatsSummarizeSingleLinear(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

equal_variance is not passed into MSstatsSummarizeSingleLinear

single_protein, impute, censored_symbol,
remove50missing, aft_iterations)

summarized_results[[protein_id]] = summarized_result
setTxtProgressBar(pb, protein_id)
}
Expand All @@ -308,6 +326,10 @@ MSstatsSummarizeWithSingleCore = function(input, method, impute, censored_symbol
#' Linear model-based summarization for a single protein
#'
#' @param single_protein feature-level data for a single protein
#' @param impute boolean for whether imputation should be performed
#' @param censored_symbol Character string indicating how censored values are represented
#' @param remove50missing if TRUE, proteins with more than 50\% missing values in each run are removed
#' @param aft_iterations number of iterations for AFT model fitting
#' @param equal_variances if TRUE, observation are assumed to be homoskedastic
#'
#' @return list with protein-level data
Expand All @@ -330,14 +352,53 @@ MSstatsSummarizeWithSingleCore = function(input, method, impute, censored_symbol
#' input = MSstatsSelectFeatures(input, "all")
#' input = MSstatsPrepareForSummarization(input, method, impute, cens, FALSE)
#' input_split = split(input, input$PROTEIN)
#' single_protein_summary = MSstatsSummarizeSingleLinear(input_split[[1]])
#' single_protein_summary = MSstatsSummarizeSingleLinear(input_split[[1]], impute, cens, TRUE, 100)
#' head(single_protein_summary[[1]])
#'
MSstatsSummarizeSingleLinear = function(single_protein, equal_variances = TRUE) {
MSstatsSummarizeSingleLinear = function(single_protein,
impute,
censored_symbol,
remove50missing,
aft_iterations = 90,
equal_variances = TRUE) {
ABUNDANCE = RUN = FEATURE = PROTEIN = LogIntensities = NULL

cols = intersect(colnames(single_protein), c("newABUNDANCE", "cen", "RUN",
"FEATURE", "ref"))
single_protein = single_protein[(n_obs > 1 & !is.na(n_obs)) &
(n_obs_run > 0 & !is.na(n_obs_run))]
if (nrow(single_protein) == 0) {
return(list(NULL, NULL))
}
single_protein[, RUN := factor(RUN)]
single_protein[, FEATURE := factor(FEATURE)]
if (impute & any(single_protein[["censored"]])) {
survival_fit = .fitSurvival(single_protein[LABEL == "L", cols,
with = FALSE],
aft_iterations)
sigma2 = survival_fit$scale^2
single_protein[, c("predicted", "imputation_var") := {
pred = predict(survival_fit, newdata = .SD, se.fit = TRUE)
list(pred$fit, pred$se.fit^2 + sigma2)
}]
Comment on lines +375 to +383
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Improve error handling for survival model fitting

When imputation is enabled and the survival model fitting fails, the code silently continues with NA predictions. This could mask important issues. Consider logging when prediction fails.

     if (impute & any(single_protein[["censored"]])) {
         survival_fit = .fitSurvival(single_protein[LABEL == "L", cols,
                                                    with = FALSE],
                                     aft_iterations)
-        sigma2 = survival_fit$scale^2
-        single_protein[, c("predicted", "imputation_var") := {
-            pred = predict(survival_fit, newdata = .SD, se.fit = TRUE)
-            list(pred$fit, pred$se.fit^2 + sigma2)
-        }]
+        if (!is.null(survival_fit)) {
+            sigma2 = survival_fit$scale^2
+            single_protein[, c("predicted", "imputation_var") := {
+                pred = predict(survival_fit, newdata = .SD, se.fit = TRUE)
+                list(pred$fit, pred$se.fit^2 + sigma2)
+            }]
+        } else {
+            msg = paste("*** warning: Survival model fitting failed for protein",
+                       unique(single_protein$PROTEIN))
+            getOption("MSstatsLog")("WARN", msg)
+            single_protein[, c("predicted", "imputation_var") := list(NA_real_, NA_real_)]
+        }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if (impute & any(single_protein[["censored"]])) {
survival_fit = .fitSurvival(single_protein[LABEL == "L", cols,
with = FALSE],
aft_iterations)
sigma2 = survival_fit$scale^2
single_protein[, c("predicted", "imputation_var") := {
pred = predict(survival_fit, newdata = .SD, se.fit = TRUE)
list(pred$fit, pred$se.fit^2 + sigma2)
}]
if (impute & any(single_protein[["censored"]])) {
survival_fit = .fitSurvival(single_protein[LABEL == "L", cols,
with = FALSE],
aft_iterations)
if (!is.null(survival_fit)) {
sigma2 = survival_fit$scale^2
single_protein[, c("predicted", "imputation_var") := {
pred = predict(survival_fit, newdata = .SD, se.fit = TRUE)
list(pred$fit, pred$se.fit^2 + sigma2)
}]
} else {
msg = paste("*** warning: Survival model fitting failed for protein",
unique(single_protein$PROTEIN))
getOption("MSstatsLog")("WARN", msg)
single_protein[, c("predicted", "imputation_var") := list(NA_real_, NA_real_)]
}
🤖 Prompt for AI Agents
In R/dataProcess.R around lines 436 to 444, the survival model fitting and
prediction are not wrapped with error handling so failures silently produce NA;
wrap the call to .fitSurvival in a tryCatch (or try) to capture and log the
error (including error message and protein identifier) and handle the failure
path explicitly, and likewise wrap the predict(...) call in a tryCatch that logs
prediction failures and ensures imputation columns are set to NA or a safe
fallback; ensure logs include enough context (protein id, LABEL, and whether
imputation was requested) and that the code returns/continues deterministically
after a caught error.

single_protein[, predicted := ifelse(censored & (LABEL == "L"),
predicted, NA)]
single_protein[, newABUNDANCE := ifelse(censored & LABEL == "L",
predicted, newABUNDANCE)]
survival = single_protein[, c(cols, "predicted"), with = FALSE]
} else {
survival = single_protein[, cols, with = FALSE]
survival[, predicted := NA]
}

Comment on lines +388 to +393
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Preserve and return imputation variance with survival output

imputation_var is computed but dropped from survival; include it for downstream DA that uses weights/variance.

Apply:

-        survival = single_protein[, c(cols, "predicted"), with = FALSE]
+        survival = single_protein[, c(cols, "predicted", "imputation_var"), with = FALSE]
@@
-        survival = single_protein[, cols, with = FALSE]
-        survival[, predicted := NA]
+        survival = single_protein[, cols, with = FALSE]
+        survival[, `:=`(predicted = NA_real_, imputation_var = NA_real_)]

Also applies to: 429-432

🤖 Prompt for AI Agents
In R/dataProcess.R around lines 388-393, the imputation_var column is computed
but not included in the survival data.frame; update both branches so survival
includes imputation_var (e.g., select cols plus "predicted" and "imputation_var"
in the first branch, and cols plus "imputation_var" in the else branch while
still adding predicted := NA), and make the identical change at lines ~429-432
to ensure imputation variance is preserved and returned for downstream DA
weighting/variance use.

if (all(!is.na(single_protein$ANOMALYSCORES))){
single_protein$weights = 1 / single_protein$ANOMALYSCORES
} else {
single_protein$weights = NA
}
Comment on lines +394 to +398
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Validate ANOMALYSCORES before computing weights

Similar to the Variance validation issue, the code should check that ANOMALYSCORES are positive before computing weights as their reciprocal.

     if (all(!is.na(single_protein$ANOMALYSCORES))){
-        single_protein$weights = 1 / single_protein$ANOMALYSCORES
+        if (any(single_protein$ANOMALYSCORES <= 0 | is.infinite(single_protein$ANOMALYSCORES))) {
+            msg = paste("*** warning: Invalid anomaly scores detected for protein",
+                       unique(single_protein$PROTEIN), 
+                       "- weights will not be applied")
+            getOption("MSstatsLog")("WARN", msg)
+            single_protein$weights = NA
+        } else {
+            single_protein$weights = 1 / single_protein$ANOMALYSCORES
+        }
     } else {
         single_protein$weights = NA
     }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if (all(!is.na(single_protein$ANOMALYSCORES))){
single_protein$weights = 1 / single_protein$ANOMALYSCORES
} else {
single_protein$weights = NA
}
if (all(!is.na(single_protein$ANOMALYSCORES))) {
if (any(single_protein$ANOMALYSCORES <= 0 |
is.infinite(single_protein$ANOMALYSCORES))) {
msg = paste(
"*** warning: Invalid anomaly scores detected for protein",
unique(single_protein$PROTEIN),
"- weights will not be applied"
)
getOption("MSstatsLog")("WARN", msg)
single_protein$weights = NA
} else {
single_protein$weights = 1 / single_protein$ANOMALYSCORES
}
} else {
single_protein$weights = NA
}
🤖 Prompt for AI Agents
In R/dataProcess.R around lines 455 to 459, the code computes weights as
1/ANOMALYSCORES without verifying ANOMALYSCORES are positive and non-zero;
update the logic to first confirm ANOMALYSCORES are numeric, finite and strictly
greater than zero for all entries before taking reciprocals, otherwise set
single_protein$weights = NA (or NA_real_) and optionally log or flag invalid
scores; ensure division by zero and negative/NA/Inf values are handled to
prevent NaN/Inf results.


label = data.table::uniqueN(single_protein$LABEL) > 1
single_protein = single_protein[!is.na(ABUNDANCE)]
single_protein = single_protein[!is.na(newABUNDANCE)]
single_protein[, RUN := factor(RUN)]
single_protein[, FEATURE := factor(FEATURE)]

Expand All @@ -346,28 +407,35 @@ MSstatsSummarizeSingleLinear = function(single_protein, equal_variances = TRUE)
counts = as.matrix(counts)
is_single_feature = .checkSingleFeature(single_protein)

fit = try(.fitLinearModel(single_protein, is_single_feature, is_labeled = label,
equal_variances), silent = TRUE)
# fit = try(, silent = TRUE)
fit = .fitLinearModel(single_protein, is_single_feature,
is_labeled = label, equal_variances)

if (inherits(fit, "try-error")) {
msg = paste("*** error : can't fit the model for ", unique(single_protein$PROTEIN))
msg = paste("*** error : can't fit the model for ",
unique(single_protein$PROTEIN))
Comment on lines +410 to +416
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Restore try/tryCatch around .fitLinearModel; current error check is ineffective

fit is not wrapped in try, yet you test inherits(fit, "try-error"). A model failure will raise and skip the handler. Wrap the call.

Apply:

-    # fit = try(, silent = TRUE)
-    fit = .fitLinearModel(single_protein, is_single_feature, 
-                          is_labeled = label, equal_variances)
+    fit <- try(
+        .fitLinearModel(single_protein, is_single_feature, 
+                        is_labeled = label, equal_variances),
+        silent = TRUE
+    )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
# fit = try(, silent = TRUE)
fit = .fitLinearModel(single_protein, is_single_feature,
is_labeled = label, equal_variances)
if (inherits(fit, "try-error")) {
msg = paste("*** error : can't fit the model for ", unique(single_protein$PROTEIN))
msg = paste("*** error : can't fit the model for ",
unique(single_protein$PROTEIN))
fit <- try(
.fitLinearModel(single_protein, is_single_feature,
is_labeled = label, equal_variances),
silent = TRUE
)
if (inherits(fit, "try-error")) {
msg = paste("*** error : can't fit the model for ",
unique(single_protein$PROTEIN))
...
🤖 Prompt for AI Agents
In R/dataProcess.R around lines 410-416, the call to .fitLinearModel is not
wrapped in try/tryCatch yet the code tests inherits(fit, "try-error"), so model
failures will raise and skip the handler; wrap the .fitLinearModel call in
try(..., silent = TRUE) or use tryCatch to catch errors and return an object
that can be detected (e.g., assign the error to fit with class "try-error" or a
sentinel), then keep the existing inherits(fit, "try-error") branch to handle
failures and log the message as before.

getOption("MSstatsLog")("WARN", msg)
getOption("MSstatsMsg")("WARN", msg)
result = NULL
} else {
cf = summary(fit)$coefficients[, 1]
cov_mat = vcov(fit)

result = unique(single_protein[, list(Protein = PROTEIN, RUN = RUN)])
log_intensities = get_linear_summary(single_protein, cf,
counts, label)
result[, LogIntensities := log_intensities]
extracted_values = get_linear_summary(single_protein, cf,
counts, label, cov_mat)
# extracted_values = get_run_estimates_simple(fit, single_protein, counts)

result = cbind(result, extracted_values)
}
list(result)
list(result, survival)
}


#' Tukey Median Polish summarization for a single protein
#'
#' @param single_protein feature-level data for a single protein
#' @param aft_iterations number of iterations for AFT model fitting
#' @inheritParams MSstatsSummarizeWithSingleCore
#'
#' @return list of two data.tables: one with fitted survival model,
Expand All @@ -392,11 +460,11 @@ MSstatsSummarizeSingleLinear = function(single_protein, equal_variances = TRUE)
#' input = MSstatsPrepareForSummarization(input, method, impute, cens, FALSE)
#' input_split = split(input, input$PROTEIN)
#' single_protein_summary = MSstatsSummarizeSingleTMP(input_split[[1]],
#' impute, cens, FALSE)
#' impute, cens, FALSE, 100)
#' head(single_protein_summary[[1]])
#'
MSstatsSummarizeSingleTMP = function(single_protein, impute, censored_symbol,
remove50missing) {
remove50missing, aft_iterations = 90) {
newABUNDANCE = n_obs = n_obs_run = RUN = FEATURE = LABEL = NULL
predicted = censored = NULL
cols = intersect(colnames(single_protein), c("newABUNDANCE", "cen", "RUN",
Expand All @@ -409,10 +477,27 @@ MSstatsSummarizeSingleTMP = function(single_protein, impute, censored_symbol,
single_protein[, RUN := factor(RUN)]
single_protein[, FEATURE := factor(FEATURE)]
if (impute & any(single_protein[["censored"]])) {
survival_fit = .fitSurvival(single_protein[LABEL == "L", cols,
with = FALSE])
single_protein[, predicted := predict(survival_fit,
newdata = .SD)]

# Flag to track convergence warning
converged = TRUE

# Try to fit survival model and catch convergence warnings
survival_fit = withCallingHandlers({
.fitSurvival(single_protein[LABEL == "L", cols, with = FALSE],
aft_iterations)
}, warning = function(w) {
if (grepl("converge", conditionMessage(w), ignore.case = TRUE)) {
message("Convergence warning caught: ", conditionMessage(w))
converged <<- FALSE
}
})

if (converged) {
single_protein[, predicted := predict(survival_fit, newdata = .SD)]
} else {
single_protein[, predicted := NA_real_]
}

single_protein[, predicted := ifelse(censored & (LABEL == "L"), predicted, NA)]
single_protein[, newABUNDANCE := ifelse(censored & LABEL == "L",
predicted, newABUNDANCE)]
Expand Down
27 changes: 15 additions & 12 deletions R/groupComparison.R
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,13 @@ groupComparison = function(contrast.matrix, data,
samples_info = getSamplesInfo(data)
groups = unique(data$ProteinLevelData$GROUP)
contrast_matrix = MSstatsContrastMatrix(contrast.matrix, groups)
getOption("MSstatsLog")("INFO",
"== Start to test and get inference in whole plot")
getOption("MSstatsMsg")("INFO",
" == Start to test and get inference in whole plot ...")
getOption("MSstatsLog")(
"INFO", "== Start to test and get inference in whole plot")
getOption("MSstatsMsg")(
"INFO", " == Start to test and get inference in whole plot ...")
testing_results = MSstatsGroupComparison(split_summarized, contrast_matrix,
save_fitted_models, repeated, samples_info,
save_fitted_models,
repeated, samples_info,
numberOfCores)
getOption("MSstatsLog")("INFO",
"== Comparisons for all proteins are done.")
Expand Down Expand Up @@ -179,13 +180,14 @@ MSstatsGroupComparison = function(summarized_list, contrast_matrix,
save_fitted_models, repeated, samples_info,
numberOfCores = 1) {
if (numberOfCores > 1) {
return(.groupComparisonWithMultipleCores(summarized_list, contrast_matrix,
return(.groupComparisonWithMultipleCores(summarized_list,
contrast_matrix,
save_fitted_models, repeated,
samples_info, numberOfCores))
} else {
return(.groupComparisonWithSingleCore(summarized_list, contrast_matrix,
save_fitted_models, repeated,
samples_info))
save_fitted_models,
repeated, samples_info))
}
}

Expand Down Expand Up @@ -276,16 +278,17 @@ MSstatsGroupComparisonOutput = function(input, summarization_output, log_base =
#' single_output # same as a single element of MSstatsGroupComparison output
#'
MSstatsGroupComparisonSingleProtein = function(single_protein, contrast_matrix,
repeated, groups, samples_info,
save_fitted_models,
repeated, groups,
samples_info, save_fitted_models,
has_imputed) {
single_protein = .prepareSingleProteinForGC(single_protein)
is_single_subject = .checkSingleSubject(single_protein)
has_tech_reps = .checkTechReplicate(single_protein)

fitted_model = try(.fitModelSingleProtein(single_protein, contrast_matrix,
has_tech_reps, is_single_subject,
repeated, groups, samples_info,
has_tech_reps,
is_single_subject, repeated,
groups, samples_info,
save_fitted_models, has_imputed),
silent = TRUE)
if (inherits(fitted_model, "try-error")) {
Expand Down
Loading
Loading