Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions R/dataProcess.R
Original file line number Diff line number Diff line change
Expand Up @@ -557,8 +557,9 @@ MSstatsSummarizeSingleTMP = function(single_protein, impute, censored_symbol,
return(list(NULL, NULL))
} else {
single_protein = single_protein[!is.na(newABUNDANCE), ]
is_labeled = nlevels(single_protein$LABEL) > 1
result = .runTukey(single_protein, is_labeled, censored_symbol,
is_labeled_reference = "is_labeled_ref" %in% colnames(single_protein) &&
any(single_protein$is_labeled_ref, na.rm = TRUE)
result = .runTukey(single_protein, is_labeled_reference, censored_symbol,
remove50missing)
}
list(result, survival)
Expand Down
47 changes: 23 additions & 24 deletions R/utils_censored.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,49 +31,47 @@ MSstatsHandleMissing = function(input, summary_method, impute,

if (impute & !is.null(missing_symbol)) {
input$censored = FALSE
use_for_analysis = if ("is_labeled_ref" %in% colnames(input)) !input$is_labeled_ref else rep(TRUE, nrow(input))
## if intensity = 1, but abundance > cutoff after normalization, it also should be censored.
if (!is.null(censored_cutoff)) {
quantiles = input[!is.na(INTENSITY) & INTENSITY > 1 & LABEL == "L",
quantile(ABUNDANCE,
prob = c(0.01, 0.25, 0.5, 0.75,
censored_cutoff),
quantiles = input[use_for_analysis & !is.na(INTENSITY) & INTENSITY > 1,
quantile(ABUNDANCE,
prob = c(0.01, 0.25, 0.5, 0.75,
censored_cutoff),
na.rm = TRUE)]
iqr = quantiles[4] - quantiles[2]
multiplier = (quantiles[5] - quantiles[4]) / iqr
cutoff_lower = (quantiles[2] - multiplier * iqr)
input$censored = !is.na(input$INTENSITY) &
input$LABEL == "L" &
cutoff_lower = (quantiles[2] - multiplier * iqr)
input$censored = use_for_analysis & !is.na(input$INTENSITY) &
input$ABUNDANCE < cutoff_lower
if (cutoff_lower <= 0 & !is.null(missing_symbol) & missing_symbol == "0") {
zero_one_filter = !is.na(input$ABUNDANCE) & input$ABUNDANCE <= 0
zero_one_filter = use_for_analysis & !is.na(input$ABUNDANCE) & input$ABUNDANCE <= 0
input$censored = ifelse(zero_one_filter, TRUE, input$censored)
}
if (!is.null(missing_symbol) & missing_symbol == "NA") {
input$censored = ifelse(is.na(input$INTENSITY), TRUE,
input$censored = ifelse(use_for_analysis & is.na(input$INTENSITY), TRUE,
input$censored)
}
msg = paste('** Log2 intensities under cutoff =',
format(cutoff_lower, digits = 5),

msg = paste('** Log2 intensities under cutoff =',
format(cutoff_lower, digits = 5),
' were considered as censored missing values.')
msg_2 = paste("** Log2 intensities =", missing_symbol, "were considered as censored missing values.")

getOption("MSstatsMsg")("INFO", msg)
getOption("MSstatsMsg")("INFO", msg_2)

getOption("MSstatsLog")("INFO", msg)
getOption("MSstatsLog")("INFO", msg_2)

} else {
if (missing_symbol == '0') {
input$censored = input$LABEL == "L" &
!is.na(input$INTENSITY) &
input$censored = use_for_analysis & !is.na(input$INTENSITY) &
(input$INTENSITY == 1 | input$ABUNDANCE <= 0)
} else if (missing_symbol == 'NA') {
input$censored = input$LABEL == "L" & is.na(input$ABUNDANCE)
input$censored = use_for_analysis & is.na(input$ABUNDANCE)
}
}
input[, censored := ifelse(LABEL == "H", FALSE, censored)]
} else {
input$censored = FALSE
}
Expand Down Expand Up @@ -128,16 +126,17 @@ MSstatsHandleMissing = function(input, summary_method, impute,
#' @param censored_symbol `censoredInt` parameter to dataProcess
#' @keywords internal
.getNonMissingFilter = function(input, impute, censored_symbol) {
use_for_analysis = if ("is_labeled_ref" %in% colnames(input)) !input$is_labeled_ref else rep(TRUE, nrow(input))
if (impute) {
if (!is.null(censored_symbol)) {
if (censored_symbol == "0") {
nonmissing_filter = input$LABEL == "L" & !is.na(input$newABUNDANCE) & input$newABUNDANCE != 0
nonmissing_filter = use_for_analysis & !is.na(input$newABUNDANCE) & input$newABUNDANCE != 0
} else if (censored_symbol == "NA") {
nonmissing_filter = input$LABEL == "L" & !is.na(input$newABUNDANCE)
}
}
nonmissing_filter = use_for_analysis & !is.na(input$newABUNDANCE)
}
}
} else {
nonmissing_filter = input$LABEL == "L" & !is.na(input$newABUNDANCE) & input$newABUNDANCE != 0
nonmissing_filter = use_for_analysis & !is.na(input$newABUNDANCE) & input$newABUNDANCE != 0
}
nonmissing_filter
}
43 changes: 26 additions & 17 deletions R/utils_normalize.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,13 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s
normalization_method = toupper(normalization_method)
if (normalization_method == "NONE" | normalization_method == "FALSE") {
return(input)
} else if (normalization_method == "EQUALIZEMEDIANS") {
}

if (normalization_method == "EQUALIZEMEDIANS") {
input = .normalizeMedian(input)
if ("H" %in% input$LABEL) {
input[, is_labeled_ref := LABEL == "H"]
}
Comment thread
tonywu1999 marked this conversation as resolved.
} else if (normalization_method == "QUANTILE") {
input = .normalizeQuantile(input)
} else if (normalization_method == "GLOBALSTANDARDS") {
Expand Down Expand Up @@ -191,15 +196,24 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s
#' @keywords internal
.normalizeGlobalStandards = function(input, peptides_dict, standards) {
PeptideSequence = PEPTIDE = PROTEIN = median_by_fraction = NULL
Standard = FRACTION = LABEL = ABUNDANCE = RUN = GROUP = NULL
Standard = FRACTION = LABEL = ABUNDANCE = RUN = mean_by_run = NULL

input_with_peptides <- merge(input, peptides_dict, by = "PEPTIDE", all.x = TRUE)
if (length(standards) == 1 && standards == "unlabeled") {
standards = unique(input_with_peptides[is.na(input_with_peptides$LABEL), ]$PeptideSequence)
if (length(standards) == 0) {
msg = "nameStandards = 'unlabeled' but no unlabeled peptides found in data."
getOption("MSstatsLog")("ERROR", msg)
stop(msg)
}
}
standards_data <- input_with_peptides[
(PeptideSequence %in% standards | PROTEIN %in% standards) &
GROUP != "0" &
(PeptideSequence %in% standards | PROTEIN %in% standards) &
!is.na(ABUNDANCE)
]
missing_standards <- standards[!standards %in% c(standards_data$PeptideSequence, standards_data$PROTEIN)]
missing_standards <- standards[
!standards %in% c(standards_data$PeptideSequence, standards_data$PROTEIN)
]
if (length(missing_standards) > 0) {
msg <- paste("Global standard peptides or proteins,",
paste(missing_standards, collapse = ", "),
Expand All @@ -210,12 +224,12 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s
standards_data[, standard := ifelse(!is.na(PeptideSequence) & PeptideSequence %in% standards,
PeptideSequence,
PROTEIN)]
means_by_standard <- standards_data[,
means_by_standard <- standards_data[,
list(mean_abundance = mean(ABUNDANCE, na.rm = TRUE)),
by = .(RUN, standard)
]
means_by_standard <- dcast(means_by_standard,
RUN ~ standard,
means_by_standard <- dcast(means_by_standard,
RUN ~ standard,
value.var = "mean_abundance")
means_by_standard = data.table::melt(means_by_standard, id.vars = "RUN",
variable.name = "Standard", value.name = "ABUNDANCE")
Expand All @@ -227,16 +241,11 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s
means_by_standard[, ABUNDANCE := NULL]
means_by_standard[, Standard := NULL]
means_by_standard = unique(means_by_standard)

input = merge(input, means_by_standard, all.x = TRUE, by = c("RUN", "FRACTION"))
input[, ABUNDANCE := ifelse(LABEL == "L", ABUNDANCE - mean_by_run + median_by_fraction, ABUNDANCE)]

if (data.table::uniqueN(input$FRACTION) == 1L) {
msg = "Normalization : normalization with global standards protein - okay"
} else {
msg = "Normalization : normalization with global standards protein - okay"
}
getOption("MSstatsLog")("INFO", msg)
input[, ABUNDANCE := ABUNDANCE - mean_by_run + median_by_fraction]

getOption("MSstatsLog")("INFO", "Normalization : normalization with global standards protein - okay")
input[ , !(colnames(input) %in% c("mean_by_run", "median_by_fraction")), with = FALSE]
}

Expand Down
6 changes: 3 additions & 3 deletions R/utils_summarization_prepare.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ MSstatsPrepareForSummarization = function(input, method, impute, censored_symbol
input[, ANOMALYSCORES := NA]
}

label = data.table::uniqueN(input$LABEL) == 2
if (label) {
input[, ref_covariate := factor(ifelse(LABEL == "L", RUN, 0))]
add_ref_covariate = "is_labeled_ref" %in% colnames(input) && any(input$is_labeled_ref, na.rm = TRUE)
if (add_ref_covariate) {
input[, ref_covariate := factor(ifelse(LABEL == "L", RUN, 0))]
}

if (is.element("remove", colnames(input))) {
Expand Down
39 changes: 39 additions & 0 deletions inst/tinytest/test_dataProcess.R
Original file line number Diff line number Diff line change
Expand Up @@ -239,3 +239,42 @@ for (i in 1:nrow(protein_runs_4_5)) {
"than to AFPLAEWQPSDVDQR", afpla_abundance,
"(dist =", round(dist_to_afpla, 4), ")"))
}


# MSstatsSummarizeSingleTMP tests ------------------------------------------

MSstatsConvert::MSstatsLogsSettings(FALSE)

single_protein_labeled <- data.table::data.table(
PROTEIN = rep("P1", 4),
FEATURE = rep("F1", 4),
LABEL = c("H", "L", "H", "L"),
RUN = c("R1", "R1", "R2", "R2"),
newABUNDANCE = c(10.0, 14.0, 12.0, 16.0),
is_labeled_ref = c(TRUE, FALSE, TRUE, FALSE),
censored = rep(FALSE, 4),
n_obs = rep(2L, 4),
n_obs_run = rep(1L, 4),
prop_features = rep(1.0, 4)
)

result_labeled <- MSstatsSummarizeSingleTMP(
single_protein_labeled,
impute = FALSE,
censored_symbol = NULL,
remove50missing = FALSE,
aft_iterations = 90
)

protein_level_labeled <- result_labeled[[1]]

expect_equal(
sort(as.numeric(protein_level_labeled$LogIntensities)),
c(15, 15),
info = paste(
"MSstatsSummarizeSingleTMP: 'is_labeled_ref' column present with TRUE values must set",
"is_labeled_reference=TRUE, causing .runTukey to call .adjustLRuns;",
"both runs should converge to H-adjusted LogIntensities of 15,",
"not the raw L values (14, 16) that the unlabeled path would return"
)
)
108 changes: 108 additions & 0 deletions inst/tinytest/test_utils_censored.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,111 @@ imputed_val_p2 <- dt_zero[
]
expect_equal(imputed_val_p2, expected_val_p2)


# Test MSstatsHandleMissing
make_cens_input <- function() {
data.table::data.table(
PROTEIN = rep("P1", 8),
FEATURE = rep("F1", 8),
LABEL = c("L","L","L","L","H","H","H","H"),
RUN = rep(c("R1","R2","R3","R4"), 2),
GROUP = rep(c("G1","G1","G2","G2"), 2),
FRACTION = rep(1L, 8),
INTENSITY = c(1L, 1L, 100L, 200L, # L: first two will be censored
1L, 1L, 100L, 200L), # H: same intensities but must NOT be censored
ABUNDANCE = c(0, 0, log2(100), log2(200),
0, 0, log2(100), log2(200)),
is_labeled_ref = c(FALSE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE)
)
}

cens_in <- make_cens_input()
out_cens <- MSstatsHandleMissing(
cens_in,
summary_method = "TMP",
impute = TRUE,
missing_symbol = "0",
censored_cutoff = NULL
)

# H rows must never be flagged as censored regardless of intensity
expect_false(
any(out_cens[LABEL == "H", censored]),
info = "H rows (is_labeled_ref=TRUE) must never be flagged censored (use_for_analysis=FALSE)"
)

# L rows with intensity==1 must be flagged censored
expect_true(
any(out_cens[LABEL == "L" & INTENSITY == 1L, censored]),
info = "L rows with intensity=1 (use_for_analysis=TRUE) must be flagged censored"
)

# L rows with intensity>1 must NOT be flagged censored
expect_false(
any(out_cens[LABEL == "L" & INTENSITY > 1L, censored]),
info = "L rows with high intensity must not be flagged censored"
)


# Tests for .getNonMissingFilter -----------------------------------------------

dt_nmf_no_ref <- data.table::data.table(
newABUNDANCE = c(1.5, 0.0, NA_real_, 2.0)
)
result_no_ref <- MSstats:::.getNonMissingFilter(dt_nmf_no_ref, impute = FALSE, censored_symbol = "0")
expect_equal(
result_no_ref, c(TRUE, FALSE, FALSE, TRUE),
info = ".getNonMissingFilter: without 'is_labeled_ref' column all rows have use_for_analysis=TRUE; zero and NA excluded"
)

dt_nmf_with_ref <- data.table::data.table(
newABUNDANCE = c(1.5, 2.0, 3.0, 1.0),
is_labeled_ref = c(TRUE, FALSE, FALSE, TRUE)
)
result_with_ref <- MSstats:::.getNonMissingFilter(dt_nmf_with_ref, impute = FALSE, censored_symbol = "0")
expect_equal(
result_with_ref, c(FALSE, TRUE, TRUE, FALSE),
info = ".getNonMissingFilter: is_labeled_ref=TRUE rows must be excluded (use_for_analysis=FALSE) regardless of abundance"
)

dt_nmf_ref_valid <- data.table::data.table(
newABUNDANCE = c(5.0, 3.0),
is_labeled_ref = c(TRUE, FALSE)
)
result_ref_valid <- MSstats:::.getNonMissingFilter(dt_nmf_ref_valid, impute = FALSE, censored_symbol = "0")
expect_false(
result_ref_valid[1],
info = ".getNonMissingFilter: is_labeled_ref=TRUE row with valid abundance must still yield FALSE"
)
expect_true(
result_ref_valid[2],
info = ".getNonMissingFilter: is_labeled_ref=FALSE row with valid abundance must yield TRUE"
)

dt_nmf_impute_na <- data.table::data.table(
newABUNDANCE = c(1.5, 0.0, NA_real_, 2.0)
)
result_impute_na <- MSstats:::.getNonMissingFilter(dt_nmf_impute_na, impute = TRUE, censored_symbol = "NA")
expect_equal(
result_impute_na, c(TRUE, TRUE, FALSE, TRUE),
info = ".getNonMissingFilter: impute=TRUE, censored_symbol='NA' treats zero as non-missing"
)

dt_nmf_impute_zero <- data.table::data.table(
newABUNDANCE = c(1.5, 0.0, NA_real_, 2.0)
)
result_impute_zero <- MSstats:::.getNonMissingFilter(dt_nmf_impute_zero, impute = TRUE, censored_symbol = "0")
expect_equal(
result_impute_zero, c(TRUE, FALSE, FALSE, TRUE),
info = ".getNonMissingFilter: impute=TRUE, censored_symbol='0' treats zero as missing"
)

dt_nmf_ref_impute_na <- data.table::data.table(
newABUNDANCE = c(1.5, 2.0, 0.0, 3.0),
is_labeled_ref = c(TRUE, FALSE, FALSE, TRUE)
)
result_ref_impute_na <- MSstats:::.getNonMissingFilter(dt_nmf_ref_impute_na, impute = TRUE, censored_symbol = "NA")
expect_equal(
result_ref_impute_na, c(FALSE, TRUE, TRUE, FALSE),
info = ".getNonMissingFilter: is_labeled_ref=TRUE rows excluded even when impute=TRUE, censored_symbol='NA'"
)
Loading
Loading