Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 36 additions & 24 deletions R/utils_fractions.R
Original file line number Diff line number Diff line change
Expand Up @@ -264,41 +264,53 @@
#' @return data.table
#' @keywords internal
.removeOverlappingFeatures = function(input) {
fraction_keep = Fraction = NULL
Fraction = NULL

if (data.table::uniqueN(input$Fraction) > 1) {
input[, fraction_keep := .getCorrectFraction(.SD),
by = "feature",
.SDcols = c("feature", "Fraction", "Run", "Intensity")]
input = input[Fraction == fraction_keep]
input = input[, !(colnames(input) == "fraction_keep"), with = FALSE]
measurement_count = input[
!is.na(Intensity) & Intensity > 0,
.(n_obs = uniqueN(Run)),
by = .(feature, Fraction)
]
measurement_count[, is_max := n_obs == max(n_obs), by = "feature"]
max_fractions = measurement_count[(is_max)]

fraction_map = .resolveFractionTies(input, max_fractions)
input = input[fraction_map, on = .(feature, Fraction), nomatch = 0]
}
input
}


#' Get a name of fraction with the largest number of measurements or the largest
#' average intensity
#' Resolve ties when multiple fractions share the maximum number of measurements
#' for a given feature. In the case of a tie, the fraction with the highest
#' mean intensity is selected.
#' @param input output of `MSstatsPreprocess`
#' @return character - label of the fraction that has most measurements or
#' highest mean intensity for a given feature
#' @param max_fractions data.table of fractions that share the maximum number
#' of unique runs per feature, as produced by `.removeOverlappingFeatures`
#' @return data.table with columns `feature` and `Fraction`, containing one
#' selected fraction per feature
#' @keywords internal
.getCorrectFraction = function(input) {
Intensity = Run = Fraction = NULL
.resolveFractionTies = function(input, max_fractions) {
tie_features = max_fractions[, .(n_ties = .N), by = "feature"][n_ties > 1, feature]

measurement_count = input[!is.na(Intensity) & Intensity > 0,
list(n_obs = data.table::uniqueN(Run)),
by = "Fraction"]
which_max_measurements = which(measurement_count$n_obs == max(measurement_count$n_obs))
if (length(which_max_measurements) == 1L) {
return(unique(measurement_count$Fraction[which_max_measurements]))
if (length(tie_features) > 0) {
tied_fractions = max_fractions[feature %in% tie_features, .(feature, Fraction)]
avg_abundance = input[
tied_fractions, on = .(feature, Fraction), nomatch = 0
][!is.na(Intensity) & Intensity > 0,
.(mean_abundance = mean(Intensity)),
by = .(feature, Fraction)]
best_tied = avg_abundance[, .SD[which.max(mean_abundance)], by = "feature"]
Comment thread
coderabbitai[bot] marked this conversation as resolved.
best_simple = max_fractions[
!feature %in% tie_features,
.(Fraction = Fraction[1]),
by = "feature"
]
rbind(best_simple[, .(feature, Fraction)],
best_tied[, .(feature, Fraction)])
} else {
input = input[Fraction %in% measurement_count$Fraction[which_max_measurements]]
average_abundance = input[!is.na(Intensity) & Intensity > 0,
list(mean_abundance = mean(Intensity)),
by = "Fraction"]
which_max_abundance = which.max(average_abundance$mean_abundance)
unique(average_abundance$Fraction[which_max_abundance])
max_fractions[, .(Fraction = Fraction[1]), by = "feature"]
}
}

Expand Down
27 changes: 22 additions & 5 deletions inst/tinytest/test_fractions.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,34 @@ fractionated = data.table::data.table(
Run = 1:12,
Intensity = c(NA, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2)
)
picked_A = MSstatsConvert:::.getCorrectFraction(fractionated[feature == "A"])
picked_B = MSstatsConvert:::.getCorrectFraction(fractionated[feature == "B"])
### More observations win
expect_equal(picked_A, 2)
### Higher average intensity wins
expect_equal(picked_B, 2)
expect_equal(
unique(MSstatsConvert:::.removeOverlappingFeatures(fractionated[feature == "A"])$Fraction),
2
)
### Higher average intensity wins on tie
expect_equal(
unique(MSstatsConvert:::.removeOverlappingFeatures(fractionated[feature == "B"])$Fraction),
2
)
### For full data
expect_identical(
MSstatsConvert:::.removeOverlappingFeatures(data.table::copy(fractionated)),
fractionated[Fraction == 2]
)
### Non-tied fraction with high mean intensity should not be selected over tied fractions
fractionated_third = data.table::data.table(
feature = rep("A", 9),
Fraction = c(rep(1, 3), rep(2, 3), rep(3, 3)),
Run = 1:9,
Intensity = c(1, 1, 1, # Fraction 1: 3 obs, mean = 1 (ties for max n_obs)
2, 2, 2, # Fraction 2: 3 obs, mean = 2 (ties for max n_obs)
10, NA, NA) # Fraction 3: 1 obs, mean = 10 (loses on n_obs but mean would win w/o fix)
)
expect_equal(
unique(MSstatsConvert:::.removeOverlappingFeatures(fractionated_third)$Fraction),
2
)
fractionated_tmt = fractionated = data.table::data.table(
feature = rep(c("A", "B"), each = 6),
Fraction = rep(rep(c(1, 2), each = 3), times = 2),
Expand Down
Loading