From f4812d32e76d4cf4d6d3bcfc1250715ceaaf7a20 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Thu, 23 Apr 2026 23:21:55 -0400 Subject: [PATCH 1/4] add plotting code for anomaly scores and other metrics --- R/plot_quality_metrics.R | 100 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 R/plot_quality_metrics.R diff --git a/R/plot_quality_metrics.R b/R/plot_quality_metrics.R new file mode 100644 index 00000000..c9512408 --- /dev/null +++ b/R/plot_quality_metrics.R @@ -0,0 +1,100 @@ +#' Plot quality metrics from converter output +#' +#' Visualizes a quality metric column from the output of MSstats converter +#' functions against run order. When the \code{Run} column is a factor with +#' levels sorted by run order (as produced automatically when \code{runOrder} +#' is supplied to the converter), the x-axis follows that temporal ordering. +#' +#' @param input data.frame or data.table returned by an MSstatsConvert +#' converter function (e.g. \code{SpectronauttoMSstatsFormat}). +#' @param metric character, name of the column to plot on the y-axis. +#' Defaults to \code{"AnomalyScores"}. Must be a column of \code{input}. +#' @param address prefix for the filename used when saving the plot. +#' If \code{FALSE} (default), the plot is returned without saving. +#' When \code{isPlotly = FALSE} a PDF is saved; when \code{isPlotly = TRUE} +#' an HTML file is saved. +#' @param isPlotly logical. If \code{TRUE} returns an interactive +#' \code{\link[plotly]{plotly}} object (and saves as HTML when +#' \code{address} is provided). If \code{FALSE} (default) returns a +#' \code{\link[ggplot2]{ggplot}} object. +#' +#' @return A \code{\link[ggplot2]{ggplot}} object, or a \code{plotly} object +#' when \code{isPlotly = TRUE}. +#' +#' @details +#' Each point represents a single feature (precursor / fragment) measurement. +#' A boxplot layer summarises the distribution per run, and individual points +#' are overlaid with jitter to avoid over-plotting. +#' +#' The x-axis order is determined by the factor levels of the \code{Run} +#' column. When \code{runOrder} is passed to the converter the \code{Run} +#' column is automatically set to an ordered factor; otherwise the runs appear +#' in alphabetical order. +#' +#' @import ggplot2 +#' @importFrom plotly ggplotly +#' @importFrom htmltools save_html +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' result <- SpectronauttoMSstatsFormat( +#' input, calculateAnomalyScores = TRUE, +#' anomalyModelFeatures = c("FGShapeQualityScoreMS2", "EGDeltaRT"), +#' anomalyModelFeatureTemporal = c("mean_decrease", "dispersion_increase"), +#' runOrder = my_run_order +#' ) +#' MSstatsQualityMetricsPlot(result) +#' MSstatsQualityMetricsPlot(result, metric = "EGDeltaRT") +#' MSstatsQualityMetricsPlot(result, isPlotly = TRUE) +#' } +MSstatsQualityMetricsPlot <- function(input, metric = "AnomalyScores", + address = FALSE, isPlotly = FALSE) { + input_df <- as.data.frame(input) + + if (!metric %in% colnames(input_df)) { + stop(paste0( + "Column '", metric, "' not found in input. ", + "Available columns: ", paste(colnames(input_df), collapse = ", ") + )) + } + if (!"Run" %in% colnames(input_df)) { + stop("'Run' column not found in input.") + } + + if (!is.factor(input_df$Run)) { + input_df$Run <- factor(input_df$Run) + } + + p <- ggplot(input_df, aes(x = .data[["Run"]], y = .data[[metric]])) + + geom_boxplot(outlier.shape = NA, fill = "lightblue", + alpha = 0.6, width = 0.5) + + geom_jitter(width = 0.2, alpha = 0.3, size = 0.8, + color = "steelblue") + + scale_x_discrete(guide = guide_axis(angle = 45)) + + theme_bw() + + theme(axis.text.x = element_text(size = 8)) + + labs( + x = "Run (temporal order)", + y = metric, + title = paste("Quality Metric:", metric) + ) + + if (isPlotly) { + plotly_p <- ggplotly(p) + if (!identical(address, FALSE)) { + save_html(plotly_p, + file = paste0(address, "QualityMetricsPlot.html")) + } + return(plotly_p) + } + + if (!identical(address, FALSE)) { + pdf(paste0(address, "QualityMetricsPlot.pdf")) + print(p) + dev.off() + } + + p +} From 9841a0b7abd77d41487e18eaeb08d9517da3f1d3 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Thu, 23 Apr 2026 23:42:18 -0400 Subject: [PATCH 2/4] per protein, precursor level anomaly metric tracking --- R/plot_quality_metrics.R | 68 +++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 21 deletions(-) diff --git a/R/plot_quality_metrics.R b/R/plot_quality_metrics.R index c9512408..ceb62745 100644 --- a/R/plot_quality_metrics.R +++ b/R/plot_quality_metrics.R @@ -1,14 +1,16 @@ #' Plot quality metrics from converter output #' #' Visualizes a quality metric column from the output of MSstats converter -#' functions against run order. When the \code{Run} column is a factor with -#' levels sorted by run order (as produced automatically when \code{runOrder} -#' is supplied to the converter), the x-axis follows that temporal ordering. +#' functions against run order for a single protein. Each +#' PeptideSequence + PrecursorCharge combination is drawn as a distinct +#' coloured line, mirroring the feature-level view in +#' \code{\link[MSstats]{dataProcessPlots}}. #' #' @param input data.frame or data.table returned by an MSstatsConvert #' converter function (e.g. \code{SpectronauttoMSstatsFormat}). #' @param metric character, name of the column to plot on the y-axis. #' Defaults to \code{"AnomalyScores"}. Must be a column of \code{input}. +#' @param which.Protein character, name of the protein to plot. Required. #' @param address prefix for the filename used when saving the plot. #' If \code{FALSE} (default), the plot is returned without saving. #' When \code{isPlotly = FALSE} a PDF is saved; when \code{isPlotly = TRUE} @@ -22,15 +24,15 @@ #' when \code{isPlotly = TRUE}. #' #' @details -#' Each point represents a single feature (precursor / fragment) measurement. -#' A boxplot layer summarises the distribution per run, and individual points -#' are overlaid with jitter to avoid over-plotting. -#' #' The x-axis order is determined by the factor levels of the \code{Run} #' column. When \code{runOrder} is passed to the converter the \code{Run} #' column is automatically set to an ordered factor; otherwise the runs appear #' in alphabetical order. #' +#' Metric values are averaged across fragment ions within each +#' PeptideSequence + PrecursorCharge + Run combination before plotting, so +#' each precursor contributes exactly one point per run. +#' #' @import ggplot2 #' @importFrom plotly ggplotly #' @importFrom htmltools save_html @@ -45,11 +47,12 @@ #' anomalyModelFeatureTemporal = c("mean_decrease", "dispersion_increase"), #' runOrder = my_run_order #' ) -#' MSstatsQualityMetricsPlot(result) -#' MSstatsQualityMetricsPlot(result, metric = "EGDeltaRT") -#' MSstatsQualityMetricsPlot(result, isPlotly = TRUE) +#' MSstatsQualityMetricsPlot(result, which.Protein = "ProteinA") +#' MSstatsQualityMetricsPlot(result, metric = "EGDeltaRT", +#' which.Protein = "ProteinA", isPlotly = TRUE) #' } MSstatsQualityMetricsPlot <- function(input, metric = "AnomalyScores", + which.Protein, address = FALSE, isPlotly = FALSE) { input_df <- as.data.frame(input) @@ -62,24 +65,47 @@ MSstatsQualityMetricsPlot <- function(input, metric = "AnomalyScores", if (!"Run" %in% colnames(input_df)) { stop("'Run' column not found in input.") } + if (!which.Protein %in% input_df$ProteinName) { + stop(paste0("Protein '", which.Protein, "' not found in input.")) + } + + input_df <- input_df[input_df$ProteinName == which.Protein, ] if (!is.factor(input_df$Run)) { input_df$Run <- factor(input_df$Run) } - p <- ggplot(input_df, aes(x = .data[["Run"]], y = .data[[metric]])) + - geom_boxplot(outlier.shape = NA, fill = "lightblue", - alpha = 0.6, width = 0.5) + - geom_jitter(width = 0.2, alpha = 0.3, size = 0.8, - color = "steelblue") + + input_df$Precursor <- paste(input_df$PeptideSequence, + input_df$PrecursorCharge, sep = "_") + + # Average across fragment ions so each precursor has one value per run + plot_df <- aggregate( + input_df[[metric]], + by = list(Run = input_df$Run, Precursor = input_df$Precursor), + FUN = mean, na.rm = TRUE + ) + colnames(plot_df)[colnames(plot_df) == "x"] <- metric + + # Preserve run factor ordering from the original data + plot_df$Run <- factor(plot_df$Run, levels = levels(input_df$Run)) + + p <- ggplot(plot_df, + aes(x = .data[["Run"]], + y = .data[[metric]], + color = .data[["Precursor"]], + group = .data[["Precursor"]])) + + geom_line(linewidth = 0.6) + + geom_point(size = 1.5) + scale_x_discrete(guide = guide_axis(angle = 45)) + theme_bw() + - theme(axis.text.x = element_text(size = 8)) + - labs( - x = "Run (temporal order)", - y = metric, - title = paste("Quality Metric:", metric) - ) + theme(axis.text.x = element_text(size = 8), + legend.title = element_text(size = 9), + legend.text = element_text(size = 7)) + + labs(x = "Run (temporal order)", + y = metric, + title = paste("Quality Metric:", metric), + subtitle = which.Protein, + color = "Peptide_Charge") if (isPlotly) { plotly_p <- ggplotly(p) From 064b9179698335703723fa2ff0fc68138a50533a Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Thu, 23 Apr 2026 23:57:19 -0400 Subject: [PATCH 3/4] add exports --- NAMESPACE | 1 + man/MSstatsQualityMetricsPlot.Rd | 67 ++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 man/MSstatsQualityMetricsPlot.Rd diff --git a/NAMESPACE b/NAMESPACE index 514752c5..02ffdf42 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ export(MSstatsNormalize) export(MSstatsPrepareForDataProcess) export(MSstatsPrepareForGroupComparison) export(MSstatsPrepareForSummarization) +export(MSstatsQualityMetricsPlot) export(MSstatsSelectFeatures) export(MSstatsSummarizationOutput) export(MSstatsSummarizeSingleLinear) diff --git a/man/MSstatsQualityMetricsPlot.Rd b/man/MSstatsQualityMetricsPlot.Rd new file mode 100644 index 00000000..6ef07a56 --- /dev/null +++ b/man/MSstatsQualityMetricsPlot.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_quality_metrics.R +\name{MSstatsQualityMetricsPlot} +\alias{MSstatsQualityMetricsPlot} +\title{Plot quality metrics from converter output} +\usage{ +MSstatsQualityMetricsPlot( + input, + metric = "AnomalyScores", + which.Protein, + address = FALSE, + isPlotly = FALSE +) +} +\arguments{ +\item{input}{data.frame or data.table returned by an MSstatsConvert +converter function (e.g. \code{SpectronauttoMSstatsFormat}).} + +\item{metric}{character, name of the column to plot on the y-axis. +Defaults to \code{"AnomalyScores"}. Must be a column of \code{input}.} + +\item{which.Protein}{character, name of the protein to plot. Required.} + +\item{address}{prefix for the filename used when saving the plot. +If \code{FALSE} (default), the plot is returned without saving. +When \code{isPlotly = FALSE} a PDF is saved; when \code{isPlotly = TRUE} +an HTML file is saved.} + +\item{isPlotly}{logical. If \code{TRUE} returns an interactive +\code{\link[plotly]{plotly}} object (and saves as HTML when +\code{address} is provided). If \code{FALSE} (default) returns a +\code{\link[ggplot2]{ggplot}} object.} +} +\value{ +A \code{\link[ggplot2]{ggplot}} object, or a \code{plotly} object + when \code{isPlotly = TRUE}. +} +\description{ +Visualizes a quality metric column from the output of MSstats converter +functions against run order for a single protein. Each +PeptideSequence + PrecursorCharge combination is drawn as a distinct +coloured line, mirroring the feature-level view in +\code{\link[MSstats]{dataProcessPlots}}. +} +\details{ +The x-axis order is determined by the factor levels of the \code{Run} +column. When \code{runOrder} is passed to the converter the \code{Run} +column is automatically set to an ordered factor; otherwise the runs appear +in alphabetical order. + +Metric values are averaged across fragment ions within each +PeptideSequence + PrecursorCharge + Run combination before plotting, so +each precursor contributes exactly one point per run. +} +\examples{ +\dontrun{ +result <- SpectronauttoMSstatsFormat( + input, calculateAnomalyScores = TRUE, + anomalyModelFeatures = c("FGShapeQualityScoreMS2", "EGDeltaRT"), + anomalyModelFeatureTemporal = c("mean_decrease", "dispersion_increase"), + runOrder = my_run_order +) +MSstatsQualityMetricsPlot(result, which.Protein = "ProteinA") +MSstatsQualityMetricsPlot(result, metric = "EGDeltaRT", + which.Protein = "ProteinA", isPlotly = TRUE) +} +} From 4173c34da4c92ebdedc1c64159f8dd7f19c3d5ab Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Fri, 24 Apr 2026 09:31:11 -0400 Subject: [PATCH 4/4] address feedback --- DESCRIPTION | 2 +- R/plot_quality_metrics.R | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b3f0df1b..b8df4641 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,7 @@ Imports: survival, utils, Rcpp, - ggplot2, + ggplot2 (>= 3.4.0), ggrepel, gplots, plotly, diff --git a/R/plot_quality_metrics.R b/R/plot_quality_metrics.R index ceb62745..f64368f6 100644 --- a/R/plot_quality_metrics.R +++ b/R/plot_quality_metrics.R @@ -54,17 +54,26 @@ MSstatsQualityMetricsPlot <- function(input, metric = "AnomalyScores", which.Protein, address = FALSE, isPlotly = FALSE) { + if (missing(which.Protein)) { + stop("'which.Protein' is required. Please specify a protein name.") + } + input_df <- as.data.frame(input) + required_cols <- c("ProteinName", "PeptideSequence", "PrecursorCharge", "Run") + missing_cols <- setdiff(required_cols, colnames(input_df)) + if (length(missing_cols) > 0) { + stop(paste0( + "Required column(s) not found in input: ", + paste(missing_cols, collapse = ", ") + )) + } if (!metric %in% colnames(input_df)) { stop(paste0( "Column '", metric, "' not found in input. ", "Available columns: ", paste(colnames(input_df), collapse = ", ") )) } - if (!"Run" %in% colnames(input_df)) { - stop("'Run' column not found in input.") - } if (!which.Protein %in% input_df$ProteinName) { stop(paste0("Protein '", which.Protein, "' not found in input.")) }