From 5a26bdcd0aead215ec60509d8d274e8ffcc10576 Mon Sep 17 00:00:00 2001 From: Swaraj Patil Date: Fri, 20 Mar 2026 13:12:56 -0400 Subject: [PATCH 01/11] Transfer the TPR simulation function from MSstatsShiny and export it for external use --- DESCRIPTION | 1 + NAMESPACE | 5 ++ R/TPR_Power_Curve.R | 142 ++++++++++++++++++++++++++++++ man/ConvertGroupToNumericDose.Rd | 35 -------- man/DoseResponseFit.Rd | 82 ----------------- man/FutureExperimentSimulation.Rd | 113 ------------------------ man/PredictIC50Parallel.Rd | 73 --------------- man/VisualizeResponseProtein.Rd | 93 ------------------- man/plot_tpr_power_curve.Rd | 18 ++++ man/run_tpr_simulation.Rd | 20 +++++ 10 files changed, 186 insertions(+), 396 deletions(-) create mode 100644 R/TPR_Power_Curve.R delete mode 100644 man/ConvertGroupToNumericDose.Rd delete mode 100644 man/DoseResponseFit.Rd delete mode 100644 man/FutureExperimentSimulation.Rd delete mode 100644 man/PredictIC50Parallel.Rd delete mode 100644 man/VisualizeResponseProtein.Rd create mode 100644 man/plot_tpr_power_curve.Rd create mode 100644 man/run_tpr_simulation.Rd diff --git a/DESCRIPTION b/DESCRIPTION index d86ec6a..dd07d37 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,6 +39,7 @@ Suggests: boot, purrr, gridExtra, + plotly, knitr, rmarkdown, BiocStyle, diff --git a/NAMESPACE b/NAMESPACE index d7a1451..7ea17c0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,8 +4,10 @@ export(MSstatsPrepareDoseResponseFit) export(convertGroupToNumericDose) export(doseResponseFit) export(futureExperimentSimulation) +export(plot_tpr_power_curve) export(predictIC50) export(predictIC50Parallel) +export(run_tpr_simulation) export(visualizeResponseProtein) import(dplyr) importFrom(BiocParallel,bplapply) @@ -15,6 +17,7 @@ importFrom(dplyr,arrange) importFrom(dplyr,distinct) importFrom(dplyr,filter) importFrom(dplyr,group_by) +importFrom(dplyr,if_else) importFrom(dplyr,mutate) importFrom(dplyr,select) importFrom(dplyr,summarise) @@ -32,9 +35,11 @@ importFrom(ggplot2,ggplot) importFrom(ggplot2,labs) importFrom(ggplot2,scale_color_manual) importFrom(ggplot2,scale_fill_manual) +importFrom(ggplot2,scale_linetype_manual) importFrom(ggplot2,scale_x_continuous) importFrom(ggplot2,scale_y_continuous) importFrom(ggplot2,theme) +importFrom(ggplot2,theme_bw) importFrom(ggplot2,theme_classic) importFrom(ggplot2,theme_minimal) importFrom(ggplot2,ylim) diff --git a/R/TPR_Power_Curve.R b/R/TPR_Power_Curve.R new file mode 100644 index 0000000..d68621e --- /dev/null +++ b/R/TPR_Power_Curve.R @@ -0,0 +1,142 @@ +# Hardcoded concentration ladders: control (0) and highest dose always present, +# intermediate doses filled from a biologically motivated set. +CONC_MAP <- list( + "2" = c(0, 3000), + "3" = c(0, 1000, 3000), + "4" = c(0, 1, 1000, 3000), + "5" = c(0, 1, 100, 1000, 3000), + "6" = c(0, 1, 100, 300, 1000, 3000), + "7" = c(0, 1, 10, 100, 300, 1000, 3000), + "8" = c(0, 1, 10, 30, 100, 300, 1000, 3000), + "9" = c(0, 1, 3, 10, 30, 100, 300, 1000, 3000) +) + +#' Run TPR simulation across a grid of concentration counts and replicate counts +#' +#' Sweeps over combinations of dose counts (2-9) and replicate counts, +#' calling \code{futureExperimentSimulation()} for each combination. +#' +#' @param rep_range Integer vector of length 2, c(min, max) for replicate sweep. +#' @param n_proteins Integer. Number of proteins to simulate. Default: 1000. +#' +#' @return A data.frame with columns: Interaction, TPR, N_rep, NumConcs. +#' +#' @importFrom dplyr filter mutate select if_else +#' @export +run_tpr_simulation <- function(rep_range, n_proteins = 1000) { + k_grid <- as.integer(names(CONC_MAP)) + rep_grid <- seq(rep_range[1], rep_range[2]) + + run_one <- function(n_rep, k_conc, seed = 123) { + set.seed(seed + n_rep * 100 + k_conc) + concs_k <- CONC_MAP[[as.character(k_conc)]] + + sim_args <- list( + N_proteins = n_proteins, + N_rep = n_rep, + Concentrations = concs_k, + IC50_Prediction = FALSE + ) + + temp_res <- futureExperimentSimulation( + N_proteins = sim_args$N_proteins, + N_rep = sim_args$N_rep, + Concentrations = sim_args$Concentrations, + IC50_Prediction = sim_args$IC50_Prediction + ) + temp_res$Hit_Rates_Data |> + dplyr::filter(Category %in% c("TPR (Strong)", "TPR (Weak)")) |> + dplyr::mutate( + N_rep = n_rep, + NumConcs = length(concs_k), + Interaction = dplyr::if_else(Category == "TPR (Strong)", "Strong", "Weak") + ) |> + dplyr::select(Interaction, TPR = Percent, N_rep, NumConcs) + } + + grid_df <- expand.grid(N_rep = rep_grid, k_conc = k_grid) + results <- do.call(rbind, lapply(seq_len(nrow(grid_df)), function(i) { + tryCatch( + run_one(n_rep = grid_df$N_rep[i], k_conc = grid_df$k_conc[i]), + error = function(e) { + warning(paste("Simulation failed for N_rep=", grid_df$N_rep[i], + ", k_conc=", grid_df$k_conc[i], ":", conditionMessage(e))) + NULL + } + ) + })) + + if (is.null(results) || nrow(results) == 0) { + stop("All simulation runs failed. Please check your input parameters.") + } + return(results) +} + +#' Plot TPR power curves for dose response proteomics experiments +#' +#' Creates a two-panel interactive plot showing True Positive Rate +#' for Strong and Weak interactions across concentration and replicate counts. +#' +#' @param simulation_results A data.frame from \code{run_tpr_simulation()}. +#' +#' @return A plotly object with two panels (Strong and Weak interaction). +#' +#' @importFrom ggplot2 ggplot aes geom_line geom_point scale_x_continuous +#' scale_y_continuous scale_linetype_manual labs theme_bw theme element_text +#' @export +plot_tpr_power_curve <- function(simulation_results) { + k_grid <- sort(unique(simulation_results$NumConcs)) + rep_levels <- sort(unique(simulation_results$N_rep)) + + ltypes <- c("dotted", "dotdash", "dashed", "longdash", "solid") + ltype_values <- setNames(ltypes[seq_along(rep_levels)], as.character(rep_levels)) + + make_panel <- function(data, color, show_legend = FALSE) { + p <- ggplot2::ggplot(data, + ggplot2::aes(x = NumConcs, y = TPR, linetype = factor(N_rep))) + + ggplot2::geom_line(linewidth = 1.2, color = color) + + ggplot2::geom_point(size = 2, color = color) + + ggplot2::scale_x_continuous(breaks = k_grid) + + ggplot2::scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, 20)) + + ggplot2::scale_linetype_manual(values = ltype_values) + + ggplot2::labs( + x = "Number of concentrations", + y = "True Positive Rate (%)", + linetype = "Replicates" + ) + + ggplot2::theme_bw(base_size = 14) + + ggplot2::theme( + plot.title = ggplot2::element_text(face = "bold", hjust = 0.5) + ) + + if (!show_legend) { + p <- p + ggplot2::theme(legend.position = "none") + } + p + } + + results_strong <- simulation_results[simulation_results$Interaction == "Strong", ] + results_weak <- simulation_results[simulation_results$Interaction == "Weak", ] + + if (!requireNamespace("plotly", quietly = TRUE)) { + stop("Package 'plotly' is required for interactive plots. Please install it.") + } + + p_strong <- plotly::ggplotly(make_panel(results_strong, "#1b9e77", FALSE)) + p_weak <- plotly::ggplotly(make_panel(results_weak, "#d95f02", TRUE)) + + plotly::subplot(p_strong, p_weak, + nrows = 1, shareY = TRUE, titleX = TRUE, titleY = TRUE, + margin = 0.12 + ) |> plotly::layout( + margin = list(t = 60), + annotations = list( + list(text = "Strong interaction detection power", x = 0.22, y = 1.08, + xref = "paper", yref = "paper", showarrow = FALSE, + font = list(size = 12), xanchor = "center"), + list(text = "Weak interaction detection power", x = 0.78, y = 1.08, + xref = "paper", yref = "paper", showarrow = FALSE, + font = list(size = 12), xanchor = "center") + ) + ) +} \ No newline at end of file diff --git a/man/ConvertGroupToNumericDose.Rd b/man/ConvertGroupToNumericDose.Rd deleted file mode 100644 index 89e407d..0000000 --- a/man/ConvertGroupToNumericDose.Rd +++ /dev/null @@ -1,35 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/PrepareData.R -\name{convertGroupToNumericDose} -\alias{convertGroupToNumericDose} -\title{Convert MSstats GROUP labels to numeric dose in nM and extract drug name} -\usage{ -convertGroupToNumericDose(group_vector) -} -\arguments{ -\item{group_vector}{A character or factor vector with GROUP labels (e.g., "Dasatinib_003uM")} -} -\value{ -A data frame with two columns: dose_nM (numeric), and drug (character). -} -\description{ -Convert MSstats GROUP labels to numeric dose in nM and extract drug name -} -\examples{ -# Example 1: Basic conversion with mixed units -groups <- c("DMSO", "Dasatinib_001uM", "Dasatinib_010uM", - "Dasatinib_100nM", "Dasatinib_1000nM") -dose_info <- convertGroupToNumericDose(groups) -print(dose_info) - -# Example 2: Handle multiple drugs -multi_drug_groups <- c("DMSO", - "Dasatinib_001uM", "Dasatinib_010uM", - "Imatinib_001uM", "Imatinib_010uM") -multi_dose_info <- convertGroupToNumericDose(multi_drug_groups) -print(multi_dose_info) - -# Show unique drugs found -print(unique(multi_dose_info$drug)) - -} diff --git a/man/DoseResponseFit.Rd b/man/DoseResponseFit.Rd deleted file mode 100644 index 07dd21f..0000000 --- a/man/DoseResponseFit.Rd +++ /dev/null @@ -1,82 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Fit_Isotonic_Regression.R -\name{doseResponseFit} -\alias{doseResponseFit} -\title{Drug-protein interaction detection tested by F-test (fitted curve vs average response)} -\usage{ -doseResponseFit( - data, - weights = NULL, - increasing = FALSE, - transform_dose = TRUE, - ratio_response = FALSE -) -} -\arguments{ -\item{data}{Protein-level data, formatted with MSstatsPreparedoseResponseFit().} - -\item{weights}{Optional numeric vector of weights. Defaults to equal weights.} - -\item{increasing}{Logical. If TRUE, fits a non-decreasing model. If FALSE, fits non-increasing.} - -\item{transform_dose}{Logical. If TRUE, applies log10(dose + 1) transformation. Default is TRUE.} - -\item{ratio_response}{Logical. If TRUE, converts log2 abundance to ratios relative to DMSO. Default is FALSE.} -} -\value{ -A data frame with protein-wise F-test results and BH-adjusted p-values. -} -\description{ -Fits an isotonic regression model to protein abundance data. -Performs an F-test to assess the significance of the dose-response curve and applies FDR correction. -} -\examples{ -# Load example data -data_path <- system.file("extdata", "DIA_MSstats_Normalized.RDS", - package = "MSstatsResponse") -dia_data <- readRDS(data_path) - -# Convert GROUP to dose -dose_info <- convertGroupToNumericDose(dia_data$ProteinLevelData$GROUP) -dia_data$ProteinLevelData$dose <- dose_info$dose_nM * 1e-9 -dia_data$ProteinLevelData$drug <- dose_info$drug - -# Prepare data for analysis -prepared_data <- MSstatsPrepareDoseResponseFit( - dia_data$ProteinLevelData, - dose_column = "dose", - drug_column = "drug", - protein_column = "Protein", - log_abundance_column = "LogIntensities" -) - -# Subset for quick example -example_data <- prepared_data[prepared_data$protein \%in\% - unique(prepared_data$protein)[1:5], ] - -# Example 1: Basic interaction detection on log2 scale -interaction_results <- doseResponseFit( - data = example_data, - increasing = FALSE, - transform_dose = TRUE, - ratio_response = FALSE -) - -# View results -print(interaction_results) - -# Check significant interactions -significant <- interaction_results[interaction_results$adjust_pval < 0.05, ] -print(paste("Found", nrow(significant), "significant interactions")) - -\dontrun{ -# Example 2: Full dataset analysis -full_results <- doseResponseFit( - data = prepared_data, - increasing = FALSE, - transform_dose = TRUE, - ratio_response = FALSE -) -} - -} diff --git a/man/FutureExperimentSimulation.Rd b/man/FutureExperimentSimulation.Rd deleted file mode 100644 index a07f49c..0000000 --- a/man/FutureExperimentSimulation.Rd +++ /dev/null @@ -1,113 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ExperimentalDesignSimulation.R -\name{futureExperimentSimulation} -\alias{futureExperimentSimulation} -\title{Test future experimental design using simulated data with user-defined or default templates} -\usage{ -futureExperimentSimulation( - N_proteins = 300, - N_rep = 3, - N_Control_Rep = NULL, - Concentrations = c(0, 1, 3, 10, 30, 100, 300, 1000, 3000), - IC50_Prediction = FALSE, - data = NULL, - strong_proteins = NULL, - weak_proteins = NULL, - no_interaction_proteins = NULL, - drug_name = NULL -) -} -\arguments{ -\item{N_proteins}{Number of proteins to simulate. Default = 300.} - -\item{N_rep}{Number of replicates for each drug concentration. Default = 3.} - -\item{N_Control_Rep}{Number of control replicates. If NULL, uses N_rep.} - -\item{Concentrations}{Numeric vector of drug concentrations (in nM scale). -Default = c(0, 1, 3, 10, 30, 100, 300, 1000, 3000).} - -\item{IC50_Prediction}{Logical. If TRUE, perform IC50 prediction. Default = FALSE.} - -\item{data}{Optional. User's prepared dose-response data (e.g., from MSstatsPrepareDoseResponseFit). -If provided, will extract templates from this data instead of using defaults.} - -\item{strong_proteins}{Character vector of protein IDs to use as strong interaction templates. -Only used if data is provided.} - -\item{weak_proteins}{Character vector of protein IDs to use as weak interaction templates. -Only used if data is provided.} - -\item{no_interaction_proteins}{Character vector of protein IDs to use as no interaction templates. -Only used if data is provided.} - -\item{drug_name}{Character. Name of drug to extract templates for. Default = first non-DMSO drug in data.} -} -\value{ -A list containing simulated data, MSstats formatted data, dose-response -fit results, hit rate plots, and optionally IC50 predictions. -} -\description{ -Test future experimental design using simulated data with user-defined or default templates -} -\examples{ -# Example 1: Quick simulation with default templates (small scale for speed) -sim_results <- futureExperimentSimulation( - N_proteins = 50, # Small number for quick example - N_rep = 2, - N_Control_Rep = 3, - Concentrations = c(0, 10, 100, 1000), # Fewer doses for speed - IC50_Prediction = FALSE -) - -# View hit rates -print(sim_results$Hit_Rates_Data) - -# Check simulation results -print(paste("Simulated", nrow(sim_results$Simulated_Data), "data points")) - -\dontrun{ -# Example 2: Full simulation with standard parameters -full_sim <- futureExperimentSimulation( - N_proteins = 3000, - N_rep = 3, - N_Control_Rep = 6, - Concentrations = c(0, 1, 3, 10, 30, 100, 300, 1000, 3000), - IC50_Prediction = TRUE -) - -# Display power analysis plot -print(full_sim$Hit_Rates_Plot) - -# Example 3: Using custom templates from your own data -# Load and prepare your data -data_path <- system.file("extdata", "DIA_MSstats_Normalized.RDS", - package = "MSstatsResponse") -dia_data <- readRDS(data_path) - -dose_info <- convertGroupToNumericDose(dia_data$ProteinLevelData$GROUP) -dia_data$ProteinLevelData$dose <- dose_info$dose_nM * 1e-9 -dia_data$ProteinLevelData$drug <- dose_info$drug - -prepared_data <- MSstatsPrepareDoseResponseFit( - dia_data$ProteinLevelData, - dose_column = "dose", - drug_column = "drug", - protein_column = "Protein", - log_abundance_column = "LogIntensities" -) - -# Run simulation with custom templates -custom_sim <- futureExperimentSimulation( - N_proteins = 1000, - N_rep = 3, - data = prepared_data, - strong_proteins = c("PROTEIN_A"), - weak_proteins = c("PROTEIN_B"), - no_interaction_proteins = c("PROTEIN_C"), - drug_name = "Drug1", - Concentrations = c(0, 1, 10, 100, 1000, 3000) -) -} - -} diff --git a/man/PredictIC50Parallel.Rd b/man/PredictIC50Parallel.Rd deleted file mode 100644 index 4f25b76..0000000 --- a/man/PredictIC50Parallel.Rd +++ /dev/null @@ -1,73 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/IC50_prediction_datatable.R -\name{predictIC50Parallel} -\alias{predictIC50Parallel} -\title{Parallel version of predictIC50 function} -\usage{ -predictIC50Parallel( - data, - n_samples = 1000, - alpha = 0.1, - increasing = FALSE, - transform_dose = TRUE, - ratio_response = TRUE, - bootstrap = TRUE, - numberOfCores = 2 -) -} -\arguments{ -\item{data}{A data frame with columns: protein, drug, dose, response.} - -\item{n_samples}{Number of bootstrap samples. Default = 1000.} - -\item{alpha}{Confidence level. Default = 0.10.} - -\item{increasing}{Logical. If TRUE, fit non-decreasing trend. Default = FALSE.} - -\item{transform_dose}{Logical. If TRUE, applies log10(dose + 1) transformation. Default = TRUE.} - -\item{ratio_response}{Logical. If TRUE, use ratio response; else use log2 scale. Default = TRUE.} - -\item{bootstrap}{Logical. If TRUE, compute bootstrap CIs. Default = TRUE.} - -\item{numberOfCores}{Number of cores for parallel processing. Default = 2.} -} -\value{ -A data frame with columns: protein, drug, IC50, lower CI, upper CI. -} -\description{ -Runs predictIC50 on the entire dataset in parallel across proteins. -} -\examples{ -# Load example data -data_path <- system.file("extdata", "DIA_MSstats_Normalized.RDS", - package = "MSstatsResponse") -dia_data <- readRDS(data_path) - -# Convert GROUP to dose -dose_info <- convertGroupToNumericDose(dia_data$ProteinLevelData$GROUP) -dia_data$ProteinLevelData$dose <- dose_info$dose_nM * 1e-9 -dia_data$ProteinLevelData$drug <- dose_info$drug - -# Prepare data for analysis -prepared_data <- MSstatsPrepareDoseResponseFit( - dia_data$ProteinLevelData, - dose_column = "dose", - drug_column = "drug", - protein_column = "Protein", - log_abundance_column = "LogIntensities" -) - -# Subset for quick example -example_data <- prepared_data[prepared_data$protein \%in\% - unique(prepared_data$protein)[1:5], ] - -# Example 1: Quick parallel IC50 without bootstrap (2 cores) -ic50_quick_parallel <- predictIC50Parallel( - data = example_data, - bootstrap = FALSE, - numberOfCores = 2 -) -print(ic50_quick_parallel) - -} diff --git a/man/VisualizeResponseProtein.Rd b/man/VisualizeResponseProtein.Rd deleted file mode 100644 index 7d89203..0000000 --- a/man/VisualizeResponseProtein.Rd +++ /dev/null @@ -1,93 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Visualize_Isotonic_Fit.R -\name{visualizeResponseProtein} -\alias{visualizeResponseProtein} -\title{Plot isotonic regression fit with optional IC50 for a single protein and drug} -\usage{ -visualizeResponseProtein( - data, - protein_name, - drug_name, - ratio_response = TRUE, - transform_dose = TRUE, - show_ic50 = TRUE, - target_response = 0.5, - add_ci = FALSE, - n_samples = 1000, - alpha = 0.1, - increasing = FALSE, - y_lab = "Ratio Response" -) -} -\arguments{ -\item{data}{Protein-level dataset (e.g., output of MSstatsPrepareDoseResponseFit).} - -\item{protein_name}{Character. Protein name to plot.} - -\item{drug_name}{Character. Drug name to plot.} - -\item{ratio_response}{Logical. If TRUE, compute IC50 on ratio scale; if FALSE, use log2 intensities.} - -\item{transform_dose}{Logical. If TRUE, applies log10(dose + 1). Default is TRUE.} - -\item{show_ic50}{Logical. If TRUE, adds vertical line and annotation for IC50.} - -\item{add_ci}{Logical. Include IC50 95\% confidence interval bands if TRUE. Default is FALSE.} - -\item{n_samples}{Number of bootstrap samples if including confidence intervals. Default is 1000.} - -\item{alpha}{Alpha level for confidence intervals. Default is 0.05.} - -\item{increasing}{Logical. If TRUE, fits a non-decreasing model. If FALSE, fits non-increasing.} - -\item{y_lab}{Character. Label for the y-axis. Default is "Ratio Response".} -} -\value{ -A ggplot object. -} -\description{ -Plot isotonic regression fit with optional IC50 for a single protein and drug -} -\examples{ -# Load example data -data_path <- system.file("extdata", "DIA_MSstats_Normalized.RDS", - package = "MSstatsResponse") -dia_data <- readRDS(data_path) - -# Convert GROUP to dose -dose_info <- convertGroupToNumericDose(dia_data$ProteinLevelData$GROUP) -dia_data$ProteinLevelData$dose <- dose_info$dose_nM * 1e-9 -dia_data$ProteinLevelData$drug <- dose_info$drug - -# Prepare data for analysis -prepared_data <- MSstatsPrepareDoseResponseFit( - dia_data$ProteinLevelData, - dose_column = "dose", - drug_column = "drug", - protein_column = "Protein", - log_abundance_column = "LogIntensities" -) - -# Example 1: Basic dose-response visualization -plot1 <- visualizeResponseProtein( - data = prepared_data, - protein_name = "PROTEIN_A", - drug_name = "Drug1", - ratio_response = TRUE, - show_ic50 = FALSE, - add_ci = FALSE -) -print(plot1) - -# Example 2: Add IC50 annotation -plot2 <- visualizeResponseProtein( - data = prepared_data, - protein_name = "PROTEIN_A", - drug_name = "Drug1", - ratio_response = TRUE, - show_ic50 = TRUE, - add_ci = FALSE -) -print(plot2) - -} diff --git a/man/plot_tpr_power_curve.Rd b/man/plot_tpr_power_curve.Rd new file mode 100644 index 0000000..5df86f9 --- /dev/null +++ b/man/plot_tpr_power_curve.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/TPR_Power_Curve.R +\name{plot_tpr_power_curve} +\alias{plot_tpr_power_curve} +\title{Plot TPR power curves for dose response proteomics experiments} +\usage{ +plot_tpr_power_curve(simulation_results) +} +\arguments{ +\item{simulation_results}{A data.frame from \code{run_tpr_simulation()}.} +} +\value{ +A plotly object with two panels (Strong and Weak interaction). +} +\description{ +Creates a two-panel interactive plot showing True Positive Rate +for Strong and Weak interactions across concentration and replicate counts. +} diff --git a/man/run_tpr_simulation.Rd b/man/run_tpr_simulation.Rd new file mode 100644 index 0000000..e221c4c --- /dev/null +++ b/man/run_tpr_simulation.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/TPR_Power_Curve.R +\name{run_tpr_simulation} +\alias{run_tpr_simulation} +\title{Run TPR simulation across a grid of concentration counts and replicate counts} +\usage{ +run_tpr_simulation(rep_range, n_proteins = 1000) +} +\arguments{ +\item{rep_range}{Integer vector of length 2, c(min, max) for replicate sweep.} + +\item{n_proteins}{Integer. Number of proteins to simulate. Default: 1000.} +} +\value{ +A data.frame with columns: Interaction, TPR, N_rep, NumConcs. +} +\description{ +Sweeps over combinations of dose counts (2-9) and replicate counts, +calling \code{futureExperimentSimulation()} for each combination. +} From 8b7eaf9d67558523df145c5ed47f7f56785770ed Mon Sep 17 00:00:00 2001 From: Swaraj Patil Date: Fri, 20 Mar 2026 17:27:04 -0400 Subject: [PATCH 02/11] Resolve TPR function nitpicks --- R/TPR_Power_Curve.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/R/TPR_Power_Curve.R b/R/TPR_Power_Curve.R index d68621e..ddd801a 100644 --- a/R/TPR_Power_Curve.R +++ b/R/TPR_Power_Curve.R @@ -24,6 +24,12 @@ CONC_MAP <- list( #' @importFrom dplyr filter mutate select if_else #' @export run_tpr_simulation <- function(rep_range, n_proteins = 1000) { + if (!is.numeric(rep_range) || length(rep_range) != 2 || rep_range[1] > rep_range[2]) { + stop("rep_range must be a numeric vector of length 2 with c(min, max) where min <= max.") + } + if (rep_range[2] > 5) { + stop("Maximum replicates is 5 (limited by available line styles for plotting).") + } k_grid <- as.integer(names(CONC_MAP)) rep_grid <- seq(rep_range[1], rep_range[2]) @@ -89,6 +95,9 @@ plot_tpr_power_curve <- function(simulation_results) { rep_levels <- sort(unique(simulation_results$N_rep)) ltypes <- c("dotted", "dotdash", "dashed", "longdash", "solid") + if (length(rep_levels) > length(ltypes)) { + stop("Too many replicate levels (max 5). Reduce the replicate range.") + } ltype_values <- setNames(ltypes[seq_along(rep_levels)], as.character(rep_levels)) make_panel <- function(data, color, show_legend = FALSE) { From 5d8718813f74f07b2338d68890c07bcc84ef24cc Mon Sep 17 00:00:00 2001 From: Swaraj Patil Date: Thu, 26 Mar 2026 10:52:39 -0400 Subject: [PATCH 03/11] Rollback the documentation files from dev branch --- man/ConvertGroupToNumericDose.Rd | 35 +++++++++ man/DoseResponseFit.Rd | 82 ++++++++++++++++++++++ man/FutureExperimentSimulation.Rd | 113 ++++++++++++++++++++++++++++++ man/PredictIC50Parallel.Rd | 73 +++++++++++++++++++ man/VisualizeResponseProtein.Rd | 93 ++++++++++++++++++++++++ 5 files changed, 396 insertions(+) create mode 100644 man/ConvertGroupToNumericDose.Rd create mode 100644 man/DoseResponseFit.Rd create mode 100644 man/FutureExperimentSimulation.Rd create mode 100644 man/PredictIC50Parallel.Rd create mode 100644 man/VisualizeResponseProtein.Rd diff --git a/man/ConvertGroupToNumericDose.Rd b/man/ConvertGroupToNumericDose.Rd new file mode 100644 index 0000000..89e407d --- /dev/null +++ b/man/ConvertGroupToNumericDose.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PrepareData.R +\name{convertGroupToNumericDose} +\alias{convertGroupToNumericDose} +\title{Convert MSstats GROUP labels to numeric dose in nM and extract drug name} +\usage{ +convertGroupToNumericDose(group_vector) +} +\arguments{ +\item{group_vector}{A character or factor vector with GROUP labels (e.g., "Dasatinib_003uM")} +} +\value{ +A data frame with two columns: dose_nM (numeric), and drug (character). +} +\description{ +Convert MSstats GROUP labels to numeric dose in nM and extract drug name +} +\examples{ +# Example 1: Basic conversion with mixed units +groups <- c("DMSO", "Dasatinib_001uM", "Dasatinib_010uM", + "Dasatinib_100nM", "Dasatinib_1000nM") +dose_info <- convertGroupToNumericDose(groups) +print(dose_info) + +# Example 2: Handle multiple drugs +multi_drug_groups <- c("DMSO", + "Dasatinib_001uM", "Dasatinib_010uM", + "Imatinib_001uM", "Imatinib_010uM") +multi_dose_info <- convertGroupToNumericDose(multi_drug_groups) +print(multi_dose_info) + +# Show unique drugs found +print(unique(multi_dose_info$drug)) + +} diff --git a/man/DoseResponseFit.Rd b/man/DoseResponseFit.Rd new file mode 100644 index 0000000..07dd21f --- /dev/null +++ b/man/DoseResponseFit.Rd @@ -0,0 +1,82 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Fit_Isotonic_Regression.R +\name{doseResponseFit} +\alias{doseResponseFit} +\title{Drug-protein interaction detection tested by F-test (fitted curve vs average response)} +\usage{ +doseResponseFit( + data, + weights = NULL, + increasing = FALSE, + transform_dose = TRUE, + ratio_response = FALSE +) +} +\arguments{ +\item{data}{Protein-level data, formatted with MSstatsPreparedoseResponseFit().} + +\item{weights}{Optional numeric vector of weights. Defaults to equal weights.} + +\item{increasing}{Logical. If TRUE, fits a non-decreasing model. If FALSE, fits non-increasing.} + +\item{transform_dose}{Logical. If TRUE, applies log10(dose + 1) transformation. Default is TRUE.} + +\item{ratio_response}{Logical. If TRUE, converts log2 abundance to ratios relative to DMSO. Default is FALSE.} +} +\value{ +A data frame with protein-wise F-test results and BH-adjusted p-values. +} +\description{ +Fits an isotonic regression model to protein abundance data. +Performs an F-test to assess the significance of the dose-response curve and applies FDR correction. +} +\examples{ +# Load example data +data_path <- system.file("extdata", "DIA_MSstats_Normalized.RDS", + package = "MSstatsResponse") +dia_data <- readRDS(data_path) + +# Convert GROUP to dose +dose_info <- convertGroupToNumericDose(dia_data$ProteinLevelData$GROUP) +dia_data$ProteinLevelData$dose <- dose_info$dose_nM * 1e-9 +dia_data$ProteinLevelData$drug <- dose_info$drug + +# Prepare data for analysis +prepared_data <- MSstatsPrepareDoseResponseFit( + dia_data$ProteinLevelData, + dose_column = "dose", + drug_column = "drug", + protein_column = "Protein", + log_abundance_column = "LogIntensities" +) + +# Subset for quick example +example_data <- prepared_data[prepared_data$protein \%in\% + unique(prepared_data$protein)[1:5], ] + +# Example 1: Basic interaction detection on log2 scale +interaction_results <- doseResponseFit( + data = example_data, + increasing = FALSE, + transform_dose = TRUE, + ratio_response = FALSE +) + +# View results +print(interaction_results) + +# Check significant interactions +significant <- interaction_results[interaction_results$adjust_pval < 0.05, ] +print(paste("Found", nrow(significant), "significant interactions")) + +\dontrun{ +# Example 2: Full dataset analysis +full_results <- doseResponseFit( + data = prepared_data, + increasing = FALSE, + transform_dose = TRUE, + ratio_response = FALSE +) +} + +} diff --git a/man/FutureExperimentSimulation.Rd b/man/FutureExperimentSimulation.Rd new file mode 100644 index 0000000..a07f49c --- /dev/null +++ b/man/FutureExperimentSimulation.Rd @@ -0,0 +1,113 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ExperimentalDesignSimulation.R +\name{futureExperimentSimulation} +\alias{futureExperimentSimulation} +\title{Test future experimental design using simulated data with user-defined or default templates} +\usage{ +futureExperimentSimulation( + N_proteins = 300, + N_rep = 3, + N_Control_Rep = NULL, + Concentrations = c(0, 1, 3, 10, 30, 100, 300, 1000, 3000), + IC50_Prediction = FALSE, + data = NULL, + strong_proteins = NULL, + weak_proteins = NULL, + no_interaction_proteins = NULL, + drug_name = NULL +) +} +\arguments{ +\item{N_proteins}{Number of proteins to simulate. Default = 300.} + +\item{N_rep}{Number of replicates for each drug concentration. Default = 3.} + +\item{N_Control_Rep}{Number of control replicates. If NULL, uses N_rep.} + +\item{Concentrations}{Numeric vector of drug concentrations (in nM scale). +Default = c(0, 1, 3, 10, 30, 100, 300, 1000, 3000).} + +\item{IC50_Prediction}{Logical. If TRUE, perform IC50 prediction. Default = FALSE.} + +\item{data}{Optional. User's prepared dose-response data (e.g., from MSstatsPrepareDoseResponseFit). +If provided, will extract templates from this data instead of using defaults.} + +\item{strong_proteins}{Character vector of protein IDs to use as strong interaction templates. +Only used if data is provided.} + +\item{weak_proteins}{Character vector of protein IDs to use as weak interaction templates. +Only used if data is provided.} + +\item{no_interaction_proteins}{Character vector of protein IDs to use as no interaction templates. +Only used if data is provided.} + +\item{drug_name}{Character. Name of drug to extract templates for. Default = first non-DMSO drug in data.} +} +\value{ +A list containing simulated data, MSstats formatted data, dose-response +fit results, hit rate plots, and optionally IC50 predictions. +} +\description{ +Test future experimental design using simulated data with user-defined or default templates +} +\examples{ +# Example 1: Quick simulation with default templates (small scale for speed) +sim_results <- futureExperimentSimulation( + N_proteins = 50, # Small number for quick example + N_rep = 2, + N_Control_Rep = 3, + Concentrations = c(0, 10, 100, 1000), # Fewer doses for speed + IC50_Prediction = FALSE +) + +# View hit rates +print(sim_results$Hit_Rates_Data) + +# Check simulation results +print(paste("Simulated", nrow(sim_results$Simulated_Data), "data points")) + +\dontrun{ +# Example 2: Full simulation with standard parameters +full_sim <- futureExperimentSimulation( + N_proteins = 3000, + N_rep = 3, + N_Control_Rep = 6, + Concentrations = c(0, 1, 3, 10, 30, 100, 300, 1000, 3000), + IC50_Prediction = TRUE +) + +# Display power analysis plot +print(full_sim$Hit_Rates_Plot) + +# Example 3: Using custom templates from your own data +# Load and prepare your data +data_path <- system.file("extdata", "DIA_MSstats_Normalized.RDS", + package = "MSstatsResponse") +dia_data <- readRDS(data_path) + +dose_info <- convertGroupToNumericDose(dia_data$ProteinLevelData$GROUP) +dia_data$ProteinLevelData$dose <- dose_info$dose_nM * 1e-9 +dia_data$ProteinLevelData$drug <- dose_info$drug + +prepared_data <- MSstatsPrepareDoseResponseFit( + dia_data$ProteinLevelData, + dose_column = "dose", + drug_column = "drug", + protein_column = "Protein", + log_abundance_column = "LogIntensities" +) + +# Run simulation with custom templates +custom_sim <- futureExperimentSimulation( + N_proteins = 1000, + N_rep = 3, + data = prepared_data, + strong_proteins = c("PROTEIN_A"), + weak_proteins = c("PROTEIN_B"), + no_interaction_proteins = c("PROTEIN_C"), + drug_name = "Drug1", + Concentrations = c(0, 1, 10, 100, 1000, 3000) +) +} + +} diff --git a/man/PredictIC50Parallel.Rd b/man/PredictIC50Parallel.Rd new file mode 100644 index 0000000..4f25b76 --- /dev/null +++ b/man/PredictIC50Parallel.Rd @@ -0,0 +1,73 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/IC50_prediction_datatable.R +\name{predictIC50Parallel} +\alias{predictIC50Parallel} +\title{Parallel version of predictIC50 function} +\usage{ +predictIC50Parallel( + data, + n_samples = 1000, + alpha = 0.1, + increasing = FALSE, + transform_dose = TRUE, + ratio_response = TRUE, + bootstrap = TRUE, + numberOfCores = 2 +) +} +\arguments{ +\item{data}{A data frame with columns: protein, drug, dose, response.} + +\item{n_samples}{Number of bootstrap samples. Default = 1000.} + +\item{alpha}{Confidence level. Default = 0.10.} + +\item{increasing}{Logical. If TRUE, fit non-decreasing trend. Default = FALSE.} + +\item{transform_dose}{Logical. If TRUE, applies log10(dose + 1) transformation. Default = TRUE.} + +\item{ratio_response}{Logical. If TRUE, use ratio response; else use log2 scale. Default = TRUE.} + +\item{bootstrap}{Logical. If TRUE, compute bootstrap CIs. Default = TRUE.} + +\item{numberOfCores}{Number of cores for parallel processing. Default = 2.} +} +\value{ +A data frame with columns: protein, drug, IC50, lower CI, upper CI. +} +\description{ +Runs predictIC50 on the entire dataset in parallel across proteins. +} +\examples{ +# Load example data +data_path <- system.file("extdata", "DIA_MSstats_Normalized.RDS", + package = "MSstatsResponse") +dia_data <- readRDS(data_path) + +# Convert GROUP to dose +dose_info <- convertGroupToNumericDose(dia_data$ProteinLevelData$GROUP) +dia_data$ProteinLevelData$dose <- dose_info$dose_nM * 1e-9 +dia_data$ProteinLevelData$drug <- dose_info$drug + +# Prepare data for analysis +prepared_data <- MSstatsPrepareDoseResponseFit( + dia_data$ProteinLevelData, + dose_column = "dose", + drug_column = "drug", + protein_column = "Protein", + log_abundance_column = "LogIntensities" +) + +# Subset for quick example +example_data <- prepared_data[prepared_data$protein \%in\% + unique(prepared_data$protein)[1:5], ] + +# Example 1: Quick parallel IC50 without bootstrap (2 cores) +ic50_quick_parallel <- predictIC50Parallel( + data = example_data, + bootstrap = FALSE, + numberOfCores = 2 +) +print(ic50_quick_parallel) + +} diff --git a/man/VisualizeResponseProtein.Rd b/man/VisualizeResponseProtein.Rd new file mode 100644 index 0000000..7d89203 --- /dev/null +++ b/man/VisualizeResponseProtein.Rd @@ -0,0 +1,93 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Visualize_Isotonic_Fit.R +\name{visualizeResponseProtein} +\alias{visualizeResponseProtein} +\title{Plot isotonic regression fit with optional IC50 for a single protein and drug} +\usage{ +visualizeResponseProtein( + data, + protein_name, + drug_name, + ratio_response = TRUE, + transform_dose = TRUE, + show_ic50 = TRUE, + target_response = 0.5, + add_ci = FALSE, + n_samples = 1000, + alpha = 0.1, + increasing = FALSE, + y_lab = "Ratio Response" +) +} +\arguments{ +\item{data}{Protein-level dataset (e.g., output of MSstatsPrepareDoseResponseFit).} + +\item{protein_name}{Character. Protein name to plot.} + +\item{drug_name}{Character. Drug name to plot.} + +\item{ratio_response}{Logical. If TRUE, compute IC50 on ratio scale; if FALSE, use log2 intensities.} + +\item{transform_dose}{Logical. If TRUE, applies log10(dose + 1). Default is TRUE.} + +\item{show_ic50}{Logical. If TRUE, adds vertical line and annotation for IC50.} + +\item{add_ci}{Logical. Include IC50 95\% confidence interval bands if TRUE. Default is FALSE.} + +\item{n_samples}{Number of bootstrap samples if including confidence intervals. Default is 1000.} + +\item{alpha}{Alpha level for confidence intervals. Default is 0.05.} + +\item{increasing}{Logical. If TRUE, fits a non-decreasing model. If FALSE, fits non-increasing.} + +\item{y_lab}{Character. Label for the y-axis. Default is "Ratio Response".} +} +\value{ +A ggplot object. +} +\description{ +Plot isotonic regression fit with optional IC50 for a single protein and drug +} +\examples{ +# Load example data +data_path <- system.file("extdata", "DIA_MSstats_Normalized.RDS", + package = "MSstatsResponse") +dia_data <- readRDS(data_path) + +# Convert GROUP to dose +dose_info <- convertGroupToNumericDose(dia_data$ProteinLevelData$GROUP) +dia_data$ProteinLevelData$dose <- dose_info$dose_nM * 1e-9 +dia_data$ProteinLevelData$drug <- dose_info$drug + +# Prepare data for analysis +prepared_data <- MSstatsPrepareDoseResponseFit( + dia_data$ProteinLevelData, + dose_column = "dose", + drug_column = "drug", + protein_column = "Protein", + log_abundance_column = "LogIntensities" +) + +# Example 1: Basic dose-response visualization +plot1 <- visualizeResponseProtein( + data = prepared_data, + protein_name = "PROTEIN_A", + drug_name = "Drug1", + ratio_response = TRUE, + show_ic50 = FALSE, + add_ci = FALSE +) +print(plot1) + +# Example 2: Add IC50 annotation +plot2 <- visualizeResponseProtein( + data = prepared_data, + protein_name = "PROTEIN_A", + drug_name = "Drug1", + ratio_response = TRUE, + show_ic50 = TRUE, + add_ci = FALSE +) +print(plot2) + +} From b1ed032643ae575270e115658054af60c7aa02a9 Mon Sep 17 00:00:00 2001 From: Swaraj Patil Date: Mon, 30 Mar 2026 17:23:36 -0400 Subject: [PATCH 04/11] Sample size calculation task: - Replace with actual dataset and protein ID - Replace hard-coded concentrations with a mechanism to pick based on farthest distance from the log(median) - Replace the linestyles with colot palettes for plot representation and removed maximum replicate rage - Provide examples for TPR simulation usage - Re-write the comments in a biologist friendly tone - Extract the single TPR simulation logic into a helper function - Improve the plotly import method - Write unit test cases for the plotly function - Move plotly from suggested into required dependency and remove the installation check from code --- DESCRIPTION | 4 +- NAMESPACE | 4 +- R/ExperimentalDesignSimulation.R | 62 ++- R/TPR_Power_Curve.R | 361 +++++++++++++----- man/DoseResponseFit.Rd | 2 +- man/plot_tpr_power_curve.Rd | 25 +- man/run_tpr_simulation.Rd | 81 +++- .../test-ExperimentalDesignSimulation.R | 16 +- tests/testthat/test-TPR_Power_Curve.R | 106 +++++ 9 files changed, 521 insertions(+), 140 deletions(-) create mode 100644 tests/testthat/test-TPR_Power_Curve.R diff --git a/DESCRIPTION b/DESCRIPTION index dd07d37..e3e40c1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,7 +31,8 @@ Imports: dplyr, stats, parallel, - data.table + data.table, + plotly Suggests: MSstats, MSstatsTMT, @@ -39,7 +40,6 @@ Suggests: boot, purrr, gridExtra, - plotly, knitr, rmarkdown, BiocStyle, diff --git a/NAMESPACE b/NAMESPACE index 7ea17c0..5dbaa32 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -35,7 +35,6 @@ importFrom(ggplot2,ggplot) importFrom(ggplot2,labs) importFrom(ggplot2,scale_color_manual) importFrom(ggplot2,scale_fill_manual) -importFrom(ggplot2,scale_linetype_manual) importFrom(ggplot2,scale_x_continuous) importFrom(ggplot2,scale_y_continuous) importFrom(ggplot2,theme) @@ -43,10 +42,13 @@ importFrom(ggplot2,theme_bw) importFrom(ggplot2,theme_classic) importFrom(ggplot2,theme_minimal) importFrom(ggplot2,ylim) +importFrom(grDevices,colorRampPalette) importFrom(parallel,clusterExport) importFrom(parallel,makeCluster) importFrom(parallel,parLapply) importFrom(parallel,stopCluster) +importFrom(plotly,ggplotly) +importFrom(plotly,layout) importFrom(stats,approx) importFrom(stats,p.adjust) importFrom(stats,pf) diff --git a/R/ExperimentalDesignSimulation.R b/R/ExperimentalDesignSimulation.R index e05f794..cd0e5b5 100644 --- a/R/ExperimentalDesignSimulation.R +++ b/R/ExperimentalDesignSimulation.R @@ -222,23 +222,38 @@ futureExperimentSimulation = function(N_proteins = 300, )) } - # Get mean profile across proteins + # Get mean profile across proteins using string-based grouping to avoid floating point precision issues profile = drug_data %>% filter(protein %in% protein_ids) %>% - mutate(dose_nM = round(dose * 1e9)) %>% # Convert M to nM and round to handle precision - group_by(dose_nM) %>% + mutate(dose_str = as.character(dose)) %>% + group_by(dose_str) %>% summarise( LogIntensities = mean(response, na.rm = TRUE), + dose_numeric = dose[1], .groups = 'drop' - ) %>% - rename(dose = dose_nM) + ) - # Create complete profile with all concentrations + # Match profile doses to target concentrations using nearest-neighbor on log scale. + # This handles unit mismatches without assuming a fixed conversion factor. complete_profile = data.frame(dose = concentrations) - - # Merge with existing data - complete_profile = complete_profile %>% - left_join(profile, by = "dose") + complete_profile$LogIntensities = NA_real_ + + for (i in seq_len(nrow(complete_profile))) { + target = complete_profile$dose[i] + if (target == 0) { + # Control: match any zero-dose entry + zero_match = profile[profile$dose_numeric == 0, ] + if (nrow(zero_match) > 0) { + complete_profile$LogIntensities[i] = zero_match$LogIntensities[1] + } + } else if (any(profile$dose_numeric != 0)) { + # Non-zero: find closest on log scale + non_zero = profile[profile$dose_numeric != 0, ] + log_ratios = abs(log10(non_zero$dose_numeric) - log10(target)) + best = which.min(log_ratios) + complete_profile$LogIntensities[i] = non_zero$LogIntensities[best] + } + } # Fill missing values with interpolation or nearest neighbor for (i in which(is.na(complete_profile$LogIntensities))) { @@ -306,11 +321,32 @@ futureExperimentSimulation = function(N_proteins = 300, return(complete_profile) } - # Extract templates for each category + # Load default templates as fallback for unspecified categories + default_template <- NULL + if (is.null(weak_proteins) || is.null(no_interaction_proteins)) { + template1_path <- system.file("extdata", "template1.RDS", package = "MSstatsResponse") + if (file.exists(template1_path)) { + default_template <- readRDS(template1_path) + } + } + + # Extract templates: use user data for specified proteins, defaults for others template = list( strong_interaction = .getMeanProfile(strong_proteins, drug_data, concentrations), - weak_interaction = .getMeanProfile(weak_proteins, drug_data, concentrations), - no_interaction = .getMeanProfile(no_interaction_proteins, drug_data, concentrations) + weak_interaction = if (!is.null(weak_proteins)) { + .getMeanProfile(weak_proteins, drug_data, concentrations) + } else if (!is.null(default_template)) { + default_template$weak_interaction[default_template$weak_interaction$dose %in% concentrations, ] + } else { + .getMeanProfile(NULL, drug_data, concentrations) + }, + no_interaction = if (!is.null(no_interaction_proteins)) { + .getMeanProfile(no_interaction_proteins, drug_data, concentrations) + } else if (!is.null(default_template)) { + default_template$no_interaction[default_template$no_interaction$dose %in% concentrations, ] + } else { + .getMeanProfile(NULL, drug_data, concentrations) + } ) # Validate template format diff --git a/R/TPR_Power_Curve.R b/R/TPR_Power_Curve.R index ddd801a..7f871ec 100644 --- a/R/TPR_Power_Curve.R +++ b/R/TPR_Power_Curve.R @@ -1,69 +1,200 @@ -# Hardcoded concentration ladders: control (0) and highest dose always present, -# intermediate doses filled from a biologically motivated set. -CONC_MAP <- list( - "2" = c(0, 3000), - "3" = c(0, 1000, 3000), - "4" = c(0, 1, 1000, 3000), - "5" = c(0, 1, 100, 1000, 3000), - "6" = c(0, 1, 100, 300, 1000, 3000), - "7" = c(0, 1, 10, 100, 300, 1000, 3000), - "8" = c(0, 1, 10, 30, 100, 300, 1000, 3000), - "9" = c(0, 1, 3, 10, 30, 100, 300, 1000, 3000) -) - -#' Run TPR simulation across a grid of concentration counts and replicate counts -#' -#' Sweeps over combinations of dose counts (2-9) and replicate counts, -#' calling \code{futureExperimentSimulation()} for each combination. -#' -#' @param rep_range Integer vector of length 2, c(min, max) for replicate sweep. -#' @param n_proteins Integer. Number of proteins to simulate. Default: 1000. +# ============================================================================ +# TPR Power Curve - Simulation and Visualization +# ============================================================================ + +#' Build concentration subsets for dose-response power analysis +#' +#' Given a user's full set of experimental concentrations, generates subsets +#' of size 2 through N by greedily selecting doses that are farthest from +#' the log-median of the already-selected set. Control (0) and the highest +#' dose are always included. +#' +#' @param concentrations Numeric vector of all available concentrations. +#' @param dose_range Integer vector of length 2, c(min, max) specifying the +#' range of subset sizes to generate. +#' +#' @return A named list where each element is a numeric vector of concentrations. +#' Names are the subset sizes (e.g., "2", "3", ...). +#' @noRd +.build_concentration_ladders <- function(concentrations, dose_range) { + concentrations <- sort(unique(concentrations)) + if (length(concentrations) < 2) { + stop("At least 2 unique concentrations are required.") + } + + control <- min(concentrations) + highest <- max(concentrations) + candidates <- setdiff(concentrations, c(control, highest)) + + # Greedily build up from {control, highest} by picking the dose + # farthest from log(median) of the current set at each step + selected <- c(control, highest) + selection_order <- list() + selection_order[["2"]] <- sort(selected) + + remaining <- candidates + max_k <- min(dose_range[2], length(concentrations)) + + for (k in 3:max_k) { + if (length(remaining) == 0) break + + log_selected <- log10(selected + 1) + log_median <- median(log_selected) + log_remaining <- log10(remaining + 1) + + distances <- abs(log_remaining - log_median) + best_idx <- which.max(distances) + pick <- remaining[best_idx] + + selected <- c(selected, pick) + remaining <- remaining[-best_idx] + selection_order[[as.character(k)]] <- sort(selected) + } + + # Filter to requested dose_range + k_range <- seq(dose_range[1], min(dose_range[2], length(concentrations))) + result <- selection_order[as.character(k_range)] + result <- result[!sapply(result, is.null)] + + if (length(result) == 0) { + stop("No valid concentration subsets could be generated for the given dose_range.") + } + return(result) +} + +#' Run a single TPR simulation for one replicate/concentration combination +#' +#' @param n_rep Integer. Number of replicates per dose. +#' @param concs Numeric vector. Concentrations to use. +#' @param n_proteins Integer. Number of proteins to simulate. +#' @param data Optional prepared dose-response data for user-based templates. +#' @param protein Optional protein ID for strong interaction template. +#' @param seed Integer. Base seed for reproducibility. #' #' @return A data.frame with columns: Interaction, TPR, N_rep, NumConcs. +#' @noRd +.run_one_tpr_simulation <- function(n_rep, concs, n_proteins, data = NULL, + protein = NULL, seed = 123) { + set.seed(seed + n_rep * 100 + length(concs)) + + sim_args <- list( + N_proteins = n_proteins, + N_rep = n_rep, + Concentrations = concs, + IC50_Prediction = FALSE + ) + if (!is.null(data)) { + sim_args$data <- data + } + if (!is.null(protein) && !is.null(data)) { + sim_args$strong_proteins <- protein + } + + temp_res <- do.call(futureExperimentSimulation, sim_args) + temp_res$Hit_Rates_Data |> + dplyr::filter(Category %in% c("TPR (Strong)", "TPR (Weak)")) |> + dplyr::mutate( + N_rep = n_rep, + NumConcs = length(concs), + Interaction = dplyr::if_else(Category == "TPR (Strong)", "Strong", "Weak") + ) |> + dplyr::select(Interaction, TPR = Percent, N_rep, NumConcs) +} + +#' Simulate detection power across experimental design configurations +#' +#' Evaluates how well a dose-response proteomics experiment can detect +#' drug-protein interactions under different experimental designs. For each +#' combination of replicate count and number of doses, the function simulates +#' a full experiment and measures the true positive rate (TPR); the +#' percentage of known interacting proteins that are correctly identified. +#' +#' This helps experimentalists answer practical questions like: "If I can +#' only afford 3 replicates per dose, how many dose levels do I need to +#' reliably detect weak interactions?" +#' +#' Concentration subsets are selected automatically from the user's available +#' doses: control (0) and the highest dose are always included, and +#' intermediate doses are added by choosing the one farthest from the +#' log-median of the already-selected set. +#' +#' @param rep_range Integer vector of length 2, \code{c(min, max)}. The range +#' of biological replicates per dose to evaluate. Each value in the sequence +#' \code{min:max} produces one line in the resulting power curve. +#' @param concentrations Numeric vector. The full set of drug concentrations +#' available in the experiment (e.g., \code{c(0, 1, 3, 10, 30, 100, 300, 1000, 3000)}). +#' @param dose_range Integer vector of length 2, \code{c(min, max)}. The range +#' of dose counts to sweep. For example, \code{c(2, 9)} evaluates designs +#' with 2 doses, 3 doses, ..., up to 9 doses. +#' @param data Optional. User's prepared dose-response data (e.g., from +#' \code{MSstatsPrepareDoseResponseFit}). If provided, protein-specific +#' templates are extracted from this data instead of using defaults. +#' @param protein Optional. Character string specifying a protein ID to use as +#' the strong interaction template. Only used when \code{data} is provided. +#' @param n_proteins Integer. Number of proteins to simulate per run. Larger +#' values give more stable estimates but increase runtime. Default: 1000. +#' +#' @return A \code{data.frame} with columns: +#' \describe{ +#' \item{Interaction}{Character. "Strong" or "Weak".} +#' \item{TPR}{Numeric. True positive rate as a percentage (0-100).} +#' \item{N_rep}{Integer. Number of replicates used in this simulation.} +#' \item{NumConcs}{Integer. Number of concentrations used in this simulation.} +#' } +#' +#' @examples +#' # Quick example with small simulation (for speed) +#' results <- run_tpr_simulation( +#' rep_range = c(1, 2), +#' concentrations = c(0, 10, 100, 1000), +#' dose_range = c(2, 4), +#' n_proteins = 50 +#' ) +#' head(results) +#' +#' \dontrun{ +#' # Full power analysis with standard chemoproteomic doses +#' results <- run_tpr_simulation( +#' rep_range = c(1, 5), +#' concentrations = c(0, 1, 3, 10, 30, 100, 300, 1000, 3000), +#' dose_range = c(2, 9), +#' n_proteins = 1000 +#' ) +#' +#' # Visualize results +#' plot_tpr_power_curve(results) +#' } #' #' @importFrom dplyr filter mutate select if_else #' @export -run_tpr_simulation <- function(rep_range, n_proteins = 1000) { +run_tpr_simulation <- function(rep_range, concentrations, dose_range, + data = NULL, protein = NULL, n_proteins = 1000) { if (!is.numeric(rep_range) || length(rep_range) != 2 || rep_range[1] > rep_range[2]) { stop("rep_range must be a numeric vector of length 2 with c(min, max) where min <= max.") } - if (rep_range[2] > 5) { - stop("Maximum replicates is 5 (limited by available line styles for plotting).") + if (!is.numeric(dose_range) || length(dose_range) != 2 || dose_range[1] > dose_range[2]) { + stop("dose_range must be a numeric vector of length 2 with c(min, max) where min <= max.") } - k_grid <- as.integer(names(CONC_MAP)) - rep_grid <- seq(rep_range[1], rep_range[2]) - - run_one <- function(n_rep, k_conc, seed = 123) { - set.seed(seed + n_rep * 100 + k_conc) - concs_k <- CONC_MAP[[as.character(k_conc)]] - - sim_args <- list( - N_proteins = n_proteins, - N_rep = n_rep, - Concentrations = concs_k, - IC50_Prediction = FALSE - ) - - temp_res <- futureExperimentSimulation( - N_proteins = sim_args$N_proteins, - N_rep = sim_args$N_rep, - Concentrations = sim_args$Concentrations, - IC50_Prediction = sim_args$IC50_Prediction - ) - temp_res$Hit_Rates_Data |> - dplyr::filter(Category %in% c("TPR (Strong)", "TPR (Weak)")) |> - dplyr::mutate( - N_rep = n_rep, - NumConcs = length(concs_k), - Interaction = dplyr::if_else(Category == "TPR (Strong)", "Strong", "Weak") - ) |> - dplyr::select(Interaction, TPR = Percent, N_rep, NumConcs) + if (dose_range[1] < 2) { + stop("dose_range minimum must be at least 2 (control + one treatment).") } + conc_subsets <- .build_concentration_ladders(concentrations, dose_range) + k_grid <- as.integer(names(conc_subsets)) + rep_grid <- seq(rep_range[1], rep_range[2]) + grid_df <- expand.grid(N_rep = rep_grid, k_conc = k_grid) results <- do.call(rbind, lapply(seq_len(nrow(grid_df)), function(i) { + k <- as.character(grid_df$k_conc[i]) + concs <- conc_subsets[[k]] tryCatch( - run_one(n_rep = grid_df$N_rep[i], k_conc = grid_df$k_conc[i]), + .run_one_tpr_simulation( + n_rep = grid_df$N_rep[i], + concs = concs, + n_proteins = n_proteins, + data = data, + protein = protein + ), error = function(e) { warning(paste("Simulation failed for N_rep=", grid_df$N_rep[i], ", k_conc=", grid_df$k_conc[i], ":", conditionMessage(e))) @@ -78,74 +209,92 @@ run_tpr_simulation <- function(rep_range, n_proteins = 1000) { return(results) } -#' Plot TPR power curves for dose response proteomics experiments +#' Create a single TPR panel plot with color gradient by replicate count +#' +#' @param data Data frame with TPR results for one interaction type. +#' @param color_low Character. Light shade for fewest replicates. +#' @param color_high Character. Dark shade for most replicates. +#' @param k_grid Integer vector. X-axis breaks. +#' @param show_legend Logical. Whether to display the legend. +#' +#' @return A ggplot object. +#' @importFrom ggplot2 ggplot aes geom_line geom_point scale_x_continuous +#' scale_y_continuous scale_color_manual labs theme_bw theme element_text +#' @noRd +.make_tpr_panel <- function(data, color_low, color_high, k_grid, show_legend = FALSE) { + rep_levels <- sort(unique(data$N_rep)) + n_levels <- length(rep_levels) + + # Generate gradient from light to dark + color_palette <- grDevices::colorRampPalette(c(color_low, color_high))(n_levels) + names(color_palette) <- as.character(rep_levels) + + p <- ggplot2::ggplot(data, + ggplot2::aes(x = NumConcs, y = TPR, + color = factor(N_rep), group = factor(N_rep))) + + ggplot2::geom_line(linewidth = 1.2) + + ggplot2::geom_point(size = 2) + + ggplot2::scale_x_continuous(breaks = k_grid) + + ggplot2::scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, 20)) + + ggplot2::scale_color_manual(values = color_palette) + + ggplot2::labs( + x = "Number of concentrations", + y = "True Positive Rate (%)", + color = "Replicates" + ) + + ggplot2::theme_bw(base_size = 14) + + ggplot2::theme( + plot.title = ggplot2::element_text(face = "bold", hjust = 0.5) + ) + + if (!show_legend) { + p <- p + ggplot2::theme(legend.position = "none") + } + p +} + +#' Visualize detection power across experimental designs #' -#' Creates a two-panel interactive plot showing True Positive Rate -#' for Strong and Weak interactions across concentration and replicate counts. +#' Creates an interactive plot showing how the true positive rate +#' (detection power) changes with the number of doses and replicates +#' for the user-selected protein template. Line color shading goes +#' from light (fewest replicates) to dark (most replicates). #' -#' @param simulation_results A data.frame from \code{run_tpr_simulation()}. +#' @param simulation_results A \code{data.frame} returned by +#' \code{\link{run_tpr_simulation}}. #' -#' @return A plotly object with two panels (Strong and Weak interaction). +#' @return An interactive \code{plotly} object. +#' +#' @examples +#' \dontrun{ +#' results <- run_tpr_simulation( +#' rep_range = c(1, 3), +#' concentrations = c(0, 10, 100, 1000), +#' dose_range = c(2, 4), +#' n_proteins = 300 +#' ) +#' plot_tpr_power_curve(results) +#' } #' #' @importFrom ggplot2 ggplot aes geom_line geom_point scale_x_continuous -#' scale_y_continuous scale_linetype_manual labs theme_bw theme element_text +#' scale_y_continuous scale_color_manual labs theme_bw theme element_text +#' @importFrom plotly ggplotly layout +#' @importFrom grDevices colorRampPalette #' @export plot_tpr_power_curve <- function(simulation_results) { k_grid <- sort(unique(simulation_results$NumConcs)) - rep_levels <- sort(unique(simulation_results$N_rep)) - ltypes <- c("dotted", "dotdash", "dashed", "longdash", "solid") - if (length(rep_levels) > length(ltypes)) { - stop("Too many replicate levels (max 5). Reduce the replicate range.") - } - ltype_values <- setNames(ltypes[seq_along(rep_levels)], as.character(rep_levels)) - - make_panel <- function(data, color, show_legend = FALSE) { - p <- ggplot2::ggplot(data, - ggplot2::aes(x = NumConcs, y = TPR, linetype = factor(N_rep))) + - ggplot2::geom_line(linewidth = 1.2, color = color) + - ggplot2::geom_point(size = 2, color = color) + - ggplot2::scale_x_continuous(breaks = k_grid) + - ggplot2::scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, 20)) + - ggplot2::scale_linetype_manual(values = ltype_values) + - ggplot2::labs( - x = "Number of concentrations", - y = "True Positive Rate (%)", - linetype = "Replicates" - ) + - ggplot2::theme_bw(base_size = 14) + - ggplot2::theme( - plot.title = ggplot2::element_text(face = "bold", hjust = 0.5) - ) - - if (!show_legend) { - p <- p + ggplot2::theme(legend.position = "none") - } - p - } + # Use only the Strong interaction results (user-selected protein template) + results_protein <- simulation_results[simulation_results$Interaction == "Strong", ] - results_strong <- simulation_results[simulation_results$Interaction == "Strong", ] - results_weak <- simulation_results[simulation_results$Interaction == "Weak", ] + p <- .make_tpr_panel(results_protein, "#a6dba0", "#1b7837", k_grid, TRUE) - if (!requireNamespace("plotly", quietly = TRUE)) { - stop("Package 'plotly' is required for interactive plots. Please install it.") - } - - p_strong <- plotly::ggplotly(make_panel(results_strong, "#1b9e77", FALSE)) - p_weak <- plotly::ggplotly(make_panel(results_weak, "#d95f02", TRUE)) - - plotly::subplot(p_strong, p_weak, - nrows = 1, shareY = TRUE, titleX = TRUE, titleY = TRUE, - margin = 0.12 - ) |> plotly::layout( + plotly::ggplotly(p) |> plotly::layout( margin = list(t = 60), annotations = list( - list(text = "Strong interaction detection power", x = 0.22, y = 1.08, - xref = "paper", yref = "paper", showarrow = FALSE, - font = list(size = 12), xanchor = "center"), - list(text = "Weak interaction detection power", x = 0.78, y = 1.08, + list(text = "Interaction detection power", x = 0.5, y = 1.08, xref = "paper", yref = "paper", showarrow = FALSE, - font = list(size = 12), xanchor = "center") + font = list(size = 14), xanchor = "center") ) ) } \ No newline at end of file diff --git a/man/DoseResponseFit.Rd b/man/DoseResponseFit.Rd index 07dd21f..3033264 100644 --- a/man/DoseResponseFit.Rd +++ b/man/DoseResponseFit.Rd @@ -66,7 +66,7 @@ interaction_results <- doseResponseFit( print(interaction_results) # Check significant interactions -significant <- interaction_results[interaction_results$adjust_pval < 0.05, ] +significant <- interaction_results[interaction_results$adj.pvalue < 0.05, ] print(paste("Found", nrow(significant), "significant interactions")) \dontrun{ diff --git a/man/plot_tpr_power_curve.Rd b/man/plot_tpr_power_curve.Rd index 5df86f9..8f92b0e 100644 --- a/man/plot_tpr_power_curve.Rd +++ b/man/plot_tpr_power_curve.Rd @@ -2,17 +2,32 @@ % Please edit documentation in R/TPR_Power_Curve.R \name{plot_tpr_power_curve} \alias{plot_tpr_power_curve} -\title{Plot TPR power curves for dose response proteomics experiments} +\title{Visualize detection power across experimental designs} \usage{ plot_tpr_power_curve(simulation_results) } \arguments{ -\item{simulation_results}{A data.frame from \code{run_tpr_simulation()}.} +\item{simulation_results}{A \code{data.frame} returned by +\code{\link{run_tpr_simulation}}.} } \value{ -A plotly object with two panels (Strong and Weak interaction). +An interactive \code{plotly} object. } \description{ -Creates a two-panel interactive plot showing True Positive Rate -for Strong and Weak interactions across concentration and replicate counts. +Creates an interactive plot showing how the true positive rate +(detection power) changes with the number of doses and replicates +for the user-selected protein template. Line color shading goes +from light (fewest replicates) to dark (most replicates). +} +\examples{ +\dontrun{ +results <- run_tpr_simulation( + rep_range = c(1, 3), + concentrations = c(0, 10, 100, 1000), + dose_range = c(2, 4), + n_proteins = 300 +) +plot_tpr_power_curve(results) +} + } diff --git a/man/run_tpr_simulation.Rd b/man/run_tpr_simulation.Rd index e221c4c..2e5fa53 100644 --- a/man/run_tpr_simulation.Rd +++ b/man/run_tpr_simulation.Rd @@ -2,19 +2,86 @@ % Please edit documentation in R/TPR_Power_Curve.R \name{run_tpr_simulation} \alias{run_tpr_simulation} -\title{Run TPR simulation across a grid of concentration counts and replicate counts} +\title{Simulate detection power across experimental design configurations} \usage{ -run_tpr_simulation(rep_range, n_proteins = 1000) +run_tpr_simulation( + rep_range, + concentrations, + dose_range, + data = NULL, + protein = NULL, + n_proteins = 1000 +) } \arguments{ -\item{rep_range}{Integer vector of length 2, c(min, max) for replicate sweep.} +\item{rep_range}{Integer vector of length 2, \code{c(min, max)}. The range +of biological replicates per dose to evaluate. Each value in the sequence +\code{min:max} produces one line in the resulting power curve.} -\item{n_proteins}{Integer. Number of proteins to simulate. Default: 1000.} +\item{concentrations}{Numeric vector. The full set of drug concentrations +available in the experiment (e.g., \code{c(0, 1, 3, 10, 30, 100, 300, 1000, 3000)}).} + +\item{dose_range}{Integer vector of length 2, \code{c(min, max)}. The range +of dose counts to sweep. For example, \code{c(2, 9)} evaluates designs +with 2 doses, 3 doses, ..., up to 9 doses.} + +\item{data}{Optional. User's prepared dose-response data (e.g., from +\code{MSstatsPrepareDoseResponseFit}). If provided, protein-specific +templates are extracted from this data instead of using defaults.} + +\item{protein}{Optional. Character string specifying a protein ID to use as +the strong interaction template. Only used when \code{data} is provided.} + +\item{n_proteins}{Integer. Number of proteins to simulate per run. Larger +values give more stable estimates but increase runtime. Default: 1000.} } \value{ -A data.frame with columns: Interaction, TPR, N_rep, NumConcs. +A \code{data.frame} with columns: +\describe{ +\item{Interaction}{Character. "Strong" or "Weak".} +\item{TPR}{Numeric. True positive rate as a percentage (0-100).} +\item{N_rep}{Integer. Number of replicates used in this simulation.} +\item{NumConcs}{Integer. Number of concentrations used in this simulation.} +} } \description{ -Sweeps over combinations of dose counts (2-9) and replicate counts, -calling \code{futureExperimentSimulation()} for each combination. +Evaluates how well a dose-response proteomics experiment can detect +drug-protein interactions under different experimental designs. For each +combination of replicate count and number of doses, the function simulates +a full experiment and measures the true positive rate (TPR); the +percentage of known interacting proteins that are correctly identified. +} +\details{ +This helps experimentalists answer practical questions like: "If I can +only afford 3 replicates per dose, how many dose levels do I need to +reliably detect weak interactions?" + +Concentration subsets are selected automatically from the user's available +doses: control (0) and the highest dose are always included, and +intermediate doses are added by choosing the one farthest from the +log-median of the already-selected set. +} +\examples{ +# Quick example with small simulation (for speed) +results <- run_tpr_simulation( + rep_range = c(1, 2), + concentrations = c(0, 10, 100, 1000), + dose_range = c(2, 4), + n_proteins = 50 +) +head(results) + +\dontrun{ +# Full power analysis with standard chemoproteomic doses +results <- run_tpr_simulation( + rep_range = c(1, 5), + concentrations = c(0, 1, 3, 10, 30, 100, 300, 1000, 3000), + dose_range = c(2, 9), + n_proteins = 1000 +) + +# Visualize results +plot_tpr_power_curve(results) +} + } diff --git a/tests/testthat/test-ExperimentalDesignSimulation.R b/tests/testthat/test-ExperimentalDesignSimulation.R index 95963fc..c853264 100644 --- a/tests/testthat/test-ExperimentalDesignSimulation.R +++ b/tests/testthat/test-ExperimentalDesignSimulation.R @@ -133,12 +133,12 @@ test_that(".extractTemplatesFromData handles missing doses via interpolation", { weak_proteins = NULL, no_interaction_proteins = NULL, drug_name = "Drug1", - concentrations = c(0, 10, 100, 1000, 10000) # More concentrations than data + concentrations = c(0, 10, 100, 1000, 3000) # All within default template range ) # Should have all requested concentrations expect_equal(nrow(template$strong_interaction), 5) - expect_equal(template$strong_interaction$dose, c(0, 10, 100, 1000, 10000)) + expect_equal(template$strong_interaction$dose, c(0, 10, 100, 1000, 3000)) }) test_that(".extractTemplatesFromData handles empty protein lists", { @@ -158,9 +158,15 @@ test_that(".extractTemplatesFromData handles empty protein lists", { concentrations = c(0, 1000) ) - # Null protein lists should get flat profiles - expect_equal(template$weak_interaction$ratio, c(1, 1)) - expect_equal(template$no_interaction$ratio, c(1, 1)) + # Null protein lists fall back to default templates from template1.RDS + # (not flat profiles): verify structure is valid + expect_equal(nrow(template$weak_interaction), 2) + expect_equal(nrow(template$no_interaction), 2) + expect_true(all(c("dose", "Intensity", "LogIntensities", "ratio") %in% names(template$weak_interaction))) + expect_true(all(c("dose", "Intensity", "LogIntensities", "ratio") %in% names(template$no_interaction))) + # Control dose should have ratio = 1 + expect_equal(template$weak_interaction$ratio[template$weak_interaction$dose == 0], 1) + expect_equal(template$no_interaction$ratio[template$no_interaction$dose == 0], 1) }) diff --git a/tests/testthat/test-TPR_Power_Curve.R b/tests/testthat/test-TPR_Power_Curve.R new file mode 100644 index 0000000..b9dab68 --- /dev/null +++ b/tests/testthat/test-TPR_Power_Curve.R @@ -0,0 +1,106 @@ +test_that("build_concentration_ladders returns correct subset sizes", { + concs <- c(0, 1, 3, 10, 30, 100, 300, 1000, 3000) + result <- MSstatsResponse:::.build_concentration_ladders(concs, c(2, 5)) + + expect_equal(length(result), 4) + expect_equal(names(result), c("2", "3", "4", "5")) + + # All subsets should contain control and highest dose + for (k in names(result)) { + expect_true(0 %in% result[[k]]) + expect_true(3000 %in% result[[k]]) + expect_equal(length(result[[k]]), as.integer(k)) + } +}) + +test_that("build_concentration_ladders always includes control and max", { + concs <- c(0, 5, 50, 200, 1000) + result <- MSstatsResponse:::.build_concentration_ladders(concs, c(2, 5)) + + for (k in names(result)) { + expect_true(0 %in% result[[k]]) + expect_true(1000 %in% result[[k]]) + } +}) + +test_that("build_concentration_ladders errors on fewer than 2 concentrations", { + expect_error( + MSstatsResponse:::.build_concentration_ladders(c(0), c(2, 3)), + "At least 2 unique" + ) +}) + +test_that("build_concentration_ladders handles arbitrary user concentrations", { + concs <- c(0, 2, 4, 20, 50, 100, 500, 1000) + result <- MSstatsResponse:::.build_concentration_ladders(concs, c(2, 6)) + + expect_equal(length(result), 5) + for (k in names(result)) { + subset <- result[[k]] + expect_true(all(subset %in% concs)) + } +}) + +test_that("run_tpr_simulation validates rep_range", { + expect_error( + run_tpr_simulation(c(3, 1), c(0, 100, 1000), c(2, 3)), + "min <= max" + ) + expect_error( + run_tpr_simulation("bad", c(0, 100, 1000), c(2, 3)), + "numeric vector" + ) +}) + +test_that("run_tpr_simulation validates dose_range", { + expect_error( + run_tpr_simulation(c(1, 2), c(0, 100, 1000), c(5, 2)), + "min <= max" + ) + expect_error( + run_tpr_simulation(c(1, 2), c(0, 100, 1000), c(1, 3)), + "at least 2" + ) +}) + +test_that("run_tpr_simulation returns correct structure", { + results <- run_tpr_simulation( + rep_range = c(1, 2), + concentrations = c(0, 10, 100, 1000), + dose_range = c(2, 3), + n_proteins = 50 + ) + + expect_true(is.data.frame(results)) + expect_true(all(c("Interaction", "TPR", "N_rep", "NumConcs") %in% colnames(results))) + expect_true(all(results$Interaction %in% c("Strong", "Weak"))) + expect_true(all(results$TPR >= 0 & results$TPR <= 100)) +}) + +test_that("plot_tpr_power_curve returns a plotly object", { + results <- run_tpr_simulation( + rep_range = c(1, 2), + concentrations = c(0, 10, 100, 1000), + dose_range = c(2, 3), + n_proteins = 50 + ) + + p <- plot_tpr_power_curve(results) + expect_true(inherits(p, "plotly")) +}) + +test_that("run_tpr_simulation accepts data and protein parameters", { + # Verify the function signature accepts these params without error + # (actual template extraction is tested via integration/manual testing) + expect_error( + run_tpr_simulation( + rep_range = c(1, 1), + concentrations = c(0, 100, 1000), + dose_range = c(2, 3), + n_proteins = 10, + data = NULL, + protein = NULL + ), + NA # NA means "expect NO error" + ) +}) \ No newline at end of file From cfabb47851643c7b4a79c1f2ae83b1cd301616ea Mon Sep 17 00:00:00 2001 From: Swaraj Patil Date: Mon, 30 Mar 2026 18:12:20 -0400 Subject: [PATCH 05/11] Resolve nitpicks in TPR power curve function --- R/ExperimentalDesignSimulation.R | 4 ++++ R/TPR_Power_Curve.R | 8 ++++++-- man/plot_tpr_power_curve.Rd | 5 +++-- tests/testthat/test-TPR_Power_Curve.R | 17 +++++++++++++++++ 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/R/ExperimentalDesignSimulation.R b/R/ExperimentalDesignSimulation.R index cd0e5b5..b253db5 100644 --- a/R/ExperimentalDesignSimulation.R +++ b/R/ExperimentalDesignSimulation.R @@ -238,6 +238,10 @@ futureExperimentSimulation = function(N_proteins = 300, complete_profile = data.frame(dose = concentrations) complete_profile$LogIntensities = NA_real_ + if (nrow(profile) > 0 && !any(profile$dose_numeric != 0)) { + warning("No non-zero dose measurements found for the selected protein. Template will use fallback values.") + } + for (i in seq_len(nrow(complete_profile))) { target = complete_profile$dose[i] if (target == 0) { diff --git a/R/TPR_Power_Curve.R b/R/TPR_Power_Curve.R index 7f871ec..c7fa998 100644 --- a/R/TPR_Power_Curve.R +++ b/R/TPR_Power_Curve.R @@ -178,6 +178,9 @@ run_tpr_simulation <- function(rep_range, concentrations, dose_range, if (dose_range[1] < 2) { stop("dose_range minimum must be at least 2 (control + one treatment).") } + if (!is.null(data) && is.null(protein)) { + stop("protein must be specified when data is provided.") + } conc_subsets <- .build_concentration_ladders(concentrations, dose_range) k_grid <- as.integer(names(conc_subsets)) @@ -256,8 +259,9 @@ run_tpr_simulation <- function(rep_range, concentrations, dose_range, #' Visualize detection power across experimental designs #' #' Creates an interactive plot showing how the true positive rate -#' (detection power) changes with the number of doses and replicates -#' for the user-selected protein template. Line color shading goes +#' (detection power) changes with the number of doses and replicates. +#' Only the results for the user-selected protein template (passed as +#' the strong interaction category) are displayed. Line color shading goes #' from light (fewest replicates) to dark (most replicates). #' #' @param simulation_results A \code{data.frame} returned by diff --git a/man/plot_tpr_power_curve.Rd b/man/plot_tpr_power_curve.Rd index 8f92b0e..3d79719 100644 --- a/man/plot_tpr_power_curve.Rd +++ b/man/plot_tpr_power_curve.Rd @@ -15,8 +15,9 @@ An interactive \code{plotly} object. } \description{ Creates an interactive plot showing how the true positive rate -(detection power) changes with the number of doses and replicates -for the user-selected protein template. Line color shading goes +(detection power) changes with the number of doses and replicates. +Only the results for the user-selected protein template (passed as +the strong interaction category) are displayed. Line color shading goes from light (fewest replicates) to dark (most replicates). } \examples{ diff --git a/tests/testthat/test-TPR_Power_Curve.R b/tests/testthat/test-TPR_Power_Curve.R index b9dab68..19f7ef8 100644 --- a/tests/testthat/test-TPR_Power_Curve.R +++ b/tests/testthat/test-TPR_Power_Curve.R @@ -103,4 +103,21 @@ test_that("run_tpr_simulation accepts data and protein parameters", { ), NA # NA means "expect NO error" ) +}) + +test_that("run_tpr_simulation errors when data provided without protein", { + mock_data <- data.frame( + protein = "P1", drug = "Drug1", dose = c(0, 1e-6), response = c(20, 18) + ) + expect_error( + run_tpr_simulation( + rep_range = c(1, 1), + concentrations = c(0, 1000), + dose_range = c(2, 2), + n_proteins = 10, + data = mock_data, + protein = NULL + ), + "protein must be specified when data is provided" + ) }) \ No newline at end of file From 38a10d52e281450c450de02c403b5c6e40ec07fe Mon Sep 17 00:00:00 2001 From: Swaraj Patil Date: Mon, 30 Mar 2026 18:20:14 -0400 Subject: [PATCH 06/11] Resolved a documentation nitpick in TPR simulation function --- R/TPR_Power_Curve.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/TPR_Power_Curve.R b/R/TPR_Power_Curve.R index c7fa998..37f42a9 100644 --- a/R/TPR_Power_Curve.R +++ b/R/TPR_Power_Curve.R @@ -21,6 +21,9 @@ if (length(concentrations) < 2) { stop("At least 2 unique concentrations are required.") } + if (!(0 %in% concentrations)) { + stop("Concentrations must include 0 (control).") + } control <- min(concentrations) highest <- max(concentrations) From bf82171d9c75c8f5e2996e5ddbd4078b580420b7 Mon Sep 17 00:00:00 2001 From: Swaraj Patil Date: Fri, 3 Apr 2026 00:05:56 -0400 Subject: [PATCH 07/11] TPR Simulation task: - Simplify the profile matching loop - Use Stop instead of warning if there are no non-zero dose - Remove template1.RDS fallback, use baseline - Enhance the message for not generating any valid concentration subsets - Make data and protein required and remove superfluous validations - Update corresponding docs for turning the optional fields mandatory - Simplify seed in one tpr simulation - Filter only TPR strong - Update the test cases --- R/ExperimentalDesignSimulation.R | 51 +++---------- R/TPR_Power_Curve.R | 37 +++++----- man/run_tpr_simulation.Rd | 14 ++-- .../test-ExperimentalDesignSimulation.R | 9 ++- tests/testthat/test-TPR_Power_Curve.R | 71 +++++++++---------- 5 files changed, 69 insertions(+), 113 deletions(-) diff --git a/R/ExperimentalDesignSimulation.R b/R/ExperimentalDesignSimulation.R index b253db5..41bfc1d 100644 --- a/R/ExperimentalDesignSimulation.R +++ b/R/ExperimentalDesignSimulation.R @@ -236,28 +236,16 @@ futureExperimentSimulation = function(N_proteins = 300, # Match profile doses to target concentrations using nearest-neighbor on log scale. # This handles unit mismatches without assuming a fixed conversion factor. complete_profile = data.frame(dose = concentrations) - complete_profile$LogIntensities = NA_real_ if (nrow(profile) > 0 && !any(profile$dose_numeric != 0)) { - warning("No non-zero dose measurements found for the selected protein. Template will use fallback values.") + stop("No non-zero dose measurements found for the selected protein.") } - for (i in seq_len(nrow(complete_profile))) { - target = complete_profile$dose[i] - if (target == 0) { - # Control: match any zero-dose entry - zero_match = profile[profile$dose_numeric == 0, ] - if (nrow(zero_match) > 0) { - complete_profile$LogIntensities[i] = zero_match$LogIntensities[1] - } - } else if (any(profile$dose_numeric != 0)) { - # Non-zero: find closest on log scale - non_zero = profile[profile$dose_numeric != 0, ] - log_ratios = abs(log10(non_zero$dose_numeric) - log10(target)) - best = which.min(log_ratios) - complete_profile$LogIntensities[i] = non_zero$LogIntensities[best] - } - } + # Match by string representation to avoid floating point issues + complete_profile = complete_profile %>% + mutate(dose_str = as.character(dose)) %>% + left_join(profile %>% select(dose_str, LogIntensities), by = "dose_str") %>% + select(-dose_str) # Fill missing values with interpolation or nearest neighbor for (i in which(is.na(complete_profile$LogIntensities))) { @@ -325,32 +313,11 @@ futureExperimentSimulation = function(N_proteins = 300, return(complete_profile) } - # Load default templates as fallback for unspecified categories - default_template <- NULL - if (is.null(weak_proteins) || is.null(no_interaction_proteins)) { - template1_path <- system.file("extdata", "template1.RDS", package = "MSstatsResponse") - if (file.exists(template1_path)) { - default_template <- readRDS(template1_path) - } - } - - # Extract templates: use user data for specified proteins, defaults for others + # Extract templates: use user data for specified proteins, baseline for others template = list( strong_interaction = .getMeanProfile(strong_proteins, drug_data, concentrations), - weak_interaction = if (!is.null(weak_proteins)) { - .getMeanProfile(weak_proteins, drug_data, concentrations) - } else if (!is.null(default_template)) { - default_template$weak_interaction[default_template$weak_interaction$dose %in% concentrations, ] - } else { - .getMeanProfile(NULL, drug_data, concentrations) - }, - no_interaction = if (!is.null(no_interaction_proteins)) { - .getMeanProfile(no_interaction_proteins, drug_data, concentrations) - } else if (!is.null(default_template)) { - default_template$no_interaction[default_template$no_interaction$dose %in% concentrations, ] - } else { - .getMeanProfile(NULL, drug_data, concentrations) - } + weak_interaction = .getMeanProfile(weak_proteins, drug_data, concentrations), + no_interaction = .getMeanProfile(no_interaction_proteins, drug_data, concentrations) ) # Validate template format diff --git a/R/TPR_Power_Curve.R b/R/TPR_Power_Curve.R index 37f42a9..980a2de 100644 --- a/R/TPR_Power_Curve.R +++ b/R/TPR_Power_Curve.R @@ -60,7 +60,11 @@ result <- result[!sapply(result, is.null)] if (length(result) == 0) { - stop("No valid concentration subsets could be generated for the given dose_range.") + stop(paste0("Cannot build concentration subsets for dose_range c(", + dose_range[1], ", ", dose_range[2], + "). You have ", length(concentrations), + " unique concentrations. dose_range[2] must be <= ", + length(concentrations), ".")) } return(result) } @@ -76,9 +80,9 @@ #' #' @return A data.frame with columns: Interaction, TPR, N_rep, NumConcs. #' @noRd -.run_one_tpr_simulation <- function(n_rep, concs, n_proteins, data = NULL, - protein = NULL, seed = 123) { - set.seed(seed + n_rep * 100 + length(concs)) +.run_one_tpr_simulation <- function(n_rep, concs, n_proteins, data, + protein, seed = 123) { + set.seed(seed) sim_args <- list( N_proteins = n_proteins, @@ -86,16 +90,12 @@ Concentrations = concs, IC50_Prediction = FALSE ) - if (!is.null(data)) { - sim_args$data <- data - } - if (!is.null(protein) && !is.null(data)) { - sim_args$strong_proteins <- protein - } + sim_args$data <- data + sim_args$strong_proteins <- protein temp_res <- do.call(futureExperimentSimulation, sim_args) temp_res$Hit_Rates_Data |> - dplyr::filter(Category %in% c("TPR (Strong)", "TPR (Weak)")) |> + dplyr::filter(Category == "TPR (Strong)") |> dplyr::mutate( N_rep = n_rep, NumConcs = length(concs), @@ -129,11 +129,11 @@ #' @param dose_range Integer vector of length 2, \code{c(min, max)}. The range #' of dose counts to sweep. For example, \code{c(2, 9)} evaluates designs #' with 2 doses, 3 doses, ..., up to 9 doses. -#' @param data Optional. User's prepared dose-response data (e.g., from -#' \code{MSstatsPrepareDoseResponseFit}). If provided, protein-specific -#' templates are extracted from this data instead of using defaults. -#' @param protein Optional. Character string specifying a protein ID to use as -#' the strong interaction template. Only used when \code{data} is provided. +#' @param data User's prepared dose-response data (e.g., from +#' \code{MSstatsPrepareDoseResponseFit}). Protein-specific templates are +#' extracted from this data for the simulation. +#' @param protein Character string specifying a protein ID to use as +#' the interaction template for the power analysis. #' @param n_proteins Integer. Number of proteins to simulate per run. Larger #' values give more stable estimates but increase runtime. Default: 1000. #' @@ -171,7 +171,7 @@ #' @importFrom dplyr filter mutate select if_else #' @export run_tpr_simulation <- function(rep_range, concentrations, dose_range, - data = NULL, protein = NULL, n_proteins = 1000) { + data, protein, n_proteins = 1000) { if (!is.numeric(rep_range) || length(rep_range) != 2 || rep_range[1] > rep_range[2]) { stop("rep_range must be a numeric vector of length 2 with c(min, max) where min <= max.") } @@ -181,9 +181,6 @@ run_tpr_simulation <- function(rep_range, concentrations, dose_range, if (dose_range[1] < 2) { stop("dose_range minimum must be at least 2 (control + one treatment).") } - if (!is.null(data) && is.null(protein)) { - stop("protein must be specified when data is provided.") - } conc_subsets <- .build_concentration_ladders(concentrations, dose_range) k_grid <- as.integer(names(conc_subsets)) diff --git a/man/run_tpr_simulation.Rd b/man/run_tpr_simulation.Rd index 2e5fa53..1df666e 100644 --- a/man/run_tpr_simulation.Rd +++ b/man/run_tpr_simulation.Rd @@ -8,8 +8,8 @@ run_tpr_simulation( rep_range, concentrations, dose_range, - data = NULL, - protein = NULL, + data, + protein, n_proteins = 1000 ) } @@ -25,12 +25,12 @@ available in the experiment (e.g., \code{c(0, 1, 3, 10, 30, 100, 300, 1000, 3000 of dose counts to sweep. For example, \code{c(2, 9)} evaluates designs with 2 doses, 3 doses, ..., up to 9 doses.} -\item{data}{Optional. User's prepared dose-response data (e.g., from -\code{MSstatsPrepareDoseResponseFit}). If provided, protein-specific -templates are extracted from this data instead of using defaults.} +\item{data}{User's prepared dose-response data (e.g., from +\code{MSstatsPrepareDoseResponseFit}). Protein-specific templates are +extracted from this data for the simulation.} -\item{protein}{Optional. Character string specifying a protein ID to use as -the strong interaction template. Only used when \code{data} is provided.} +\item{protein}{Character string specifying a protein ID to use as +the interaction template for the power analysis.} \item{n_proteins}{Integer. Number of proteins to simulate per run. Larger values give more stable estimates but increase runtime. Default: 1000.} diff --git a/tests/testthat/test-ExperimentalDesignSimulation.R b/tests/testthat/test-ExperimentalDesignSimulation.R index c853264..8e85747 100644 --- a/tests/testthat/test-ExperimentalDesignSimulation.R +++ b/tests/testthat/test-ExperimentalDesignSimulation.R @@ -158,15 +158,14 @@ test_that(".extractTemplatesFromData handles empty protein lists", { concentrations = c(0, 1000) ) - # Null protein lists fall back to default templates from template1.RDS - # (not flat profiles): verify structure is valid + # Null protein lists get flat baseline profiles expect_equal(nrow(template$weak_interaction), 2) expect_equal(nrow(template$no_interaction), 2) expect_true(all(c("dose", "Intensity", "LogIntensities", "ratio") %in% names(template$weak_interaction))) expect_true(all(c("dose", "Intensity", "LogIntensities", "ratio") %in% names(template$no_interaction))) - # Control dose should have ratio = 1 - expect_equal(template$weak_interaction$ratio[template$weak_interaction$dose == 0], 1) - expect_equal(template$no_interaction$ratio[template$no_interaction$dose == 0], 1) + # Flat profiles should have ratio = 1 for all doses + expect_equal(template$weak_interaction$ratio, c(1, 1)) + expect_equal(template$no_interaction$ratio, c(1, 1)) }) diff --git a/tests/testthat/test-TPR_Power_Curve.R b/tests/testthat/test-TPR_Power_Curve.R index 19f7ef8..2d3f55f 100644 --- a/tests/testthat/test-TPR_Power_Curve.R +++ b/tests/testthat/test-TPR_Power_Curve.R @@ -5,7 +5,6 @@ test_that("build_concentration_ladders returns correct subset sizes", { expect_equal(length(result), 4) expect_equal(names(result), c("2", "3", "4", "5")) - # All subsets should contain control and highest dose for (k in names(result)) { expect_true(0 %in% result[[k]]) expect_true(3000 %in% result[[k]]) @@ -30,6 +29,13 @@ test_that("build_concentration_ladders errors on fewer than 2 concentrations", { ) }) +test_that("build_concentration_ladders errors without control dose", { + expect_error( + MSstatsResponse:::.build_concentration_ladders(c(1, 10, 100), c(2, 3)), + "must include 0" + ) +}) + test_that("build_concentration_ladders handles arbitrary user concentrations", { concs <- c(0, 2, 4, 20, 50, 100, 500, 1000) result <- MSstatsResponse:::.build_concentration_ladders(concs, c(2, 6)) @@ -42,82 +48,69 @@ test_that("build_concentration_ladders handles arbitrary user concentrations", { }) test_that("run_tpr_simulation validates rep_range", { + mock_data <- data.frame(protein = "P1", drug = "D1", dose = 0, response = 20) expect_error( - run_tpr_simulation(c(3, 1), c(0, 100, 1000), c(2, 3)), + run_tpr_simulation(c(3, 1), c(0, 100, 1000), c(2, 3), data = mock_data, protein = "P1"), "min <= max" ) expect_error( - run_tpr_simulation("bad", c(0, 100, 1000), c(2, 3)), + run_tpr_simulation("bad", c(0, 100, 1000), c(2, 3), data = mock_data, protein = "P1"), "numeric vector" ) }) test_that("run_tpr_simulation validates dose_range", { + mock_data <- data.frame(protein = "P1", drug = "D1", dose = 0, response = 20) expect_error( - run_tpr_simulation(c(1, 2), c(0, 100, 1000), c(5, 2)), + run_tpr_simulation(c(1, 2), c(0, 100, 1000), c(5, 2), data = mock_data, protein = "P1"), "min <= max" ) expect_error( - run_tpr_simulation(c(1, 2), c(0, 100, 1000), c(1, 3)), + run_tpr_simulation(c(1, 2), c(0, 100, 1000), c(1, 3), data = mock_data, protein = "P1"), "at least 2" ) }) test_that("run_tpr_simulation returns correct structure", { + mock_data <- data.frame( + protein = rep("P1", 6), + drug = c("DMSO", "DMSO", "Drug1", "Drug1", "Drug1", "Drug1"), + dose = c(0, 0, 10, 100, 1000, 1000), + response = c(20, 20, 18, 15, 12, 12) + ) + results <- run_tpr_simulation( rep_range = c(1, 2), concentrations = c(0, 10, 100, 1000), dose_range = c(2, 3), + data = mock_data, + protein = "P1", n_proteins = 50 ) expect_true(is.data.frame(results)) expect_true(all(c("Interaction", "TPR", "N_rep", "NumConcs") %in% colnames(results))) - expect_true(all(results$Interaction %in% c("Strong", "Weak"))) + expect_true(all(results$Interaction == "Strong")) expect_true(all(results$TPR >= 0 & results$TPR <= 100)) }) test_that("plot_tpr_power_curve returns a plotly object", { + mock_data <- data.frame( + protein = rep("P1", 6), + drug = c("DMSO", "DMSO", "Drug1", "Drug1", "Drug1", "Drug1"), + dose = c(0, 0, 10, 100, 1000, 1000), + response = c(20, 20, 18, 15, 12, 12) + ) + results <- run_tpr_simulation( rep_range = c(1, 2), concentrations = c(0, 10, 100, 1000), dose_range = c(2, 3), + data = mock_data, + protein = "P1", n_proteins = 50 ) p <- plot_tpr_power_curve(results) expect_true(inherits(p, "plotly")) -}) - -test_that("run_tpr_simulation accepts data and protein parameters", { - # Verify the function signature accepts these params without error - # (actual template extraction is tested via integration/manual testing) - expect_error( - run_tpr_simulation( - rep_range = c(1, 1), - concentrations = c(0, 100, 1000), - dose_range = c(2, 3), - n_proteins = 10, - data = NULL, - protein = NULL - ), - NA # NA means "expect NO error" - ) -}) - -test_that("run_tpr_simulation errors when data provided without protein", { - mock_data <- data.frame( - protein = "P1", drug = "Drug1", dose = c(0, 1e-6), response = c(20, 18) - ) - expect_error( - run_tpr_simulation( - rep_range = c(1, 1), - concentrations = c(0, 1000), - dose_range = c(2, 2), - n_proteins = 10, - data = mock_data, - protein = NULL - ), - "protein must be specified when data is provided" - ) }) \ No newline at end of file From 6236e60b143ff38b8520b0e63cb214d0a93a366a Mon Sep 17 00:00:00 2001 From: Swaraj Patil Date: Fri, 3 Apr 2026 00:21:37 -0400 Subject: [PATCH 08/11] Update the example in the docs for TPR simulation function --- R/TPR_Power_Curve.R | 27 ++++++++++++++------------- man/run_tpr_simulation.Rd | 27 ++++++++++++++------------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/R/TPR_Power_Curve.R b/R/TPR_Power_Curve.R index 980a2de..9ca2942 100644 --- a/R/TPR_Power_Curve.R +++ b/R/TPR_Power_Curve.R @@ -146,22 +146,23 @@ #' } #' #' @examples -#' # Quick example with small simulation (for speed) -#' results <- run_tpr_simulation( -#' rep_range = c(1, 2), -#' concentrations = c(0, 10, 100, 1000), -#' dose_range = c(2, 4), -#' n_proteins = 50 +#' \dontrun{ +#' # Prepare user data +#' mock_data <- data.frame( +#' protein = rep("P1", 6), +#' drug = c("DMSO", "DMSO", "Drug1", "Drug1", "Drug1", "Drug1"), +#' dose = c(0, 0, 10, 100, 1000, 1000), +#' response = c(20, 20, 18, 15, 12, 12) #' ) -#' head(results) #' -#' \dontrun{ -#' # Full power analysis with standard chemoproteomic doses +#' # Run power analysis #' results <- run_tpr_simulation( -#' rep_range = c(1, 5), -#' concentrations = c(0, 1, 3, 10, 30, 100, 300, 1000, 3000), -#' dose_range = c(2, 9), -#' n_proteins = 1000 +#' rep_range = c(1, 3), +#' concentrations = c(0, 10, 100, 1000), +#' dose_range = c(2, 4), +#' data = mock_data, +#' protein = "P1", +#' n_proteins = 300 #' ) #' #' # Visualize results diff --git a/man/run_tpr_simulation.Rd b/man/run_tpr_simulation.Rd index 1df666e..db7a1c2 100644 --- a/man/run_tpr_simulation.Rd +++ b/man/run_tpr_simulation.Rd @@ -62,22 +62,23 @@ intermediate doses are added by choosing the one farthest from the log-median of the already-selected set. } \examples{ -# Quick example with small simulation (for speed) -results <- run_tpr_simulation( - rep_range = c(1, 2), - concentrations = c(0, 10, 100, 1000), - dose_range = c(2, 4), - n_proteins = 50 +\dontrun{ +# Prepare user data +mock_data <- data.frame( + protein = rep("P1", 6), + drug = c("DMSO", "DMSO", "Drug1", "Drug1", "Drug1", "Drug1"), + dose = c(0, 0, 10, 100, 1000, 1000), + response = c(20, 20, 18, 15, 12, 12) ) -head(results) -\dontrun{ -# Full power analysis with standard chemoproteomic doses +# Run power analysis results <- run_tpr_simulation( - rep_range = c(1, 5), - concentrations = c(0, 1, 3, 10, 30, 100, 300, 1000, 3000), - dose_range = c(2, 9), - n_proteins = 1000 + rep_range = c(1, 3), + concentrations = c(0, 10, 100, 1000), + dose_range = c(2, 4), + data = mock_data, + protein = "P1", + n_proteins = 300 ) # Visualize results From 73527a2112fc39bd8d9c2bfb21c4c34b6fb4e160 Mon Sep 17 00:00:00 2001 From: Swaraj Patil Date: Sat, 4 Apr 2026 17:54:13 -0400 Subject: [PATCH 09/11] Update the TPR simulation plot color palettes for replicate levels --- NAMESPACE | 1 - R/TPR_Power_Curve.R | 24 ++++++++++++++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 5dbaa32..24d4bd3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,7 +42,6 @@ importFrom(ggplot2,theme_bw) importFrom(ggplot2,theme_classic) importFrom(ggplot2,theme_minimal) importFrom(ggplot2,ylim) -importFrom(grDevices,colorRampPalette) importFrom(parallel,clusterExport) importFrom(parallel,makeCluster) importFrom(parallel,parLapply) diff --git a/R/TPR_Power_Curve.R b/R/TPR_Power_Curve.R index 9ca2942..601d655 100644 --- a/R/TPR_Power_Curve.R +++ b/R/TPR_Power_Curve.R @@ -216,8 +216,6 @@ run_tpr_simulation <- function(rep_range, concentrations, dose_range, #' Create a single TPR panel plot with color gradient by replicate count #' #' @param data Data frame with TPR results for one interaction type. -#' @param color_low Character. Light shade for fewest replicates. -#' @param color_high Character. Dark shade for most replicates. #' @param k_grid Integer vector. X-axis breaks. #' @param show_legend Logical. Whether to display the legend. #' @@ -225,13 +223,24 @@ run_tpr_simulation <- function(rep_range, concentrations, dose_range, #' @importFrom ggplot2 ggplot aes geom_line geom_point scale_x_continuous #' scale_y_continuous scale_color_manual labs theme_bw theme element_text #' @noRd -.make_tpr_panel <- function(data, color_low, color_high, k_grid, show_legend = FALSE) { +.make_tpr_panel <- function(data, k_grid, show_legend = FALSE) { rep_levels <- sort(unique(data$N_rep)) n_levels <- length(rep_levels) - # Generate gradient from light to dark - color_palette <- grDevices::colorRampPalette(c(color_low, color_high))(n_levels) - names(color_palette) <- as.character(rep_levels) + # 10 distinct colors for up to 10 replicate levels + all_colors <- c( + "#1f78b4", # strong blue + "#ff964f", # orange + "#17becf", # cyan + "#1b9e77", # teal + "#f0b6d5", # pink + "#DC143C", # crimson + "#6a3d9a", # purple + "#9e9ac8", # lavender + "#525252", # cool dark gray + "#08306b" # deep navy + ) + color_palette <- setNames(all_colors[seq_len(n_levels)], as.character(rep_levels)) p <- ggplot2::ggplot(data, ggplot2::aes(x = NumConcs, y = TPR, @@ -284,7 +293,6 @@ run_tpr_simulation <- function(rep_range, concentrations, dose_range, #' @importFrom ggplot2 ggplot aes geom_line geom_point scale_x_continuous #' scale_y_continuous scale_color_manual labs theme_bw theme element_text #' @importFrom plotly ggplotly layout -#' @importFrom grDevices colorRampPalette #' @export plot_tpr_power_curve <- function(simulation_results) { k_grid <- sort(unique(simulation_results$NumConcs)) @@ -292,7 +300,7 @@ plot_tpr_power_curve <- function(simulation_results) { # Use only the Strong interaction results (user-selected protein template) results_protein <- simulation_results[simulation_results$Interaction == "Strong", ] - p <- .make_tpr_panel(results_protein, "#a6dba0", "#1b7837", k_grid, TRUE) + p <- .make_tpr_panel(results_protein, k_grid, TRUE) plotly::ggplotly(p) |> plotly::layout( margin = list(t = 60), From 71a46e94585d2a8409f8eda30f92159e5c99364a Mon Sep 17 00:00:00 2001 From: Swaraj Patil Date: Mon, 6 Apr 2026 13:01:26 -0400 Subject: [PATCH 10/11] - Throw an exception that prevents a user from simulating with more than the number of available concentrations - Verify that the TPR increases as the number of replicates/doses increases? --- R/TPR_Power_Curve.R | 21 +++++++++------------ tests/testthat/test-TPR_Power_Curve.R | 21 +++++++++++++++++++++ 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/R/TPR_Power_Curve.R b/R/TPR_Power_Curve.R index 601d655..7295903 100644 --- a/R/TPR_Power_Curve.R +++ b/R/TPR_Power_Curve.R @@ -25,6 +25,12 @@ stop("Concentrations must include 0 (control).") } + if (dose_range[2] > length(concentrations)) { + stop(paste0("dose_range maximum (", dose_range[2], + ") exceeds the number of available concentrations (", + length(concentrations), ").")) + } + control <- min(concentrations) highest <- max(concentrations) candidates <- setdiff(concentrations, c(control, highest)) @@ -55,17 +61,8 @@ } # Filter to requested dose_range - k_range <- seq(dose_range[1], min(dose_range[2], length(concentrations))) + k_range <- seq(dose_range[1], dose_range[2]) result <- selection_order[as.character(k_range)] - result <- result[!sapply(result, is.null)] - - if (length(result) == 0) { - stop(paste0("Cannot build concentration subsets for dose_range c(", - dose_range[1], ", ", dose_range[2], - "). You have ", length(concentrations), - " unique concentrations. dose_range[2] must be <= ", - length(concentrations), ".")) - } return(result) } @@ -74,8 +71,8 @@ #' @param n_rep Integer. Number of replicates per dose. #' @param concs Numeric vector. Concentrations to use. #' @param n_proteins Integer. Number of proteins to simulate. -#' @param data Optional prepared dose-response data for user-based templates. -#' @param protein Optional protein ID for strong interaction template. +#' @param data Prepared dose-response data for user-based templates. +#' @param protein Protein ID for strong interaction template. #' @param seed Integer. Base seed for reproducibility. #' #' @return A data.frame with columns: Interaction, TPR, N_rep, NumConcs. diff --git a/tests/testthat/test-TPR_Power_Curve.R b/tests/testthat/test-TPR_Power_Curve.R index 2d3f55f..ade6877 100644 --- a/tests/testthat/test-TPR_Power_Curve.R +++ b/tests/testthat/test-TPR_Power_Curve.R @@ -92,6 +92,27 @@ test_that("run_tpr_simulation returns correct structure", { expect_true(all(c("Interaction", "TPR", "N_rep", "NumConcs") %in% colnames(results))) expect_true(all(results$Interaction == "Strong")) expect_true(all(results$TPR >= 0 & results$TPR <= 100)) + + # TPR should generally increase with more replicates for a given concentration count + for (nc in unique(results$NumConcs)) { + subset <- results[results$NumConcs == nc, ] + subset <- subset[order(subset$N_rep), ] + if (nrow(subset) >= 2) { + # Allow for some noise, but overall trend should be non-decreasing + expect_true(subset$TPR[nrow(subset)] >= subset$TPR[1], + info = paste("TPR should increase with replicates at NumConcs =", nc)) + } + } + + # TPR should generally increase with more concentrations for a given replicate count + for (nr in unique(results$N_rep)) { + subset <- results[results$N_rep == nr, ] + subset <- subset[order(subset$NumConcs), ] + if (nrow(subset) >= 2) { + expect_true(subset$TPR[nrow(subset)] >= subset$TPR[1], + info = paste("TPR should increase with concentrations at N_rep =", nr)) + } + } }) test_that("plot_tpr_power_curve returns a plotly object", { From 308e2d86c78efcfee50f250d288044dd64b8d463 Mon Sep 17 00:00:00 2001 From: Swaraj Patil Date: Tue, 7 Apr 2026 15:00:37 -0400 Subject: [PATCH 11/11] - Enhance the test cases for ensuring the TPR simulation returns correct structure - Small arrangement in the power curve color palette application --- R/TPR_Power_Curve.R | 14 +++++++---- man/plot_tpr_power_curve.Rd | 9 ++++--- tests/testthat/test-TPR_Power_Curve.R | 35 +++++++++++++++++---------- 3 files changed, 36 insertions(+), 22 deletions(-) diff --git a/R/TPR_Power_Curve.R b/R/TPR_Power_Curve.R index 7295903..8f066e6 100644 --- a/R/TPR_Power_Curve.R +++ b/R/TPR_Power_Curve.R @@ -271,10 +271,11 @@ run_tpr_simulation <- function(rep_range, concentrations, dose_range, #' the strong interaction category) are displayed. Line color shading goes #' from light (fewest replicates) to dark (most replicates). #' -#' @param simulation_results A \code{data.frame} returned by -#' \code{\link{run_tpr_simulation}}. +#' @param static Logical. If \code{TRUE}, returns a static \code{ggplot} +#' object suitable for PDF export. Default: \code{FALSE}. #' -#' @return An interactive \code{plotly} object. +#' @return An interactive \code{plotly} object, or a static \code{ggplot} +#' object if \code{static = TRUE}. #' #' @examples #' \dontrun{ @@ -291,13 +292,16 @@ run_tpr_simulation <- function(rep_range, concentrations, dose_range, #' scale_y_continuous scale_color_manual labs theme_bw theme element_text #' @importFrom plotly ggplotly layout #' @export -plot_tpr_power_curve <- function(simulation_results) { +plot_tpr_power_curve <- function(simulation_results, static = FALSE) { k_grid <- sort(unique(simulation_results$NumConcs)) # Use only the Strong interaction results (user-selected protein template) results_protein <- simulation_results[simulation_results$Interaction == "Strong", ] - p <- .make_tpr_panel(results_protein, k_grid, TRUE) + p <- .make_tpr_panel(results_protein, k_grid, TRUE) + + ggplot2::ggtitle("Interaction detection power") + + if (static) return(p) plotly::ggplotly(p) |> plotly::layout( margin = list(t = 60), diff --git a/man/plot_tpr_power_curve.Rd b/man/plot_tpr_power_curve.Rd index 3d79719..b3c74d4 100644 --- a/man/plot_tpr_power_curve.Rd +++ b/man/plot_tpr_power_curve.Rd @@ -4,14 +4,15 @@ \alias{plot_tpr_power_curve} \title{Visualize detection power across experimental designs} \usage{ -plot_tpr_power_curve(simulation_results) +plot_tpr_power_curve(simulation_results, static = FALSE) } \arguments{ -\item{simulation_results}{A \code{data.frame} returned by -\code{\link{run_tpr_simulation}}.} +\item{static}{Logical. If \code{TRUE}, returns a static \code{ggplot} +object suitable for PDF export. Default: \code{FALSE}.} } \value{ -An interactive \code{plotly} object. +An interactive \code{plotly} object, or a static \code{ggplot} +object if \code{static = TRUE}. } \description{ Creates an interactive plot showing how the true positive rate diff --git a/tests/testthat/test-TPR_Power_Curve.R b/tests/testthat/test-TPR_Power_Curve.R index ade6877..35c0b9b 100644 --- a/tests/testthat/test-TPR_Power_Curve.R +++ b/tests/testthat/test-TPR_Power_Curve.R @@ -80,12 +80,12 @@ test_that("run_tpr_simulation returns correct structure", { ) results <- run_tpr_simulation( - rep_range = c(1, 2), + rep_range = c(1, 3), concentrations = c(0, 10, 100, 1000), - dose_range = c(2, 3), + dose_range = c(2, 4), data = mock_data, protein = "P1", - n_proteins = 50 + n_proteins = 200 ) expect_true(is.data.frame(results)) @@ -93,24 +93,33 @@ test_that("run_tpr_simulation returns correct structure", { expect_true(all(results$Interaction == "Strong")) expect_true(all(results$TPR >= 0 & results$TPR <= 100)) - # TPR should generally increase with more replicates for a given concentration count + noise_margin <- 5 + for (nc in unique(results$NumConcs)) { subset <- results[results$NumConcs == nc, ] subset <- subset[order(subset$N_rep), ] if (nrow(subset) >= 2) { - # Allow for some noise, but overall trend should be non-decreasing - expect_true(subset$TPR[nrow(subset)] >= subset$TPR[1], - info = paste("TPR should increase with replicates at NumConcs =", nc)) + for (j in 2:nrow(subset)) { + expect_true(subset$TPR[j] >= subset$TPR[j - 1] - noise_margin, + info = paste0("TPR should not drop significantly between rep ", + subset$N_rep[j - 1], " and rep ", subset$N_rep[j], + " at NumConcs = ", nc, + " (was ", subset$TPR[j - 1], ", got ", subset$TPR[j], ")")) + } } } - # TPR should generally increase with more concentrations for a given replicate count for (nr in unique(results$N_rep)) { subset <- results[results$N_rep == nr, ] subset <- subset[order(subset$NumConcs), ] if (nrow(subset) >= 2) { - expect_true(subset$TPR[nrow(subset)] >= subset$TPR[1], - info = paste("TPR should increase with concentrations at N_rep =", nr)) + for (j in 2:nrow(subset)) { + expect_true(subset$TPR[j] >= subset$TPR[j - 1] - noise_margin, + info = paste0("TPR should not drop significantly between ", + subset$NumConcs[j - 1], " and ", subset$NumConcs[j], + " concentrations at N_rep = ", nr, + " (was ", subset$TPR[j - 1], ", got ", subset$TPR[j], ")")) + } } } }) @@ -124,12 +133,12 @@ test_that("plot_tpr_power_curve returns a plotly object", { ) results <- run_tpr_simulation( - rep_range = c(1, 2), + rep_range = c(1, 3), concentrations = c(0, 10, 100, 1000), - dose_range = c(2, 3), + dose_range = c(2, 4), data = mock_data, protein = "P1", - n_proteins = 50 + n_proteins = 200 ) p <- plot_tpr_power_curve(results)