Experimentalists need to understand how many samples they need to reproduce results for a future experiment. Typically, people will run pilot studies with a few samples, and then assess variation, and use that variation to run a real larger study. But more samples = money
# Create interaction detection power curves using simulations --
library(MSstatsResponse)
library(tidyverse)
library(data.table)
temp_res = futureExperimentSimulation(N_proteins = 300,
N_rep = 4,
Concentrations = c(0, 1, 1000, 3000),
IC50_Prediction = FALSE)
temp_res$Hit_Rates_Data
# 2 con : 0, 3000
# 3 con : 0, 1000, 3000,
# 4 con : 0, 1, 1000, 3000
# 5 con : 0, 1, 100, 1000, 3000
# 6 concentration : 0, 1, 100, 300, 1000, 3000
# 7 concentration : 0, 1, 10, 100, 300, 1000, 3000
# 8 concentration : 0, 1, 10, 30, 100, 300, 1000, 3000
# 9 concentration : 0, 1, 3, 10, 30, 100, 300, 1000, 3000
# ---- Setup -------------------------------------------------------------------
library(dplyr)
library(purrr)
library(tidyr)
library(ggplot2)
# Allowed concentrations (these are the ones that exist in the experiment)
allowed_concs <- c(0, 1, 3, 10, 30, 100, 300, 1000, 3000)
# custom selection for each number of concentrations
conc_map <- list(
`2` = c(0, 3000),
`3` = c(0, 1000, 3000),
`4` = c(0, 1, 1000, 3000),
`5` = c(0, 1, 100, 1000, 3000),
`6` = c(0, 1, 100, 300, 1000, 3000),
`7` = c(0, 1, 10, 100, 300, 1000, 3000),
`8` = c(0, 1, 10, 30, 100, 300, 1000, 3000),
`9` = c(0, 1, 3, 10, 30, 100, 300, 1000, 3000)
)
# Grid to sweep: replicates 1–4 and K in {2..9}
k_grid <- as.integer(names(conc_map))
#rep_grid <- 1:4
rep_grid <- 1:5
# Helper to run one config mapping
run_one <- function(n_rep, k_conc, seed = 123) {
set.seed(seed + n_rep * 100 + k_conc)
concs_k <- conc_map[[as.character(k_conc)]]
if (is.null(concs_k)) stop("No concentration set defined for K = ", k_conc)
if (!all(concs_k %in% allowed_concs)) {
stop("Concentration set for K=", k_conc, " includes values outside allowed_concs.")
}
temp_res <- futureExperimentSimulation(
N_proteins = 300,
N_rep = n_rep,
Concentrations = concs_k,
IC50_Prediction = FALSE
)
temp_res$Hit_Rates_Data %>%
dplyr::filter(Category %in% c("TPR (Strong)", "TPR (Weak)")) %>%
dplyr::mutate(
N_rep = n_rep,
NumConcs = length(concs_k),
Interaction = if_else(Category == "TPR (Strong)", "Strong", "Weak")
) %>%
dplyr::select(Interaction, TPR = Percent, N_rep, NumConcs)
}
# Run full
grid_df <- expand.grid(N_rep = rep_grid, k_conc = k_grid) %>% as_tibble()
results_long <- pmap_dfr(grid_df, ~ run_one(n_rep = ..1, k_conc = ..2))
# visualize
results_strong = results_long %>% filter(Interaction == 'Strong')
results_weak = results_long %>% filter(Interaction == 'Weak')
p_power <- ggplot(results_weak,
aes(x = NumConcs, y = TPR,
color = Interaction, linetype = factor(N_rep))) +
geom_line(linewidth = 1.5) +
geom_point(size = 2) +
scale_x_continuous(breaks = k_grid) +
scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, 20)) +
scale_color_manual(values = c("Strong" = "#1b9e77", "Weak" = "#d95f02")) +
#scale_linetype_manual(values = c("1" = "solid", "2" = "dashed", "3" = "dotdash", "4" = "dotted")) +
scale_linetype_manual(values = c("1" = "dotted", "2" = "dotdash", "3" = "dashed", "4" = "longdash", "5" = "solid")) +
labs(
title = "Interaction Detection Power Curve",
#subtitle = "Lines = Strong vs. Weak; Line type = Replicates per dose (1–4)",
x = "Number of concentrations ",
y = "True Positive Rate (%)",
color = "Interaction",
linetype = "Replicates"
) +
theme_bw(base_size = 16) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
legend.position = "right"
)
p_power
Story
Experimentalists need to understand how many samples they need to reproduce results for a future experiment. Typically, people will run pilot studies with a few samples, and then assess variation, and use that variation to run a real larger study. But more samples = money
Subtasks
Script