diff --git a/R/getSubnetworkFromIndra.R b/R/getSubnetworkFromIndra.R index e875977..af40f3f 100644 --- a/R/getSubnetworkFromIndra.R +++ b/R/getSubnetworkFromIndra.R @@ -24,6 +24,12 @@ #' 0.3 #' @param sources_filter filtering only on specific sources. Default is no filter, i.e. NULL. #' Otherwise, should be a list, e.g. c('reach', 'medscan'). +#' @param logfc_cutoff absolute log fold change cutoff for filtering proteins. +#' Only proteins with |logFC| greater than this value will be retained. Default +#' is NULL, i.e. no logFC filtering. +#' @param force_include_proteins character vector of protein identifiers to exempt +#' from all filtering steps. These proteins will be retained regardless of p-value, +#' logFC, or other filtering criteria. Default is NULL, i.e. no exemptions. #' #' @return list of 2 data.frames, nodes and edges #' @@ -45,8 +51,10 @@ getSubnetworkFromIndra <- function(input, paper_count_cutoff = 1, evidence_count_cutoff = 1, correlation_cutoff = 0.3, - sources_filter = NULL) { - input <- .filterGetSubnetworkFromIndraInput(input, pvalueCutoff) + sources_filter = NULL, + logfc_cutoff = NULL, + force_include_proteins = NULL) { + input <- .filterGetSubnetworkFromIndraInput(input, pvalueCutoff, logfc_cutoff, force_include_proteins) .validateGetSubnetworkFromIndraInput(input, protein_level_data, sources_filter) res <- .callIndraCogexApi(input$HgncId) res <- .filterIndraResponse(res, statement_types, evidence_count_cutoff, sources_filter) diff --git a/R/utils_getSubnetworkFromIndra.R b/R/utils_getSubnetworkFromIndra.R index 318c7b6..cc13156 100644 --- a/R/utils_getSubnetworkFromIndra.R +++ b/R/utils_getSubnetworkFromIndra.R @@ -86,19 +86,48 @@ #' Filter groupComparison result input based on user-defined cutoffs #' @param input groupComparison result #' @param pvalueCutoff p-value cutoff +#' @param logfc_cutoff logFC cutoff +#' @param force_include_proteins list of proteins to exempt from filtering #' @return filtered groupComparison result #' @keywords internal #' @noRd -.filterGetSubnetworkFromIndraInput <- function(input, pvalueCutoff) { +.filterGetSubnetworkFromIndraInput <- function(input, pvalueCutoff, logfc_cutoff, force_include_proteins) { + # Extract exempt proteins before any filtering + exempt_proteins <- NULL + if (!is.null(force_include_proteins)) { + if (!is.character(force_include_proteins)) { + stop("force_include_proteins must be a character vector") + } + missing_prots <- setdiff(force_include_proteins, input$Protein) + if (length(missing_prots) > 0) { + warning("force_include_proteins not found: ", paste(missing_prots, collapse = ", ")) + } + exempt_proteins <- input[input$Protein %in% force_include_proteins,] + } + + # Apply standard filtering input <- input[!is.na(input$adj.pvalue),] if (!is.null(pvalueCutoff)) { input <- input[input$adj.pvalue < pvalueCutoff, ] } + if (!is.null(logfc_cutoff)) { + if (!is.numeric(logfc_cutoff) || length(logfc_cutoff) != 1 || logfc_cutoff <= 0) { + stop("logfc_cutoff must be a single positive numeric value") + } + input <- input[!is.na(input$log2FC) & abs(input$log2FC) > logfc_cutoff, ] + } input <- input[is.na(input$issue), ] + + # Combine filtered data with exempt proteins and remove duplicates + if (!is.null(exempt_proteins) && nrow(exempt_proteins) > 0) { + combined_input <- rbind(exempt_proteins, input) + # Remove duplicates based on Protein column, keeping first occurrence + input <- combined_input[!duplicated(combined_input$Protein), ] + } + input$Protein <- as.character(input$Protein) return(input) } - #' Add additional metadata to an edge #' @param edge object representation of an INDRA statement #' @param input filtered groupComparison result diff --git a/man/getSubnetworkFromIndra.Rd b/man/getSubnetworkFromIndra.Rd index e1c9d6d..bc84f34 100644 --- a/man/getSubnetworkFromIndra.Rd +++ b/man/getSubnetworkFromIndra.Rd @@ -12,7 +12,9 @@ getSubnetworkFromIndra( paper_count_cutoff = 1, evidence_count_cutoff = 1, correlation_cutoff = 0.3, - sources_filter = NULL + sources_filter = NULL, + logfc_cutoff = NULL, + force_include_proteins = NULL ) } \arguments{ @@ -44,6 +46,14 @@ cutoff for edges with correlation less than a specified cutoff. Default is \item{sources_filter}{filtering only on specific sources. Default is no filter, i.e. NULL. Otherwise, should be a list, e.g. c('reach', 'medscan').} + +\item{logfc_cutoff}{absolute log fold change cutoff for filtering proteins. +Only proteins with |logFC| greater than this value will be retained. Default +is NULL, i.e. no logFC filtering.} + +\item{force_include_proteins}{character vector of protein identifiers to exempt +from all filtering steps. These proteins will be retained regardless of p-value, +logFC, or other filtering criteria. Default is NULL, i.e. no exemptions.} } \value{ list of 2 data.frames, nodes and edges