Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions R/getPathwaysFromIndra.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ getPathwaysFromIndra <- function(annotated_df, main_target = 'MEN1_HUMAN', targe
edgeToMetadataMapping[[key]]$data$stmt_type <- c(edge$type)
edgeToMetadataMapping[[key]]$source_id <- source_id
edgeToMetadataMapping[[key]]$target_id <- obj
edgeToMetadataMapping[[key]] <- MSstatsBioNet:::.addAdditionalMetadataToIndraEdge(
edgeToMetadataMapping[[key]], annotated_df, namespace
edgeToMetadataMapping[[key]] <- .addAdditionalMetadataToIndraEdge(
edgeToMetadataMapping[[key]], annotated_df
)
}
}
Expand Down
10 changes: 7 additions & 3 deletions R/getSubnetworkFromIndra.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
#' @param force_include_proteins character vector of protein identifiers to exempt
#' from all filtering steps. These proteins will be retained regardless of p-value,
#' logFC, or other filtering criteria. Default is NULL, i.e. no exemptions.
#' @param force_include_other character vector of identifiers to include in the
#' network, regardless if those ids are in the input data. Should be formatted
#' as "namespace:identifier", e.g. "HGNC:1234" or "CHEBI:4911".
#'
#' @return list of 2 data.frames, nodes and edges
#'
Expand All @@ -53,10 +56,11 @@ getSubnetworkFromIndra <- function(input,
correlation_cutoff = 0.3,
sources_filter = NULL,
logfc_cutoff = NULL,
force_include_proteins = NULL) {
force_include_proteins = NULL,
force_include_other = NULL) {
input <- .filterGetSubnetworkFromIndraInput(input, pvalueCutoff, logfc_cutoff, force_include_proteins)
.validateGetSubnetworkFromIndraInput(input, protein_level_data, sources_filter)
res <- .callIndraCogexApi(input$HgncId)
.validateGetSubnetworkFromIndraInput(input, protein_level_data, sources_filter, force_include_other)
res <- .callIndraCogexApi(input$HgncId, force_include_other)
res <- .filterIndraResponse(res, statement_types, evidence_count_cutoff, sources_filter)
edges <- .constructEdgesDataFrame(res, input, protein_level_data)
edges <- .filterEdgesDataFrame(edges, paper_count_cutoff, correlation_cutoff)
Expand Down
73 changes: 44 additions & 29 deletions R/utils_getSubnetworkFromIndra.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@
#' @param input dataframe from MSstats groupComparison output
#' @param protein_level_data dataframe from MSstats dataProcess output
#' @param sources_filter sources filter
#' @param force_include_other character vector of identifiers to include in the
#' network.
#' @keywords internal
#' @noRd
.validateGetSubnetworkFromIndraInput <- function(input, protein_level_data, sources_filter) {
.validateGetSubnetworkFromIndraInput <- function(input, protein_level_data, sources_filter, force_include_other) {
if (!"HgncId" %in% colnames(input)) {
stop("Invalid Input Error: Input must contain a column named 'HgncId'.")
}
if (nrow(input) >= 400) {
num_proteins = nrow(input) + ifelse(!is.null(force_include_other), length(force_include_other), 0)
if (num_proteins >= 400) {
stop("Invalid Input Error: INDRA query must contain less than 400 proteins. Consider lowering your p-value cutoff")
}
if (nrow(input) == 0) {
Expand All @@ -28,16 +31,26 @@

#' Call INDRA Cogex API and return response
#' @param hgncIds list of hgnc ids
#' @param force_include_other list of identifiers to include in the network
#' @return list of INDRA statements
#' @importFrom jsonlite toJSON
#' @importFrom httr POST add_headers content
#' @keywords internal
#' @noRd
.callIndraCogexApi <- function(hgncIds) {
.callIndraCogexApi <- function(hgncIds, force_include_other) {
indraCogexUrl <-
"https://discovery.indra.bio/api/indra_subnetwork_relations"

groundings <- lapply(hgncIds, function(x) list("HGNC", x))
if (!is.null(force_include_other)) {
groundings <- c(groundings, lapply(force_include_other, function(x) {
parts <- unlist(strsplit(x, ":"))
if (length(parts) != 2) {
stop(paste0("Invalid identifier format: ", x, ". Expected format is 'namespace:identifier', e.g. 'HGNC:1234' or 'CHEBI:4911'."))
}
list(parts[1], parts[2])
}))
}
Comment on lines +40 to +53
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Missing HTTP-error handling & fragile string-split

  1. POST() result is consumed without checking status_code, so 4xx/5xx responses silently become cryptic list structures.
  2. strsplit(x, ":") fails on NA or strings containing additional ‘:’, and uses regex.
    Use fixed = TRUE and trimws() for robustness.
res <- POST(...)

if (httr::http_error(res))
    stop("INDRA API request failed: ", httr::status_code(res), "", httr::http_status(res)$message)
...
parts <- strsplit(trimws(x), ":", fixed = TRUE)[[1]]
🤖 Prompt for AI Agents
In R/utils_getSubnetworkFromIndra.R around lines 40 to 53, the POST request
result is used without checking for HTTP errors, which can cause silent failures
on 4xx/5xx responses. Add a check after the POST call using httr::http_error to
detect errors and stop execution with a clear message including the status code
and status message. Also, improve the string splitting by using strsplit with
fixed = TRUE and apply trimws to the input string before splitting to handle NA
and extra colons robustly.

groundings <- list(nodes = groundings)
groundings <- jsonlite::toJSON(groundings, auto_unbox = TRUE)

Expand Down Expand Up @@ -133,40 +146,31 @@
#' Add additional metadata to an edge
#' @param edge object representation of an INDRA statement
#' @param input filtered groupComparison result
#' @param source_namespace namespace of the source for evidence URL
#' @return edge with additional metadata
#' @keywords internal
#' @noRd
.addAdditionalMetadataToIndraEdge <- function(edge, input, source_namespace = "@HGNC") {
.addAdditionalMetadataToIndraEdge <- function(edge, input) {
edge$evidence_list <- paste(
"https://db.indra.bio/statements/from_agents?subject=",
edge$source_id, source_namespace, "&object=",
edge$target_id, "@HGNC&format=html",
edge$source_id, "@", edge$source_ns, "&object=",
edge$target_id, "@", edge$target_ns, "&format=html",
sep = ""
)

# Convert back to uniprot IDs
if (source_namespace == "@HGNC") {
matched_rows_source <- input[which(input$HgncId == edge$source_id), ]
matched_rows_source <- input[which(input$HgncId == edge$source_id), ]
if (nrow(matched_rows_source) != 1) {
edge$source_uniprot_id <- edge$source_name
} else {
edge$source_uniprot_id <- matched_rows_source$Protein
}
matched_rows_target <- input[which(input$HgncId == edge$target_id), ]

if ((source_namespace == "@HGNC" && nrow(matched_rows_source) != 1) || nrow(matched_rows_target) != 1) {
stop(paste0(
"INDRA Exception: Unexpected number of matches for the following HGNC IDs in the input data: ",
edge$source_id,
" or ",
edge$target_id,
". Each ID must match exactly one entry in the input data, but 0 or multiple matches were found. Please check the input data for duplicates or missing entries."
))
}

if (source_namespace == "@HGNC") {
edge$source_uniprot_id <- matched_rows_source$Protein
matched_rows_target <- input[which(input$HgncId == edge$target_id), ]
if (nrow(matched_rows_target) != 1) {
edge$target_uniprot_id <- edge$target_name
} else {
edge$source_uniprot_id <- edge$source_id
edge$target_uniprot_id <- matched_rows_target$Protein
}
edge$target_uniprot_id <- matched_rows_target$Protein

return(edge)
}
Expand Down Expand Up @@ -258,14 +262,25 @@
#' @keywords internal
#' @noRd
.constructNodesDataFrame <- function(input, edges) {
# Get unique nodes from edges
node_ids <- unique(c(edges$source, edges$target))

# Create base nodes dataframe
nodes <- data.frame(
id = input$Protein,
logFC = input$log2FC,
adj.pvalue = input$adj.pvalue,
hgncName = if ("HgncName" %in% colnames(input) && is.character(input$HgncName)) input$HgncName else NA,
id = node_ids,
stringsAsFactors = FALSE
)
nodes <- nodes[which(nodes$id %in% edges$source | nodes$id %in% edges$target),]

# Add attributes from input where available
nodes$logFC <- input$log2FC[match(nodes$id, input$Protein)]
nodes$adj.pvalue <- input$adj.pvalue[match(nodes$id, input$Protein)]
nodes$hgncName <- if ("HgncName" %in% colnames(input) && is.character(input$HgncName)) {
hgnc_value <- input$HgncName[match(nodes$id, input$Protein)]
ifelse(is.na(hgnc_value), nodes$id, hgnc_value)
} else {
nodes$id
}

return(nodes)
}

Expand Down
7 changes: 6 additions & 1 deletion man/getSubnetworkFromIndra.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions tests/testthat/test-getSubnetworkFromIndra.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ test_that("getSubnetworkFromIndra works correctly", {
input <- data.table::fread(
system.file("extdata/groupComparisonModel.csv", package = "MSstatsBioNet")
)
local_mocked_bindings(.callIndraCogexApi = function(x) {
local_mocked_bindings(.callIndraCogexApi = function(x,y) {
return(readRDS(system.file("extdata/indraResponse.rds", package = "MSstatsBioNet")))
})
suppressWarnings(subnetwork <- getSubnetworkFromIndra(input, statement_types = c("Activation", "Phosphorylation")))
Expand All @@ -14,7 +14,7 @@ test_that("getSubnetworkFromIndra with different statement type works correctly"
input <- data.table::fread(
system.file("extdata/groupComparisonModel.csv", package = "MSstatsBioNet")
)
local_mocked_bindings(.callIndraCogexApi = function(x) {
local_mocked_bindings(.callIndraCogexApi = function(x,y) {
return(readRDS(system.file("extdata/indraResponse.rds", package = "MSstatsBioNet")))
})
suppressWarnings(
Expand Down