Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
0bec2fa
preliminary setup for ptm analysis
tonywu1999 Aug 21, 2025
3e616c8
Add preliminary PTM data and vignette to PTM feature branch
tonywu1999 Jun 8, 2025
64b27ed
Code is working with PTM analysis, but analysis is protein specific
tonywu1999 Jun 8, 2025
25507fa
take the max logFC for ranking
tonywu1999 Jun 8, 2025
3c5f4b7
Add ability to filter by phosphorylation interaction and include site…
tonywu1999 Jun 8, 2025
207fd72
Update vignette to include statement types
tonywu1999 Jun 8, 2025
35605f1
Switch if-else statement for modifications vs other stmt types
tonywu1999 Jun 9, 2025
6ed5888
add site information to the edges table for getSubnetworkFromIndra
tonywu1999 Sep 2, 2025
ba08baf
add site information in the nodes table
tonywu1999 Sep 2, 2025
d60e59c
add hover tooltip first attempt
tonywu1999 Sep 3, 2025
d911ceb
hover tool tip fully working now with site info
tonywu1999 Sep 3, 2025
c206517
finalize ptm overlap hover functionality
tonywu1999 Sep 3, 2025
f790748
consider scenario where global protein column does not exist
tonywu1999 Sep 11, 2025
c8f53d1
remove getpathwaysfromindra
tonywu1999 Sep 11, 2025
60196fd
modify PTM vignette
tonywu1999 Sep 11, 2025
4c8d92e
remove commented code
tonywu1999 Sep 11, 2025
0d45665
tidy up comments
tonywu1999 Sep 11, 2025
371972d
add visualization in vignette
tonywu1999 Sep 11, 2025
b2299e6
ensure protein column is a character and not a factor
tonywu1999 Sep 11, 2025
bb60657
reorder when protein is turned into a character
tonywu1999 Sep 11, 2025
8887c55
remove changed comments
tonywu1999 Sep 11, 2025
b737617
add multiple PTM sites to site column
tonywu1999 Sep 11, 2025
5c5d1db
make multiple PTMs compatible in hover tooltip
tonywu1999 Sep 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ Imports:
httr,
jsonlite,
r2r,
tidyr,
MASS
tidyr
Suggests:
data.table,
BiocStyle,
Expand Down
3 changes: 0 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@
export(annotateProteinInfoFromIndra)
export(exportNetworkToHTML)
export(generateCytoscapeConfig)
export(getPathwaysFromIndra)
export(getSubnetworkFromIndra)
export(previewNetworkInBrowser)
export(visualizeNetworks)
importFrom(MASS,fitdistr)
importFrom(RCy3,addAnnotationShape)
importFrom(RCy3,addAnnotationText)
importFrom(RCy3,createNetworkFromDataFrames)
Expand All @@ -20,7 +18,6 @@ importFrom(RCy3,mapVisualProperty)
importFrom(RCy3,setVisualStyle)
importFrom(grDevices,colorRamp)
importFrom(grDevices,rgb)
importFrom(httr,GET)
importFrom(httr,POST)
importFrom(httr,add_headers)
importFrom(httr,content)
Expand Down
15 changes: 12 additions & 3 deletions R/annotateProteinInfoFromIndra.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,27 @@ annotateProteinInfoFromIndra <- function(df, proteinIdType) {
#' It can be either "Uniprot" or "Uniprot_Mnemonic".
#' @return A data frame with populated Uniprot IDs.
.populateUniprotIdsInDataFrame <- function(df, proteinIdType) {
if ("GlobalProtein" %in% colnames(df)) {
protein_ids = unique(as.character(df$GlobalProtein))
} else {
df$Protein = as.character(df$Protein)
df$GlobalProtein = ifelse(grepl("_[A-Z][0-9]", df$Protein),
gsub("_[A-Z][0-9].*", "", df$Protein, perl = TRUE),
df$Protein)
protein_ids = unique(df$GlobalProtein)
}
df$UniprotId <- NA
if (proteinIdType == "Uniprot") {
df$UniprotId <- as.character(df$Protein)
df$UniprotId <- as.character(df$GlobalProtein)
}

if (proteinIdType == "Uniprot_Mnemonic") {
mnemonicProteins <- df$Protein
mnemonicProteins <- protein_ids
if (length(mnemonicProteins) > 0) {
uniprotMapping <- .callGetUniprotIdsFromUniprotMnemonicIdsApi(as.list(mnemonicProteins))
for (mnemonicId in names(uniprotMapping)) {
if (!is.null(uniprotMapping[[mnemonicId]])) {
df$UniprotId[df$Protein == mnemonicId] <- uniprotMapping[[mnemonicId]]
df$UniprotId[df$GlobalProtein == mnemonicId] <- uniprotMapping[[mnemonicId]]
}
}
}
Expand Down
176 changes: 0 additions & 176 deletions R/getPathwaysFromIndra.R

This file was deleted.

60 changes: 37 additions & 23 deletions R/utils_getSubnetworkFromIndra.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
if (!"HgncId" %in% colnames(input)) {
stop("Invalid Input Error: Input must contain a column named 'HgncId'.")
}
num_proteins = nrow(input) + ifelse(!is.null(force_include_other), length(force_include_other), 0)
num_proteins = length(unique(input$HgncId)) +
ifelse(!is.null(force_include_other), length(force_include_other), 0)
if (num_proteins >= 400) {
stop("Invalid Input Error: INDRA query must contain less than 400 proteins. Consider lowering your p-value cutoff")
}
Expand Down Expand Up @@ -41,6 +42,7 @@
indraCogexUrl <-
"https://discovery.indra.bio/api/indra_subnetwork_relations"

hgncIds = unique(hgncIds)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
groundings <- lapply(hgncIds, function(x) list("HGNC", x))
if (!is.null(force_include_other)) {
groundings <- c(groundings, lapply(force_include_other, function(x) {
Expand Down Expand Up @@ -105,6 +107,8 @@
#' @keywords internal
#' @noRd
.filterGetSubnetworkFromIndraInput <- function(input, pvalueCutoff, logfc_cutoff, force_include_proteins) {
input$Protein <- as.character(input$Protein)

# Extract exempt proteins before any filtering
exempt_proteins <- NULL
if (!is.null(force_include_proteins)) {
Expand Down Expand Up @@ -140,7 +144,16 @@
input <- combined_input[!duplicated(combined_input$Protein), ]
}

input$Protein <- as.character(input$Protein)
# Handle PTMs in Protein column
input$Site = ifelse(grepl("_[A-Z][0-9]", input$Protein),
gsub("^_", "",
gsub("^[^_]*_|_(?![A-Z][0-9])[^_]*", "", input$Protein, perl = TRUE)
),
NA_character_
)
if ("GlobalProtein" %in% colnames(input)) {
input$Protein = input$GlobalProtein
}
return(input)
}
#' Add additional metadata to an edge
Expand All @@ -159,17 +172,19 @@

# Convert back to uniprot IDs
matched_rows_source <- input[which(input$HgncId == edge$source_id), ]
if (nrow(matched_rows_source) != 1) {
uniprot_ids_source <- unique(matched_rows_source$Protein)
if (length(uniprot_ids_source) != 1) {
edge$source_uniprot_id <- edge$source_name
} else {
edge$source_uniprot_id <- matched_rows_source$Protein
edge$source_uniprot_id <- uniprot_ids_source
Comment thread
tonywu1999 marked this conversation as resolved.
}

matched_rows_target <- input[which(input$HgncId == edge$target_id), ]
if (nrow(matched_rows_target) != 1) {
uniprot_ids_target = unique(matched_rows_target$Protein)
if (length(uniprot_ids_target) != 1) {
edge$target_uniprot_id <- edge$target_name
} else {
edge$target_uniprot_id <- matched_rows_target$Protein
edge$target_uniprot_id <- uniprot_ids_target
}

return(edge)
Expand All @@ -179,6 +194,7 @@
#' Collapse duplicate INDRA statements into a mapping of edge to metadata
#' @param res INDRA response
#' @param input filtered groupComparison result
#' @importFrom jsonlite fromJSON
#' @importFrom r2r hashmap keys
#' @return processed edge to metadata mapping
#' @keywords internal
Expand All @@ -188,6 +204,13 @@

for (edge in res) {
key <- paste(edge$source_id, edge$target_id, edge$data$stmt_type, sep = "_")
json_object <- fromJSON(edge$data$stmt_json)
if (!is.null(json_object$residue) && !is.null(json_object$position)) {
edge$site = paste0(json_object$residue, json_object$position)
key <- paste(key, edge$site, sep = "_")
} else {
edge$site = NA_character_
}
Comment on lines +207 to +213
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Harden stmt_json parsing.

fromJSON can throw on malformed JSON; use tryCatch to avoid dropping the whole run.

-        json_object <- fromJSON(edge$data$stmt_json)
-        if (!is.null(json_object$residue) && !is.null(json_object$position)) {
+        json_object <- tryCatch(fromJSON(edge$data$stmt_json), error = function(e) NULL)
+        if (!is.null(json_object) && !is.null(json_object$residue) && !is.null(json_object$position)) {
             edge$site = paste0(json_object$residue, json_object$position)
             key <- paste(key, edge$site, sep = "_")
         } else {
             edge$site = NA_character_
         }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
json_object <- fromJSON(edge$data$stmt_json)
if (!is.null(json_object$residue) && !is.null(json_object$position)) {
edge$site = paste0(json_object$residue, json_object$position)
key <- paste(key, edge$site, sep = "_")
} else {
edge$site = NA_character_
}
json_object <- tryCatch(fromJSON(edge$data$stmt_json), error = function(e) NULL)
if (!is.null(json_object) && !is.null(json_object$residue) && !is.null(json_object$position)) {
edge$site = paste0(json_object$residue, json_object$position)
key <- paste(key, edge$site, sep = "_")
} else {
edge$site = NA_character_
}
🤖 Prompt for AI Agents
In R/utils_getSubnetworkFromIndra.R around lines 204-210,
fromJSON(edge$data$stmt_json) can throw on malformed JSON and crash the run;
wrap the fromJSON call in tryCatch to safely handle parsing errors (returning
NULL or an empty list on error), and when an error occurs set edge$site <-
NA_character_ (and optionally log a warning including the edge id or key).
Ensure the subsequent logic that sets key and pastes site only runs when
json_object was successfully parsed and residue/position are present.

if (key %in% keys(edgeToMetadataMapping)) {
edgeToMetadataMapping[[key]]$data$evidence_count <-
edgeToMetadataMapping[[key]]$data$evidence_count +
Expand All @@ -209,6 +232,7 @@
#' @param input filtered groupComparison result
#' @param protein_level_data output of dataProcess
#' @importFrom r2r query keys
#' @importFrom jsonlite fromJSON
#' @return edge data.frame
#' @keywords internal
#' @noRd
Expand Down Expand Up @@ -236,6 +260,9 @@
sourceCounts = vapply(keys(res), function(x) {
query(res, x)$data$source_counts
}, ""),
site = vapply(keys(res), function(x) {
query(res, x)$site
}, ""),
stringsAsFactors = FALSE
)
# add correlation - maybe create a separate function
Expand All @@ -262,24 +289,11 @@
#' @keywords internal
#' @noRd
.constructNodesDataFrame <- function(input, edges) {
# Get unique nodes from edges
node_ids <- unique(c(edges$source, edges$target))
nodes = input[, c("Protein", "log2FC", "adj.pvalue", "HgncName", "Site")]
colnames(nodes) = c("id", "logFC", "adj.pvalue", "hgncName", "Site")

# Create base nodes dataframe
nodes <- data.frame(
id = node_ids,
stringsAsFactors = FALSE
)

# Add attributes from input where available
nodes$logFC <- input$log2FC[match(nodes$id, input$Protein)]
nodes$adj.pvalue <- input$adj.pvalue[match(nodes$id, input$Protein)]
nodes$hgncName <- if ("HgncName" %in% colnames(input) && is.character(input$HgncName)) {
hgnc_value <- input$HgncName[match(nodes$id, input$Protein)]
ifelse(is.na(hgnc_value), nodes$id, hgnc_value)
} else {
nodes$id
}
nodes = nodes[nodes$id %in% c(edges$source, edges$target), ]
nodes$hgncName = ifelse(is.na(nodes$hgncName), nodes$id, nodes$hgncName)

return(nodes)
}
Expand Down
Loading