diff --git a/DESCRIPTION b/DESCRIPTION index 84fff48..e869e86 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,8 +23,7 @@ Imports: httr, jsonlite, r2r, - tidyr, - MASS + tidyr Suggests: data.table, BiocStyle, diff --git a/NAMESPACE b/NAMESPACE index b2e4376..e84ce42 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,11 +3,9 @@ export(annotateProteinInfoFromIndra) export(exportNetworkToHTML) export(generateCytoscapeConfig) -export(getPathwaysFromIndra) export(getSubnetworkFromIndra) export(previewNetworkInBrowser) export(visualizeNetworks) -importFrom(MASS,fitdistr) importFrom(RCy3,addAnnotationShape) importFrom(RCy3,addAnnotationText) importFrom(RCy3,createNetworkFromDataFrames) @@ -20,7 +18,6 @@ importFrom(RCy3,mapVisualProperty) importFrom(RCy3,setVisualStyle) importFrom(grDevices,colorRamp) importFrom(grDevices,rgb) -importFrom(httr,GET) importFrom(httr,POST) importFrom(httr,add_headers) importFrom(httr,content) diff --git a/R/annotateProteinInfoFromIndra.R b/R/annotateProteinInfoFromIndra.R index b4470b7..cffe254 100644 --- a/R/annotateProteinInfoFromIndra.R +++ b/R/annotateProteinInfoFromIndra.R @@ -55,18 +55,27 @@ annotateProteinInfoFromIndra <- function(df, proteinIdType) { #' It can be either "Uniprot" or "Uniprot_Mnemonic". #' @return A data frame with populated Uniprot IDs. .populateUniprotIdsInDataFrame <- function(df, proteinIdType) { + if ("GlobalProtein" %in% colnames(df)) { + protein_ids = unique(as.character(df$GlobalProtein)) + } else { + df$Protein = as.character(df$Protein) + df$GlobalProtein = ifelse(grepl("_[A-Z][0-9]", df$Protein), + gsub("_[A-Z][0-9].*", "", df$Protein, perl = TRUE), + df$Protein) + protein_ids = unique(df$GlobalProtein) + } df$UniprotId <- NA if (proteinIdType == "Uniprot") { - df$UniprotId <- as.character(df$Protein) + df$UniprotId <- as.character(df$GlobalProtein) } if (proteinIdType == "Uniprot_Mnemonic") { - mnemonicProteins <- df$Protein + mnemonicProteins <- protein_ids if (length(mnemonicProteins) > 0) { uniprotMapping <- .callGetUniprotIdsFromUniprotMnemonicIdsApi(as.list(mnemonicProteins)) for (mnemonicId in names(uniprotMapping)) { if (!is.null(uniprotMapping[[mnemonicId]])) { - df$UniprotId[df$Protein == mnemonicId] <- uniprotMapping[[mnemonicId]] + df$UniprotId[df$GlobalProtein == mnemonicId] <- uniprotMapping[[mnemonicId]] } } } diff --git a/R/getPathwaysFromIndra.R b/R/getPathwaysFromIndra.R deleted file mode 100644 index 76c654e..0000000 --- a/R/getPathwaysFromIndra.R +++ /dev/null @@ -1,176 +0,0 @@ -#' Get pathways ranked on relevance from INDRA DB -#' -#' @importFrom httr GET content -#' @importFrom MASS fitdistr -#' @importFrom r2r hashmap keys query -#' -#' @param annotated_df output of \code{\link[MSstats]{groupComparison}} function's -#' comparisionResult table, which contains a list of proteins and their -#' corresponding p-values, logFCs, along with additional HGNC ID and HGNC -#' name columns -#' @param main_target A main target, e.g. main target of a drug or protein of -#' particular interest -#' @param target_type One of either 'Protein' or 'Drug'. Default is 'Protein' -#' -#' @return df of pathways -#' -#' @export -#' -#' @examples -#' annotated_df <- data.table::fread(system.file( -#' "extdata/groupComparisonModel.csv", -#' package = "MSstatsBioNet" -#' )) -#' pathways <- getPathwaysFromIndra(annotated_df, "P05067") -#' head(pathways) -#' -getPathwaysFromIndra <- function(annotated_df, main_target = 'MEN1_HUMAN', target_type = "Protein") { - annotated_df$Protein <- as.character(annotated_df$Protein) - log2fc_values <- annotated_df$log2FC - fit <- fitdistr(log2fc_values, "normal") - para <- fit$estimate - # pnorm(0, mean = para[1], sd = para[2]) - - # Fit a power-law-like curve - b = -0.08 - m = -1.33 - # 1 - 10^(m*log10(1000)+b) - # probability - - # Call INDRA - if (target_type == "Protein") { - main_target_row = annotated_df[annotated_df$Protein == main_target,] - source_id = as.character(main_target_row$HgncId) - namespace = "@HGNC" - id_field = "HGNC" - } else if (target_type == "Drug") { - source_id = main_target - namespace = "" - id_field = "TEXT" - } else { - stop("Invalid target type.") - } - url = paste('https://db.indra.bio/statements/from_agents?subject=', - source_id, namespace, sep = "") - response <- GET(url) - z = content(response) - - edgeToMetadataMapping <- hashmap() - - if (length(z$statements) == 0) { - return(data.frame()) - } - - for (index in seq(1, length(z$statements))) { - edge <- z$statements[[index]] - if (edge$type == "Complex") { - if (length(edge$members) == 2) { - if (identical(edge$members[[1]]$db_refs[[id_field]], source_id)) { - obj = edge$members[[2]]$db_refs$HGNC - namespaces = names(edge$members[[2]]$db_refs) - } else { - obj = edge$members[[1]]$db_refs$HGNC - namespaces = names(edge$members[[1]]$db_refs) - } - } else { - namespaces = c() - } - } else if (edge$type == "Phosphorylation") { - obj = edge$sub$db_refs$HGNC - namespaces = names(edge$sub$db_refs) - } else { - obj = edge$obj$db_refs$HGNC - namespaces = names(edge$obj$db_refs) - } - - # Filter out edges with no HGNC ID or not in the dataset - if (!("HGNC" %in% namespaces)) { - next - } else if (!(obj %in% annotated_df$HgncId)) { - next - } - - key <- paste(source_id, obj, sep = "_") - - if (key %in% keys(edgeToMetadataMapping)) { - edgeToMetadataMapping[[key]]$data$evidence_count <- - edgeToMetadataMapping[[key]]$data$evidence_count + - z$evidence_counts[[index]] - edgeToMetadataMapping[[key]]$data$stmt_type <- unique(c( - edgeToMetadataMapping[[key]]$data$stmt_type, - edge$type)) - } else { - # edge <- MSstatsBioNet:::.addAdditionalMetadataToIndraEdge(edge, annotated_df) - edgeToMetadataMapping[[key]] <- edge - edgeToMetadataMapping[[key]]$data$evidence_count <- - z$evidence_counts[[index]] - edgeToMetadataMapping[[key]]$data$stmt_type <- c(edge$type) - edgeToMetadataMapping[[key]]$source_id <- source_id - edgeToMetadataMapping[[key]]$target_id <- obj - edgeToMetadataMapping[[key]] <- .addAdditionalMetadataToIndraEdge( - edgeToMetadataMapping[[key]], annotated_df - ) - } - } - - # Calculate probabilities - for (key in keys(edgeToMetadataMapping)) { - edgeToMetadataMapping[[key]]$data$stmt_type <- - paste(unique(edgeToMetadataMapping[[key]]$data$stmt_type), - collapse = ", ") - prob_logFC = 0 - logFC = annotated_df[which(annotated_df$HgncId == edgeToMetadataMapping[[key]]$target_id),] - logFC = logFC$log2FC[[1]] - if (logFC > para[1]) { - prob_logFC = 1 - pnorm(logFC, mean = para[1], sd = para[2]) - } else { - prob_logFC = pnorm(logFC, mean = para[1], sd = para[2]) - } - evidence_prob = 10^(m*log10(edgeToMetadataMapping[[key]]$data$evidence_count)+b) - edgeToMetadataMapping[[key]]$data$total_prob = 1 - ((1 - prob_logFC) * (1 - evidence_prob)) - edgeToMetadataMapping[[key]]$data$logFC = logFC - } - - # Construct DF and sort - edges <- data.frame( - source = vapply(keys(edgeToMetadataMapping), function(x) { - query(edgeToMetadataMapping, x)$source_uniprot_id - }, ""), - target = vapply(keys(edgeToMetadataMapping), function(x) { - query(edgeToMetadataMapping, x)$target_uniprot_id - }, ""), - interaction = vapply(keys(edgeToMetadataMapping), function(x) { - query(edgeToMetadataMapping, x)$data$stmt_type - }, ""), - evidenceCount = vapply(keys(edgeToMetadataMapping), function(x) { - query(edgeToMetadataMapping, x)$data$evidence_count - }, 1), - logFC = vapply(keys(edgeToMetadataMapping), function(x) { - query(edgeToMetadataMapping, x)$data$logFC - }, 1), - prob = vapply(keys(edgeToMetadataMapping), function(x) { - query(edgeToMetadataMapping, x)$data$total_prob - }, 1), - evidenceLink = vapply(keys(edgeToMetadataMapping), function(x) { - query(edgeToMetadataMapping, x)$evidence_list - }, ""), - stringsAsFactors = FALSE - ) - - nodes <- .constructNodesDataFrame( - annotated_df, edges - ) - if (!(main_target %in% nodes$id)) { - # add a row with the main target - nodes <- rbind( - nodes, - data.frame( - id = main_target, - logFC = 0, - pvalue = 0, - hgncName = main_target - ) - ) - } - return(list(nodes = nodes, edges = edges)) -} \ No newline at end of file diff --git a/R/utils_getSubnetworkFromIndra.R b/R/utils_getSubnetworkFromIndra.R index e514d52..f3b8bce 100644 --- a/R/utils_getSubnetworkFromIndra.R +++ b/R/utils_getSubnetworkFromIndra.R @@ -10,7 +10,8 @@ if (!"HgncId" %in% colnames(input)) { stop("Invalid Input Error: Input must contain a column named 'HgncId'.") } - num_proteins = nrow(input) + ifelse(!is.null(force_include_other), length(force_include_other), 0) + num_proteins = length(unique(input$HgncId)) + + ifelse(!is.null(force_include_other), length(force_include_other), 0) if (num_proteins >= 400) { stop("Invalid Input Error: INDRA query must contain less than 400 proteins. Consider lowering your p-value cutoff") } @@ -41,6 +42,7 @@ indraCogexUrl <- "https://discovery.indra.bio/api/indra_subnetwork_relations" + hgncIds = unique(hgncIds) groundings <- lapply(hgncIds, function(x) list("HGNC", x)) if (!is.null(force_include_other)) { groundings <- c(groundings, lapply(force_include_other, function(x) { @@ -105,6 +107,8 @@ #' @keywords internal #' @noRd .filterGetSubnetworkFromIndraInput <- function(input, pvalueCutoff, logfc_cutoff, force_include_proteins) { + input$Protein <- as.character(input$Protein) + # Extract exempt proteins before any filtering exempt_proteins <- NULL if (!is.null(force_include_proteins)) { @@ -140,7 +144,16 @@ input <- combined_input[!duplicated(combined_input$Protein), ] } - input$Protein <- as.character(input$Protein) + # Handle PTMs in Protein column + input$Site = ifelse(grepl("_[A-Z][0-9]", input$Protein), + gsub("^_", "", + gsub("^[^_]*_|_(?![A-Z][0-9])[^_]*", "", input$Protein, perl = TRUE) + ), + NA_character_ + ) + if ("GlobalProtein" %in% colnames(input)) { + input$Protein = input$GlobalProtein + } return(input) } #' Add additional metadata to an edge @@ -159,17 +172,19 @@ # Convert back to uniprot IDs matched_rows_source <- input[which(input$HgncId == edge$source_id), ] - if (nrow(matched_rows_source) != 1) { + uniprot_ids_source <- unique(matched_rows_source$Protein) + if (length(uniprot_ids_source) != 1) { edge$source_uniprot_id <- edge$source_name } else { - edge$source_uniprot_id <- matched_rows_source$Protein + edge$source_uniprot_id <- uniprot_ids_source } matched_rows_target <- input[which(input$HgncId == edge$target_id), ] - if (nrow(matched_rows_target) != 1) { + uniprot_ids_target = unique(matched_rows_target$Protein) + if (length(uniprot_ids_target) != 1) { edge$target_uniprot_id <- edge$target_name } else { - edge$target_uniprot_id <- matched_rows_target$Protein + edge$target_uniprot_id <- uniprot_ids_target } return(edge) @@ -179,6 +194,7 @@ #' Collapse duplicate INDRA statements into a mapping of edge to metadata #' @param res INDRA response #' @param input filtered groupComparison result +#' @importFrom jsonlite fromJSON #' @importFrom r2r hashmap keys #' @return processed edge to metadata mapping #' @keywords internal @@ -188,6 +204,13 @@ for (edge in res) { key <- paste(edge$source_id, edge$target_id, edge$data$stmt_type, sep = "_") + json_object <- fromJSON(edge$data$stmt_json) + if (!is.null(json_object$residue) && !is.null(json_object$position)) { + edge$site = paste0(json_object$residue, json_object$position) + key <- paste(key, edge$site, sep = "_") + } else { + edge$site = NA_character_ + } if (key %in% keys(edgeToMetadataMapping)) { edgeToMetadataMapping[[key]]$data$evidence_count <- edgeToMetadataMapping[[key]]$data$evidence_count + @@ -209,6 +232,7 @@ #' @param input filtered groupComparison result #' @param protein_level_data output of dataProcess #' @importFrom r2r query keys +#' @importFrom jsonlite fromJSON #' @return edge data.frame #' @keywords internal #' @noRd @@ -236,6 +260,9 @@ sourceCounts = vapply(keys(res), function(x) { query(res, x)$data$source_counts }, ""), + site = vapply(keys(res), function(x) { + query(res, x)$site + }, ""), stringsAsFactors = FALSE ) # add correlation - maybe create a separate function @@ -262,24 +289,11 @@ #' @keywords internal #' @noRd .constructNodesDataFrame <- function(input, edges) { - # Get unique nodes from edges - node_ids <- unique(c(edges$source, edges$target)) + nodes = input[, c("Protein", "log2FC", "adj.pvalue", "HgncName", "Site")] + colnames(nodes) = c("id", "logFC", "adj.pvalue", "hgncName", "Site") - # Create base nodes dataframe - nodes <- data.frame( - id = node_ids, - stringsAsFactors = FALSE - ) - - # Add attributes from input where available - nodes$logFC <- input$log2FC[match(nodes$id, input$Protein)] - nodes$adj.pvalue <- input$adj.pvalue[match(nodes$id, input$Protein)] - nodes$hgncName <- if ("HgncName" %in% colnames(input) && is.character(input$HgncName)) { - hgnc_value <- input$HgncName[match(nodes$id, input$Protein)] - ifelse(is.na(hgnc_value), nodes$id, hgnc_value) - } else { - nodes$id - } + nodes = nodes[nodes$id %in% c(edges$source, edges$target), ] + nodes$hgncName = ifelse(is.na(nodes$hgncName), nodes$id, nodes$hgncName) return(nodes) } diff --git a/R/visualizeNetworksWithHTML.R b/R/visualizeNetworksWithHTML.R index cd4772b..d10eb33 100644 --- a/R/visualizeNetworksWithHTML.R +++ b/R/visualizeNetworksWithHTML.R @@ -1,8 +1,3 @@ -# ============================================================================= -# CYTOSCAPE VISUALIZATION PACKAGE FUNCTIONS -# These functions should go in your separate package -# ============================================================================= - #' Helper function to map logFC values to colors #' @param logFC_values Numeric vector of log fold change values #' @importFrom grDevices colorRamp rgb @@ -79,8 +74,75 @@ getRelationshipProperties <- function() { ) } +#' Calculate PTM site overlap between edge targets and nodes with aggregation +#' @param edges Data frame with edge information including 'target' and 'site' columns +#' @param nodes Data frame with node information including 'id' and 'Site' columns +#' @return Vector of overlap descriptions for each unique edge (after consolidation) +#' @noRd +calculatePTMOverlapAggregated <- function(edges, nodes) { + if (nrow(edges) == 0) return(character(0)) + + # Group edges by source-target-interaction to match consolidation logic + edges$edge_key <- paste(edges$source, edges$target, edges$interaction, sep = "-") + unique_edges <- unique(edges$edge_key) + + overlap_info <- character(length(unique_edges)) + names(overlap_info) <- unique_edges + + for (edge_key in unique_edges) { + # Get all edges with this source-target-interaction combination + matching_edges <- edges[edges$edge_key == edge_key, ] + all_overlap_sites <- c() + + # Process each matching edge to find PTM overlaps + for (i in 1:nrow(matching_edges)) { + edge <- matching_edges[i, ] + + # Check if edge has target and site information + if (!is.na(edge$target) && "site" %in% names(edge) && !is.na(edge$site)) { + # Find matching nodes with the same target ID + target_nodes <- nodes[nodes$id == edge$target, ] + + if (nrow(target_nodes) > 0 && "Site" %in% names(target_nodes)) { + edge_sites <- trimws(unlist(strsplit(as.character(edge$site), "[,;|]"))) + + # Check each target node row for site matches + for (j in 1:nrow(target_nodes)) { + if (!is.na(target_nodes$Site[j])) { + node_sites <- trimws(unlist(strsplit(as.character(target_nodes$Site[j]), "_"))) + + # Find overlapping sites for this edge-node combination + overlap_sites <- intersect(edge_sites, node_sites) + overlap_sites <- overlap_sites[overlap_sites != "" & !is.na(overlap_sites)] + + # Add to the aggregate list + all_overlap_sites <- c(all_overlap_sites, overlap_sites) + } + } + } + } + } + + # Remove duplicates and create tooltip text for this consolidated edge + unique_overlap_sites <- unique(all_overlap_sites) + unique_overlap_sites <- unique_overlap_sites[unique_overlap_sites != "" & !is.na(unique_overlap_sites)] + + if (length(unique_overlap_sites) > 0) { + if (length(unique_overlap_sites) == 1) { + overlap_info[edge_key] <- paste0("Overlapping PTM site: ", unique_overlap_sites[1]) + } else { + overlap_info[edge_key] <- paste0("Overlapping PTM sites: ", paste(unique_overlap_sites, collapse = ", ")) + } + } else { + overlap_info[edge_key] <- "" + } + } + + return(overlap_info) +} + # Consolidate bidirectional edges based on relationship type -consolidateEdges <- function(edges) { +consolidateEdges <- function(edges, nodes = NULL) { if (nrow(edges) == 0) return(edges) required_cols <- c("source", "target", "interaction") @@ -89,6 +151,13 @@ consolidateEdges <- function(edges) { stop("Missing required columns: ", paste(missing_cols, collapse = ", ")) } + # Calculate aggregated PTM overlap information if nodes are provided + ptm_overlap_map <- if (!is.null(nodes)) { + calculatePTMOverlapAggregated(edges, nodes) + } else { + NULL + } + relationship_props <- getRelationshipProperties() consolidated_edges <- list() processed_pairs <- c() @@ -96,7 +165,6 @@ consolidateEdges <- function(edges) { for (i in 1:nrow(edges)) { edge <- edges[i, ] pair_key <- paste(sort(c(edge$source, edge$target)), edge$interaction, collapse = "-") - reverse_key <- paste(sort(c(edge$source, edge$target), decreasing = TRUE), edge$interaction, sep = "-") # Skip if we've already processed this pair if (pair_key %in% processed_pairs) next @@ -118,6 +186,14 @@ consolidateEdges <- function(edges) { consolidation_type <- relationship_props[[category]]$consolidate + # Get PTM overlap info for this edge combination + edge_key <- paste(edge$source, edge$target, edge$interaction, sep = "-") + ptm_overlap_text <- if (!is.null(ptm_overlap_map) && edge_key %in% names(ptm_overlap_map)) { + ptm_overlap_map[[edge_key]] + } else { + "" + } + if (nrow(reverse_edges) > 0 && consolidation_type %in% c("undirected", "bidirectional")) { # Create consolidated edge if (consolidation_type == "undirected") { @@ -128,6 +204,7 @@ consolidateEdges <- function(edges) { interaction = edge$interaction, edge_type = "undirected", category = category, + ptm_overlap = ptm_overlap_text, stringsAsFactors = FALSE ) } else { @@ -138,6 +215,7 @@ consolidateEdges <- function(edges) { interaction = paste(edge$interaction, "(bidirectional)"), edge_type = "bidirectional", category = category, + ptm_overlap = ptm_overlap_text, stringsAsFactors = FALSE ) } @@ -148,8 +226,8 @@ consolidateEdges <- function(edges) { consolidated_edge[[col]] <- edge[[col]] } - edge_key <- paste(edge$source, edge$target, consolidated_edge$interaction, sep = "-") - consolidated_edges[[edge_key]] <- consolidated_edge + edge_key_final <- paste(edge$source, edge$target, consolidated_edge$interaction, sep = "-") + consolidated_edges[[edge_key_final]] <- consolidated_edge # Mark both directions as processed processed_pairs <- c(processed_pairs, pair_key) @@ -159,9 +237,10 @@ consolidateEdges <- function(edges) { directed_edge <- edge directed_edge$edge_type <- "directed" directed_edge$category <- category + directed_edge$ptm_overlap = ptm_overlap_text - edge_key <- paste(edge$source, edge$target, edge$interaction, sep = "-") - consolidated_edges[[edge_key]] <- directed_edge + edge_key_final <- paste(edge$source, edge$target, edge$interaction, sep = "-") + consolidated_edges[[edge_key_final]] <- directed_edge } } @@ -242,11 +321,11 @@ createNodeElements <- function(nodes, displayLabelType = "id") { }) } -createEdgeElements <- function(edges) { +createEdgeElements <- function(edges, nodes = NULL) { if (nrow(edges) == 0) return(list()) # First consolidate edges - consolidated_edges <- consolidateEdges(edges) + consolidated_edges <- consolidateEdges(edges, nodes) edge_elements <- list() @@ -262,7 +341,10 @@ createEdgeElements <- function(edges) { evidence_link <- ifelse(is.na(evidence_link) | evidence_link == "NA", "", evidence_link) evidence_link <- escape_js_string(evidence_link) - # Create edge data with styling information + # Escape quotes in tooltip text for JavaScript safety + tooltip_text <- gsub("'", "\\\\'", row$ptm_overlap) + + # Create edge data with styling information and PTM overlap tooltip edge_data <- paste0("{ data: { source: '", row$source, "', target: '", row$target, "', id: '", edge_key, @@ -273,7 +355,8 @@ createEdgeElements <- function(edges) { "', color: '", style$color, "', line_style: '", style$style, "', arrow_shape: '", style$arrow, - "', width: ", style$width, " } }") + "', width: ", style$width, + ", tooltip: '", tooltip_text, "' } }") edge_elements[[edge_key]] <- edge_data } @@ -319,7 +402,7 @@ generateCytoscapeConfig <- function(nodes, edges, # Create elements node_elements <- createNodeElements(nodes, display_label_type) - edge_elements <- createEdgeElements(edges) + edge_elements <- createEdgeElements(edges, nodes) # Default layout options default_layout <- list( @@ -342,7 +425,7 @@ generateCytoscapeConfig <- function(nodes, edges, } } - # Define the style configuration (same as before) + # Define the style configuration style_config <- list( list( selector = "node", @@ -470,6 +553,48 @@ generateJavaScriptCode <- function(config) { layout: ", layout_js, " }); + // Create tooltip element + var tooltip = document.createElement('div'); + tooltip.style.cssText = ` + position: absolute; + background-color: rgba(0, 0, 0, 0.9); + color: white; + padding: 8px 12px; + border-radius: 4px; + font-size: 12px; + font-family: Arial, sans-serif; + white-space: nowrap; + pointer-events: none; + z-index: 9999; + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.3); + display: none; + max-width: 300px; + word-wrap: break-word; + white-space: pre-wrap; + `; + document.body.appendChild(tooltip); + + // Only show tooltip if there's actual PTM overlap information + cy.on('mouseover', 'edge', function(evt) { + var edge = evt.target; + var tooltipText = edge.data('tooltip'); + if (tooltipText && tooltipText.trim() !== '' && tooltipText.trim() !== 'No overlapping PTM sites found') { + tooltip.innerHTML = tooltipText; + tooltip.style.display = 'block'; + } + }); + + cy.on('mousemove', 'edge', function(evt) { + if (tooltip.style.display === 'block') { + tooltip.style.left = evt.originalEvent.pageX + 10 + 'px'; + tooltip.style.top = evt.originalEvent.pageY - 30 + 'px'; + } + }); + + cy.on('mouseout', 'edge', function(evt) { + tooltip.style.display = 'none'; + }); + ", event_handlers_js) return(js_code) @@ -665,7 +790,7 @@ exportCytoscapeToHTML <- function(config, } #' - , config$container_id, ' { + , config$container_id, ' { width: ', width, '; height: ', height, '; border: 1px solid #ddd; @@ -768,6 +893,7 @@ exportCytoscapeToHTML <- function(config, Instructions: Click and drag to pan the network | Use mouse wheel to zoom in/out + | Hover over edges to see PTM site overlap information | Click on nodes or edges to select them ', if(include_controls) '| Use the buttons above for common navigation actions' else '', ' @@ -846,6 +972,10 @@ exportCytoscapeToHTML <- function(config, Complex +
+ PTM Site Info:
+ Hover over edges to see overlapping PTM sites between the edge target and node data +
`; legendDiv.innerHTML = legendHTML; @@ -934,7 +1064,7 @@ exportNetworkToHTML <- function(nodes, edges, previewNetworkInBrowser <- function(nodes, edges, displayLabelType = "id", ...) { - + # Generate configuration config <- generateCytoscapeConfig(nodes, edges, display_label_type = displayLabelType) diff --git a/inst/extdata/garrido-2024.csv b/inst/extdata/garrido-2024.csv new file mode 100644 index 0000000..27e174a --- /dev/null +++ b/inst/extdata/garrido-2024.csv @@ -0,0 +1,23 @@ +"Protein","Label","log2FC","SE","DF","pvalue","adj.pvalue","issue" +"P00533_S1039_S1042","t0 vs t1",-0.32003632837107,0.149943118877325,9,0.0615809510693193,0.280245896469974,NA +"P00533_S1064","t0 vs t1",0.356653113576368,0.0891534693290476,9,0.00310836383475933,0.0686359841548604,NA +"P00533_S991_S995","t0 vs t1",-0.122903662291889,0.10858297785538,9,0.286937654558352,0.579073736150236,NA +"P00533_T693","t0 vs t1",-0.0233443995453799,0.177244588005518,9,0.898113182376962,0.96083633837953,NA +"P00533_T693_S695","t0 vs t1",-0.165995656836019,0.150007538230776,9,0.297173768898177,0.588091082037082,NA +"P00533_Y1110","t0 vs t1",0.210632429258209,0.0927903082311584,9,0.0493644341170289,0.252589138618561,NA +"P28482_T185","t0 vs t1",0.634378738770068,0.182300631518675,9,0.00694020442220732,0.0999180588053667,NA +"P28482_T185_T190","t0 vs t1",0.890196707480898,0.165905887251245,9,0.000452927502292055,0.025493055208194,NA +"P28482_T185_Y187","t0 vs t1",0.77570963567424,0.200315650539845,9,0.0037747061619258,0.0748119597579435,NA +"P28482_Y187","t0 vs t1",1.13798944558515,0.182042606245047,9,0.000149415500661671,0.0138506354758592,NA +"P28482_Y187_T190","t0 vs t1",0.0839586116733722,0.114930246570557,9,0.483655330294773,0.739194565873987,NA +"Q13480_S266","t0 vs t1",0.147775796440306,0.106174162166453,9,0.19741006608831,0.483926492931687,NA +"Q13480_S418_S419","t0 vs t1",0.281122057791004,0.146958322587612,9,0.0880466739278739,0.328235537559088,NA +"Q13480_S419","t0 vs t1",0.320048191455162,0.114579108916761,9,0.0209422984694783,0.166454473266352,NA +"Q13480_S500_T503","t0 vs t1",-0.450413295801827,0.0980746173074297,9,0.00130426958782228,0.0449838460341635,NA +"Q13480_S634_S637","t0 vs t1",-0.616943295142088,0.129109930148088,9,0.00100346794179792,0.0394342642255919,NA +"Q13480_S650_S651_Y659","t0 vs t1",1.42523331569634,0.173080560297211,9,1.75594329256568e-05,0.00503640093293397,NA +"Q13480_S651_Y659","t0 vs t1",1.34708073620391,0.292405749303692,9,0.00127794033828973,0.0447363035684408,NA +"Q13480_T387_S389","t0 vs t1",-0.494395484918809,0.116320309251806,9,0.0021415442309728,0.0576989604963562,NA +"Q13480_Y265_S266","t0 vs t1",0.438767055281507,0.111182087237631,9,0.00337315512046587,0.0713587661347963,NA +"Q13480_Y627_T638","t0 vs t1",2.58471343768464,0.276077760810949,9,6.17542514318978e-06,0.0032951498209992,NA +"Q13480_Y659","t0 vs t1",2.93366028129369,0.180356840960513,9,5.57098141249871e-08,0.000346487188950357,NA diff --git a/man/getPathwaysFromIndra.Rd b/man/getPathwaysFromIndra.Rd deleted file mode 100644 index 9f37110..0000000 --- a/man/getPathwaysFromIndra.Rd +++ /dev/null @@ -1,38 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/getPathwaysFromIndra.R -\name{getPathwaysFromIndra} -\alias{getPathwaysFromIndra} -\title{Get pathways ranked on relevance from INDRA DB} -\usage{ -getPathwaysFromIndra( - annotated_df, - main_target = "MEN1_HUMAN", - target_type = "Protein" -) -} -\arguments{ -\item{annotated_df}{output of \code{\link[MSstats]{groupComparison}} function's -comparisionResult table, which contains a list of proteins and their -corresponding p-values, logFCs, along with additional HGNC ID and HGNC -name columns} - -\item{main_target}{A main target, e.g. main target of a drug or protein of -particular interest} - -\item{target_type}{One of either 'Protein' or 'Drug'. Default is 'Protein'} -} -\value{ -df of pathways -} -\description{ -Get pathways ranked on relevance from INDRA DB -} -\examples{ -annotated_df <- data.table::fread(system.file( - "extdata/groupComparisonModel.csv", - package = "MSstatsBioNet" -)) -pathways <- getPathwaysFromIndra(annotated_df, "P05067") -head(pathways) - -} diff --git a/vignettes/MSstatsBioNet.Rmd b/vignettes/MSstatsBioNet.Rmd index 8c9c1d5..d3fd498 100644 --- a/vignettes/MSstatsBioNet.Rmd +++ b/vignettes/MSstatsBioNet.Rmd @@ -106,7 +106,11 @@ subnetwork of proteins from the INDRA database based on differential abundance analysis results. ```{r} -subnetwork <- getSubnetworkFromIndra(annotated_df, pvalueCutoff = 0.05) +subnetwork <- getSubnetworkFromIndra( + annotated_df, + pvalueCutoff = 0.05, + statement_types = c("Complex", "IncreaseAmount", "DecreaseAmount", "Inhibition", "Activation", "Phosphorylation") +) head(subnetwork$nodes) head(subnetwork$edges) ``` @@ -116,16 +120,18 @@ This package is distributed under the [Artistic-2.0](https://opensource.org/lice ## Visualize Networks -The function `visualizeNetworks` then takes the output of +The function `previewNetworkInBrowser` then takes the output of `getSubnetworkFromIndra` and visualizes the subnetwork. The function requires -Cytoscape desktop to be open for the visualization to work. +an internet browser to view the subnetwork ```{r} -visualizeNetworks(subnetwork$nodes, subnetwork$edges) +previewNetworkInBrowser(subnetwork$nodes, subnetwork$edges, displayLabelType = "hgncName") ``` -In the network diagram displayed in Cytoscape, you should see two arrows -connecting two nodes, P16050 and P84243. These arrows represent the -interactions between these two proteins, notably activation and phosphorylation. + +In the network diagram displayed using CytoscapeJS, you should see three arrows +connecting two nodes, APP and APOD. These arrows represent the +interactions between these two proteins, notably Activation, IncreaseAmount +(e.g. via transcriptional regulation), and forming a protein complex. # Session info diff --git a/vignettes/PTM-Analysis.Rmd b/vignettes/PTM-Analysis.Rmd new file mode 100644 index 0000000..67bd7cc --- /dev/null +++ b/vignettes/PTM-Analysis.Rmd @@ -0,0 +1,83 @@ +--- +title: "PTM Analysis" +author: "Anthony Wu" +package: MSstatsBioNet +output: BiocStyle::html_document +vignette: > + %\VignetteIndexEntry{MSstatsBioNet: PTM Analysis} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + + +# Installation + +Run this code below to install MSstatsBioNet from bioconductor + +```{r eval = FALSE} +if (!require("BiocManager", quietly = TRUE)) { + install.packages("BiocManager") +} + +BiocManager::install("MSstatsBioNet") +``` + +## Dataset + +We will be taking a subset of the dataset found in this [paper](https://www.biorxiv.org/content/10.1101/2024.10.21.619348v1). The table is the output of the MSstatsPTM function `groupComparisonPTM` (filtered down to the columns that are actually needed) + +```{r} +input = data.table::fread(system.file( + "extdata/garrido-2024.csv", + package = "MSstatsBioNet" +)) +head(input) +``` + +## ID Conversion + +First, we need to convert the group comparison results to a format that can be +processed by INDRA. We can use the `annotateProteinInfoFromIndra` function +to obtain these mappings. + +In the below example, we convert uniprot IDs to their corresponding Hgnc IDs. We +can also extract other information, such as hgnc gene name and protein function. + +```{r} +library(MSstatsBioNet) +annotated_df = annotateProteinInfoFromIndra(input, "Uniprot") +head(annotated_df) +``` + +## Subnetwork Query + +The package provides a function `getSubnetworkFromIndra` that retrieves a +subnetwork of proteins from the INDRA database based on differential abundance +analysis results. This function may help finding off target subnetworks. + +```{r} +subnetwork <- getSubnetworkFromIndra(annotated_df, pvalueCutoff = 0.05, statement_types = c("Phosphorylation"), logfc_cutoff = 1, force_include_proteins = c("P00533_Y1110")) +head(subnetwork$nodes) +head(subnetwork$edges) +``` + +## Network Visualization + +Visualize the subnetwork on your browser + +```{r} +previewNetworkInBrowser(subnetwork$nodes, subnetwork$edges, displayLabelType = "hgncName") +``` + +# Session info + +```{r} +sessionInfo() +```