diff --git a/DESCRIPTION b/DESCRIPTION index b5731929..c5369611 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,9 +1,10 @@ Package: VISION Title: Functional interpretation of single cell RNA-seq latent manifolds -Version: 2.1.0 +Version: 3.0.0 Authors@R: c(person("Matt", "Jones", email = "mattjones315@gmail.com", role = c("aut", "cre")), person("David", "Detomaso", email = "david.detomaso@berkeley.edu", role = c("aut", "cre")), person("Tal", "Ashuach", email = "tal_ashuach@berkeley.edu", role = c("aut")), + person("Yanay", "Rosen", email = "yanayrosen@berkeley.edu", role = c("aut")), person("Nir", "Yosef", email = "niryosef@berkeley.edu", role = c("ctb", "cph"))) Author: Matt Jones [aut, cre], David Detomaso [aut, cre], Tal Ashuach [aut], Nir Yosef [ctb] Maintainer: Matt Jones @@ -40,7 +41,7 @@ Encoding: UTF-8 LazyData: true URL: https://yoseflab.github.io/VISION, https://github.com/yoseflab/VISION BugReports: https://github.com/YosefLab/VISION/issues -RoxygenNote: 7.1.0 +RoxygenNote: 7.1.1 Suggests: Biobase, BiocStyle, diff --git a/NAMESPACE b/NAMESPACE index 4d71be67..0f3f698f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,21 +1,37 @@ # Generated by roxygen2: do not edit by hand -export(Vision) export(PhyloVision) +export(Vision) +export(addHotspotToVision) export(addSignatures) export(addTSNE) export(addUMAP) +export(analyzeHotspotObjectVision) export(analyzeLocalCorrelations) export(analyzeLocalCorrelationsModules) export(annotateLatentComponents) export(applyMicroClustering) export(calcModuleScores) export(calcSignatureScores) -export(calcHotspotModules) +export(calc_mod_sig_enrichment) +export(calc_set_enrichment) +export(clusterModScores) export(clusterSigScores) export(computeLatentSpace) export(convertGeneIds) export(createGeneSignature) +export(depthBasedCladewiseTreeCluster) +export(depthBasedTreeCluster) +export(draw_hotspot_heatmap) +export(generateOverlapSignatures) +export(group_modules_enrichment) +export(hsCalculateModuleScores) +export(hsComputeAutoCorrelations) +export(hsComputeLocalCorrelations) +export(hsCreateKnnGraph) +export(hsInit) +export(lca_based_depth) +export(loadHotspotObject) export(poolMatrixCols) export(poolMatrixRows) export(poolMetaData) @@ -23,6 +39,11 @@ export(read_10x) export(read_10x_h5) export(read_10x_h5_v2) export(read_10x_h5_v3) +export(runHotspot) +export(saveHSBytestToPickle) +export(treeClusterMinCladeSize) +export(trivial_dist) +exportMethods(PhyloVision) exportMethods(Vision) exportMethods(addProjection) exportMethods(analyze) @@ -39,7 +60,6 @@ exportMethods(phyloAnalyze) exportMethods(saveAndViewResults) exportMethods(viewResults) import(Matrix) -import(dplyr) import(Rcpp) import(ape) import(loe) diff --git a/NEWS.md b/NEWS.md index fc8b5f31..6cfb87c6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +# VISION 3.0.0 + +Added support for Phylogenies as latent spaces in core VISION. + +Integrated [Hotspot](https://yoseflab.github.io/Hotspot/index.html) into VISION analysis and report UI. + +Deprecated support for trajectories and LC Annotator. + # VISION 2.1.0 Added parameter `sig_gene_threshold` with **changed default behavior** diff --git a/R/AllClasses.R b/R/AllClasses.R index 11f87891..935cd28b 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -10,6 +10,8 @@ setClassUnion("numericORNULL", members = c("numeric", "NULL")) setClassUnion("matrixORSparse", members = c("matrix", "dgCMatrix")) setClassUnion("matrixORNULL", members = c("matrix", "NULL")) setClassUnion("dataframeORNULL", members = c("data.frame", "NULL")) +setClassUnion("rawORNULL", members = c("raw", "NULL")) + # setClassUnion("treeorNull", members=c("phylo", "NULL")) # setClassUnion("pythonorNull", members = c("python.builtin.object", "NULL")) @@ -121,7 +123,7 @@ Vision <- setClass("Vision", Pools = "list", LatentSpace = "matrix", LatentTrajectory = "trajectoryORNULL", - Hotspot = "list", + Hotspot = "rawORNULL", ModuleSignatureEnrichment = "list", ModuleHotspotScores = "data.frame", Viewer = "list", @@ -148,7 +150,7 @@ Vision <- setClass("Vision", Pools = list(), LatentSpace = matrix(NA, 1, 1), LatentTrajectory = NULL, - Hotspot = list(), + Hotspot = NULL, ModuleSignatureEnrichment = list(), ModuleHotspotScores = data.frame(), Viewer = list(), diff --git a/R/AllGenerics.R b/R/AllGenerics.R index b76e8a82..69d7e677 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -4,6 +4,8 @@ setGeneric("Vision", function(data, ...) { standardGeneric("Vision") }) +#' @rdname PhyloVision-class +#' @export setGeneric("PhyloVision", function(tree, ...) { standardGeneric("PhyloVision") }) diff --git a/R/AnalysisFunctions.R b/R/AnalysisFunctions.R index d272dee9..bc8a7a67 100644 --- a/R/AnalysisFunctions.R +++ b/R/AnalysisFunctions.R @@ -18,7 +18,7 @@ clusterCells <- function(object, tree=FALSE) { } else { message("Using Tree to compute clusters...\n") # Get the MRCA matrix and convert the node indexes to depths - cl <- treeCluster3(object@tree) + cl <- maxSizeCladewiseTreeCluster(object@tree) } names(cl) <- paste('Cluster', seq(length(cl))) diff --git a/R/Microclusters.R b/R/Microclusters.R index 8c82bb11..f7d0870e 100644 --- a/R/Microclusters.R +++ b/R/Microclusters.R @@ -326,6 +326,7 @@ readjust_clusters <- function(clusters, data, cellsPerPartition=100) { return(clusters) } + #' Pools columns of a numeric matrix #' #' Uses the provided pools to merge columns of the supplied data matrix @@ -383,6 +384,7 @@ poolMatrixRows <- function(data, pools) { return(pooled_data) } + #' create "super-cells" by pooling together single cells #' @param expr expression data (genes x cells matrix) #' @param pools cluster association of each cell @@ -418,13 +420,15 @@ poolMatrixCols_Inner <- function(expr, pools) { #' Performs a binary search on a depth d such that -#' if depth(u, v) <= d then u and v are in the same cluster +#' if depth(LCA(u, v)) <= d then u and v are in the same cluster #' #' @param tree object of class phylo -#' @param reach number of clusters to attempt to generate +#' @param target number of clusters to attempt to generate #' @return List of clusters, each entry being a vector of indices representing #' samples in the cluster. -treeCluster <- function(tree, reach=10) { +#' +#' @export +depthBasedTreeCluster <- function(tree, target=10) { high <- length(tree$tip.label) low <- 0 while (T) { @@ -453,12 +457,12 @@ treeCluster <- function(tree, reach=10) { } } - if (num_clusters >= reach) { + if (num_clusters >= target) { if(low == d) { break } low <- d - } else if (num_clusters < reach) { + } else if (num_clusters < target) { if(high == d) { break } @@ -470,15 +474,17 @@ treeCluster <- function(tree, reach=10) { } -#' Performs a breadth first search to create a specific number of clusters -#' Clusters are split based on depth +#' Performs a breadth first search to create a specific number of clusters. +#' Clusters are split based on depth. #' #' @param tree object of class phylo -#' @param reach number of clusters to attempt to generate +#' @param target number of clusters to attempt to generate #' @return List of clusters, each entry being a vector of indices representing #' samples in the cluster. -treeCluster2 <- function(tree, reach=10) { - if (reach > length(tree$tip.label)) { +#' +#' @export +depthBasedCladewiseTreeCluster <- function(tree, target=10) { + if (target > length(tree$tip.label)) { stop("Number of clusters is too high.") } @@ -498,7 +504,7 @@ treeCluster2 <- function(tree, reach=10) { cluster_parents[[as.name(child)]] <- node_depths[child] } - if (length(cluster_parents) >= reach) { + if (length(cluster_parents) >= target) { break } } @@ -516,18 +522,17 @@ treeCluster2 <- function(tree, reach=10) { #' Performs a breadth first search to create a specific number of clusters -#' Clusters are split to prioritize cluster size +#' Clusters are split to prioritize max cluster size #' #' @param tree object of class phylo -#' @param reach number of clusters to attempt to generate +#' @param target number of clusters to attempt to generate #' @return List of clusters, each entry being a vector of tips representing #' samples in the cluster. -treeCluster3 <- function(tree, reach=10) { - if (reach > length(tree$tip.label)) { +maxSizeCladewiseTreeCluster <- function(tree, target=10) { + if (target > length(tree$tip.label)) { stop("Number of clusters is too high.") } - # node_depths <- node.depth(tree) root <- find_root(tree) cluster_parents <- c() cluster_parents[[as.name(root)]] <- get_max_cluster_size(tree, root) @@ -556,36 +561,38 @@ treeCluster3 <- function(tree, reach=10) { cl[[cluster]] <- all_children } - while (length(cl) > reach) { - cs <- c() - for (c in cl) { - cs <- append(cs, length(c)) - } - - smallest_i <- which.min(cs) - tip1 <- which(tree$tip.label == cl[[smallest_i]][1]) - dists <- c() - for (i in 1:length(cl)) { - tip2 <- which(tree$tip.label == cl[[i]][1]) - dists <- append(dists, trivial_dist(tree, tip1, tip2)) - } - - dists[smallest_i] <- dists[smallest_i] + max(dists) - closest_cluster_i <- which.min(dists) - cl[[min(c(closest_cluster_i, smallest_i))]] <- append(cl[[smallest_i]], cl[[closest_cluster_i]]) - cl[[max(c(closest_cluster_i, smallest_i))]] <- NULL + while (length(cl) > target) { + cs <- c() + for (c in cl) { + cs <- append(cs, length(c)) + } + + smallest_i <- which.min(cs) + tip1 <- which(tree$tip.label == cl[[smallest_i]][1]) + dists <- c() + for (i in 1:length(cl)) { + tip2 <- which(tree$tip.label == cl[[i]][1]) + dists <- append(dists, trivial_dist(tree, tip1, tip2)) + } + + dists[smallest_i] <- dists[smallest_i] + max(dists) + closest_cluster_i <- which.min(dists) + cl[[min(c(closest_cluster_i, smallest_i))]] <- append(cl[[smallest_i]], cl[[closest_cluster_i]]) + cl[[max(c(closest_cluster_i, smallest_i))]] <- NULL } return(cl) } - -#' Generate clusters for a tree of minimum size (unless children of root) +#' Generate clade-clusters for a tree of minimum size (unless children of root) +#' #' #' @param tree object of class phylo #' @param minSize minimum clade size for a clade to be expanded #' @return List of clusters, each entry being a vector of tips representing +#' WARNING: This won't work well for tree's with broad multifurcations +#' @export treeClusterMinCladeSize <- function(tree, minSize=30) { nodeLabels <- tree$node.label numC <- length(tree$tip.label) diff --git a/R/Projections.R b/R/Projections.R index 14b3316a..43f8e525 100644 --- a/R/Projections.R +++ b/R/Projections.R @@ -378,20 +378,28 @@ setMethod("computeKNNWeights", signature(object = "matrix"), ) -#' compute for each vector the weights to apply to it's K nearest neighbors +#' Compute for each vector the weights to apply to it's K nearest neighbors +#' #' @importFrom Matrix rowSums #' @importFrom Matrix sparseMatrix #' @importFrom matrixStats rowMaxs #' @param object tree to use for KNN #' @param K Number of neighbors to consider. +#' @param lcaKNN whether to use LCA based KNN (cluster by minimum size), if false defaults to cophenetic distance (random tie breaking). +#' WARNING: lcaKNN doesn't perform well with broad multifurcations #' @return a list of two items: #' indices: matrix, cells X neighbors #' Each row specifies indices of nearest neighbors #' weights: matrix, cells X neighbors #' Corresponding weights to nearest neighbors setMethod("computeKNNWeights", signature(object = "phylo"), - function(object, K = round(sqrt(length(object$tip.label)))) { - k <- find_knn_parallel_tree(object, K) + function(object, K = round(sqrt(length(object$tip.label))), lcaKNN=FALSE, minSize=20) { + if (lcaKNN) { + k <- lcaBasedTreeKNN(object, minSize = minSize) + } else { + k <- find_knn_parallel_tree(object, K) + } + nn <- k[[1]] d <- k[[2]] diff --git a/R/Utilities.R b/R/Utilities.R index ec57920b..c7eb65cf 100644 --- a/R/Utilities.R +++ b/R/Utilities.R @@ -806,7 +806,6 @@ knn_tree <- function(leaves, k, distances) { } - #' Parallel KNN for Trees #' #' Computes nearest-neighbor indices and distances @@ -852,6 +851,47 @@ find_knn_parallel_tree <- function(tree, K) { return(list(index=idx, dist=dists)) } + + +#' Add custom tree based neighbor and weights to a hotspot object +#' +#' @param tree object of class phylo +#' @param the hotspot object to add the nw to +#' @param minSize the minimum number of neighbors of the node +#' @return the hotspot object +lcaBasedTreeKNN <- function(tree, minSize=20) { + tips <- tree$tip.label + nTips <- length(tips) + neighbors <- data.frame(t(matrix(seq_len(nTips), ncol = nTips, nrow= nTips))) + rownames(neighbors) <- tips + + + weights <- data.frame(matrix(0, ncol = nTips, nrow= nTips)) + for (tip in seq_len(nTips)) { + my_neighbors <- minSizeCladeNeighbors(tree, tip, minSize) + + weights[tip, my_neighbors] <- 1 + } + + neighbors_no_diag <- data.frame(matrix(ncol = nTips -1, nrow= nTips)) + weights_no_diag <- data.frame(matrix(ncol = nTips -1, nrow= nTips)) + + for (tip in seq_len(nTips)) { + neighbors_no_diag[tip, ] <- neighbors[tip, -tip] + weights_no_diag[tip, ] <- weights[tip, -tip] + } + + rownames(neighbors_no_diag) <- tips + rownames(weights_no_diag) <- tips + + colnames(neighbors_no_diag) <- seq_len(nTips-1) - 1 + colnames(weights_no_diag) <- seq_len(nTips-1) - 1 + return(list("neighbors"=as.matrix(neighbors_no_diag), "weights"=as.matrix(weights_no_diag))) +} + + + + #' Generate an ultrametric tree #' #' @param tree an object of class phylo @@ -1012,38 +1052,74 @@ get_min_cluster_size <- function(tree, node) { #' Trivial distance function for arbitrary tree clustering #' -#' Trivial distance is defined as the difference in depths between the +#' Number of mutations along path from tip1 to LCA(tip1, tip2) +#' Ensures if on same clade, join. #' #' @param tree an object of class phylo #' @param tip1 the first leaf #' @param tip2 the second leaf #' @return the trivial distance between tip1, tip2 +#' +#' @export trivial_dist <- function(tree, tip1, tip2) { - depths <- node.depth(tree) - edges <- tree$edge - path1 <- c(tip1) - root <- find_root(tree) - parent <- tip1 - while (T) { - parent <- edges[, 1][edges[, 2] == parent] - path1 <- append(path1, parent) - if (parent == root) { - break - } + # node depths of tree + edges <- tree$edge + # Get the path from tip1 to root + path1 <- c(tip1) + root <- find_root(tree) + parent <- tip1 + while (T) { + parent <- edges[, 1][edges[, 2] == parent] + path1 <- append(path1, parent) + if (parent == root) { + break } - - path2 <- c(tip2) - parent <- tip2 - while (T) { - parent <- edges[, 1][edges[, 2] == parent] - path2 <- append(path2, parent) - if (parent == root || parent %in% path1) { - break - } + } + + mrca <- getMRCA(tree, c(tip1, tip2)) # MRCA of both + + + # Depths of the internal nodes that represent the parents of tip1, tip2 right before LCA + # ie the 'diverging' point or first split + path_length <- which(path1 == mrca) + + # Return the absolute difference between the depths + return(path_length) +} + + + + + +#' Depth of tip1 parent immediately after LCA(tip1, tip2) +#' +#' @param tree an object of class phylo +#' @param tip1 the first leaf +#' @param tip2 the second leaf +#' @return the trivial distance between tip1, tip2 +#' +#' @export +lca_based_depth <- function(tree, tip1, tip2) { + depths <- node.depth(tree) + edges <- tree$edge + # Get the path from tip1 to root + path1 <- c(tip1) + root <- find_root(tree) + parent <- tip1 + while (T) { + parent <- edges[, 1][edges[, 2] == parent] + path1 <- append(path1, parent) + if (parent == root) { + break } - - tip1_depth_prev <- depths[path1[which(path1 == path2[length(path2)]) - 1]] - tip2_depth_prev <- depths[path2[length(path2)] - 1] - - return(abs(tip1_depth_prev - tip2_depth_prev)) + } + + mrca <- getMRCA(tree, c(tip1, tip2)) # MRCA of both + + # Depths of the internal nodes that represent the parents of tip1, tip2 right before LCA + # ie the 'diverging' point or first split + lca_child_depth <- depths[path1[which(path1 == mrca) - 1]] + + # Return the absolute difference between the depths + return(lca_child_depth) } \ No newline at end of file diff --git a/R/methods-Module.R b/R/methods-Module.R index 37d5f6d6..e870adb1 100644 --- a/R/methods-Module.R +++ b/R/methods-Module.R @@ -1,5 +1,4 @@ -#' Initializes a new VISION object. -#' +#' Perform Hotspot analysis on Vision Object #' #' @param object Vision Object #' @param model model argument for Hotspot, one of \itemize{ @@ -10,127 +9,179 @@ #' } #' @param tree whether to use tree as latent space. If TRUE, object should have #' a tree slot. -#' @param number_top_genes hotspot argument for number of genes to consider +#' @param number_top_genes Hotspot argument for number of genes to consider #' @param num_umi optional dataframe containing umi counts in first column for #' barcodes -#' @param min_gene_threshold minimum number of genes in hotspot module +#' @param min_gene_threshold minimum number of genes in Hotspot module #' @param n_neighbors number of neighbors to consider in latent space #' @param autocorrelation_fdr threshold for significance for genes autocorr #' @param clustering_fdr threshold for significance for clustering modules -#' +#' @param logdata boolean, log the expression data, avoid for danb #' Populates the modData, HotspotModuleScores, ModuleSignatureEnrichment #' and HotspotObject slots of object, as well as recalculates signature scores #' for new modules. -calcHotspotModules <- function(object, model="normal", tree=FALSE, - number_top_genes=1000,num_umi=NULL, +#' @return the modified Vision object +#' +#' @export +runHotspot <- function(object, model="normal", tree=FALSE, + number_top_genes=1000, num_umi=NULL, min_gene_threshold=20, n_neighbors=NULL, - autocorrelation_fdr=0.05, clustering_fdr=0.5, nn_precomp=NULL, wt_precomp=NULL) { - - hotspot <- import("hotspot", convert=F) + autocorrelation_fdr=0.05, clustering_fdr=0.5, logdata=FALSE) { + + # Init Hotspot + hs <- hsInit(object, model, tree, num_umi, logdata) + # Init Hotspot KNN + hs <- hsCreateKnnGraph(hs, object, n_neighbors=n_neighbors) + # perform Hotspot analysis and store results in R + hs_genes <- hsComputeAutoCorrelations(hs, number_top_genes=number_top_genes, autocorrelation_fdr=autocorrelation_fdr) + # Compute localcorr + hs <- hsComputeLocalCorrelations(hs, hs_genes) + # Calculate Hotspot Module Scores for informative genes + hs <- hsCalculateModuleScores(hs, min_gene_threshold, clustering_fdr) + # Cluster Hotspot modules and perform Vision based analysis on HS Modules and + object <- analyzeHotspotObjectVision(object, hs, tree) - workers <- getOption("mc.cores") - if (is.null(workers)){ - workers <- 1 - } - if (model == "danb") { - # Don't take the log if danb - exprData = object@exprData - } else { - # take the log2 otherwise - exprData = matLog2(object@exprData) - } - - gene_subset = object@params$latentSpace$projectionGenes - - if (is.na(gene_subset)) { - gene_subset <- applyFilters(exprData, - object@params$latentSpace$projectionGenesMethod, - object@params$latentSpace$threshold, 2) - } - - exprData = as.data.frame(as.matrix(exprData)[gene_subset,]) + return(object) +} - # remove genes that do not have any standard deviation - sds = apply(exprData, 1, sd) - exprData = exprData[which(sds > 0), ] - # generate the Hotspot object in python, potentially using the tree - if (tree) { - message("Using Tree") - ete3 <- import("ete3", convert=F) - nwk <- write.tree(object@tree) - pyTree <- ete3$Tree(nwk, format = 8L) - if (is.null(num_umi)) { - hs <- hotspot$Hotspot(exprData, tree=pyTree, model=model) - } else { - py$umi_df <- r_to_py(num_umi) - py_run_string("umi_counts = umi_df.iloc[:, 0]") - hs <- hotspot$Hotspot(exprData, tree=pyTree, model=model, umi_counts=py$umi_counts) - } - +#' Init Hotspot object from Vision Object +#' +#' @param object the Vision Object +#' @param model the model for Hotspot (ie "normal", "danb"...) +#' @param tree boolean, whether to use the tree as ls +#' @param num_umi df of barcodes x num_umi +#' @param logdata boolean, log the expression data, avoid for danb +#' +#' @return the Hotspot object +#' +#' @export +hsInit <- function(object, model="normal", tree=F, num_umi=NULL, logdata=FALSE) { + hotspot <- import("hotspot", convert=F) + + workers <- getOption("mc.cores") + if (is.null(workers)){ + workers <- 1 + } + + if (!logdata) { + # Don't take the log + exprData = object@exprData + } else { + # take the log2 otherwise + exprData = matLog2(object@exprData) + } + + gene_subset <- object@params$latentSpace$projectionGenes + + if (any(is.na(gene_subset))) { + gene_subset <- applyFilters(exprData, + object@params$latentSpace$projectionGenesMethod, + object@params$latentSpace$threshold, 2) + } + + exprData = as.data.frame(as.matrix(exprData)[gene_subset,]) + + # remove genes that do not have any standard deviation + sds = apply(exprData, 1, sd) + exprData = exprData[which(sds > 0), ] + + # generate the Hotspot object in python, potentially using the tree + if (tree) { + message("Using Tree") + ete3 <- import("ete3", convert=F) + nwk <- write.tree(object@tree) + pyTree <- ete3$Tree(nwk, format = 8L) + if (is.null(num_umi)) { + hs <- hotspot$Hotspot(exprData, tree=pyTree, model=model) } else { - if (is.null(num_umi)) { - hs <- hotspot$Hotspot(exprData, latent=as.data.frame(object@LatentSpace), model=model) - } else { - py$umi_df <- r_to_py(num_umi) - py_run_string("umi_counts = umi_df.iloc[:, 0]") - hs <- hotspot$Hotspot(exprData, latent=as.data.frame(object@LatentSpace), model=model, umi_counts=py$umi_counts) - } + py$umi_df <- r_to_py(num_umi) + py_run_string("umi_counts = umi_df.iloc[:, 0]") + hs <- hotspot$Hotspot(exprData, tree=pyTree, model=model, umi_counts=py$umi_counts) } - # create knn graph, specify nn or use object default - if (!is.null(nn_precomp)) { - hs$neighbors <- nn_precomp - hs$weights <- wt_precomp - } else if (is.null(n_neighbors)) { - hs$create_knn_graph(F, n_neighbors = as.integer(object@params$numNeighbors)) + } else { + if (is.null(num_umi)) { + hs <- hotspot$Hotspot(exprData, latent=as.data.frame(object@LatentSpace), model=model) } else { - hs$create_knn_graph(F, n_neighbors = as.integer(n_neighbors)) + py$umi_df <- r_to_py(num_umi) + py_run_string("umi_counts = umi_df.iloc[:, 0]") + hs <- hotspot$Hotspot(exprData, latent=as.data.frame(object@LatentSpace), model=model, umi_counts=py$umi_counts) } + } - # perform hotspot analysis and store results in R - hs_results <- hs$compute_autocorrelations(jobs=as.integer(workers)) - hs_genes <- hs_results$loc[hs_results$FDR$le(autocorrelation_fdr)]$sort_values('Z', ascending=F)$head(as.integer(number_top_genes))$index - - hs <- hsComputeLocalCorrelations(hs, hs_genes, workers) - - hs <- createHotspotModulesCalcScores(hs, min_gene_threshold, clustering_fdr) - - object <- analyzeHotspotObject(object, hs, tree) - - return(object) + return(hs) +} + + +#' Init KNN graph in Hotspot object +#' +#' @return the Hotspot object with KNN initialized +#' +#' @export +hsCreateKnnGraph <- function(hs, object, n_neighbors=NULL) { + # create knn graph, specify nn or use object default + if (is.null(n_neighbors)) { + hs$create_knn_graph(F, n_neighbors = as.integer(object@params$numNeighbors)) + } else { + hs$create_knn_graph(F, n_neighbors = as.integer(n_neighbors)) + } + return(hs) } +#' Compute Hotspot auto correlations +#' +#' @param hs the Hotspot object +#' @param number_top_genes Hotspot argument for number of genes to consider +#' @param autocorrelation_fdr threshold for significance for genes autocorr +#' @return list of HS genes +#' +#' @export +hsComputeAutoCorrelations <- function(hs, number_top_genes=1000, autocorrelation_fdr=0.05) { + workers <- getOption("mc.cores") + if (is.null(workers)){ + workers <- 1 + } + + hs_results <- hs$compute_autocorrelations(jobs=as.integer(workers)) + hs_genes <- hs_results$loc[hs_results$FDR$le(autocorrelation_fdr)]$sort_values('Z', ascending=F)$head(as.integer(number_top_genes))$index + return(hs_genes) +} + -#' Interface function to compute local correlations for hotspot +#' Interface function to compute local correlations for Hotspot #' Warning: modifies the hs argument -#' @param hs the hotspot object -#' @param hs_genes hotspot genes -#' @param workers num core +#' @param hs the Hotspot object +#' @param hs_genes Hotspot genes #' @return the populated hs object #' #' @export -hsComputeLocalCorrelations <- function(hs, hs_genes, workers) { - hs$compute_local_correlations(hs_genes, jobs=as.integer(workers)) - return(hs) +hsComputeLocalCorrelations <- function(hs, hs_genes) { + workers <- getOption("mc.cores") + if (is.null(workers)){ + workers <- 1 + } + hs$compute_local_correlations(hs_genes, jobs=as.integer(workers)) + return(hs) } -#' Analyze a hotspot object using built in methods such +#' Analyze a Hotspot object using built in methods such #' such as local correlation, signature overlap, etc. -#' Necessary to run this function for hotpot functionality in viewer to work. +#' Necessary to run this function for Hotspot functionality in viewer to work. #' #' @param object the VISION object -#' @param hs the hotspot python object loaded by reticulate -#' @param tree whether to use tree as latent space. If TRUE, object should have +#' @param hs the Hotspot python object loaded by Reticulate +#' @param tree whether to use tree as latent space. If TRUE, object should have a tree #' #' @return the modified VISION object with the following slots filled: #' Populates the modData, HotspotModuleScores, ModuleSignatureEnrichment #' and HotspotObject slots of object, as well as recalculates signature scores #' for new modules. +#' #' @export -analyzeHotspotObject <- function(object, hs, tree=FALSE) { +analyzeHotspotObjectVision <- function(object, hs, tree=FALSE) { hs_module_scores <- hs$module_scores hs_modules <- hs$modules @@ -182,7 +233,7 @@ analyzeHotspotObject <- function(object, hs, tree=FALSE) { object@ModuleHotspotScores <- hs_module_scores object <- analyzeLocalCorrelationsModules(object, tree) - # save the hotspot object + # save the Hotspot object object <- addHotspotToVision(object, hs) return(object) @@ -191,13 +242,14 @@ analyzeHotspotObject <- function(object, hs, tree=FALSE) { #' Create Hotspot Modules and calculate module scores given a HS object #' with local correlations already calculated -#' @param hs the hotspot object, must have ran compute_local_correlations already +#' +#' @param hs the Hotspot object, must have ran compute_local_correlations already #' @param min_gene_threshold min genes per module #' @param clustering_fdr p value for clustering genes #' @return the modified hs object #' #' @export -createHotspotModulesCalcScores <- function(hs, min_gene_threshold=20, clustering_fdr=0.5, plot=F) { +hsCalculateModuleScores <- function(hs, min_gene_threshold=20, clustering_fdr=0.5, plot=F) { hs$create_modules(min_gene_threshold=as.integer(min_gene_threshold), fdr_threshold=clustering_fdr) hs$calculate_module_scores() @@ -210,19 +262,20 @@ createHotspotModulesCalcScores <- function(hs, min_gene_threshold=20, clustering #' Add HS python obj to vision OBJECT +#' #' @param object Vision object #' @param hs python hs object -#' @return vision object +#' @return Vision object with hs populated #' #' @export addHotspotToVision <- function(object, hs) { - # save the hotspot object + # save the Hotspot object pickle <- import("pickle", convert=F) py$hs <- hs py$pickle <- pickle py_run_string("hs_byte_array = bytearray(pickle.dumps(hs))") hs_pickled_r <- as.raw(py$hs_byte_array) - object@Hotspot <- list(hs_pickled_r) + object@Hotspot <- hs_pickled_r return(object) } @@ -242,6 +295,7 @@ addHotspotToVision <- function(object, hs) { #' the overall signature score. Default = TRUE. This is used for inspecting #' genes in a signature in the output report #' @return the VISION object, with the @ModScores and @ModGeneImportance slots populated +#' #' @export calcModuleScores <- function( object, mod_norm_method = NULL, mod_gene_importance = TRUE) { @@ -303,9 +357,11 @@ calcModuleScores <- function( #' different projection onto low-dimensional space are computed, and the #' consistency of the resulting space with the signature scores is computed #' to find signals that are captured successfully by the projections. +#' #' @param object the VISION object #' @param tree whether to use the tree object as latent space for neighbors #' @return the VISION object with values set for the analysis results +#' #' @export analyzeLocalCorrelationsModules <- function(object, tree=FALSE) { @@ -350,10 +406,12 @@ analyzeLocalCorrelationsModules <- function(object, tree=FALSE) { #' Computes the hypergeometric overlap test for modules and signatures -#' @param object the vision object. +#' +#' @param object the Vision object. #' @param skip_down whether to ignore down signatures in overlap #' @return list(statistic values, p values, clusters of signatures) #' +#' @export calc_mod_sig_enrichment <- function(object, skip_down=TRUE) { modules <- object@modData original_signatures <- object@sigData @@ -425,13 +483,15 @@ calc_mod_sig_enrichment <- function(object, skip_down=TRUE) { #' Calculate the hypergeometric enrichment for two sets from a population +#' Statisic = log (observed overlap / expected overlap) +#' P value = 1- hypergeometric(observed overlap -1, max(|set1|, |set2|), |genes| - |set1|, min(|set1|, |set2|)) #' -#' @param set1 -#' @param set2 +#' @param set1 the first gene set +#' @param set2 the second gene set #' @param genes the population #' @return c(statistic, p value) -#' Statisic = log (observed overlap / expected overlap) -#' P value = 1- hypergeometric(observed overlap -1, max(|set1|, |set2|), |genes| - |set1|, min(|set1|, |set2|)) +#' +#' @export calc_set_enrichment <- function(set1, set2, genes) { N <- length(genes) m <- max(length(set1), length(set2)) @@ -450,11 +510,16 @@ calc_set_enrichment <- function(set1, set2, genes) { return(c(stat, p_value)) } + #' Make the clusters for the modules by enrichment. -#' For now we just assign each signature to each cluster, could filter to only include once. +#' For now we just assign each signature to each cluster, could filter to only include once, +#' so that each one appears in the modules x sigs table. +#' +#' @param stats overlap stats from calc_set_enrichment +#' @param pvals overlap p values from calc_set_enrichment +#' @return assignments of each signature to each module #' -#' @param stats -#' @param pvals +#' @export group_modules_enrichment <- function(stats, pvals) { sigs <- rownames(stats) mods <- colnames(stats) @@ -467,9 +532,10 @@ group_modules_enrichment <- function(stats, pvals) { #' Generates signature objects for the overlap sets between modules and signatures -#' @param object the vision object +#' @param object the Vision object +#' @return Vision Object, populates the modData slot with overlap signatures. #' -#' Populates the modData slot with overlap signatures. +#' @export generateOverlapSignatures <- function(object) { message("Generating Module Signature Overlaps...\n") sigs <- rownames(object@ModuleSignatureEnrichment$statistics) @@ -505,6 +571,7 @@ generateOverlapSignatures <- function(object) { #' @param object the VISION object #' @param variables which columns of the meta-data to use for comparisons #' @return the VISION object with the @ClusterComparisons modules slot populated +#' #' @export clusterModScores <- function(object, variables = "All") { @@ -616,12 +683,14 @@ saveHSBytestToPickle <- function(path, bytes) { } -#' Add custom tree based neighbor and weights to a hotspot object +#' Add custom tree based neighbor and weights to a Hotspot object #' #' @param tree object of class phylo -#' @param the hotspot object to add the nw to +#' @param the Hotspot object to add the nw to #' @param minSize the minimum number of neighbors of the node -#' @return the hotspot object +#' @return the Hotspot object +#' +#' @export lcaBasedHotspotNeighbors <- function(tree, hotspot, minSize=20) { tips <- tree$tip.label nTips <- length(tips) @@ -650,4 +719,49 @@ lcaBasedHotspotNeighbors <- function(tree, hotspot, minSize=20) { colnames(neighbors_no_diag) <- seq_len(nTips-1) - 1 colnames(weights_no_diag) <- seq_len(nTips-1) - 1 return(list("neighbors"=neighbors_no_diag, "weights"=weights_no_diag)) -} +} + + +#' Draw Modules Heatmap (Gene x Gene) +#' +#' @param hs the Hotspot Object +#' @param palette palette +#' @export +draw_hotspot_heatmap <- function(hs, palette = paletteer_d("ggsci::default_nejm")) { + scipy_hierarchy <- import("scipy.cluster.hierarchy", convert=F) + np <- import("numpy") + linkage <- hs$linkage + py_dend = scipy_hierarchy$dendrogram(linkage) + lcz = hs$local_correlation_z + gene_order = colnames(lcz)[np$array(py_dend$leaves)+1] + col_mapping = c() + module_to_col = list() + hs$modules = as.character(hs$modules) + unique_mods = as.character(unique(hs$modules)) + example_genes = list() + col_mapping[["-1"]] = "#ffffff" + for (i in 1:length(unique_mods)) { + mod = unique_mods[[i]] + if (mod != -1) { + col_mapping[[mod]] = palette[[i]] + module_to_col[[mod]] = palette[[i]] + example_genes[[mod]] = sample(hs$modules[hs$modules == mod], 5) + } + } + print(module_to_col) + print(col_mapping[order(names(col_mapping))]) + print(col_mapping) + print(example_genes) + modules = data.frame("module" = hs$modules) + modules$module = as.character(modules$module) + ha = rowAnnotation(df = modules, + col = col_mapping, + simple_anno_size = unit(0.5, "in")) + ht = Heatmap(as.matrix(lcz), name = "mat", + show_row_names=F, show_column_names=F, show_row_dend=F, show_column_dend=F, + row_order = gene_order, column_order=gene_order, + right_annotation=ha, + column_names_gp = gpar(fontsize = c(1)), + width = unit(5, "in"), height = unit(5, "in")) + draw(ht) +} diff --git a/R/methods-Vision.R b/R/methods-Vision.R index a7a74e16..a097c27c 100644 --- a/R/methods-Vision.R +++ b/R/methods-Vision.R @@ -90,7 +90,7 @@ setMethod("Vision", signature(data = "matrixORSparse"), "rank_norm_columns"), pool="auto", cellsPerPartition=10, name=NULL, num_neighbors = NULL, latentSpace = NULL, latentSpaceName = NULL, latentTrajectory = NULL, - tree = NULL, modData = list(), hotspot= list(), pools=list()) { + tree = NULL, modData = list(), hotspot= NULL, pools=list()) { .Object <- new("Vision") @@ -407,6 +407,13 @@ setMethod("Vision", signature(data = "matrixORSparse"), } ) + +#' Initializes a new PhyloVision Object +#' +#' @param tree parsed ape tree +#' @param ... arguments passed to the base Vision constructor +#' @rdname PhyloVision-class +#' @export setMethod("PhyloVision", signature(tree="phylo"), function(tree, ...) { obj <- Vision(...) @@ -722,7 +729,7 @@ setMethod("analyze", signature(object="Vision"), if (hotspot) { message("Hotspot Analysis") - object <- calcHotspotModules(object, model="danb", tree) + object <- runHotspot(object, model="danb", tree) } @@ -731,6 +738,12 @@ setMethod("analyze", signature(object="Vision"), return(object) }) + +#' Analyze a PhyloVision object +#' +#' @param ... arguments passed to the base Vision constructor +#' @aliases phyloAnalyze +#' @export setMethod("phyloAnalyze", signature(object="PhyloVision"), function(object, hotspot=FALSE) { object <- analyze(object, tree=TRUE, hotspot=hotspot) diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 536364b6..6f105d34 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -81,7 +81,7 @@ VISION - 2.1.0 + 3.0.0 @@ -89,7 +89,7 @@