diff --git a/R/CalculatePercentage.R b/R/CalculatePercentage.R index 448e5b3..b233435 100644 --- a/R/CalculatePercentage.R +++ b/R/CalculatePercentage.R @@ -1,14 +1,16 @@ -#' Calculate the percentage of cells in activation status +#' CalculatePercentage #' #' This function calculates the percentage of cells in ON (scale > 0) and OFF (scale < 0) #' activation states within each group defined by `group_var`. If exactly two groups #' are provided, it also computes Cohen's d effect size between their activation values. +#' #' @name CalculatePercentage #' @importFrom dplyr bind_rows #' @importFrom effsize cohen.d #' @importFrom stats na.omit #' @param to.plot A data frame containing at least a `scale` column and a grouping column. #' @param group_var A string specifying the grouping variable (e.g., "genotype", "treatment"). +#' #' @return A data frame with the percentage of ON/OFF cells and Cohen's d (if applicable). #' @examples #' data(fake_to_plot) diff --git a/R/ComputeCellData.R b/R/ComputeCellData.R index 584def6..94565ac 100644 --- a/R/ComputeCellData.R +++ b/R/ComputeCellData.R @@ -1,3 +1,5 @@ +#' ComputeCellData +#' #' A function computes cell status for a given pathway in single-cell RNA-seq data, #' based on the distance between genes in a specified pathway. The distance is computed #' for each batch of cells, and classical multidimensional scaling (MDS) is used to @@ -14,7 +16,7 @@ #' #' @param x A `Seurat` object containing single-cell RNA sequencing data. #' @param pathway A `character` string specifying the pathway name. This should match a pathway used by `LoadPathway()`. -#' @param distance.method A `character` string specifying the distance metric to use. +#' @param distance.method A `character` string specifying the distance metric to use.Default is "manhattan". #' Options include: `"manhattan"`, `"euclidean"`, `"canberra"`, `"binary"`, `"minkowski"` #' @param batch.size An `integer` specifying the number of cells to process per batch. Default is 1000. #' @param scale.data A `logical` indicating whether to use scaled data (`scale.data = TRUE`) or normalized data. Default is `TRUE`. @@ -53,8 +55,15 @@ ComputeCellData <- function(x, pathway, distance.method, batch.size = batch.size shuffled_cell_id <- sample(cell_id) # Split shuffled indices into batches + # Check if batch.size is provided; if not, set default and message + if (missing(batch.size) || is.null(batch.size)) { + message("Parameter 'batch.size' is missing or NULL. Setting default batch size to 1000.") + batch.size <- 1000 + } + # Define batch size batch_size <- batch.size + batches <- split(shuffled_cell_id, ceiling(seq_along(shuffled_cell_id) / batch.size)) # Subset expression data into chunks based on sampled indices @@ -82,6 +91,12 @@ ComputeCellData <- function(x, pathway, distance.method, batch.size = batch.size next } + # Check if distance.method is provided; if not, set default and message + if (missing(distance.method) || is.null(distance.method)) { + message("Parameter 'distance.method' is missing or NULL. Setting default distance.method to 'manhattan'.") + distance.method <- "manhattan" + } + # Distance calculation message("Computing distance...") d <- dist(t(pathwaytempdata), method = distance.method) diff --git a/R/LoadPathway.R b/R/LoadPathway.R index 7595976..79f51d8 100644 --- a/R/LoadPathway.R +++ b/R/LoadPathway.R @@ -1,6 +1,7 @@ -## Pathway Data Extraction from Exceldataset +#' LoadPathway #' #' This function reads pathway data from the package's built-in Excel file. +#' #' @name LoadPathway #' @param pathway A `character` string specifying the pathway name. #' @return A data frame with pathway data. diff --git a/R/PathwayMaxMin.R b/R/PathwayMaxMin.R index 80e93dc..71a8bdc 100644 --- a/R/PathwayMaxMin.R +++ b/R/PathwayMaxMin.R @@ -1,4 +1,7 @@ +#' PathwayMaxMin +#' #' A function to obtain the hypothetical max and min activation status of selected pathway for a given scRNA seq data set +#' #' @name PathwayMaxMin #' @import Seurat #' @import tidyverse diff --git a/R/PlotPathway.R b/R/PlotPathway.R index c2ac45d..d72d7f1 100644 --- a/R/PlotPathway.R +++ b/R/PlotPathway.R @@ -1,3 +1,5 @@ +#' PlotPathway +#' #' A function to plot the Pathway activation status #' #' @name PlotPathway diff --git a/README.md b/README.md index a53803b..a58b1d4 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ library(PathwayEmbed) data(fake_test_object) # Compute pathway data -mds_results <- ComputeCellData(fake_test_object, pathway = "Wnt", distance.method = "manhattan") +mds_results <- ComputeCellData(fake_test_object, pathway = "Wnt", distance.method = "manhattan", batch.size = 100) need to add a default batch size and a end message # Prepare data for plotting plot_data <- PreparePlotData(fake_test_object, mds_results, group = "genotype") diff --git a/inst/extdata/Pathway_Embedding.xlsx b/inst/extdata/Pathway_Embedding.xlsx index b2c8852..4ad26c8 100644 Binary files a/inst/extdata/Pathway_Embedding.xlsx and b/inst/extdata/Pathway_Embedding.xlsx differ diff --git a/man/CalculatePercentage.Rd b/man/CalculatePercentage.Rd index e8ee495..d0df29c 100644 --- a/man/CalculatePercentage.Rd +++ b/man/CalculatePercentage.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/CalculatePercentage.R \name{CalculatePercentage} \alias{CalculatePercentage} -\title{Calculate the percentage of cells in activation status} +\title{CalculatePercentage} \usage{ CalculatePercentage(to.plot, group_var) } diff --git a/man/ComputeCellData.Rd b/man/ComputeCellData.Rd index 2890bc1..c5e25d6 100644 --- a/man/ComputeCellData.Rd +++ b/man/ComputeCellData.Rd @@ -2,10 +2,7 @@ % Please edit documentation in R/ComputeCellData.R \name{ComputeCellData} \alias{ComputeCellData} -\title{A function computes cell status for a given pathway in single-cell RNA-seq data, -based on the distance between genes in a specified pathway. The distance is computed -for each batch of cells, and classical multidimensional scaling (MDS) is used to -visualize the pathway expression across cells.} +\title{ComputeCellData} \usage{ ComputeCellData( x, @@ -20,7 +17,7 @@ ComputeCellData( \item{pathway}{A \code{character} string specifying the pathway name. This should match a pathway used by \code{LoadPathway()}.} -\item{distance.method}{A \code{character} string specifying the distance metric to use. +\item{distance.method}{A \code{character} string specifying the distance metric to use.Default is "manhattan". Options include: \code{"manhattan"}, \code{"euclidean"}, \code{"canberra"}, \code{"binary"}, \code{"minkowski"}} \item{batch.size}{An \code{integer} specifying the number of cells to process per batch. Default is 1000.} diff --git a/man/LoadPathway.Rd b/man/LoadPathway.Rd index e10575b..53554a1 100644 --- a/man/LoadPathway.Rd +++ b/man/LoadPathway.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/LoadPathway.R \name{LoadPathway} \alias{LoadPathway} -\title{This function reads pathway data from the package's built-in Excel file.} +\title{LoadPathway} \usage{ LoadPathway(pathway) } diff --git a/man/PathwayMaxMin.Rd b/man/PathwayMaxMin.Rd index cee43c3..d473aa9 100644 --- a/man/PathwayMaxMin.Rd +++ b/man/PathwayMaxMin.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/PathwayMaxMin.R \name{PathwayMaxMin} \alias{PathwayMaxMin} -\title{A function to obtain the hypothetical max and min activation status of selected pathway for a given scRNA seq data set} +\title{PathwayMaxMin} \usage{ PathwayMaxMin(x, pathway, scale.data = TRUE) } diff --git a/man/PlotPathway.Rd b/man/PlotPathway.Rd index 3e18af8..4f7ed94 100644 --- a/man/PlotPathway.Rd +++ b/man/PlotPathway.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/PlotPathway.R \name{PlotPathway} \alias{PlotPathway} -\title{A function to plot the Pathway activation status} +\title{PlotPathway} \usage{ PlotPathway(to.plot, pathway, group, color) }