Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions .github/workflows/R-CMD-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,10 @@ jobs:
config:
- { os: ubuntu-latest, r: '4.5' }
- { os: ubuntu-latest, r: '4.4' }
- { os: ubuntu-latest, r: '4.3' }
- { os: ubuntu-latest, r: '4.2' }
- { os: windows-latest, r: '4.3' }
- { os: windows-latest, r: '4.2' }
- { os: macos-latest, r: '4.3' }
- { os: macos-latest, r: '4.2' }
- { os: windows-latest, r: '4.5' }
- { os: windows-latest, r: '4.4' }
- { os: macos-latest, r: '4.5' }
- { os: macos-latest, r: '4.4' }
name: R ${{ matrix.config.r }} - ${{ matrix.config.os }}

steps:
Expand Down
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: splikit
Title: A toolkit for analysing RNA splicing in scRNA-seq data
Version: 1.0.2
Version: 1.0.3
Authors@R:
person("Arsham", "Mikaeili Namini", , "arsham.mikaeilinamini@mail.mcgill.ca", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-9453-6951"))
Expand All @@ -20,7 +20,8 @@ Imports:
methods,
stats,
Rcpp,
RcppArmadillo
RcppArmadillo,
RcppEigen
LinkingTo:
Rcpp,
RcppArmadillo,
Expand Down
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(find_variable_genes)
export(get_pseudo_correlation)
export(get_rowVar)
export(get_silhouette_mean)
export(load_toy_M1_M2_object)
export(load_toy_SJ_object)
export(make_eventdata_plus)
export(make_gene_count)
Expand All @@ -13,7 +14,12 @@ export(make_m1)
export(make_m2)
export(make_velo_count)
import(Matrix)
import(Rcpp)
import(data.table)
importClassesFrom(Matrix,dgCMatrix)
importClassesFrom(Matrix,dgTMatrix)
importClassesFrom(Matrix,dsCMatrix)
importClassesFrom(Matrix,dsTMatrix)
importFrom(Matrix,Matrix)
importFrom(Matrix,readMM)
importFrom(Matrix,sparseMatrix)
Expand Down
49 changes: 34 additions & 15 deletions R/feature_selection.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@
#' @param ... Additional arguments to be passed.
#'
#' @return A \code{data.table} containing the events and their corresponding sum deviance values.
#'
#' @examples
#' # loading the toy dataset
#' toy_obj <- load_toy_M1_M2_object()
#'
#' # getting HVE (high variable events)
#' HVE <- find_variable_events(toy_obj$m1, toy_obj$m2)
#'
#' # printing the results
#' print(HVE[order(-sum_deviance)])
#' @export
find_variable_events <- function(m1_matrix, m2_matrix, min_row_sum = 50, verbose=TRUE, n_threads=1, ...) {

Expand Down Expand Up @@ -95,19 +105,27 @@ find_variable_events <- function(m1_matrix, m2_matrix, min_row_sum = 50, verbose
#' @return A \code{data.table} containing gene names (column \code{events}) and computed metrics.
#' For the deviance method, this includes \code{sum_deviance} and \code{variance} columns.
#'
#' @examples
#' library(Matrix)
#' # loading the toy dataset
#' toy_obj <- load_toy_M1_M2_object()
#'
#' # getting high variable genes
#' HVG_VST <- find_variable_genes(toy_obj$gene_expression, method = "vst") # vst method
#' HVG_DEV <- find_variable_genes(toy_obj$gene_expression, method = "sum_deviance") # sum_deviance method
#'
#' # printing the results
#' print(HVG_VST[order(-standardize_variance)])
#' print(HVG_DEV[order(-sum_deviance)])
#'
#' @useDynLib splikit, .registration = TRUE
#' @import Rcpp
#' @import Matrix
#' @importClassesFrom Matrix dgCMatrix dsCMatrix dgTMatrix dsTMatrix
#' @export
find_variable_genes <- function(gene_expression_matrix, method = c("vst", "sum_deviance"), ...) {

# Check required libraries
if (!requireNamespace("data.table", quietly = TRUE)) {
stop("The 'data.table' package is required but not installed.")
}
if (!requireNamespace("Rcpp", quietly = TRUE)) {
stop("The 'Rcpp' package is required but not installed.")
}
if (!requireNamespace("Matrix", quietly = TRUE)) {
stop("The 'Matrix' package is required but not installed.")
}
find_variable_genes <- function(gene_expression_matrix, method = "vst", ...) {
# addinng the vst method as the deafult
method <- match.arg(method, choices = c("vst", "sum_deviance"))

# Verify that gene_expression_matrix is a sparse Matrix
if (!inherits(gene_expression_matrix, "Matrix")) {
Expand Down Expand Up @@ -155,7 +173,8 @@ find_variable_genes <- function(gene_expression_matrix, method = c("vst", "sum_d

# Calculate deviances using the C++ function
deviance_values <- tryCatch({
calcNBDeviancesWithThetaEstimation(gene_expression_matrix_sub)
calcNBDeviancesWithThetaEstimation(as(gene_expression_matrix_sub, "dgCMatrix"))

}, error = function(e) {
stop("Error in calcNBDeviancesWithThetaEstimation function: ", e$message)
})
Expand All @@ -168,9 +187,9 @@ find_variable_genes <- function(gene_expression_matrix, method = c("vst", "sum_d

# Compute row variance using the previously defined function
row_var <- tryCatch({
multigedi_get_row_variance(sparse_matrix = gene_expression_matrix)
splikit::get_rowVar(M = gene_expression_matrix)
}, error = function(e) {
stop("Error in multigedi_get_row_variance: ", e$message)
stop("Error in splikit::get_rowVar: ", e$message)
})

row_var_cpp_dt <- data.table::data.table(events = rownames(gene_expression_matrix),
Expand Down
10 changes: 10 additions & 0 deletions R/star_solo_processing.R
Original file line number Diff line number Diff line change
Expand Up @@ -842,3 +842,13 @@ make_velo_count <- function(velocyto_dirs, sample_ids, whitelist_barcodes = NULL
}
}

#' Load the toy M1/M2 object
#'
#' Loads a toy object of M1 and M2 used for examples or testing.
#'
#' @return An R object from the toy_m1_m2_obj.rds file.
#' @export
load_toy_M1_M2_object <- function() {
file <- system.file("extdata", "toy_m1_m2_obj.rds", package = "splikit")
readRDS(file)
}
Binary file added inst/extdata/toy_m1_m2_obj.rds
Binary file not shown.
10 changes: 10 additions & 0 deletions man/find_variable_events.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 15 additions & 5 deletions man/find_variable_genes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions man/load_toy_M1_M2_object.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 0 additions & 4 deletions src/Makevars.linux
Original file line number Diff line number Diff line change
@@ -1,4 +0,0 @@
## src/Makevars (Linux)
CXX_STD = CXX14
PKG_CXXFLAGS += -O3 -march=native -pipe -fopenmp
PKG_LIBS += -fopenmp $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
41 changes: 15 additions & 26 deletions src/deviance_gene.cpp
Original file line number Diff line number Diff line change
@@ -1,25 +1,23 @@
// calcNBDeviancesWithThetaEstimation.cpp
#define ARMA_64BIT_WORD
#include <RcppArmadillo.h>
// [[Rcpp::depends(RcppArmadillo)]]

using namespace Rcpp;
// [[Rcpp::export]]

// [[Rcpp::export]]
arma::vec calcNBDeviancesWithThetaEstimation(const arma::sp_mat& gene_expression) {

int n_rows = gene_expression.n_rows;
int n_cols = gene_expression.n_cols; // number of cells
arma::vec dev(n_rows, arma::fill::zeros);

for (int i = 0; i < n_rows; i++){
double sum_y = 0.0;
double sum_y = 0.0;
double sum_y2 = 0.0;


for (int j = 0; j < n_cols; j++){
double y = gene_expression(i, j);
sum_y += y; // compute total counts
sum_y2 += y * y; // and total of squares for gene i.
sum_y += y; // compute total counts
sum_y2 += y * y; // and total of squares for gene i.
}

// If there are no counts, set deviance to 0.
Expand All @@ -28,36 +26,27 @@ arma::vec calcNBDeviancesWithThetaEstimation(const arma::sp_mat& gene_expression
continue;
}

double mu_hat = sum_y / n_cols; // intercept-only mean
double variance = (sum_y2 / n_cols) - (mu_hat * mu_hat); // empirical variance

double mu_hat = sum_y / n_cols; // Fitted mean under the intercept-only model (MLE).

// Estimate the variance as (mean of squares) - (square of the mean).
double variance = (sum_y2 / n_cols) - (mu_hat * mu_hat);

// Estimate theta using: theta = mu_hat^2 / (variance - mu_hat) if variance > mu_hat.
double theta_est;
if (variance > mu_hat) {
theta_est = (mu_hat * mu_hat) / (variance - mu_hat);
} else {
// If variance is not greater than mean, approximate Poisson behavior.
theta_est = 1e12; // large number to approximate theta -> infinity
}
// Estimate NB dispersion: theta = mu^2/(var − mu) if var > mu, else (approx. infiniti)
double theta_est = (variance > mu_hat)
? (mu_hat * mu_hat) / (variance - mu_hat)
: 1e12;

double dev_row = 0.0;
// Second pass: compute deviance contribution from each cell.
// Second pass: deviance contribution from each cell
for (int j = 0; j < n_cols; j++){
double y = gene_expression(i, j);
double term = 0.0;

// Add y * log(y/mu_hat); define 0*log(0) as 0.
if (y > 0)
term = y * std::log(y / mu_hat);

// Subtract (y + theta_est)*log((y + theta_est)/(mu_hat + theta_est)).
term -= (y + theta_est) * std::log((y + theta_est) / (mu_hat + theta_est));
term -= (y + theta_est)
* std::log((y + theta_est) / (mu_hat + theta_est));

// Multiply by 2 as per the deviance definition.
dev_row += 2 * term;
dev_row += 2.0 * term;
}

dev[i] = dev_row;
Expand Down
Loading