Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions R/differential_geometry.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ grassmann_map <- function(x, base_point){
# Adapted from https://github.com/JuliaManifolds/Manifolds.jl/blob/master/src/manifolds/GrassmannStiefel.jl#L93
if(ncol(base_point) == 0 || nrow(base_point) == 0){
base_point
}else if(any(is.na(x))){
}else if(anyNA(x)){
matrix(NA, nrow = nrow(x), ncol = ncol(x))
}else{
svd <- svd(x)
z <- base_point %*% svd$v %*% diag(cos(svd$d), nrow = length(svd$d)) %*% t(svd$v) +
svd$u %*% diag(sin(svd$d), nrow = length(svd$d)) %*% t(svd$v)
z <- tcrossprod(base_point %*% svd$v %*% diag(cos(svd$d), nrow = length(svd$d)), svd$v) +
tcrossprod(svd$u %*% diag(sin(svd$d), nrow = length(svd$d)), svd$v)
# Calling `qr.Q(qr(z))` is problematic because it can flip the signs
z
}
Expand All @@ -25,11 +25,11 @@ grassmann_log <- function(p, q){
if(n == 0 || k == 0){
p
}else{
z <- t(q) %*% p
At <- t(q) - z %*% t(p)
z <- crossprod(q, p)
At <- t(q) - tcrossprod(z, p)
Bt <- lm.fit(z, At)$coefficients
svd <- svd(t(Bt), k, k)
svd$u %*% diag(atan(svd$d), nrow = k) %*% t(svd$v)
tcrossprod(svd$u %*% diag(atan(svd$d), nrow = k), svd$v)
}
}

Expand All @@ -38,7 +38,7 @@ project_grassmann <- function(x){
}

project_grassmann_tangent <- function(x, base_point){
x - base_point %*% t(base_point) %*% x
x - base_point %*% crossprod(base_point, x)
}


Expand Down
2 changes: 1 addition & 1 deletion R/find_de_neighborhoods.R
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,7 @@ pseudobulk_size_factors_for_neighborhoods <- function(counts, mask, col_data, gr
sf[! absent_sample] <- apply(Y, 2, function(cnts) {
exp(median((log(cnts) - log_geo_means)[is.finite(log_geo_means) & cnts > 0]))
})
if(any(! is.finite(sf))){
if(!all(is.finite(sf))){
# Something went wrong (maybe the data was too sparse), fall back to "normed_sum"
drop(aggregate_matrix(matrix(cell_col_sums * mask_row, nrow = 1), group_split, MatrixGenerics::rowSums2))
}else{
Expand Down
12 changes: 6 additions & 6 deletions R/lemur.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ lemur <- function(data, design = ~ 1, col_data = NULL,
# Create indicator vector which cells are used for training and which for testing
is_test_data <- rep(FALSE, ncol(data))
if(is.logical(test_fraction) && length(ncol(data))){
if(any(is.na(test_fraction))) stop("test_fraction must not contain 'NA's.")
if(anyNA(test_fraction)) stop("test_fraction must not contain 'NA's.")
is_test_data <- test_fraction
}else if(length(test_fraction) != 1){
stop("'test_fraction' must be a boolean vector of length 'ncol(data)' or a single number between 0 and 1.")
Expand Down Expand Up @@ -153,7 +153,7 @@ lemur_impl <- function(Y, design_matrix,
if(linear_coefficient_estimator == "zero"){
Y_clean <- Y
}else{
Y_clean <- Y - linear_coefficients %*% t(design_matrix)
Y_clean <- Y - tcrossprod(linear_coefficients, design_matrix)
}
if(!is.matrix(base_point)){
if(verbose) message("Find base point for differential embedding")
Expand All @@ -180,7 +180,7 @@ lemur_impl <- function(Y, design_matrix,
coefficients = coefficients, base_point = base_point)
}
if(verbose){
residuals <- Y - project_diffemb_into_data_space(embedding, design = design_matrix, coefficients = coefficients, base_point = base_point) - linear_coefficients %*% t(design_matrix)
residuals <- Y - project_diffemb_into_data_space(embedding, design = design_matrix, coefficients = coefficients, base_point = base_point) - tcrossprod(linear_coefficients, design_matrix)
error <- sum(residuals^2)
message("Final error: ", sprintf("%.3g", error))
}
Expand All @@ -194,7 +194,7 @@ lemur_impl <- function(Y, design_matrix,
}

# Make sure that axes are ordered by variance
if(prod(dim(embedding)) > 0 && all(!is.na(embedding))){
if(prod(dim(embedding)) > 0 && !anyNA(embedding)){
svd_emb <- svd(embedding)
rot <- svd_emb$u
base_point <- base_point %*% rot
Expand All @@ -221,7 +221,7 @@ find_base_point <- function(Y_clean, base_point, n_embedding){
stopifnot(ncol(base_point) == n_embedding)

# Check if it is orthogonal
orth <- t(base_point) %*% base_point
orth <- crossprod(base_point)
if(sum((orth - diag(nrow = n_embedding))^2) > 1e-8){
stop("The provided 'base_point' is not orthogonal")
}
Expand Down Expand Up @@ -254,7 +254,7 @@ project_data_on_diffemb <- function(Y_clean, design, coefficients, base_point){
mm_groups <- get_groups(design)
for(gr in unique(mm_groups)){
covars <- design[which(mm_groups == gr)[1], ]
res[,mm_groups == gr] <- t(grassmann_map(sum_tangent_vectors(coefficients, covars), base_point)) %*% Y_clean[,mm_groups == gr,drop=FALSE]
res[,mm_groups == gr] <- crossprod(grassmann_map(sum_tangent_vectors(coefficients, covars), base_point), Y_clean[,mm_groups == gr,drop=FALSE])
}
res
}
Expand Down
2 changes: 1 addition & 1 deletion R/lemur_fit.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ S4Vectors::setValidity2("lemur_fit", function(obj){
if(! is.null(col_names) && length(col_names) != length(unique(col_names))) msg <- c(msg, "`colnames` are not unique")
if(! is.null(row_names) && length(row_names) != length(unique(row_names))) msg <- c(msg, "`rownames` are not unique")
if(is.logical(row_mask)) msg <- c(msg, "`row_mask` is a logical. This is no longer allowed (changed in version 1.0.4 to fix a bug). Please rerun `lemur`.")
if(max(row_mask) > n_features_original || min(row_mask) < 0 || any(is.na(row_mask))) msg <- c(msg, "`row_mask` contains illegal index. This is a bug!")
if(max(row_mask) > n_features_original || min(row_mask) < 0 || anyNA(row_mask)) msg <- c(msg, "`row_mask` contains illegal index. This is a bug!")

if(is.null(msg)){
TRUE
Expand Down
2 changes: 1 addition & 1 deletion R/predict.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ predict_impl <- function(object, newdata = NULL, newdesign = NULL,
stop("The number of rows in 'newdesign' (", nrow(newdesign) ,") and 'alignment_design_matrix'(", nrow(alignment_design_matrix) ,") must be the same")
}
approx <- if(with_linear_model){
linear_coefficients[row_mask,,drop=FALSE] %*% t(newdesign)
tcrossprod(linear_coefficients[row_mask,,drop=FALSE], newdesign)
}else{
matrix(0, nrow = length(row_mask), ncol = nrow(newdesign))
}
Expand Down
2 changes: 1 addition & 1 deletion R/project_on_fit.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ project_on_lemur_fit <- function(fit, data, col_data = NULL, use_assay = "logcou
}

project_on_lemur_fit_impl <- function(Y, design_matrix, alignment_design_matrix, coefficients, linear_coefficients, alignment_coefficients, base_point){
Y_clean <- Y - linear_coefficients %*% t(design_matrix)
Y_clean <- Y - tcrossprod(linear_coefficients, design_matrix)
embedding <- project_data_on_diffemb(Y_clean, design = design_matrix, coefficients = coefficients, base_point = base_point)
embedding <- apply_linear_transformation(embedding, alignment_coefficients, alignment_design_matrix)
# TODO: subset to row_mask? And then potentially also remove the check in find_de_neighborhoods line 143.
Expand Down
12 changes: 6 additions & 6 deletions R/recursive_least_squares.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ recursive_least_squares <- function(y, X){
k <- ncol(X)
res <- matrix(NA, nrow = k, ncol = n)
gamma <- solve(crossprod(X[seq_len(k),]))
beta <- gamma %*% t(X[seq_len(k),]) %*% y[seq_len(k)]
beta <- gamma %*% crossprod(X[seq_len(k),], y[seq_len(k)])
res[,k] <- beta
for(idx in seq(k+1, n)){
yi <- y[idx]
xi <- t(X[idx,,drop=FALSE])
gamma <- gamma - (gamma %*% xi %*% t(xi) %*% gamma) / c(1 + t(xi) %*% gamma %*% xi)
beta <- beta - gamma %*% xi %*% (t(xi) %*% beta - yi)
gamma <- gamma - (gamma %*% xi %*% crossprod(xi, gamma)) / c(1 + crossprod(xi, gamma %*% xi))
beta <- beta - gamma %*% xi %*% (crossprod(xi, beta) - yi)
Comment on lines +29 to +30
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this kind of cases, it may be smart to investigate how it's better to associate the operations because (AB)C might be more performant than A(BC).

res[,idx] <- beta
}
res
Expand Down Expand Up @@ -74,17 +74,17 @@ bulked_recursive_least_squares_contrast <- function(y, X, group, contrast, ridge
if(count[gi] == 1){
X_act[gi,] <- xi
n_obs <- n_obs + 1L
gamma <- gamma - (gamma %*% xi %*% t(xi) %*% gamma) / c(1 + t(xi) %*% gamma %*% xi)
gamma <- gamma - (gamma %*% xi %*% crossprod(xi, gamma)) / c(1 + crossprod(xi, gamma %*% xi))
# Below is a more efficient version of: beta <- gamma %*% t(X_act) %*% m
beta <- beta + gamma %*% xi %*% (m[gi] - t(xi) %*% beta)
beta <- beta + gamma %*% xi %*% (m[gi] - crossprod(xi, beta))
}else{
beta <- beta + gamma %*% (xi * delta_m)
}
# I can't find a recursive way to calculate the residual sum of squares
rss <- max(1e-6, sum((m - X_act %*% beta)^2))
# Avoid zero or negative numbers
covar <- rss / max(1e-8, n_obs - k) * gamma
se_sq <- contrast %*% covar %*% t(contrast)
se_sq <- tcrossprod(contrast %*% covar, contrast)
if(se_sq > 0){
t_stat[idx] <- sum(drop(contrast) * beta) / sqrt(se_sq)
}
Expand Down
2 changes: 1 addition & 1 deletion R/ridge_regression.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ ridge_regression <- function(Y, X, ridge_penalty = 0, weights = rep(1, nrow(X)))
stopifnot(length(ridge_penalty) == 1 || length(ridge_penalty) == ncol(X))
ridge_penalty <- diag(ridge_penalty, nrow = ncol(X))
}
ridge_penalty_sq <- sqrt(sum(weights)) * (t(ridge_penalty) %*% ridge_penalty)
ridge_penalty_sq <- sqrt(sum(weights)) * crossprod(ridge_penalty)
weights_sqrt <- sqrt(weights)

X_extended <- rbind(X * weights_sqrt, ridge_penalty_sq)
Expand Down
4 changes: 2 additions & 2 deletions R/test_de.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ test_global <- function(fit,
}else{ # only linear test
resid_full <- as.matrix(residuals(fit, with_linear_model = TRUE, with_embedding = FALSE))
resid_red <- as.matrix(get_residuals_for_alt_fit(fit, reduced_design_mat = reduced_design_mat, with_linear_model = TRUE, with_embedding = FALSE))
pval <- multivar_wilks_ftest(RSS_full = resid_full %*% t(resid_full),
RSS_red = resid_red %*% t(resid_red),
pval <- multivar_wilks_ftest(RSS_full = tcrossprod(resid_full),
RSS_red = tcrossprod(resid_red),
n_features = nrow(fit), full_design, reduced_design_mat)
}
}else if(variance_est == "resampling"){
Expand Down
6 changes: 3 additions & 3 deletions R/util.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ stack_rows <- function(x){
#' @describeIn mply_dbl Each list element becomes a row in a matrix
stack_cols <- function(x){
stopifnot(is.list(x))
do.call(cbind, x)
list2DF(x)
}

#' Make a cube from a list of matrices
Expand Down Expand Up @@ -263,7 +263,7 @@ aggregate_matrix <- function(mat, group_split, aggr_fnc, col_sel = TRUE, ...){
lgl[idx] <- TRUE
lgl
})
if(all(col_sel == TRUE)){
if(all(col_sel)){
new_data_mat <- t(mply_dbl(group_split_lgl, \(split_sel){
aggr_fnc(mat, cols = split_sel, ...)
}, ncol = nrow(mat)))
Expand Down Expand Up @@ -332,7 +332,7 @@ pseudoinverse <- function(X){
not_null <- svd$d > max(tol * svd$d[1L], 0)
if(all(not_null)){
with(svd, v %*% (1/d * t(u)))
}else if(all(! not_null)){
}else if(!any(not_null)){
matrix(0, nrow = ncol(X), ncol = nrow(X))
}else{
with(svd, v[,not_null,drop=FALSE] %*% (1/d[not_null] * t(u[,not_null,drop=FALSE])))
Expand Down