diff --git a/._README.Rmd b/._README.Rmd deleted file mode 100644 index 20f533d..0000000 Binary files a/._README.Rmd and /dev/null differ diff --git a/DESCRIPTION b/DESCRIPTION index 0013c93..55d53f6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,18 +1,16 @@ Package: SelectSim Title: Selected Events Linked by Evolutionary Conditions across human Tumors -Version: 0.0.1.3 +Version: 0.0.1.5 Authors@R: c(person("Arvind", "Iyer", , "ayalurarvind@gamil.com", role = c("aut", "cre"),comment = c(ORCID = "0000-0002-8247-700X")), person("Marco", "Mina", , "marco.mina.85@gmail.com", role = c("aut")), + person("Miljan", "Petrovic", , "miljanpet93@gmail.com", role = c("aut")), person("Giovanni", "Ciriello", , "givoanni.cirello@unil.ch", role = c("aut"),comment = c(ORCID ="0000-0003-2021-8683"))) -Description: This R package implements the SelectSim methodology to infer evolutionary dependencies between functional alterations in cancer. +Description: This R package implements the SelectSim methodology to infer co-mutations between functional alterations in cancer. License: MIT + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.0 -LinkingTo: - Rcpp, - RcppArmadillo +RoxygenNote: 7.3.2 Imports: doParallel, doRNG, @@ -23,7 +21,6 @@ Imports: ggridges, Matrix, Rcpp, - RcppArmadillo, reshape2, Rfast, tictoc @@ -33,7 +30,10 @@ Suggests: testthat (>= 3.0.0) Config/testthat/edition: 3 Depends: - R (>= 2.10) + R (>= 3.5) LazyData: true VignetteBuilder: knitr URL: https://csogroup.github.io/SelectSim/ +LinkingTo: + Rcpp, + RcppArmadillo diff --git a/INSTALLATION.md b/INSTALLATION.md index 3081c95..43e88e9 100644 --- a/INSTALLATION.md +++ b/INSTALLATION.md @@ -8,17 +8,20 @@ devtools::install_github("CSOgroup/SelectSim",dependencies = TRUE, build_vignettes = TRUE) ``` -## Installation with micromamba enviorment +## Installation with micromamba enviorment (prefered) `micromamba` is a tiny version of the mamba package manager (Like Conda). Refer [website](https://mamba.readthedocs.io/en/latest/installation/micromamba-installation.html) for it's installation guide. Steps to follow after installing micromamba in a terminal: -`micromamba create -n r_env r-essentials r-base`\ +`micromamba create -n r_env`\ `micromamba activate r_env`\ +`micromamba install conda-forge::r-base`\ +`micromamba install conda-forge::r-essentials`\ `micromamba install conda-forge::r-devtools`\ -`micromamba install conda-forge::armadillo`\ +`micromamba install conda-forge::r-pak`\ +`micromamba install conda-forge::cmake`\ `micromamba install conda-forge::r-rcppparallel`\ `micromamba install conda-forge::r-rfast` @@ -27,10 +30,15 @@ Steps to follow after installing micromamba in a terminal: # install.packages("devtools") devtools::install_github("CSOgroup/SelectSim",dependencies = TRUE, build_vignettes = TRUE) ``` +OR +``` r +library('pak') +pak::pkg_install('CSOgroup/SelectSim') +``` -Alternative way install with provided `enviorment.yml` file. +Alternative way install with provided [`enviorment.yml`](enviorment.yml) file. -`micromamba create -f env.yml`\ +`micromamba create -f enviorment.yml`\ `micromamba activate r_env` - After this run R and install `SelectSim` R package as follows ``` r diff --git a/LICENSE b/LICENSE index a411f6d..67f81dd 100644 --- a/LICENSE +++ b/LICENSE @@ -1,2 +1,2 @@ -YEAR: 2024 +YEAR: 2025 COPYRIGHT HOLDER: SelectSim authors diff --git a/LICENSE.md b/LICENSE.md index 661d370..3082a60 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ # MIT License -Copyright (c) 2024 SelectSim authors +Copyright (c) 2025 SelectSim authors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/NAMESPACE b/NAMESPACE index 166604b..33c442d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -35,7 +35,6 @@ export(new.AL.general) export(new.ALS) export(new.AMS) export(null_model_parallel) -export(null_model_parallel_debug) export(obs_exp_scatter) export(overlap_pair_extract) export(r.am.pairwise.alteration.overlap) @@ -44,11 +43,11 @@ export(retrieveOutliers) export(ridge_plot_ed) export(ridge_plot_ed_compare) export(selectX) -export(selectX_debug) export(stat_maf_column) export(stat_maf_gene) export(stat_maf_sample) export(templeate.obj.gen) +export(theme_Publication) export(w.r.am.pairwise.alteration.overlap) import(doParallel) import(doRNG) @@ -59,6 +58,8 @@ import(ggridges) import(graphics) import(parallel) importFrom(Matrix,Matrix) +importFrom(Matrix,sparseMatrix) +importFrom(Matrix,t) importFrom(Rcpp,sourceCpp) importFrom(Rfast,rowSort) importFrom(dplyr,"%>%") diff --git a/NEWS.md b/NEWS.md index 17c631b..8467031 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,4 +6,16 @@ * Added a `NEWS.md` file to track changes to the package. * Rename the pacakge SelectX to SelectSim by creating a new git folder. * Create CSO group repositroy and move the code there. -* Created the website for github \ No newline at end of file +* Created the website for github + +# SelectX 0.0.1.4 + +* Added Mijan in author's list +* Remove C/C++ code dependecny to avoid installation diffculties in different systems. + * Hence move to using `Matrix` library functions and removed `RCpp` functions and code. +* Update the website and vignette accordingly. + +# SelectX 0.0.1.5 + +* Fixed bug of outlier functions and added C/C++ code back. +* More description in functions \ No newline at end of file diff --git a/R/._selectX_run.R b/R/._selectX_run.R deleted file mode 100644 index faccdf2..0000000 Binary files a/R/._selectX_run.R and /dev/null differ diff --git a/R/.gitignore b/R/.gitignore index 73d8c77..3e57fa0 100644 --- a/R/.gitignore +++ b/R/.gitignore @@ -8,3 +8,4 @@ inst/doc .* !/.gitignore +._* \ No newline at end of file diff --git a/R/gam_utils.r b/R/gam_utils.r index e3dded4..8a7db3c 100644 --- a/R/gam_utils.r +++ b/R/gam_utils.r @@ -3,7 +3,7 @@ # Email : ayalurarvind@gmail.com # Project : SelectX # Desc : Functions to process the maf to gam -# Version : 0.1 +# Version : 0.1.5 # Updates : # Todo: ### diff --git a/R/selectX_create.r b/R/selectX_create.r index 969617d..ffe852d 100644 --- a/R/selectX_create.r +++ b/R/selectX_create.r @@ -2,12 +2,11 @@ # Author : Arvind Iyer # Email : arvind.iyer@unil.ch # Project : SelectSim -# Desc : The alteration landscape object, template object and selectX object creating functions -# Version : 0.1 +# Desc : Implementation of SelectSim algorithm +# Version : 0.1.5 # Updates : Re wrote the whole code # Todo: -# - Random seed to set to one value for parallel foreach? -# - c++ backend for faster computation +# - c++ backend for faster computation: Removed kept R native? ### #' Create an AL object @@ -30,24 +29,14 @@ new.AL.general <- function(am, # Assert if the AM are provided or not if(is.null(am$M)) stop('Problem:--> Input data is null') - - - # Assert if the data structre is in correct format - # if(is.list(am)){ - # if(names(am) != c('M','tmb')) - # stop('Problem:--> Input data (M) is not in correct structure\n Check the format which should be: - # list(M=list("missesne"=matrix()), # which has features on rows and sample on columns - # tmb=list("missesne"=data.frame()) # which has sample,mutation as column names and rownames as sample - # )') - # } # create the alteration landscape object al = list('am'=list()) # Create a full gam al$am[['full']] = matrix(0, nrow = nrow(am$M[[1]]), ncol = ncol(am$M[[1]])) # Keep the order of rownames and colnames as in first gam. - row.order = rownames(am$M[[1]]) - col.order = colnames(am$M[[1]]) + row.order = sort(rownames(am$M[[1]])) #fixed gene ordering issuses + col.order = colnames(am$M[[1]]) for ( i in names(am$M)){ al$am[[i]] <-as.matrix(am$M[[i]][row.order,col.order]) al$am[['full']] <- al$am[['full']]+as.matrix(am$M[[i]][row.order,col.order]) @@ -140,8 +129,9 @@ generateS = function(gam, upperBound = 1){ gene.freq = as.matrix(rowSums(gam)/ncol(gam), ncol = 1) - tmb.weight = t(as.matrix(sample.weights, ncol = 1)) - sim.gam = gene.freq %*% tmb.weight + tmb.weight = (as.matrix(sample.weights, nrow = 1)) + #sim.gam = gene.freq %*% tmb.weight + sim.gam = tcrossprod(gene.freq,tmb.weight) colnames(sim.gam) = colnames(gam) sim.gam[ sim.gam > 1 ] = upperBound return(sim.gam) @@ -261,6 +251,7 @@ generateW_block = function(al,lambda,tao) { #' @param W weight matrix #' @param n.cores Number of cores #' @param n.permut Number of simulation +#' @param seed Random seed #' @return Template matrix as list object #' #' Ordering of genes impacts the results as residual subraction is not correct. @@ -269,22 +260,28 @@ null_model_parallel <-function(al, temp_mat, W, n.cores=1, - n.permut) { + n.permut, + seed=42) { `%dopar%` <- foreach::`%dopar%` `%do%` <- foreach::`%do%` - - + # Set RNG kind and seed for reproducibility + RNGkind(kind = "Mersenne-Twister", normal.kind = "Inversion", sample.kind = "Rejection") + set.seed(seed) simulationStep = function(template, total_mut){ + S = template nvalues = nrow(S)*ncol(S) r = matrix( runif(nvalues, min = 0, max = 1), nrow = nrow(S), ncol = ncol(S)) test = S - r + rownames(r) <- rownames(S) + colnames(r) <- colnames(S) return(test) } combine <- function(residuals,residuals_sort,val,index){ return(residuals[index,]>=residuals_sort[index,][val[index]]) + #return(round(residuals[index, ], digits = 10) >= round(residuals_sort[index, ][val[index]], digits = 10)) } simulationFixedOnes = function(al,temp_mat, W,CORRECT_T = 0.05){ @@ -344,140 +341,37 @@ null_model_parallel <-function(al, if(n.cores>1){ - cl <- parallel::makeCluster(n.cores, outfile=paste("gen.random.am.log", sep='')) - doParallel::registerDoParallel(cl) - if( foreach::getDoParRegistered()) { - registerDoRNG(42) - randomMs <- foreach::foreach(i=1:n.permut) %dopar% simulationFixedOnes(al,temp_mat,W,CORRECT_T=0.05) + log_file <- paste0("gen.random.am_", Sys.getpid(), ".log") + cl <- parallel::makeCluster(n.cores, outfile=log_file) + doParallel::registerDoParallel(cl) + on.exit({ parallel::stopCluster(cl) - return (randomMs) - } else { - stop('Error in registering a parallel cluster for randomization.') - } - } - else{ - foreach::registerDoSEQ() - randomMs <- foreach(i=1:n.permut) %do% simulationFixedOnes(al,temp_mat,W,CORRECT_T=0.05) + foreach::registerDoSEQ() # Unregister the parallel backend + }) + registerDoRNG(seed) + # # Use chunking to reduce overhead + # chunk_size <- ceiling(n.permut / n.cores) + # randomMs <- foreach(chunk = seq(1, n.permut, by = chunk_size), .combine = c) %dopar% { + # lapply(seq_len(chunk_size), function(i) simulationFixedOnes(al, temp_mat, W, CORRECT_T = 0.05)) + # } + randomMs <- foreach::foreach(i=1:n.permut) %dopar% simulationFixedOnes(al,temp_mat,W,CORRECT_T=0.05) return (randomMs) } -} - - - -#' Generating the null_simulation matrix -#' -#' @import doParallel -#' @import parallel -#' @importFrom Rfast rowSort -#' @import doRNG -#' @param al Alteration landscape object -#' @param temp_mat template matrixes -#' @param W weight matrix -#' @param n.cores Number of cores -#' @param n.permut Number of simulation -#' @return Template matrix as list object -#' -#' Ordering of genes impacts the results as residual subraction is not correct. -#' @export -null_model_parallel_debug <-function(al, - temp_mat, - W, - n.cores=1, - n.permut) { - - `%dopar%` <- foreach::`%dopar%` - `%do%` <- foreach::`%do%` - - - simulationStep = function(template, total_mut){ - S = template - nvalues = nrow(S)*ncol(S) - r = matrix( runif(nvalues, min = 0, max = 1), nrow = nrow(S), ncol = ncol(S)) - rownames(r)<-rownames(S) - colnames(r)<-colnames(S) - test = S - r[rownames(S),colnames(S)] - return(test) - } - - combine <- function(residuals,residuals_sort,val,index){ - return(residuals[index,]>=residuals_sort[index,][val[index]]) - } - - simulationFixedOnes = function(al,temp_mat, W,CORRECT_T = 0.05){ - exp = matrix(0, nrow = nrow(W), ncol = ncol(W)) - gam_names = names(al$am) - gam_incidence = list() - k=1 - for(name in gam_names){ - if (name=='full'){ - } - else{ - gam_incidence[[k]]=sum(al$am[[name]]) - k=k+1 - } - } - residuals = list() - for(i in 1:length(temp_mat)){ - S = temp_mat[[i]] - test = simulationStep(template = S, total_mut = gam_incidence[[i]]) - residuals[[i]]<-test - } - total_mut = sum(al$am$full) - gene_freq = rowSums(al$am$full) - if(length(gam_names)>2){ - residual_mtx <- pmax(residuals[[1]],residuals[[2]]) - residual_mtx_sort <- Rfast::rowSort(residual_mtx, descending = TRUE) - temp<-list() - for(i in c(1:nrow(al$am$full))){ - temp[[i]]<-combine(residual_mtx,residual_mtx_sort,rowSums(al$am$full),i) - } - exp<-do.call(rbind,temp)*1 - rownames(exp) <- rownames(al$am$full) - tot = sum(exp) - exp.freq = rowSums(exp) - diff = abs(exp.freq - rowSums(al$am$full))/ncol(W) - return(exp) - } - else{ - residual_mtx <- residuals[[1]] - residual_mtx_sort <- Rfast::rowSort(residual_mtx, descending = TRUE) - #print(dim(residual_mtx)) - #print(dim(residual_mtx_sort)) - temp<-list() - for(i in c(1:nrow(al$am$full))){ - temp[[i]]<-combine(residual_mtx,residual_mtx_sort,rowSums(al$am$full),i) - } - exp<-do.call(rbind,temp)*1 - rownames(exp) <- rownames(al$am$full) - tot = sum(exp) - #print(tot) - #print(gam_incidence[[1]]) - exp.freq = rowSums(exp) - diff = abs(exp.freq - rowSums(al$am$full))/ncol(W) - return(exp) - } - } - - - if(n.cores>1){ - cl <- parallel::makeCluster(n.cores, outfile=paste("gen.random.am.log", sep='')) - doParallel::registerDoParallel(cl) - if( foreach::getDoParRegistered()) { - registerDoRNG(42) - randomMs <- foreach::foreach(i=1:n.permut) %dopar% simulationFixedOnes(al,temp_mat,W,CORRECT_T=0.05) - parallel::stopCluster(cl) - return (randomMs) - } else { - stop('Error in registering a parallel cluster for randomization.') - } - } else{ + # Use sequential execution + set.seed(seed) foreach::registerDoSEQ() + registerDoRNG(seed) randomMs <- foreach(i=1:n.permut) %do% simulationFixedOnes(al,temp_mat,W,CORRECT_T=0.05) return (randomMs) } } +### Updates +# Author: Miljan Petrovic +# Miljan found a bug in the previous implementation of fuction. +# Now fixed with an update. +### #' Removing the Outliers #' #' @param obj selectX object @@ -492,6 +386,8 @@ retrieveOutliers = function(obj, nSim=1000){ gnMut[,i] = rowSums(obj$null[[i]]) snMut[,i] = colSums(obj$null[[i]]) } + #gnMut <- sapply(obj$null, rowSums) + #snMut <- sapply(obj$null, colSums) rgn = gnMut rsn = snMut ogn = rowSums(obj$al$am$full) @@ -500,11 +396,23 @@ retrieveOutliers = function(obj, nSim=1000){ rsn = rsn-osn mean.gn = apply(abs(rgn), 2, mean) mean.sn = apply(abs(rsn), 2, mean) + # rgn <- abs(gnMut - ogn) + # rsn <- abs(snMut - osn) + # mean.gn <- colMeans(rgn) + # mean.sn <- colMeans(rsn) dev = mean.gn + mean.sn dev2 = sort(dev) - mincut = dev2[ round(0.05*length(dev2)) ] - maxcut = dev2[ round(0.95*length(dev2)) ] - outliers = dev < mincut | dev >= maxcut - - return(outliers) -} + # Miljan changed: + # mincut = dev2[ round(0.05*length(dev2)) ] + # maxcut = dev2[ round(0.95*length(dev2)) ] + # outliers = dev < mincut | dev >= maxcut + #print(dev2) + maxcut = dev2[ round(0.90*length(dev2)) ] + #maxcut = quantile(dev, probs = 0.9,type = 7) + outliers = dev >= maxcut + #print(outliers) + #outliers = rep( FALSE, length(dev)) + #print("Script uses corrected outlier removal & column indexing.") + #return(list('outliers'=outliers,'dev'=dev)) + return (outliers) +} \ No newline at end of file diff --git a/R/selectX_plot.R b/R/selectX_plot.R index 2643daf..66d080d 100644 --- a/R/selectX_plot.R +++ b/R/selectX_plot.R @@ -3,13 +3,50 @@ # Email : arvind.iyer@unil.ch # Project : SelectX # Desc : The file contains plot related functions -# Version : 0.1 +# Version : 0.1.5 # Updates : Re wrote the whole code # Todo: # - Oncoprint & other usefull plot functions ### +#' A nice theme for making good plots +#' +#' @param base_size size of fonts +#' @param base_family type of font +#' +#' @returns theme object +#' @export +theme_Publication = function (base_size = 14, base_family = "sans") { + + (theme( + text = element_text(family = base_family, size = base_size), + plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), + panel.background = element_blank(), + plot.background = element_blank(), + panel.border = element_rect(colour = "black", fill = NA, size = 1), + axis.title = element_text(face = "bold", size = rel(1)), + axis.title.y = element_text(angle = 90, vjust = 2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(size = rel(0.8)), + axis.ticks = element_line(), + panel.grid.major = element_line(colour = "#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_blank(), + legend.position = "right", + legend.text = element_text(size = rel(0.8)), + legend.key.size = unit(4, "mm"), + legend.direction = "vertical", + legend.spacing = unit(0, "cm"), + legend.title = element_text(face = "italic"), + plot.margin = margin(5, 5, 5, 5, "mm"), + strip.background = element_rect(colour = "black", fill = "white"), + strip.text = element_text(face = "bold"))) + +} + + + #' Create an AL object #' #' Create an Alteration Landscape (AL) object which contains gams and mutation burden of samples of associated gams. @@ -42,11 +79,9 @@ obs_exp_scatter <- function(result,title){ scale_y_continuous(breaks = seq(0, max(result$log_overlap), 1))+ scale_x_continuous(breaks= seq(0, max(result$log_overlap), 1)) - plot <- plot + ggtitle(title)+theme_pubr()+ + plot <- plot + ggtitle(title)+theme_Publication(base_size=16)+ theme( legend.position = "top", - legend.title = element_blank(), - plot.title = element_text(hjust = 0.5), - text = element_text(size = 16)) + legend.title = element_blank()) return(plot) } diff --git a/R/selectX_run.R b/R/selectX_run.R index ff1fc08..c82f8c3 100644 --- a/R/selectX_run.R +++ b/R/selectX_run.R @@ -3,11 +3,8 @@ # Email : arvind.iyer@unil.ch # Project : SelectSim # Desc : Main file which call the selectX function to create alteration object with background model and funtion to generate the table. -# Version : 0.1 -# Updates : Re wrote the whole code +# Version : 0.1.5 # Todo: -# - Random seed to set to one value for parallel foreach? -# - Same ordering of colnames in input gam as the one used in null. # - Better Error message and running text # - Edge case: When sample size in less than 2 there is error in computation (need to fix a number to do this analysis) # - parallel::makeCluster(2, setup_strategy = "sequential") a possible fix to remove the erorr of not able to connect problem (https://github.com/rstudio/rstudio/issues/6692) @@ -15,7 +12,7 @@ ### ### -#' SelectX main function to create alteration object with background model +#' SelectX main function from SelectSim to create alteration object with background model #' #' @description #' `selectX()` takes a list object which consist of genome alteration matrix and tumor mutation burden. @@ -44,7 +41,7 @@ #' @param estimate_pairwise Compute pairwise p-value #' @param maxFDR FDR value #' @param seed a random seed -#' @return result a SelectX object with background model and other info along with result table +#' @return result a SelectSim object with background model and other info along with result table #' #' @export selectX <- function(M, #A list object consiting of GAM and TMB data @@ -77,10 +74,10 @@ selectX <- function(M, #A list object consiting of GAM and TMB data temp.data<-templeate.obj.gen(al) print(paste('-> Templetate object created')) toc() - print(paste('Step3-> Generating penalty matrix...')) + print(paste('Step3-> Generating sample weight matrix...')) tic('Time:') W<-generateW_block(al,lambda,tao) - print(paste('-> Penalty Matrix created')) + print(paste('-> Weight Matrix created')) toc() print(paste('Step4-> Generating null model...')) tic('Time:') @@ -100,7 +97,7 @@ selectX <- function(M, #A list object consiting of GAM and TMB data } toc() print(paste('-> Null model generated')) - print(paste('### SelectX object created ###')) + print(paste('### SelectSim object created ###')) print(paste('#### Computing EDs on the dataset ####')) tic('Time:') @@ -113,12 +110,12 @@ selectX <- function(M, #A list object consiting of GAM and TMB data obs.co = as.matrix(als$alteration.pairwise$overlap) wobs.co = as.matrix(als$alteration.pairwise$w_overlap) robs.co<-r.am.pairwise.alteration.overlap(null = obj$null, - n.permut = obj$nSim, - n.cores = 1) + n.permut = obj$nSim, + n.cores = 1) wrobs.co<-w.r.am.pairwise.alteration.overlap(null = obj$null, - W= obj$W$W, - n.permut = obj$nSim, - n.cores = 1) + W= obj$W$W, + n.permut = obj$nSim, + n.cores = 1) selectX_result <- interaction.table(al, als, @@ -137,8 +134,8 @@ selectX <- function(M, #A list object consiting of GAM and TMB data print(paste('#### EDs computed ####')) toc() if(save.object) { - saveRDS(obj, file=paste(folder,'selectX_object.rds',sep='')) - saveRDS(selectX_result, file=paste(folder,'selectX_results.rds',sep='')) + saveRDS(obj, file=paste(folder,'selectsim_object.rds',sep='')) + saveRDS(selectX_result, file=paste(folder,'selectsim_results.rds',sep='')) } return(list(obj=obj,result=selectX_result)) } @@ -166,194 +163,10 @@ selectX <- function(M, #A list object consiting of GAM and TMB data obs.co = as.matrix(als$alteration.pairwise$overlap) wobs.co = as.matrix(als$alteration.pairwise$w_overlap) robs.co<-r.am.pairwise.alteration.overlap(null = obj$null, - n.permut = obj$nSim, - n.cores = 1) - wrobs.co<-w.r.am.pairwise.alteration.overlap(null = obj$null, - W= obj$W$W, - n.permut = obj$nSim, - n.cores = 1) - - selectX_result <- interaction.table(al, - als, - obs.co, - wobs.co, - robs.co, - wrobs.co, - null=obj$null, - maxFDR=maxFDR, - n.cores=1, - estimate_pairwise=estimate_pairwise, - n.permut=obj$nSim) - obj$robs.co <- robs.co - obj$wrobs.co <- wrobs.co - toc() - - if(save.object) { - saveRDS(obj, file=paste(folder,'selectX_object.rds',sep='')) - saveRDS(selectX_result, file=paste(folder,'selectX_results.rds',sep='')) - } - return(list(obj=obj,result=selectX_result)) - } -} - - - -### -#' SelectX main function to create alteration object with background model -#' -#' @description -#' `selectX()` takes a list object which consist of genome alteration matrix and tumor mutation burden. -#' -#' @import foreach -#' @import doParallel -#' @import parallel -#' @importFrom tictoc tic -#' @importFrom tictoc toc -#' @importFrom stats as.dist -#' @importFrom stats density -#' @importFrom stats ecdf -#' @importFrom stats runif -#' @importFrom stats setNames -#' @param M a list data object which consist of gams and tmbs. -#' @param sample.class sample covariates as named list. -#' @param alteration.class alteration covariates as named list. -#' @param n.cores no of cores. -#' @param min.freq number of samples for features to be atleast mutated in. -#' @param n.permut number of simulations. -#' @param lambda lambda parameter. -#' @param tao tao parameter. -#' @param folder folder path to store the results. -#' @param save.object store the SelectX object. -#' @param verbose print the time and each steps. -#' @param estimate_pairwise Compute pairwise p-value -#' @param maxFDR FDR value -#' @param seed a random seed -#' @return result a SelectX object with background model and other info along with result table -#' -#' @export -selectX_debug <- function(M, #A list object consiting of GAM and TMB data - sample.class, # Sample Covariates as named list - alteration.class, #Alteration Covariates as named list - n.cores = 1, #number of cores - min.freq=10, # Min freq of gene to be mutated to do the analysis - n.permut = 1000, # number of simulations - lambda=0.3, #wieght factor - tao=1, #the fold change factor - save.object = FALSE, #save the object - folder='./', # folder - verbose = TRUE, #verbose to print the steps - estimate_pairwise=FALSE, #compute pairwise p-val - maxFDR=0.25, #max fdr - seed=42 # a random seed, - ){ - ## Set a global seed for computation - set.seed(seed) - if(verbose){ - tic('Total time taken:') - print(paste('#### Creating SelectX object ####')) - tic('Time:') - print(paste('Step1-> Parsing and Filtering GAM...')) - al = new.AL.general(M,alteration.class,sample.class,min.freq,verbose) - print(paste('-> Alteration Landscape object created')) - toc() - print(paste('Step2-> Generating Templetate object...')) - tic('Time:') - temp.data<-templeate.obj.gen(al) - print(paste('-> Templetate object created')) - toc() - print(paste('Step3-> Generating penalty matrix...')) - tic('Time:') - W<-generateW_block(al,lambda,tao) - print(paste('-> Penalty Matrix created')) - toc() - print(paste('Step4-> Generating null model...')) - tic('Time:') - sim <- null_model_parallel_debug(al,temp.data$temp_mat,W$W,n.cores,n.permut) - obj<- list('al'=al,'W'=W,'T'=temp.data,'null'=sim,'nSim'=n.permut) - print(paste('-> Removing the outliers matrix from null model...')) - outliers <- retrieveOutliers(obj = obj,nSim = n.permut) - if(length(which(outliers))==0){ - print(paste('Removed null-matrix:',length(which(outliers)),sep=" ")) - obj$nSim <- n.permut - } - else{ - print(paste(' Removed null-matrix:',length(which(outliers)),sep=" ")) - obj$null <- obj$null[which(!outliers)] - print(paste(' Updated the null-model and nSim variables...')) - obj$nSim <- length(which(!outliers)) - } - toc() - print(paste('-> Null model generated')) - print(paste('### SelectX object created ###')) - - print(paste('#### Computing EDs on the dataset ####')) - tic('Time:') - al<-obj$al - als = al.stats(obj) # Univariate stats - alp = al.pairwise.alteration.stats(obj, als, do.blocks=FALSE) - als[['alteration.pairwise']] = alp - als[['alteration.pairwise']]$sample.blocks = NULL - for(i in names(als$sample.blocks)) als$sample.blocks[[i]][['alteration.pairwise']] = alp$sample.blocks[[i]] - obs.co = as.matrix(als$alteration.pairwise$overlap) - wobs.co = as.matrix(als$alteration.pairwise$w_overlap) - robs.co<-r.am.pairwise.alteration.overlap(null = obj$null, - n.permut = obj$nSim, + n.permut = obj$nSim, n.cores = 1) - wrobs.co<-w.r.am.pairwise.alteration.overlap(null = obj$null, - W= obj$W$W, - n.permut = obj$nSim, - n.cores = 1) - - selectX_result <- interaction.table(al, - als, - obs.co, - wobs.co, - robs.co, - wrobs.co, - null=obj$null, - maxFDR=maxFDR, - n.cores=1, - estimate_pairwise=estimate_pairwise, - n.permut=obj$nSim) - obj$robs.co <- robs.co - obj$wrobs.co <- wrobs.co - toc() - print(paste('#### EDs computed ####')) - toc() - if(save.object) { - saveRDS(obj, file=paste(folder,'selectX_object.rds',sep='')) - saveRDS(selectX_result, file=paste(folder,'selectX_results.rds',sep='')) - } - return(list(obj=obj,result=selectX_result)) - } - else{ - tic('Total Time') - al = new.AL.general(M,alteration.class,sample.class,min.freq,verbose) - temp.data<-templeate.obj.gen(al) - W<-generateW_block(al,lambda,tao) - sim <- null_model_parallel_debug(al,temp.data$temp_mat,W$W,n.cores,n.permut) - obj<- list('al'=al,'W'=W,'T'=temp.data,'null'=sim,'nSim'=n.permut) - outliers <- retrieveOutliers(obj = obj,nSim = n.permut) - if(length(which(outliers))==0){ - obj$nSim <- n.permut - } - else{ - obj$null <- obj$null[which(!outliers)] - obj$nSim <- length(which(!outliers)) - } - al<-obj$al - als = al.stats(obj) # Univariate stats - alp = al.pairwise.alteration.stats(obj, als, do.blocks=FALSE) - als[['alteration.pairwise']] = alp - als[['alteration.pairwise']]$sample.blocks = NULL - for(i in names(als$sample.blocks)) als$sample.blocks[[i]][['alteration.pairwise']] = alp$sample.blocks[[i]] - obs.co = as.matrix(als$alteration.pairwise$overlap) - wobs.co = as.matrix(als$alteration.pairwise$w_overlap) - robs.co<-r.am.pairwise.alteration.overlap(null = obj$null, - n.permut = obj$nSim, - n.cores = 1) wrobs.co<-w.r.am.pairwise.alteration.overlap(null = obj$null, - W= obj$W$W, + W= obj$W$W, n.permut = obj$nSim, n.cores = 1) @@ -373,8 +186,8 @@ selectX_debug <- function(M, #A list object consiting of GAM and TMB data toc() if(save.object) { - saveRDS(obj, file=paste(folder,'selectX_object.rds',sep='')) - saveRDS(selectX_result, file=paste(folder,'selectX_results.rds',sep='')) + saveRDS(obj, file=paste(folder,'selectsim_object.rds',sep='')) + saveRDS(selectX_result, file=paste(folder,'selectsim_results.rds',sep='')) } return(list(obj=obj,result=selectX_result)) } diff --git a/R/selectX_stats.r b/R/selectX_stats.r index b57bb2f..74924e9 100644 --- a/R/selectX_stats.r +++ b/R/selectX_stats.r @@ -3,11 +3,9 @@ # Email : arvind.iyer@unil.ch # Project : SelectSim # Desc : The file which contains the function to generate the stats and table -# Version : 0.1 +# Version : 0.1.5 # Updates : Re wrote the whole code # Todo: -# - Random seed to set to one value for parallel foreach? -# - c++ backend for faster computation ### #' Create an alteration landscape object @@ -88,8 +86,12 @@ al.stats <- function(al) { return(als) } + +### #' Compute overlap stats #' +#' @importFrom Matrix sparseMatrix +#' @importFrom Matrix t #' @param am The alteration matrix #' @return overlap the overlap between the pairs #' @@ -101,16 +103,19 @@ am.pairwise.alteration.overlap <- function(am) { } #' Compute weight overlap stats #' +#' @importFrom Matrix sparseMatrix +#' @importFrom Matrix t #' @param am The alteration matrix #' @param W The weight matrix #' @return overlap the weight overlap between the pairs #' #' @export am.weight.pairwise.alteration.overlap <- function(am,W) { - A = am * 1 - col_order= colnames(A) - overlap = (W[,col_order]*A) %*% t(A) - return(overlap) + + A = am * 1 + col_order= colnames(A) + overlap = (W[,col_order]*A) %*% t(A) + return(overlap) } #' Compute weight overlap stats #' @@ -181,28 +186,7 @@ al.pairwise.alteration.stats <- function(al, als=NULL, do.blocks=FALSE) { #' #' @export r.am.pairwise.alteration.overlap <- function(null,n.permut,n.cores=1) { - # `%dopar%` <- foreach::`%dopar%` - # `%do%` <- foreach::`%do%` - # if(n.cores>1){ - # cl <- parallel::makeCluster(n.cores-1, outfile=paste("r.gen.random.am.log", sep='')) - # doParallel::registerDoParallel(cl) - # if(foreach::getDoParRegistered()) { - # random_overlap <- foreach::foreach(i=c(1:length(null))) %dopar%{ - # selectX::am.pairwise.alteration.overlap(am=null[[i]]) - # } - # parallel::stopCluster(cl) - # return (random_overlap) - # } else { - # stop('Error in registering a parallel cluster for randomization.') - # } - # } - # else{ - # foreach::registerDoSEQ() - # random_overlap <- foreach(i=c(1:length(null))) %do% selectX::am.pairwise.alteration.overlap(am=null[[i]]) - # return (random_overlap) - # } return(rcpp_overlap(v=null,t=1)) - #return(selectX:::rcpp_overlap(v=null,t=1)) } #' Compute weight overlap stats #' @@ -215,30 +199,7 @@ r.am.pairwise.alteration.overlap <- function(null,n.permut,n.cores=1) { #' @return overlap the weight overlap between the pairs #' #' @export -w.r.am.pairwise.alteration.overlap <- function(null,W,n.permut,n.cores=1) { - - - # `%dopar%` <- foreach::`%dopar%` - # `%do%` <- foreach::`%do%` - # if(n.cores>1){ - # cl <- parallel::makeCluster(n.cores-1, outfile=paste("r.gen.random.am.log", sep='')) - # doParallel::registerDoParallel(cl) - # if(foreach::getDoParRegistered()) { - # random_overlap <- foreach::foreach(i=c(1:length(null))) %dopar%{ - # selectX::am.weight.pairwise.alteration.overlap(am=null[[i]],W=W) - # } - # parallel::stopCluster(cl) - # return (random_overlap) - # } - # else { - # stop('Error in registering a parallel cluster for randomization.') - # } - # } - # else{ - # foreach::registerDoSEQ() - # random_overlap <- foreach(i=c(1:length(null))) %do% selectX::am.weight.pairwise.alteration.overlap(am=null[[i]],W=W) - # return (random_overlap) - # } +w.r.am.pairwise.alteration.overlap <- function(null,W,n.permut,n.cores=1) { return(rcpp_w_overlap(v=null,w=W,t=1)) } #' Compute weight overlap stats @@ -270,39 +231,11 @@ effectSize = function(obs, exp){ #' @export r.effectSize<- function(null_overlap,mean_mat,n.permut=1000,n.cores=1){ - # effectSizes <- function(x,arg){ - # es = (x - arg)*sin(pi/4) - # return(es) - # } - - # r.effect <- lapply(null_overlap,effectSizes,mean_mat) - # return(r.effect) - foreach::registerDoSEQ() i<-NULL r.effect <- foreach(i=1:n.permut) %do% as.vector(as.dist((null_overlap[[i]]-mean_mat)*sin(pi/4))) return(r.effect) - # i <- NULL - # `%do%` <- foreach::`%do%` - # `%dopar%` <- foreach::`%dopar%` - # if(n.cores>1){ - # cl <- parallel::makeCluster(n.cores, outfile=paste("r.gen.random.am.log", sep='')) - # doParallel::registerDoParallel(cl) - # if(foreach::getDoParRegistered()) { - # tryCatch(r.effect <- foreach(i=1:n.permut) %dopar% as.vector(as.dist((null_overlap[[i]]-mean_mat)*sin(pi/4))), error = function(e) print(e)) - # parallel::stopCluster(cl) - # return (r.effect) - # } else { - # stop('Error in registering a parallel cluster for randomization.') - # } - # } - # else{ - # foreach::registerDoSEQ() - # r.effect <- foreach(i=1:n.permut) %do% as.vector(as.dist((null_overlap[[i]]-mean_mat)*sin(pi/4))) - # return(r.effect) - # } - } #' Compute yule coefficent #' @@ -483,17 +416,9 @@ interaction.table <- function(al, results$max_overlap = as.vector(as.dist(A)) } results[,'freq_overlap'] = results[,'overlap'] / (results[,'max_overlap']) - #tic('compute overlaps') - results[,'r_overlap'] = as.vector(as.dist(add(r.obs)/length(r.obs))) - results[,'w_r_overlap'] = as.vector(as.dist(add(r.wobs)/length(r.wobs))) - #toc() - # tic('compute diffoverlap') - # exp.ES = (results[,'overlap'] - results[,'r_overlap']) - # exp.ES = as.numeric(exp.ES) - # results[,'diffoverlap']<-exp.ES - # toc() + results[,'r_overlap'] = as.vector(as.dist(add(r.obs)/length(r.obs))) + results[,'w_r_overlap'] = as.vector(as.dist(add(r.wobs)/length(r.wobs))) if(estimate_pairwise){ - #tic('compute p-value on overlap pairwise') obs.co = as.matrix(als$alteration.pairwise$overlap) results[,'pairwise_p']<-rep(1,nrow(results)) results[,'pairwise_p'] = estimate_pairwise_p(obs=obs.co, @@ -501,8 +426,6 @@ interaction.table <- function(al, results=results, nSim=n.permut ) - #toc() - } exp.ES = (results[,'w_overlap'] - results[,'w_r_overlap'])*sin(pi/4) exp.ES = as.numeric(exp.ES) @@ -541,12 +464,7 @@ interaction.table <- function(al, freq.cats = c(0, 0.02, 0.05, 0.10, 1) results$nFDR2 = rep(1, nrow(results)) for(i in 2:length(freq.cats)){ - #print(which(rownames(results)=='KRAS - TP53')) - #print(freq.cats[i-1]*samples) - #print(freq.cats[i]*samples) select = results[,'cum_freq'] > freq.cats[i-1]*2*samples & results[,'cum_freq'] < freq.cats[i]*2*samples - #print(paste(i,select[which(rownames(results)=='KRAS - TP53')],sep=":")) - #print(paste(i,sum(select),sep=":")) if (sum(select)>1){ results$nFDR2[select] = estimateFDR2(obs = abs(results$nES[select]), exp = abs(as.numeric(unlist(exp.rES[select,]))), @@ -559,9 +477,6 @@ interaction.table <- function(al, results = results[order(abs(results$nES), decreasing = T),] results$type = rep('ME', nrow(results)) results$type[ results$nES > 0 ] = 'CO' - #results$diff_type = rep('ME', nrow(results)) - #results$diff_type[ results$diffoverlap > 0 ] = 'CO' - results$FDR<-results$nFDR2<=maxFDR return (results) } diff --git a/data/luad_result.rda b/data/luad_result.rda index 77dd97b..32d554d 100644 Binary files a/data/luad_result.rda and b/data/luad_result.rda differ diff --git a/docs/404.html b/docs/404.html index b994a41..95d69c4 100644 --- a/docs/404.html +++ b/docs/404.html @@ -14,60 +14,52 @@ - - - + + + - Skip to contents - -