From a9dba8ceb255817423bbd3d59a2f42e85a5d2d70 Mon Sep 17 00:00:00 2001
From: FerriolCalvet <ferriolcalvet@gmail.com>
Date: Wed, 18 Dec 2024 03:32:16 +0100
Subject: [PATCH 1/3] update default test paths to irb cluster

---
 assets/input_double_bam.csv | 4 ++--
 conf/test.config            | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/assets/input_double_bam.csv b/assets/input_double_bam.csv
index 12ec9c2d..0eefcc4d 100644
--- a/assets/input_double_bam.csv
+++ b/assets/input_double_bam.csv
@@ -1,3 +1,3 @@
 sample,vcf,bam,pileup_bam,pileup_ind
-K_5_1_A_1,/workspace/datasets/transfer/ferriol_deepcsa/K_5_1_A_1.high.filtered.vcf,/workspace/datasets/transfer/ferriol_deepcsa/K_5_1_A_1.sorted.bam,/workspace/datasets/transfer/ferriol_deepcsa/allmolecules/K_5_1_A_1.sorted.bam,/workspace/datasets/transfer/ferriol_deepcsa/allmolecules/K_5_1_A_1.sorted.bam.csi
-K_6_1_A_1,/workspace/datasets/transfer/ferriol_deepcsa/K_6_1_A_1.high.filtered.vcf,/workspace/datasets/transfer/ferriol_deepcsa/K_6_1_A_1.sorted.bam,/workspace/datasets/transfer/ferriol_deepcsa/allmolecules/K_6_1_A_1.sorted.bam,/workspace/datasets/transfer/ferriol_deepcsa/allmolecules/K_6_1_A_1.sorted.bam.csi
+K_5_1_A_1,/data/bbg/datasets/transfer/ferriol_deepcsa/K_5_1_A_1.high.filtered.vcf,/data/bbg/datasets/transfer/ferriol_deepcsa/K_5_1_A_1.sorted.bam,/data/bbg/datasets/transfer/ferriol_deepcsa/allmolecules/K_5_1_A_1.sorted.bam,/data/bbg/datasets/transfer/ferriol_deepcsa/allmolecules/K_5_1_A_1.sorted.bam.csi
+K_6_1_A_1,/data/bbg/datasets/transfer/ferriol_deepcsa/K_6_1_A_1.high.filtered.vcf,/data/bbg/datasets/transfer/ferriol_deepcsa/K_6_1_A_1.sorted.bam,/data/bbg/datasets/transfer/ferriol_deepcsa/allmolecules/K_6_1_A_1.sorted.bam,/data/bbg/datasets/transfer/ferriol_deepcsa/allmolecules/K_6_1_A_1.sorted.bam.csi
diff --git a/conf/test.config b/conf/test.config
index 04b14c36..cafa23d4 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -27,7 +27,7 @@ params {
 
 
     omega_hotspots              = false
-    omega_hotspots_bedfile     = "/workspace/datasets/transfer/ferriol_deepcsa/kidney_panel.hotspots.bed4.bed"
+    omega_hotspots_bedfile     = "/data/bbg/datasets/transfer/ferriol_deepcsa/kidney_panel.hotspots.bed4.bed"
     hotspot_expansion           = 30
 
     oncodrivefml               = false

From f1073e7969dff00a800c891c15ea514eb6c8158b Mon Sep 17 00:00:00 2001
From: FerriolCalvet <ferriolcalvet@gmail.com>
Date: Wed, 18 Dec 2024 04:39:09 +0100
Subject: [PATCH 2/3] update omega_globalloc

- use synonymous mutation rates instead of relative mutrates
- use mutation rate for omegagloballoc and mutated reads rate for omegamultigloballoc
- add all_samples mutational profile to omega input
- rename scripts and modules accordingly to the changes
---
 bin/omega_compute_relative_mutrate.py         | 40 ---------------
 bin/omega_select_mutrate.py                   | 49 +++++++++++++++++++
 conf/modules.config                           |  7 +++
 .../local/bbgtools/omega/estimator/main.nf    |  2 +-
 .../local/bbgtools/omega/preprocess/main.nf   | 19 +++++--
 .../main.nf                                   | 16 +++---
 subworkflows/local/omega/main.nf              | 20 ++++++--
 workflows/deepcsa.nf                          | 18 ++++---
 8 files changed, 108 insertions(+), 63 deletions(-)
 delete mode 100755 bin/omega_compute_relative_mutrate.py
 create mode 100755 bin/omega_select_mutrate.py
 rename modules/local/{relative_mutrate => select_mutrate}/main.nf (70%)

diff --git a/bin/omega_compute_relative_mutrate.py b/bin/omega_compute_relative_mutrate.py
deleted file mode 100755
index 09f3c672..00000000
--- a/bin/omega_compute_relative_mutrate.py
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/usr/local/bin/python
-
-
-import click
-import pandas as pd
-from read_utils import custom_na_values
-
-
-def compute_relative_mutrate(mutrate_file, output_file):
-    """
-    INFO
-    """
-
-    mutrate_df = pd.read_csv(mutrate_file, sep = "\t", header = 0, na_values = custom_na_values)
-
-    synonymous_mutrates_all_samples = mutrate_df[(mutrate_df["MUTTYPES"] == "SNV") & 
-                                                  (mutrate_df["GENE"] != "ALL_GENES")].reset_index(drop = True)
-
-    relative_synonymous_mutrates_all_samples = synonymous_mutrates_all_samples[['GENE', 'MUTRATE_MB_ADJUSTED']].set_index(['GENE']) \
-                                                    / synonymous_mutrates_all_samples["MUTRATE_MB_ADJUSTED"].sum()
-    relative_synonymous_mutrates_all_samples.columns = ["REL_MUTRATE"]
-
-    relative_synonymous_mutrates_all_samples.reset_index()[["GENE", "REL_MUTRATE"]].to_csv(f"{output_file}",
-                                                            header=["GENE", "SYNONYMOUS_MUTS"],
-                                                            index=False,
-                                                            sep="\t")
-
-
-@click.command()
-@click.option('--mutrates', type=click.Path(exists=True), help='Input mutation rate file')
-@click.option('--output', type=click.Path(), help='Output file')
-
-
-def main(mutrates, output):
-    click.echo(f"Computing the relative mutation rate...")
-    compute_relative_mutrate(mutrates, output)
-
-if __name__ == '__main__':
-    main()
-
diff --git a/bin/omega_select_mutrate.py b/bin/omega_select_mutrate.py
new file mode 100755
index 00000000..f5860081
--- /dev/null
+++ b/bin/omega_select_mutrate.py
@@ -0,0 +1,49 @@
+#!/usr/local/bin/python
+
+
+import click
+import pandas as pd
+from read_utils import custom_na_values
+
+
+def select_syn_mutrate(mutrate_file, output_file, mode):
+    """
+    INFO
+    """
+
+    mutrate_df = pd.read_csv(mutrate_file, sep = "\t", header = 0, na_values = custom_na_values)
+
+    synonymous_mutrates_all_samples = mutrate_df[(mutrate_df["MUTTYPES"] == "SNV") &
+                                                    (mutrate_df["GENE"] != "ALL_GENES")].reset_index(drop = True)
+
+    if mode == 'mutations':
+        synonymous_mutrates_genes = synonymous_mutrates_all_samples[['GENE', 'MUTRATE_MB_ADJUSTED']]
+    elif mode == 'mutated_reads':
+        synonymous_mutrates_genes = synonymous_mutrates_all_samples[['GENE', 'MUTREADSRATE_MB_ADJUSTED']]
+
+    ## FIXME not sure if this is really needed since when called through main()
+    # the input would have already been forced to be either of the two options
+    # it might still be useful in case this was not called from main()
+    else:
+        print('unknown mode, please enter either mutations or mutated_reads')
+        exit(1) ## FIXME not sure if this is the right code to exit with
+
+    synonymous_mutrates_genes.columns = ["GENE", "MUTRATE"]
+    synonymous_mutrates_genes.to_csv(f"{output_file}",
+                                        header=True,
+                                        index=False,
+                                        sep="\t")
+
+
+@click.command()
+@click.option('--mutrates', type=click.Path(exists=True), help='Input mutation rate file')
+@click.option('--output', type=click.Path(), help='Output file')
+@click.option('--mode', type=click.Choice(['mutations', 'mutated_reads']), default='mutations')
+
+def main(mutrates, output, mode):
+    click.echo("Selecting the gene synonymous mutation rates...")
+    select_syn_mutrate(mutrates, output, mode)
+
+if __name__ == '__main__':
+    main()
+
diff --git a/conf/modules.config b/conf/modules.config
index 5eec249d..18c6643b 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -489,6 +489,13 @@ process {
             ]
         }
 
+        withName: 'SYNMUTRATE' {
+            ext.mode     = 'mutations'
+        }
+        withName: 'SYNMUTREADSRATE' {
+            ext.mode     = 'mutated_reads'
+        }
+
     }
 
 
diff --git a/modules/local/bbgtools/omega/estimator/main.nf b/modules/local/bbgtools/omega/estimator/main.nf
index bac76fda..15612302 100644
--- a/modules/local/bbgtools/omega/estimator/main.nf
+++ b/modules/local/bbgtools/omega/estimator/main.nf
@@ -5,7 +5,7 @@ process OMEGA_ESTIMATOR {
     label 'process_high_memory'
 
 
-    container 'docker.io/ferriolcalvet/omega:latest'
+    container 'docker.io/ferriolcalvet/omega:20241217'
 
     input:
     tuple val(meta) , path(mutabilities_table), path(mutations_table), path(depths)
diff --git a/modules/local/bbgtools/omega/preprocess/main.nf b/modules/local/bbgtools/omega/preprocess/main.nf
index d46bde25..03cbda41 100644
--- a/modules/local/bbgtools/omega/preprocess/main.nf
+++ b/modules/local/bbgtools/omega/preprocess/main.nf
@@ -5,12 +5,13 @@ process OMEGA_PREPROCESS {
     label 'process_high_memory'
 
 
-    container 'docker.io/ferriolcalvet/omega:latest'
+    container 'docker.io/ferriolcalvet/omega:20241217'
 
     input:
     tuple val(meta) , path(mutations), path(depths), path(mutation_profile)
     tuple val(meta2), path (annotated_panel)
     tuple val(meta3), path (syn_muts_global)
+    tuple val(meta4), path (mut_profile_global, stageAs: 'global_mutprofile.tsv')
 
 
     output:
@@ -26,7 +27,7 @@ process OMEGA_PREPROCESS {
     def prefix = task.ext.prefix ?: "${meta.id}"
     // TODO revise this fix
     def sample_name = prefix.tokenize('.')[0]
-    def global_loc = task.ext.global_loc ? "--absent-synonymous infer_global_custom  --relative-synonymous-muts-file ${syn_muts_global}" : "--absent-synonymous ignore"
+    def global_loc = task.ext.global_loc ? "--absent-synonymous infer_global_custom  --mutational-profile-global-file global_mutprofile.tsv --synonymous-mutrates-file ${syn_muts_global}" : "--absent-synonymous ignore"
     prefix = task.ext.global_loc ? "${prefix}.gLoc" : "${prefix}"
     """
     omega preprocessing --preprocessing-mode compute_mutabilities \\
@@ -36,7 +37,7 @@ process OMEGA_PREPROCESS {
                         --table-observed-muts mutations_per_sample_gene_impact_context.${prefix}.tsv \\
                         --mutabilities-table mutability_per_sample_gene_context.${prefix}.tsv \\
                         --synonymous-muts-table syn_muts.${prefix}.tsv \\
-                        --mutational-profile ${mutation_profile} \\
+                        --mutational-profile-file ${mutation_profile} \\
                         --single-sample ${sample_name} \\
                         ${global_loc}
     # $args -c $task.cpus
@@ -59,3 +60,15 @@ process OMEGA_PREPROCESS {
     """
 }
 
+// omega preprocessing --preprocessing-mode compute_mutabilities
+//                      --depths-file all_samples.subset_depths.tsv.gz
+//                      --mutations-file all_samples.mutations.tsv
+//                      --input-vep-postprocessed-file consensus.exons_splice_sites.tsv
+//                      --table-observed-muts mutations_per_sample_gene_impact_context.all_samples2.global_loc.gLoc.tsv
+//                      --mutabilities-table mutability_per_sample_gene_context.all_samples2.global_loc.gLoc.tsv
+//                      --synonymous-muts-table syn_muts.all_samples2.global_loc.gLoc.tsv
+//                      --mutational-profile-file all_samples.all.profile.tsv
+//                      --mutational-profile-global-file P19_0033_BTR_01.all.profile.tsv
+//                      --single-sample all_samples
+//                      --absent-synonymous infer_global_custom
+//                      --synonymous-mutrates-file mutrates_per_gene.tsv
diff --git a/modules/local/relative_mutrate/main.nf b/modules/local/select_mutrate/main.nf
similarity index 70%
rename from modules/local/relative_mutrate/main.nf
rename to modules/local/select_mutrate/main.nf
index a8853c79..48549e90 100644
--- a/modules/local/relative_mutrate/main.nf
+++ b/modules/local/select_mutrate/main.nf
@@ -1,4 +1,4 @@
-process RELATIVE_MUTRATE {
+process SELECT_MUTRATES {
     tag "$meta.id"
     label 'process_single'
 
@@ -12,20 +12,21 @@ process RELATIVE_MUTRATE {
     tuple val(meta), path(mutation_rates)
 
     output:
-    tuple val(meta), path("*.rel_mutrates.tsv") , emit: mutrate
-    path  "versions.yml"                        , emit: versions
+    tuple val(meta), path("*.gene_mutrates.tsv") , emit: mutrate
+    path  "versions.yml"                         , emit: versions
 
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
+    def mode = task.ext.mode ?: "mutations"
     """
-    omega_compute_relative_mutrate.py \\
+    omega_select_mutrate.py \\
                 --mutrates ${mutation_rates} \\
-                --output ${prefix}.rel_mutrates.tsv;
+                --output ${prefix}.gene_mutrates.tsv \\
+                --mode ${mode};
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -35,9 +36,8 @@ process RELATIVE_MUTRATE {
 
     stub:
     def prefix = task.ext.prefix ?: "all_samples"
-    def panel_version = task.ext.panel_version ?: "${meta2.id}"
     """
-    touch ${prefix}.rel_mutrates.tsv
+    touch ${prefix}.gene_mutrates.tsv
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/subworkflows/local/omega/main.nf b/subworkflows/local/omega/main.nf
index b6b6f4bb..88028e6c 100644
--- a/subworkflows/local/omega/main.nf
+++ b/subworkflows/local/omega/main.nf
@@ -57,6 +57,10 @@ workflow OMEGA_ANALYSIS{
     .join( profile )
     .set{ muts_n_depths_n_profile }
 
+    Channel.of([ [ id: "all_samples" ] ])
+    .join( profile ).first()
+    .set{ all_samples_mut_profile }
+
 
     if (params.omega_hotspots){
         EXPANDREGIONS(panel, hotspots_file)
@@ -68,8 +72,11 @@ workflow OMEGA_ANALYSIS{
         json_hotspots = bedfile
     }
 
-    // FIXME: here I am using bedfile as a dummy value channel
-    PREPROCESSING( muts_n_depths_n_profile, expanded_panel, bedfile)
+    // FIXME here I am using bedfile as a dummy value channel
+    PREPROCESSING( muts_n_depths_n_profile,
+                    expanded_panel,
+                    bedfile,
+                    all_samples_mut_profile)
     ch_versions = ch_versions.mix(PREPROCESSING.out.versions)
 
     PREPROCESSING.out.mutabs_n_mutations_tsv
@@ -98,14 +105,19 @@ workflow OMEGA_ANALYSIS{
 
     if (params.omega_globalloc) {
 
-        PREPROCESSINGGLOBALLOC( muts_n_depths_n_profile, expanded_panel, relative_mutationrates.first() )
+        PREPROCESSINGGLOBALLOC(muts_n_depths_n_profile,
+                                expanded_panel,
+                                relative_mutationrates.first(),
+                                all_samples_mut_profile)
         ch_versions = ch_versions.mix(PREPROCESSINGGLOBALLOC.out.versions)
 
         PREPROCESSINGGLOBALLOC.out.mutabs_n_mutations_tsv
         .join( depth )
         .set{ preprocess_globalloc_n_depths }
 
-        ESTIMATORGLOBALLOC( preprocess_globalloc_n_depths, expanded_panel, GROUPGENES.out.json_genes.first())
+        ESTIMATORGLOBALLOC(preprocess_globalloc_n_depths,
+                            expanded_panel,
+                            GROUPGENES.out.json_genes.first())
         ch_versions = ch_versions.mix(ESTIMATORGLOBALLOC.out.versions)
 
         global_loc_results = ESTIMATORGLOBALLOC.out.results
diff --git a/workflows/deepcsa.nf b/workflows/deepcsa.nf
index 870cecdc..22bed4b7 100644
--- a/workflows/deepcsa.nf
+++ b/workflows/deepcsa.nf
@@ -117,7 +117,8 @@ include { TABIX_BGZIPTABIX_QUERY    as DEPTHSNONPROTCONS    } from '../modules/n
 include { TABIX_BGZIPTABIX_QUERY    as DEPTHSINTRONSCONS    } from '../modules/nf-core/tabix/bgziptabixquery/main'
 include { TABIX_BGZIPTABIX_QUERY    as DEPTHSSYNONYMOUSCONS } from '../modules/nf-core/tabix/bgziptabixquery/main'
 
-include { RELATIVE_MUTRATE          as RELMUTRATE           } from '../modules/local/relative_mutrate/main'
+include { SELECT_MUTRATES           as SYNMUTRATE           } from '../modules/local/select_mutrate/main'
+include { SELECT_MUTRATES           as SYNMUTREADSRATE      } from '../modules/local/select_mutrate/main'
 
 
 
@@ -247,8 +248,11 @@ workflow DEEPCSA{
         .join( MUTRATESYNONYMOUS.out.mutrates )
         .set{ all_samples_syn_mutrate }
 
-        RELMUTRATE(all_samples_syn_mutrate)
-        ch_versions = ch_versions.mix(RELMUTRATE.out.versions)
+        SYNMUTRATE(all_samples_syn_mutrate)
+        ch_versions = ch_versions.mix(SYNMUTRATE.out.versions)
+
+        SYNMUTREADSRATE(all_samples_syn_mutrate)
+        ch_versions = ch_versions.mix(SYNMUTREADSRATE.out.versions)
 
 
         // Concatenate all outputs into a single file
@@ -442,7 +446,7 @@ workflow DEEPCSA{
                     CREATEPANELS.out.exons_consensus_panel,
                     custom_groups_table,
                     hotspots_bed_file,
-                    RELMUTRATE.out.mutrate
+                    SYNMUTRATE.out.mutrate
                     )
             positive_selection_results = positive_selection_results.join(OMEGA.out.results, remainder: true)
             positive_selection_results = positive_selection_results.join(OMEGA.out.results_global, remainder: true)
@@ -456,7 +460,7 @@ workflow DEEPCSA{
                         CREATEPANELS.out.exons_consensus_panel,
                         custom_groups_table,
                         hotspots_bed_file,
-                        RELMUTRATE.out.mutrate
+                        SYNMUTREADSRATE.out.mutrate
                         )
             positive_selection_results = positive_selection_results.join(OMEGAMULTI.out.results, remainder: true)
             positive_selection_results = positive_selection_results.join(OMEGAMULTI.out.results_global, remainder: true)
@@ -470,7 +474,7 @@ workflow DEEPCSA{
                             CREATEPANELS.out.exons_consensus_panel,
                             custom_groups_table,
                             hotspots_bed_file,
-                            RELMUTRATE.out.mutrate
+                            SYNMUTRATE.out.mutrate
                             )
             ch_versions = ch_versions.mix(OMEGANONPROT.out.versions)
 
@@ -481,7 +485,7 @@ workflow DEEPCSA{
                                 CREATEPANELS.out.exons_consensus_panel,
                                 custom_groups_table,
                                 hotspots_bed_file,
-                                RELMUTRATE.out.mutrate
+                                SYNMUTREADSRATE.out.mutrate
                                 )
             ch_versions = ch_versions.mix(OMEGANONPROTMULTI.out.versions)
         }

From edfa571ae890aba8c6696d1420b4089a62b2756c Mon Sep 17 00:00:00 2001
From: FerriolCalvet <ferriolcalvet@gmail.com>
Date: Wed, 18 Dec 2024 21:26:54 +0100
Subject: [PATCH 3/3] remove other_sample_SNP filter from somatic mutations

---
 conf/modules.config | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 18c6643b..c0598137 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -295,7 +295,7 @@ process {
                                 ['"VAF_AM" : "le ', "${params.germline_threshold}", '"'].join('').trim(),
                                 ['"vd_VAF" : "le ', "${params.germline_threshold}", '"'].join('').trim(),
                                 ['"DEPTH" : "ge ', "${params.mutation_depth_threshold}", '"'].join('').trim(),
-                                '"FILTER" : ["notcontains NM20", "notcontains n_rich", "notcontains cohort_n_rich", "notcontains cohort_n_rich_uni",  "notcontains no_pileup_support", "notcontains other_sample_SNP", "notcontains low_mappability" ]',
+                                '"FILTER" : ["notcontains NM20", "notcontains n_rich", "notcontains cohort_n_rich", "notcontains cohort_n_rich_uni",  "notcontains no_pileup_support", "notcontains low_mappability" ]',
                                 '"VAF_distorted_expanded" : false',
                                 ].join(',\t').trim()
                             }
@@ -315,7 +315,7 @@ process {
             ext.filters     = { [['"VAF" : "le ', "${params.germline_threshold}", '"'].join('').trim(),
                                 ['"vd_VAF" : "le ', "${params.germline_threshold}", '"'].join('').trim(),
                                 ['"DEPTH" : "ge ', "${params.mutation_depth_threshold}", '"'].join('').trim(),
-                                '"FILTER" : ["notcontains NM20", "notcontains n_rich", "notcontains cohort_n_rich", "notcontains cohort_n_rich_uni",  "notcontains no_pileup_support", "notcontains other_sample_SNP", "notcontains low_mappability" ]',
+                                '"FILTER" : ["notcontains NM20", "notcontains n_rich", "notcontains cohort_n_rich", "notcontains cohort_n_rich_uni",  "notcontains no_pileup_support",  "notcontains low_mappability" ]',
                                 '"VAF_distorted_expanded" : false',
                                 ].join(',\t').trim()
                             }