bbglab · FerriolCalvet · Oct 16, 2024 · Oct 8, 2024 · Oct 8, 2024 · Oct 8, 2024
diff --git a/bin/compute_mutrate.py b/bin/compute_mutrate.py
@@ -33,8 +33,8 @@ def mutrate_sample(maf_df, depths_df, depths_adj_df, sample_name, type_list = Fa
     # mutation rate metrics
     sample_features = {"N_MUTS" : n_muts,
                         "N_MUTATED" : n_mutated_reads,
-                        "DEPTH" : depths_df[f"{sample_name}"].sum(),
-                        "DEPTH_ADJUSTED": depths_adj_df[f"{sample_name}"].sum()
+                        "DEPTH" : depths_df.drop_duplicates(subset = ["CHROM", "POS"])[f"{sample_name}"].sum(),
+                        "DEPTH_ADJUSTED": depths_adj_df[f"{sample_name}"].sum() # they should be the same for all impacts not for subsets of impacts
                         }
     sample_features["MUTRATE_MB"] = ( sample_features["N_MUTS"] / sample_features["DEPTH"] * 1000000 ).astype(float)
     sample_features["MUTRATE_MB_ADJUSTED"] = ( sample_features["N_MUTS"] / sample_features["DEPTH_ADJUSTED"] * 1000000 ).astype(float)
@@ -116,22 +116,26 @@ def compute_mutrate(maf_path, depths_path, annot_panel_path, sample_name, panel_
     annot_panel_df = pd.read_csv(annot_panel_path, sep = "\t", na_values = custom_na_values)
 
     # Subset depths with panel
-    ## mode 1: each position counts one
+    ## mode 1: each position counts one (once per gene, be careful that it might be duplicated in different genes)
     depths_subset_df = depths_df.merge(annot_panel_df[["CHROM", "POS", "GENE"]].drop_duplicates(),
                                         on = ["CHROM", "POS"], how = "inner")
     ## mode 2 (adjusted): each position counts as many times it contributes to the panel
     depths_df[sample_name.split('.')[0]] = depths_df[sample_name.split('.')[0]] / 3   # the depth per position can contribute to three different mutations
     depths_subset_adj_df = depths_df.merge(annot_panel_df[["CHROM", "POS", "GENE"]], on = ["CHROM", "POS"], how = "inner")
 
+    ## mode 3 (adjusted): each position counts as many times it contributes to the panel, but ONLY ONCE PER SAMPLE
+    depths_subset_adj_sample_df = depths_df.merge(annot_panel_df.drop_duplicates(subset = ["CHROM", "POS", "REF", "ALT"])[["CHROM", "POS"]],
+                                                    on = ["CHROM", "POS"], how = "inner")
+
     del depths_df
     del annot_panel_df
 
     # Compute mutation rates
     ## sample mutation rate
-    mutrate_sample_allmuts_df = mutrate_sample(maf_df, depths_subset_df, depths_subset_adj_df, sample_name.split('.')[0])
-    mutrate_sample_snvs_df = mutrate_sample(maf_df, depths_subset_df, depths_subset_adj_df, sample_name.split('.')[0], ["SNV"])
-    mutrate_sample_nonsnvs_df = mutrate_sample(maf_df, depths_subset_df, depths_subset_adj_df, sample_name.split('.')[0], ["INSERTION", "DELETION", "COMPLEX", "MNV"])
-    mutrate_sample_indels_df = mutrate_sample(maf_df, depths_subset_df, depths_subset_adj_df, sample_name.split('.')[0], ["INSERTION", "DELETION"])
+    mutrate_sample_allmuts_df = mutrate_sample(maf_df, depths_subset_df, depths_subset_adj_sample_df, sample_name.split('.')[0])
+    mutrate_sample_snvs_df = mutrate_sample(maf_df, depths_subset_df, depths_subset_adj_sample_df, sample_name.split('.')[0], ["SNV"])
+    mutrate_sample_nonsnvs_df = mutrate_sample(maf_df, depths_subset_df, depths_subset_adj_sample_df, sample_name.split('.')[0], ["INSERTION", "DELETION", "COMPLEX", "MNV"])
+    mutrate_sample_indels_df = mutrate_sample(maf_df, depths_subset_df, depths_subset_adj_sample_df, sample_name.split('.')[0], ["INSERTION", "DELETION"])
     ## per gene mutation rate
     mutrate_genes_allmuts_df = mutrate_gene(maf_df, depths_subset_df, depths_subset_adj_df, sample_name.split('.')[0])
     mutrate_genes_snvs_df = mutrate_gene(maf_df, depths_subset_df, depths_subset_adj_df, sample_name.split('.')[0], ["SNV"])
@@ -145,9 +149,12 @@ def compute_mutrate(maf_path, depths_path, annot_panel_path, sample_name, panel_
 
     # Save
     mutrate_df[["SAMPLE_ID", "GENE", "REGIONS", "MUTTYPES",
-                "N_MUTS", "N_MUTATED", "DEPTH",
-                "MUTRATE_MB", "MUTRATE_MB_ADJUSTED", "MUTRATE_KB", "MUTREADSRATE_KB_ADJUSTED",
-                "MUTREADSRATE_MB", "MUTREADSRATE_MB_ADJUSTED", "MUTREADSRATE_KB", "MUTREADSRATE_KB_ADJUSTED"]].to_csv(f"{sample_name.split('.')[0]}.{panel_v}.mutrates.tsv",
+                "DEPTH",
+                "N_MUTS", "N_MUTATED",
+                "MUTRATE_MB", "MUTRATE_MB_ADJUSTED",
+                "MUTRATE_KB", "MUTRATE_KB_ADJUSTED",
+                "MUTREADSRATE_MB", "MUTREADSRATE_MB_ADJUSTED",
+                "MUTREADSRATE_KB", "MUTREADSRATE_KB_ADJUSTED"]].to_csv(f"{sample_name.split('.')[0]}.{panel_v}.mutrates.tsv",
                                                             sep = "\t",
                                                             header = True,
                                                             index = False

diff --git a/conf/mice.config b/conf/mice.config
@@ -3,15 +3,15 @@ params {
     config_profile_description = 'Running deepCSA for mice data'
 
     // Input data
-    input  = '/workspace/nobackup2/prominent/ucsf_mice/deepUMIcaller/deepCSA_input.med.double_bam.csv'
+    input  = '/data/bbg/nobackup2/prominent/ucsf_mice/deepUMIcaller/deepCSA_input.med.csv'
 
     // Fasta references
-    fasta = '/workspace/datasets/genomes/mm39/GCA_000001635.9_genome/GCA_000001635.9_GRCm39_genomic.fna'
+    fasta = '/data/bbg/datasets/genomes/mm39/GCA_000001635.9_genome/GCA_000001635.9_GRCm39_genomic.fna'
 
-    features_table           = "/workspace/nobackup2/prominent/ucsf_mice/2024-08-07_clinical_features_summary.tsv"
+    features_table           = "/data/bbg/nobackup2/prominent/ucsf_mice/2024-10-13_clinical_features_summary.tsv"
     features_table_separator =  'tab'
     features_table_dict      = ['"unique_identifier" : "sample"',
-                                    '"groups_of_interest" : [ ["DMBA"], ["TPA"], ["treatment"], ["sex"] ]'
+                                    '"groups_of_interest" : [ ["DMBA"], ["TPA"], ["treatment"], ["sex"], ["timepoint"], ["timepoint", "treatment"], ["timepoint", "TPA"], ["timepoint", "DMBA"] ]'
                                     ].join(',\t').trim()
 
     use_custom_minimum_depth    = 5
@@ -24,15 +24,15 @@ params {
     vep_out_format      = "tab"
     vep_params          = "--no_stats --cache --offline --symbol --protein --canonical"
     vep_species         = "mus_musculus"
-    vep_cache           = "/workspace/datasets/vep/mus_musculus/111_GRCm39"
+    vep_cache           = "/data/bbg/datasets/vep/mus_musculus/111_GRCm39"
 
 
     // oncodrive3d
-    datasets3d                 = "/workspace/nobackup/scratch/oncodrive3d/datasets_mouse"
+    datasets3d                 = "/data/bbg/nobackup/scratch/oncodrive3d/datasets_mouse"
     // annotations3d              = "/workspace/nobackup/scratch/oncodrive3d/annotations_240506"
 
-    omega_hotspots              = false
-    omega_hotspots_bedfile      = "/workspace/datasets/transfer/ferriol_deepcsa/mouse_skin_panel.hotspots.bed4.bed"
+    omega_hotspots              = true
+    omega_hotspots_bedfile      = "/data/bbg/datasets/transfer/ferriol_deepcsa/mouse_skin_panel.hotspots.bed4.bed"
     hotspot_expansion           = 30
 
 
@@ -43,20 +43,20 @@ params {
     // o3d_plot                   = false
     // o3d_plot_chimerax          = false
 
-    omega                      = false
+    omega                      = true
     omega_globalloc            = false
     omega_vaf_distorsioned     = false
-    omega_plot                  = false
+    omega_plot                  = true
 
 
-    signatures                 = false
-    mutationrate               = false
+    signatures                 = true
+    mutationrate               = true
     mutated_epithelium         = false
     mutated_epithelium_vaf     = false
 
     indels                     = false
 
-    profileall                 = false
+    profileall                 = true
     profilenonprot             = false
     profileexons               = false
     profileintrons             = false

diff --git a/conf/modules.config b/conf/modules.config
@@ -158,6 +158,16 @@ process {
         ]
     }
 
+    withName: 'BBG_DEEPCSA:CREATEPANELS:CREATESAMPLEPANELSSYNONYMOUS' {
+        publishDir       = [
+            [
+                mode: params.publish_dir_mode,
+                path: { "${params.outdir}/createpanels/samplepanels/createsamplepanelssynonymous" },
+                pattern: "*{tsv,bed}"
+            ]
+        ]
+    }
+
 
     withName: 'SUBSETDEPTHS' {
         ext.prefix    = { "${meta.id}.subset_depths" }
@@ -170,6 +180,30 @@ process {
                 enabled : false
         ]
     }
+    if (params.store_depths){
+        withName: 'DEPTHS.*CONS' {
+            ext.prefix    = { "${meta.id}.subset_depths" }
+            ext.args      = ''
+            ext.args2     = '-s 1 -b 2 -e 2'
+            ext.args3     = '-h'
+            ext.extension = 'tsv'
+            ext.header    = '1'
+        }
+    } else {
+        withName: 'DEPTHS.*CONS' {
+            ext.prefix    = { "${meta.id}.subset_depths" }
+            ext.args      = ''
+            ext.args2     = '-s 1 -b 2 -e 2'
+            ext.args3     = '-h'
+            ext.extension = 'tsv'
+            ext.header    = '1'
+            publishDir       = [
+                enabled : false
+            ]
+        }
+
+    }
+
 
     withName: 'SUBSETMUTATIONS' {
         ext.prefix    = { "${meta.id}.subset_mutations" }
@@ -328,7 +362,7 @@ process {
     // }
 
 
-    withName: 'SUBSET_MUTPROFILE' {
+    withName: 'SUBSETMUTPROFILE' {
         ext.filters     = { ['"TYPE" : "SNV"'].join(',\t').trim()
                         }
         ext.output_fmt  = { ['"header": true',
@@ -343,7 +377,7 @@ process {
     }
 
     if (params.profilenonprot){
-        withName: '.*NONPROT:SUBSET_MUTPROFILE' {
+        withName: '.*NONPROT:SUBSETMUTPROFILE' {
             ext.filters     = { [ '"TYPE" : "SNV"',
                                     '"Protein_affecting": "non_protein_affecting"'].join(',\t').trim()
                             }
@@ -360,7 +394,7 @@ process {
     }
 
 
-    withName: 'SUBSET_MUTABILITY' {
+    withName: 'SUBSETMUTABILITY' {
         ext.filters     = { ['"TYPE" : "SNV"'].join(',\t').trim()
                         }
         ext.output_fmt  = { ['"header": true',
@@ -399,7 +433,7 @@ process {
     }
 
     if (params.mutationrate) {
-        withName: 'SUBSET_MUTRATE' {
+        withName: 'SUBSETMUTRATE' {
             ext.filters     = ''
 
             ext.output_fmt  = { ['"header": true',
@@ -413,7 +447,7 @@ process {
             ]
         }
 
-        withName: 'BBG_DEEPCSA:MUTRATEPROT:SUBSET_MUTRATE' {
+        withName: 'BBG_DEEPCSA:MUTRATEPROT:SUBSETMUTRATE' {
             ext.filters     = { ['"Protein_affecting": "protein_affecting"'].join(',\t').trim()
                             }
             ext.output_fmt  = { ['"header": true',
@@ -427,7 +461,7 @@ process {
             ]
         }
 
-        withName: '.*NONPROT:SUBSET_MUTRATE' {
+        withName: '.*NONPROT:SUBSETMUTRATE' {
             ext.filters     = { ['"Protein_affecting": "non_protein_affecting"'].join(',\t').trim()
                             }
             ext.output_fmt  = { ['"header": true',
@@ -441,7 +475,7 @@ process {
             ]
         }
 
-        withName: '.*SYNONYMOUS:SUBSET_MUTRATE' {
+        withName: '.*SYNONYMOUS:SUBSETMUTRATE' {
             ext.filters     = { ['"canonical_Consequence_broader": "synonymous"'].join(',\t').trim()
                             }
             ext.output_fmt  = { ['"header": true',
@@ -459,7 +493,7 @@ process {
 
 
     if (params.indels) {
-        withName: 'SUBSET_INDELS' {
+        withName: 'SUBSETINDELS' {
             // ext.filters     = ''
             ext.filters     = { ['"FILTER": "notcontains repetitive_variant"'].join(',\t').trim() }
 
@@ -478,7 +512,7 @@ process {
 
 
     if (params.mutated_epithelium) {
-        withName: 'SUBSET_MUTEPI' {
+        withName: 'SUBSETMUTEPI' {
             ext.filters     = { ['"Protein_affecting": "protein_affecting"',
                                     '"TYPE" : "SNV"',
                                     '"VAF" : "gt 0"',
@@ -504,7 +538,7 @@ process {
         }
 
         if (params.pileup_all_duplex){
-            withName: 'SUBSET_MUTEPI' {
+            withName: 'SUBSETMUTEPI' {
                 ext.filters     = { ['"Protein_affecting": "protein_affecting"',
                                         '"TYPE" : "SNV"',
                                         '"VAF" : "gt 0"',
@@ -532,7 +566,7 @@ process {
     }
 
     if (params.mutated_epithelium_vaf) {
-        withName: 'SUBSET_MUTEPIVAF' {
+        withName: 'SUBSETMUTEPIVAF' {
             ext.filters     = { ['"Protein_affecting": "protein_affecting"',
                                         '"TYPE" : "SNV"',
                                         '"VAF" : "gt 0"',
@@ -548,7 +582,7 @@ process {
             ]
         }
         if (params.all_duplex_counts){
-            withName: 'SUBSET_MUTEPIVAFAM' {
+            withName: 'SUBSETMUTEPIVAFAM' {
                 ext.filters     = { ['"Protein_affecting": "protein_affecting"',
                                         '"TYPE" : "SNV"',
                                         '"VAF_AM" : "gt 0"',
@@ -569,7 +603,7 @@ process {
     }
 
     if (params.oncodrivefml) {
-        withName: 'SUBSET_ONCODRIVEFML' {
+        withName: 'SUBSETONCODRIVEFML' {
             ext.filters     = { "" }
             ext.output_fmt  = { ['"header": true',
                                     '"columns": ["CHROM_ensembl", "POS_ensembl", "REF_ensembl", "ALT_ensembl", "SAMPLE_ID"]',
@@ -591,7 +625,7 @@ process {
     }
 
     if (params.oncodrive3d){
-        withName: 'SUBSET_ONCODRIVE3D' {
+        withName: 'SUBSETONCODRIVE3D' {
             ext.filters     = { ['"TYPE" : "SNV"'].join(',\t').trim()
     //                                '"canonical_Consequence": "contains missense_variant"'
                             }
@@ -663,7 +697,7 @@ process {
 
 
 
-    withName: 'SUBSET_ONCODRIVECLUSTL' {
+    withName: 'SUBSETONCODRIVECLUSTL' {
         ext.filters     = { "" }
 
 //        ext.filters     = { ['"TYPE" : "SNV"'].join(',\t').trim()

diff --git a/conf/tools/omega.config b/conf/tools/omega.config
@@ -12,7 +12,7 @@
 
 process {
 
-    withName: 'SUBSET_OMEGA' {
+    withName: 'SUBSETOMEGA' {
         ext.filters     = { ['"TYPE" : "SNV"'].join(',\t').trim()
                         }
         ext.output_fmt  = { ['"header": true',
@@ -27,7 +27,7 @@ process {
     }
 
 
-    withName: '.*MULTI:SUBSET_OMEGA' {
+    withName: '.*MULTI:SUBSETOMEGA' {
         ext.filters     = { ['"TYPE" : "SNV"'].join(',\t').trim()
                         }
         ext.output_fmt  = { ['"header": true',
@@ -163,7 +163,7 @@ process {
                 ]
             ]
         }
-        withName: 'SUBSET_OMEGA_REDUCED' {
+        withName: 'SUBSETOMEGA_REDUCED' {
             ext.filters     = { ['"TYPE" : "SNV"',
                                     '"VAF_distorted_reduced" : true',
                                     ].join(',\t').trim()
@@ -201,7 +201,7 @@ process {
                 ]
             ]
         }
-        withName: 'SUBSET_OMEGA_EXPANDED' {
+        withName: 'SUBSETOMEGA_EXPANDED' {
             ext.filters     = { ['"TYPE" : "SNV"',
                                     '"VAF_distorted_expanded" : true',
                                     ].join(',\t').trim()
@@ -239,7 +239,7 @@ process {
                 ]
             ]
         }
-        withName: 'SUBSET_OMEGA_OK' {
+        withName: 'SUBSETOMEGA_OK' {
             ext.filters     = { ['"TYPE" : "SNV"',
                                     '"VAF_distorted" : false',
                                     ].join(',\t').trim()

diff --git a/modules/local/signatures/sigprofiler/assignment/main.nf b/modules/local/signatures/sigprofiler/assignment/main.nf
@@ -29,8 +29,7 @@ process SIGPROFILERASSIGNMENT {
     //                    sample_reconstruction_plots=False, verbose=False)"
     """
     #python -c "from SigProfilerAssignment import Analyzer as Analyze; Analyze.cosmic_fit('${matrix}', 'output_${prefix}', input_type='matrix', context_type='96', signature_database='${reference_signatures}', genome_build='${assembly}', sample_reconstruction_plots= 'pdf', exclude_signature_subgroups= ${params.exclude_subgroups})"
-    python -c "from SigProfilerAssignment import Analyzer as Analyze; Analyze.cosmic_fit('${matrix}', 'output_${prefix}', input_type='matrix', context_type='96', genome_build='${assembly}', exclude_signature_subgroups=${params.exclude_subgroups})"
-    #python -c "from SigProfilerAssignment import Analyzer as Analyze; Analyze.cosmic_fit('${matrix}', 'output_${prefix}', input_type='matrix', context_type='96', genome_build='${assembly}', signature_database='${reference_signatures}', exclude_signature_subgroups=${params.exclude_subgroups})"
+    python -c "from SigProfilerAssignment import Analyzer as Analyze; Analyze.cosmic_fit('${matrix}', 'output_${prefix}', input_type='matrix', context_type='96', genome_build='${assembly}', signature_database='${reference_signatures}', exclude_signature_subgroups=${params.exclude_subgroups})"
 
     mv output_${prefix}/Assignment_Solution/Activities/Decomposed_MutationType_Probabilities.txt output_${prefix}/Assignment_Solution/Activities/Decomposed_MutationType_Probabilities.${prefix}.txt;
 

diff --git a/nextflow.config b/nextflow.config
@@ -58,6 +58,7 @@ params {
     all_duplex_counts          = false
     pileup_all_duplex          = false
     plot_depths                = false
+    store_depths                = false
 
 
     // depth and panel

diff --git a/subworkflows/local/indels/main.nf b/subworkflows/local/indels/main.nf
@@ -1,7 +1,6 @@
-include { TABIX_BGZIPTABIX_QUERY    as SUBSETDEPTHS             } from '../../../modules/nf-core/tabix/bgziptabixquery/main'
 include { TABIX_BGZIPTABIX_QUERY    as SUBSETMUTATIONS          } from '../../../modules/nf-core/tabix/bgziptabixquery/main'
 
-include { SUBSET_MAF                as SUBSET_INDELS           } from '../../../modules/local/subsetmaf/main'
+include { SUBSET_MAF                as SUBSETINDELS           } from '../../../modules/local/subsetmaf/main'
 
 include { INDELS_COMPARISON as INDELS } from '../../../modules/local/indels/main'
 
@@ -18,10 +17,10 @@ workflow INDELS_SELECTION {
     SUBSETMUTATIONS(mutations, bedfile)
     ch_versions = ch_versions.mix(SUBSETMUTATIONS.out.versions)
 
-    SUBSET_INDELS(SUBSETMUTATIONS.out.subset)
-    ch_versions = ch_versions.mix(SUBSET_INDELS.out.versions)
+    SUBSETINDELS(SUBSETMUTATIONS.out.subset)
+    ch_versions = ch_versions.mix(SUBSETINDELS.out.versions)
 
-    INDELS(SUBSET_INDELS.out.mutations)
+    INDELS(SUBSETINDELS.out.mutations)
     ch_versions = ch_versions.mix(INDELS.out.versions)
 
     emit: