From 36a539f7011dd7fdea4f7f7fae85d0201a8fde5b Mon Sep 17 00:00:00 2001
From: FerriolCalvet <ferriolcalvet@gmail.com>
Date: Tue, 7 Jan 2025 23:33:02 +0100
Subject: [PATCH] split not_in_panel into two filters

- new not_in_exons filter
- new not_covered filter (not minimum depth across all samples)
- the combination of both is the same as previous not_in_panel
- tested in test samples
---
 bin/panels_computedna2protein.py              |  3 ++-
 bin/plot_selectionfeatures.py                 | 18 +++++----------
 conf/modules.config                           |  7 ++++++
 conf/test.config                              |  4 ++--
 modules/local/filterbed/main.nf               |  5 ++---
 .../local/mutationpreprocessing/main.nf       |  7 +++++-
 workflows/deepcsa.nf                          | 22 ++++++++++---------
 7 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/bin/panels_computedna2protein.py b/bin/panels_computedna2protein.py
index 91b37014..252693c5 100755
--- a/bin/panels_computedna2protein.py
+++ b/bin/panels_computedna2protein.py
@@ -30,7 +30,8 @@ def get_normal_maf(path_maf):
     print("Initial shape of MAF DataFrame:", maf_df.shape)
 
     maf_df_filtered = maf_df.loc[
-        (~maf_df["FILTER.not_in_panel"]) &
+        (~maf_df["FILTER.not_in_exons"]) &
+        (~maf_df["FILTER.not_covered"]) &
         (maf_df["TYPE"].isin(["SNV", "INSERTION", "DELETION"]))
     ].reset_index(drop=True)
 
diff --git a/bin/plot_selectionfeatures.py b/bin/plot_selectionfeatures.py
index a73a3263..caf315c9 100755
--- a/bin/plot_selectionfeatures.py
+++ b/bin/plot_selectionfeatures.py
@@ -745,19 +745,13 @@ def preprocess_maf(maf_df):
 
     maf_df["CLEAN_SAMPLE_ID"] = maf_df["SAMPLE_ID"].apply(lambda x: "_".join(x.split("_")[1:3]))
 
-    # Reduce the number of samples and the number of mutations
-    samples_histo_findings = ['P19_0017_BDO_01', 'P19_0017_BTR_01',
-                          'P19_0032_BDO_01', 'P19_0032_BTR_01',
-                          'P19_0044_BDO_01', 'P19_0044_BTR_01']
     maf_df_f = maf_df.loc[(maf_df["VAF"] <= 0.35) &
-                          # (maf_df["FILTER.repetitive_variant"] == False) & # filter not well defined yet; may hide hotspots
-                          (~maf_df["FILTER.not_in_panel"]) &
-                          (~maf_df["FILTER.no_pileup_support"]) & # avoid variants w/o VAF recomputed
-                          (~maf_df["FILTER.n_rich"]) &
-                          (~maf_df["FILTER.low_mappability"]) &
-                          (~maf_df["FILTER.other_sample_SNP"]) &
-                          (~maf_df["SAMPLE_ID"].isin(samples_histo_findings))
-                         ].reset_index(drop = True)
+                            (~maf_df["FILTER.not_in_exons"]) &
+                            (~maf_df["FILTER.not_covered"]) &
+                            (~maf_df["FILTER.no_pileup_support"]) & # avoid variants w/o VAF recomputed
+                            (~maf_df["FILTER.n_rich"]) &
+                            (~maf_df["FILTER.low_mappability"])
+                        ].reset_index(drop = True)
 
     # SNV
     snvs_maf = maf_df_f[(maf_df_f["TYPE"] == 'SNV') &
diff --git a/conf/modules.config b/conf/modules.config
index 402253a4..cd16745b 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -168,6 +168,13 @@ process {
         ]
     }
 
+    withName: 'FILTEREXONS' {
+        ext.filtername      = 'not_in_exons'
+    }
+
+    withName: 'FILTERPANEL' {
+        ext.filtername      = 'not_covered'
+    }
 
     withName: 'SUBSETDEPTHS' {
         ext.prefix    = { "${meta.id}.subset_depths" }
diff --git a/conf/test.config b/conf/test.config
index 60c4c73d..0cb7eb88 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -50,8 +50,8 @@ params {
 
     mutated_epithelium         = false
     mutated_epithelium_vaf     = false
-    expected_mutation_rate      = true
-    dnds                        = true
+    expected_mutation_rate      = false
+    dnds                        = false
 
     indels                     = false
 
diff --git a/modules/local/filterbed/main.nf b/modules/local/filterbed/main.nf
index c39d7890..5c90fbea 100644
--- a/modules/local/filterbed/main.nf
+++ b/modules/local/filterbed/main.nf
@@ -26,10 +26,9 @@ process FILTERBED {
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ""
-    def prefix = task.ext.prefix ?: "${meta.id}"
+    def filtername = task.ext.filtername ?: "covered"
     """
-    filterbed.py ${maf} ${bedfile} not_in_panel;
+    filterbed.py ${maf} ${bedfile} ${filtername};
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/subworkflows/local/mutationpreprocessing/main.nf b/subworkflows/local/mutationpreprocessing/main.nf
index 9800482a..af8985f9 100644
--- a/subworkflows/local/mutationpreprocessing/main.nf
+++ b/subworkflows/local/mutationpreprocessing/main.nf
@@ -5,6 +5,7 @@ include { VCF_ANNOTATE_ENSEMBLVEP   as VCFANNOTATE      } from '../../nf-core/vc
 include { SUMMARIZE_ANNOTATION      as SUMANNOTATION    } from '../../../modules/local/summarize_annotation/main'
 include { VCF2MAF                   as VCF2MAF          } from '../../../modules/local/vcf2maf/main'
 include { FILTERBED                 as FILTERPANEL      } from '../../../modules/local/filterbed/main'
+include { FILTERBED                 as FILTEREXONS      } from '../../../modules/local/filterbed/main'
 include { MERGE_BATCH               as MERGEBATCH       } from '../../../modules/local/mergemafs/main'
 include { FILTER_BATCH              as FILTERBATCH      } from '../../../modules/local/filtermaf/main'
 include { WRITE_MAFS                as WRITEMAF         } from '../../../modules/local/writemaf/main'
@@ -20,6 +21,7 @@ workflow MUTATION_PREPROCESSING {
     vep_cache
     vep_extra_files
     bedfile
+    bedfile_exons
     groups
     sequence_information_df
 
@@ -46,7 +48,10 @@ workflow MUTATION_PREPROCESSING {
     VCF2MAF(vcfs, SUMANNOTATION.out.tab)
     ch_versions = ch_versions.mix(VCF2MAF.out.versions.first())
 
-    FILTERPANEL(VCF2MAF.out.maf, bedfile)
+    FILTEREXONS(VCF2MAF.out.maf, bedfile_exons)
+    ch_versions = ch_versions.mix(FILTEREXONS.out.versions.first())
+
+    FILTERPANEL(FILTEREXONS.out.maf, bedfile)
     ch_versions = ch_versions.mix(FILTERPANEL.out.versions.first())
 
     // Join all samples' MAFs and put them in a channel to be merged
diff --git a/workflows/deepcsa.nf b/workflows/deepcsa.nf
index 9b1be68e..64020cfc 100644
--- a/workflows/deepcsa.nf
+++ b/workflows/deepcsa.nf
@@ -169,17 +169,17 @@ workflow DEEPCSA{
     //
     // Separate input BAMs and VCFs
     //
-    INPUT_CHECK.out.mutations.
-    map{ it -> [it[0], it[1]]}.
-    set{ meta_vcfs_alone }
+    INPUT_CHECK.out.mutations
+    .map{ it -> [it[0], it[1]]}
+    .set{ meta_vcfs_alone }
 
-    INPUT_CHECK.out.mutations.
-    map{ it -> [it[0], it[2]]}.
-    set{ meta_bams_alone }
+    INPUT_CHECK.out.mutations
+    .map{ it -> [it[0], it[2]]}
+    .set{ meta_bams_alone }
 
-    INPUT_CHECK.out.mutations.
-    map{ it -> [it[0], it[3], it[4]]}.
-    set{ meta_pileupbamindex_alone }
+    INPUT_CHECK.out.mutations
+    .map{ it -> [it[0], it[3], it[4]]}
+    .set{ meta_pileupbamindex_alone }
 
 
 
@@ -221,7 +221,9 @@ workflow DEEPCSA{
     }
 
     // Mutation preprocessing
-    MUT_PREPROCESSING(meta_vcfs_alone, vep_cache, vep_extra_files, CREATEPANELS.out.exons_consensus_bed,
+    MUT_PREPROCESSING(meta_vcfs_alone, vep_cache, vep_extra_files,
+                        CREATEPANELS.out.all_consensus_bed,
+                        CREATEPANELS.out.exons_bed,
                         TABLE2GROUP.out.json_allgroups, seqinfo_df)
     ch_versions = ch_versions.mix(MUT_PREPROCESSING.out.versions)
     positive_selection_results = MUT_PREPROCESSING.out.somatic_mafs