From 36a539f7011dd7fdea4f7f7fae85d0201a8fde5b Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Tue, 7 Jan 2025 23:33:02 +0100 Subject: [PATCH] split not_in_panel into two filters - new not_in_exons filter - new not_covered filter (not minimum depth across all samples) - the combination of both is the same as previous not_in_panel - tested in test samples --- bin/panels_computedna2protein.py | 3 ++- bin/plot_selectionfeatures.py | 18 +++++---------- conf/modules.config | 7 ++++++ conf/test.config | 4 ++-- modules/local/filterbed/main.nf | 5 ++--- .../local/mutationpreprocessing/main.nf | 7 +++++- workflows/deepcsa.nf | 22 ++++++++++--------- 7 files changed, 37 insertions(+), 29 deletions(-) diff --git a/bin/panels_computedna2protein.py b/bin/panels_computedna2protein.py index 91b37014..252693c5 100755 --- a/bin/panels_computedna2protein.py +++ b/bin/panels_computedna2protein.py @@ -30,7 +30,8 @@ def get_normal_maf(path_maf): print("Initial shape of MAF DataFrame:", maf_df.shape) maf_df_filtered = maf_df.loc[ - (~maf_df["FILTER.not_in_panel"]) & + (~maf_df["FILTER.not_in_exons"]) & + (~maf_df["FILTER.not_covered"]) & (maf_df["TYPE"].isin(["SNV", "INSERTION", "DELETION"])) ].reset_index(drop=True) diff --git a/bin/plot_selectionfeatures.py b/bin/plot_selectionfeatures.py index a73a3263..caf315c9 100755 --- a/bin/plot_selectionfeatures.py +++ b/bin/plot_selectionfeatures.py @@ -745,19 +745,13 @@ def preprocess_maf(maf_df): maf_df["CLEAN_SAMPLE_ID"] = maf_df["SAMPLE_ID"].apply(lambda x: "_".join(x.split("_")[1:3])) - # Reduce the number of samples and the number of mutations - samples_histo_findings = ['P19_0017_BDO_01', 'P19_0017_BTR_01', - 'P19_0032_BDO_01', 'P19_0032_BTR_01', - 'P19_0044_BDO_01', 'P19_0044_BTR_01'] maf_df_f = maf_df.loc[(maf_df["VAF"] <= 0.35) & - # (maf_df["FILTER.repetitive_variant"] == False) & # filter not well defined yet; may hide hotspots - (~maf_df["FILTER.not_in_panel"]) & - (~maf_df["FILTER.no_pileup_support"]) & # avoid variants w/o VAF recomputed - (~maf_df["FILTER.n_rich"]) & - (~maf_df["FILTER.low_mappability"]) & - (~maf_df["FILTER.other_sample_SNP"]) & - (~maf_df["SAMPLE_ID"].isin(samples_histo_findings)) - ].reset_index(drop = True) + (~maf_df["FILTER.not_in_exons"]) & + (~maf_df["FILTER.not_covered"]) & + (~maf_df["FILTER.no_pileup_support"]) & # avoid variants w/o VAF recomputed + (~maf_df["FILTER.n_rich"]) & + (~maf_df["FILTER.low_mappability"]) + ].reset_index(drop = True) # SNV snvs_maf = maf_df_f[(maf_df_f["TYPE"] == 'SNV') & diff --git a/conf/modules.config b/conf/modules.config index 402253a4..cd16745b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -168,6 +168,13 @@ process { ] } + withName: 'FILTEREXONS' { + ext.filtername = 'not_in_exons' + } + + withName: 'FILTERPANEL' { + ext.filtername = 'not_covered' + } withName: 'SUBSETDEPTHS' { ext.prefix = { "${meta.id}.subset_depths" } diff --git a/conf/test.config b/conf/test.config index 60c4c73d..0cb7eb88 100644 --- a/conf/test.config +++ b/conf/test.config @@ -50,8 +50,8 @@ params { mutated_epithelium = false mutated_epithelium_vaf = false - expected_mutation_rate = true - dnds = true + expected_mutation_rate = false + dnds = false indels = false diff --git a/modules/local/filterbed/main.nf b/modules/local/filterbed/main.nf index c39d7890..5c90fbea 100644 --- a/modules/local/filterbed/main.nf +++ b/modules/local/filterbed/main.nf @@ -26,10 +26,9 @@ process FILTERBED { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: "" - def prefix = task.ext.prefix ?: "${meta.id}" + def filtername = task.ext.filtername ?: "covered" """ - filterbed.py ${maf} ${bedfile} not_in_panel; + filterbed.py ${maf} ${bedfile} ${filtername}; cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/subworkflows/local/mutationpreprocessing/main.nf b/subworkflows/local/mutationpreprocessing/main.nf index 9800482a..af8985f9 100644 --- a/subworkflows/local/mutationpreprocessing/main.nf +++ b/subworkflows/local/mutationpreprocessing/main.nf @@ -5,6 +5,7 @@ include { VCF_ANNOTATE_ENSEMBLVEP as VCFANNOTATE } from '../../nf-core/vc include { SUMMARIZE_ANNOTATION as SUMANNOTATION } from '../../../modules/local/summarize_annotation/main' include { VCF2MAF as VCF2MAF } from '../../../modules/local/vcf2maf/main' include { FILTERBED as FILTERPANEL } from '../../../modules/local/filterbed/main' +include { FILTERBED as FILTEREXONS } from '../../../modules/local/filterbed/main' include { MERGE_BATCH as MERGEBATCH } from '../../../modules/local/mergemafs/main' include { FILTER_BATCH as FILTERBATCH } from '../../../modules/local/filtermaf/main' include { WRITE_MAFS as WRITEMAF } from '../../../modules/local/writemaf/main' @@ -20,6 +21,7 @@ workflow MUTATION_PREPROCESSING { vep_cache vep_extra_files bedfile + bedfile_exons groups sequence_information_df @@ -46,7 +48,10 @@ workflow MUTATION_PREPROCESSING { VCF2MAF(vcfs, SUMANNOTATION.out.tab) ch_versions = ch_versions.mix(VCF2MAF.out.versions.first()) - FILTERPANEL(VCF2MAF.out.maf, bedfile) + FILTEREXONS(VCF2MAF.out.maf, bedfile_exons) + ch_versions = ch_versions.mix(FILTEREXONS.out.versions.first()) + + FILTERPANEL(FILTEREXONS.out.maf, bedfile) ch_versions = ch_versions.mix(FILTERPANEL.out.versions.first()) // Join all samples' MAFs and put them in a channel to be merged diff --git a/workflows/deepcsa.nf b/workflows/deepcsa.nf index 9b1be68e..64020cfc 100644 --- a/workflows/deepcsa.nf +++ b/workflows/deepcsa.nf @@ -169,17 +169,17 @@ workflow DEEPCSA{ // // Separate input BAMs and VCFs // - INPUT_CHECK.out.mutations. - map{ it -> [it[0], it[1]]}. - set{ meta_vcfs_alone } + INPUT_CHECK.out.mutations + .map{ it -> [it[0], it[1]]} + .set{ meta_vcfs_alone } - INPUT_CHECK.out.mutations. - map{ it -> [it[0], it[2]]}. - set{ meta_bams_alone } + INPUT_CHECK.out.mutations + .map{ it -> [it[0], it[2]]} + .set{ meta_bams_alone } - INPUT_CHECK.out.mutations. - map{ it -> [it[0], it[3], it[4]]}. - set{ meta_pileupbamindex_alone } + INPUT_CHECK.out.mutations + .map{ it -> [it[0], it[3], it[4]]} + .set{ meta_pileupbamindex_alone } @@ -221,7 +221,9 @@ workflow DEEPCSA{ } // Mutation preprocessing - MUT_PREPROCESSING(meta_vcfs_alone, vep_cache, vep_extra_files, CREATEPANELS.out.exons_consensus_bed, + MUT_PREPROCESSING(meta_vcfs_alone, vep_cache, vep_extra_files, + CREATEPANELS.out.all_consensus_bed, + CREATEPANELS.out.exons_bed, TABLE2GROUP.out.json_allgroups, seqinfo_df) ch_versions = ch_versions.mix(MUT_PREPROCESSING.out.versions) positive_selection_results = MUT_PREPROCESSING.out.somatic_mafs