From 9ff2fd76678fdef0c8348ba28a54bd3390fd6890 Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Sun, 24 Aug 2025 20:48:23 +0200 Subject: [PATCH 1/2] samples with no mutations are not analyzed individually - same happens with groups that have less than a the chosen number of muts - set minimum to 0 - NOT tested --- bin/subset_maf.py | 11 ++++++++--- conf/modules.config | 5 +++-- modules/local/subsetmaf/main.nf | 6 ++++-- nextflow.config | 3 ++- nextflow_schema.json | 6 ++++++ 5 files changed, 23 insertions(+), 8 deletions(-) diff --git a/bin/subset_maf.py b/bin/subset_maf.py index 98ae6eb2..c3b2d4ae 100755 --- a/bin/subset_maf.py +++ b/bin/subset_maf.py @@ -9,7 +9,7 @@ from read_utils import custom_na_values -def subset_mutation_dataframe(sample_name, mutations_file, mutations_file_out, json_filters, requested_fields): +def subset_mutation_dataframe(sample_name, mutations_file, mutations_file_out, json_filters, requested_fields, minimum_mutations): """ INFO """ @@ -43,6 +43,10 @@ def subset_mutation_dataframe(sample_name, mutations_file, mutations_file_out, j with open(requested_fields, 'r') as file: output_format = json.load(file) + if annotated_maf.shape[0] < minimum_mutations: + print(f"{mutations_file_out} file will not be written since it has not reached the minimum number of mutations required for per sample analysis") + return False + if output_format: header_ = output_format.get("header", False) columns = output_format.get("columns", annotated_maf.columns.values) @@ -73,11 +77,12 @@ def subset_mutation_dataframe(sample_name, mutations_file, mutations_file_out, j @click.option('--out_maf', type=click.Path(), help='Output MAF file') @click.option('--json_filters', type=click.Path(exists=True), help='Input mutation filtering criteria file') @click.option('--req_fields', type=click.Path(exists=True), help='Column names to output') +@click.option('--min_mutations', type=int, default=-1, help='Minimum number of mutations for sample to be outputted.') # @click.option('--plot', is_flag=True, help='Generate plot and save as PDF') -def main(sample_name, mut_file, out_maf, json_filters, req_fields): # , plot): +def main(sample_name, mut_file, out_maf, json_filters, req_fields, min_mutations): # , plot): click.echo(f"Subsetting MAF file...") - subset_mutation_dataframe(sample_name, mut_file, out_maf, json_filters, req_fields) + subset_mutation_dataframe(sample_name, mut_file, out_maf, json_filters, req_fields, min_mutations) if __name__ == '__main__': main() diff --git a/conf/modules.config b/conf/modules.config index ea420932..a3ef5ebb 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -234,8 +234,9 @@ process { } withName: 'SOMATICMUTATIONS' { - ext.output_prefix = '.somatic' - ext.output_fmt = { ['"header": true'].join(',\t').trim() } + ext.output_prefix = '.somatic' + ext.output_fmt = { ['"header": true'].join(',\t').trim() } + ext.minimum_mutations = params.min_muts_per_samples publishDir = [ [ diff --git a/modules/local/subsetmaf/main.nf b/modules/local/subsetmaf/main.nf index d903669f..fce33d77 100644 --- a/modules/local/subsetmaf/main.nf +++ b/modules/local/subsetmaf/main.nf @@ -9,8 +9,8 @@ process SUBSET_MAF { tuple val(meta), path(mut_files) output: - tuple val(meta), path("*.mutations.tsv") , emit: mutations - path "versions.yml" , topic: versions + tuple val(meta), path("*.mutations.tsv") , optional : true , emit: mutations + path "versions.yml" , topic: versions script: @@ -20,6 +20,7 @@ process SUBSET_MAF { def output_prefix = task.ext.output_prefix ?: "" def filters = task.ext.filters ?: "" def output_format = task.ext.output_fmt ?: "" + def min_muts = task.ext.minimum_mutations ? "--min_mutations ${task.ext.minimum_mutations}" : "" """ cat > mutations_subset.conf << EOF { @@ -39,6 +40,7 @@ process SUBSET_MAF { --out_maf ${prefix}${output_prefix}.mutations.tsv \\ --json_filters mutations_subset.conf \\ --req_fields output_formats.conf \\ + ${min_muts} \\ ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index b097fc55..89997c6c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -85,7 +85,7 @@ params { regressions = false - filter_criteria = ["notcontains NM20", "notcontains p8", "notcontains n_rich", "notcontains cohort_n_rich_threshold", "notcontains cohort_n_rich", "notcontains no_pileup_support", "notcontains low_mappability", "notcontains not_covered" ] + filter_criteria = ["notcontains NM20", "notcontains n_rich", "notcontains cohort_n_rich_threshold", "notcontains cohort_n_rich", "notcontains no_pileup_support", "notcontains low_mappability", "notcontains not_covered" ] filter_criteria_somatic = [] no_filter = false @@ -94,6 +94,7 @@ params { sample_panel_min_depth = 40 consensus_panel_min_depth = 500 consensus_compliance = 0.8 + min_muts_per_samples = 0 selected_genes = '' panel_with_canonical = true diff --git a/nextflow_schema.json b/nextflow_schema.json index a9222b4b..83eb1544 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -560,6 +560,12 @@ "fa_icon": "fas fa-book", "help_text": "" }, + "min_muts_per_sample": { + "type": "number", + "description": "Threshold on minimum number of mutations that a sample needs to have for doing a per-sample analysis of that specific case", + "fa_icon": "fas fa-book", + "help_text": "" + }, "selected_genes": { "type": "string", "description": "Comma-separeted list of genes to focus the analysis", From d62a9c55ed6d497eb23b1b72c1ce9af5e32cbbc0 Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Tue, 26 Aug 2025 14:08:07 +0200 Subject: [PATCH 2/2] fix typo in parameter name --- conf/modules.config | 2 +- nextflow.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index a3ef5ebb..ffe66f98 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -236,7 +236,7 @@ process { withName: 'SOMATICMUTATIONS' { ext.output_prefix = '.somatic' ext.output_fmt = { ['"header": true'].join(',\t').trim() } - ext.minimum_mutations = params.min_muts_per_samples + ext.minimum_mutations = params.min_muts_per_sample publishDir = [ [ diff --git a/nextflow.config b/nextflow.config index 89997c6c..df0353cd 100644 --- a/nextflow.config +++ b/nextflow.config @@ -94,7 +94,7 @@ params { sample_panel_min_depth = 40 consensus_panel_min_depth = 500 consensus_compliance = 0.8 - min_muts_per_samples = 0 + min_muts_per_sample = 0 selected_genes = '' panel_with_canonical = true