diff --git a/bin/subset_maf.py b/bin/subset_maf.py index 98ae6eb2..c3b2d4ae 100755 --- a/bin/subset_maf.py +++ b/bin/subset_maf.py @@ -9,7 +9,7 @@ from read_utils import custom_na_values -def subset_mutation_dataframe(sample_name, mutations_file, mutations_file_out, json_filters, requested_fields): +def subset_mutation_dataframe(sample_name, mutations_file, mutations_file_out, json_filters, requested_fields, minimum_mutations): """ INFO """ @@ -43,6 +43,10 @@ def subset_mutation_dataframe(sample_name, mutations_file, mutations_file_out, j with open(requested_fields, 'r') as file: output_format = json.load(file) + if annotated_maf.shape[0] < minimum_mutations: + print(f"{mutations_file_out} file will not be written since it has not reached the minimum number of mutations required for per sample analysis") + return False + if output_format: header_ = output_format.get("header", False) columns = output_format.get("columns", annotated_maf.columns.values) @@ -73,11 +77,12 @@ def subset_mutation_dataframe(sample_name, mutations_file, mutations_file_out, j @click.option('--out_maf', type=click.Path(), help='Output MAF file') @click.option('--json_filters', type=click.Path(exists=True), help='Input mutation filtering criteria file') @click.option('--req_fields', type=click.Path(exists=True), help='Column names to output') +@click.option('--min_mutations', type=int, default=-1, help='Minimum number of mutations for sample to be outputted.') # @click.option('--plot', is_flag=True, help='Generate plot and save as PDF') -def main(sample_name, mut_file, out_maf, json_filters, req_fields): # , plot): +def main(sample_name, mut_file, out_maf, json_filters, req_fields, min_mutations): # , plot): click.echo(f"Subsetting MAF file...") - subset_mutation_dataframe(sample_name, mut_file, out_maf, json_filters, req_fields) + subset_mutation_dataframe(sample_name, mut_file, out_maf, json_filters, req_fields, min_mutations) if __name__ == '__main__': main() diff --git a/conf/modules.config b/conf/modules.config index ea420932..ffe66f98 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -234,8 +234,9 @@ process { } withName: 'SOMATICMUTATIONS' { - ext.output_prefix = '.somatic' - ext.output_fmt = { ['"header": true'].join(',\t').trim() } + ext.output_prefix = '.somatic' + ext.output_fmt = { ['"header": true'].join(',\t').trim() } + ext.minimum_mutations = params.min_muts_per_sample publishDir = [ [ diff --git a/modules/local/subsetmaf/main.nf b/modules/local/subsetmaf/main.nf index d903669f..fce33d77 100644 --- a/modules/local/subsetmaf/main.nf +++ b/modules/local/subsetmaf/main.nf @@ -9,8 +9,8 @@ process SUBSET_MAF { tuple val(meta), path(mut_files) output: - tuple val(meta), path("*.mutations.tsv") , emit: mutations - path "versions.yml" , topic: versions + tuple val(meta), path("*.mutations.tsv") , optional : true , emit: mutations + path "versions.yml" , topic: versions script: @@ -20,6 +20,7 @@ process SUBSET_MAF { def output_prefix = task.ext.output_prefix ?: "" def filters = task.ext.filters ?: "" def output_format = task.ext.output_fmt ?: "" + def min_muts = task.ext.minimum_mutations ? "--min_mutations ${task.ext.minimum_mutations}" : "" """ cat > mutations_subset.conf << EOF { @@ -39,6 +40,7 @@ process SUBSET_MAF { --out_maf ${prefix}${output_prefix}.mutations.tsv \\ --json_filters mutations_subset.conf \\ --req_fields output_formats.conf \\ + ${min_muts} \\ ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index b097fc55..df0353cd 100644 --- a/nextflow.config +++ b/nextflow.config @@ -85,7 +85,7 @@ params { regressions = false - filter_criteria = ["notcontains NM20", "notcontains p8", "notcontains n_rich", "notcontains cohort_n_rich_threshold", "notcontains cohort_n_rich", "notcontains no_pileup_support", "notcontains low_mappability", "notcontains not_covered" ] + filter_criteria = ["notcontains NM20", "notcontains n_rich", "notcontains cohort_n_rich_threshold", "notcontains cohort_n_rich", "notcontains no_pileup_support", "notcontains low_mappability", "notcontains not_covered" ] filter_criteria_somatic = [] no_filter = false @@ -94,6 +94,7 @@ params { sample_panel_min_depth = 40 consensus_panel_min_depth = 500 consensus_compliance = 0.8 + min_muts_per_sample = 0 selected_genes = '' panel_with_canonical = true diff --git a/nextflow_schema.json b/nextflow_schema.json index a9222b4b..83eb1544 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -560,6 +560,12 @@ "fa_icon": "fas fa-book", "help_text": "" }, + "min_muts_per_sample": { + "type": "number", + "description": "Threshold on minimum number of mutations that a sample needs to have for doing a per-sample analysis of that specific case", + "fa_icon": "fas fa-book", + "help_text": "" + }, "selected_genes": { "type": "string", "description": "Comma-separeted list of genes to focus the analysis",