Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions bin/subset_maf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from read_utils import custom_na_values


def subset_mutation_dataframe(sample_name, mutations_file, mutations_file_out, json_filters, requested_fields):
def subset_mutation_dataframe(sample_name, mutations_file, mutations_file_out, json_filters, requested_fields, minimum_mutations):
"""
INFO
"""
Expand Down Expand Up @@ -43,6 +43,10 @@ def subset_mutation_dataframe(sample_name, mutations_file, mutations_file_out, j
with open(requested_fields, 'r') as file:
output_format = json.load(file)

if annotated_maf.shape[0] < minimum_mutations:
print(f"{mutations_file_out} file will not be written since it has not reached the minimum number of mutations required for per sample analysis")
return False

if output_format:
header_ = output_format.get("header", False)
columns = output_format.get("columns", annotated_maf.columns.values)
Expand Down Expand Up @@ -73,11 +77,12 @@ def subset_mutation_dataframe(sample_name, mutations_file, mutations_file_out, j
@click.option('--out_maf', type=click.Path(), help='Output MAF file')
@click.option('--json_filters', type=click.Path(exists=True), help='Input mutation filtering criteria file')
@click.option('--req_fields', type=click.Path(exists=True), help='Column names to output')
@click.option('--min_mutations', type=int, default=-1, help='Minimum number of mutations for sample to be outputted.')
# @click.option('--plot', is_flag=True, help='Generate plot and save as PDF')

def main(sample_name, mut_file, out_maf, json_filters, req_fields): # , plot):
def main(sample_name, mut_file, out_maf, json_filters, req_fields, min_mutations): # , plot):
click.echo(f"Subsetting MAF file...")
subset_mutation_dataframe(sample_name, mut_file, out_maf, json_filters, req_fields)
subset_mutation_dataframe(sample_name, mut_file, out_maf, json_filters, req_fields, min_mutations)

if __name__ == '__main__':
main()
Expand Down
5 changes: 3 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,9 @@ process {
}

withName: 'SOMATICMUTATIONS' {
ext.output_prefix = '.somatic'
ext.output_fmt = { ['"header": true'].join(',\t').trim() }
ext.output_prefix = '.somatic'
ext.output_fmt = { ['"header": true'].join(',\t').trim() }
ext.minimum_mutations = params.min_muts_per_sample

publishDir = [
[
Expand Down
6 changes: 4 additions & 2 deletions modules/local/subsetmaf/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ process SUBSET_MAF {
tuple val(meta), path(mut_files)

output:
tuple val(meta), path("*.mutations.tsv") , emit: mutations
path "versions.yml" , topic: versions
tuple val(meta), path("*.mutations.tsv") , optional : true , emit: mutations
path "versions.yml" , topic: versions


script:
Expand All @@ -20,6 +20,7 @@ process SUBSET_MAF {
def output_prefix = task.ext.output_prefix ?: ""
def filters = task.ext.filters ?: ""
def output_format = task.ext.output_fmt ?: ""
def min_muts = task.ext.minimum_mutations ? "--min_mutations ${task.ext.minimum_mutations}" : ""
"""
cat > mutations_subset.conf << EOF
{
Expand All @@ -39,6 +40,7 @@ process SUBSET_MAF {
--out_maf ${prefix}${output_prefix}.mutations.tsv \\
--json_filters mutations_subset.conf \\
--req_fields output_formats.conf \\
${min_muts} \\
${args}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
3 changes: 2 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ params {

regressions = false

filter_criteria = ["notcontains NM20", "notcontains p8", "notcontains n_rich", "notcontains cohort_n_rich_threshold", "notcontains cohort_n_rich", "notcontains no_pileup_support", "notcontains low_mappability", "notcontains not_covered" ]
filter_criteria = ["notcontains NM20", "notcontains n_rich", "notcontains cohort_n_rich_threshold", "notcontains cohort_n_rich", "notcontains no_pileup_support", "notcontains low_mappability", "notcontains not_covered" ]
filter_criteria_somatic = []
no_filter = false

Expand All @@ -94,6 +94,7 @@ params {
sample_panel_min_depth = 40
consensus_panel_min_depth = 500
consensus_compliance = 0.8
min_muts_per_sample = 0
selected_genes = ''
panel_with_canonical = true

Expand Down
6 changes: 6 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,12 @@
"fa_icon": "fas fa-book",
"help_text": ""
},
"min_muts_per_sample": {
"type": "number",
"description": "Threshold on minimum number of mutations that a sample needs to have for doing a per-sample analysis of that specific case",
"fa_icon": "fas fa-book",
"help_text": ""
},
"selected_genes": {
"type": "string",
"description": "Comma-separeted list of genes to focus the analysis",
Expand Down