From c6db504f3e34e1d6d0bc06ef67e40f437657a676 Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Sun, 24 Aug 2025 23:40:49 +0200 Subject: [PATCH 1/2] first pas update of sigrofiler DBS and ID signatures. --- .../useful_scripts/deepcsa_maf2samplevcfs.py | 0 bin/deepcsa_maf2samplevcfs.py | 221 ++++++++++++++++++ conf/modules.config | 28 ++- modules/local/maf2vcf.nf | 43 ++++ modules/local/sigprofiler_matrix_generator.nf | 43 ++++ subworkflows/local/signatures/main.nf | 28 ++- workflows/deepcsa.nf | 10 +- 7 files changed, 362 insertions(+), 11 deletions(-) mode change 100644 => 100755 assets/useful_scripts/deepcsa_maf2samplevcfs.py create mode 100755 bin/deepcsa_maf2samplevcfs.py create mode 100644 modules/local/maf2vcf.nf create mode 100644 modules/local/sigprofiler_matrix_generator.nf diff --git a/assets/useful_scripts/deepcsa_maf2samplevcfs.py b/assets/useful_scripts/deepcsa_maf2samplevcfs.py old mode 100644 new mode 100755 diff --git a/bin/deepcsa_maf2samplevcfs.py b/bin/deepcsa_maf2samplevcfs.py new file mode 100755 index 00000000..7c5637c4 --- /dev/null +++ b/bin/deepcsa_maf2samplevcfs.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python + +####### +# This script converts a mutations file (TSV format) to one or multiple VCF-formatted files. +####### + +####### +# Usage: +####### +## If your sample names are NOT in a column called SAMPLE_ID, +## you can use the --sample-name-column option to specify it. + +# if the maf is from deepCSA, use this one +# usage: python deepcsa_maf2samplevcfs.py --mutations-file all_samples.somatic.mutations.tsv --output-dir ./test/ --maf-from-deepcsa + +# if the maf file is not from deepCSA, use this one +# usage: python deepcsa_maf2samplevcfs.py --mutations-file all_samples.somatic.mutations.tsv --output-dir ./test/ + + + +####### +# Mandatory columns in input mutations: +####### + +# if the maf is from deepCSA, it must contain the following columns, as they were originally generated +# ['CHROM', 'POS', 'REF', 'ALT', 'FILTER', 'INFO', 'FORMAT', 'SAMPLE'] + +# if the maf file is not from deepCSA, then it MUST contain the following columns +# ['CHROM', 'POS', 'REF', 'ALT', 'DEPTH', 'ALT_DEPTH'] +# where: +# DEPTH indicates the total number of duplex reads sequenced at the position where the mutation occurs +# ALT_DEPTH indicates the total number of duplex reads supporting the variant at the same position + + + + +import click +import pandas as pd + + +def build_vcf_like_dataframe(mutations_dataframe, samplee): + """ + Build a VCF-like dataframe from the mutations dataframe. + input needs to have: + ['CHROM', 'POS', 'REF', 'ALT', 'DEPTH', 'ALT_DEPTH'] + output needs to have: + ['CHROM', 'POS', 'REF', 'ALT', 'FILTER', 'INFO', 'FORMAT', 'SAMPLE'] + """ + for col in ['CHROM', 'POS', 'REF', 'ALT', 'DEPTH', 'ALT_DEPTH']: + if col not in mutations_dataframe.columns: + raise ValueError(f"Column {col} is missing from the mutations dataframe.") + + # fill FILTER and INFO columns with default values + if "FILTER" not in mutations_dataframe.columns: + print("WARNING: FILTER column is missing from the mutations dataframe. Setting it to 'PASS' for all mutations") + mutations_dataframe["FILTER"] = "PASS" + if "INFO" not in mutations_dataframe.columns: + print(f"WARNING: INFO column is missing from the mutations dataframe. Setting it to 'SAMPLE={samplee};'") + mutations_dataframe["INFO"] = f"SAMPLE={samplee};" + + # Create a new dataframe with the required columns + vcf_like_df = mutations_dataframe[['CHROM', 'POS', 'REF', 'ALT', 'FILTER', 'INFO', 'DEPTH', 'ALT_DEPTH']].copy() + vcf_like_df["FORMAT"] = "GT:DP:VD:AD:AF:RD:ALD:CDP:CAD:NDP:CDPAM:CADAM:NDPAM" + vcf_like_df["SAMPLE"] = vcf_like_df[['DEPTH', 'ALT_DEPTH']].apply( + lambda x: "{GT}:{DP}:{VD}:{AD}:{AF}:{RD}:{ALD}:{CDP}:{CAD}:{NDP}:{CDPAM}:{CADAM}:{NDPAM}".format( + GT="0/1", + DP=x['DEPTH'], + VD=x['ALT_DEPTH'], + AD=f"{x['DEPTH'] - x['ALT_DEPTH']},{x['ALT_DEPTH']}", + AF=round(x['ALT_DEPTH'] / x['DEPTH'] , 5), + RD=f"{(x['DEPTH'] - x['ALT_DEPTH'])//2},{(x['DEPTH'] - x['ALT_DEPTH'])//2 if (x['DEPTH'] - x['ALT_DEPTH']) % 2 == 0 else (x['DEPTH'] - x['ALT_DEPTH'])//2 + 1}", + ALD=f"{x['ALT_DEPTH']//2},{x['ALT_DEPTH']//2 if x['ALT_DEPTH'] % 2 == 0 else x['ALT_DEPTH']//2 + 1}", + CDP=x['DEPTH'], + CAD=f"{x['DEPTH'] - x['ALT_DEPTH']},{x['ALT_DEPTH']}", + NDP="0", + CDPAM=x['DEPTH'], + CADAM=f"{x['DEPTH'] - x['ALT_DEPTH']},{x['ALT_DEPTH']}", + NDPAM="0" + ), + axis=1 + ) + return vcf_like_df + +filters_to_remove = ["not_in_exons", "not_covered"] +def remove_deepcsa_filters(old_filt, filters_to_removee): + """ + Remove deepCSA filters from the FILTER field of the VCF file. + """ + filter_result = sorted([ x for x in old_filt.split(";") if x not in filters_to_removee ]) + return ";".join(filter_result) if filter_result != [] else "PASS" + + +vardict_vcf_header = '''##fileformat=VCFv4.2 +##source=artificially_generated_vcf_VarDict_v1.8.2-like +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER==14)"> +##FILTER= +##FILTER== 20, thus likely false positive"> +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= 1 indicates MSI"> +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +''' + + +@click.command() +@click.option('--mutations-file', required=True, type=click.Path(exists=True), help="Path to the mutations file (TSV format).") +@click.option('--output-dir', required=True, type=click.Path(), help="Directory to save the output VCF files.") +@click.option('--maf-from-deepcsa', is_flag=True, default=False, help="Flag to indicate if the MAF file is from deepCSA.") +@click.option('--sample-name-column', default="SAMPLE_ID", type=str, help="Column name for sample names in the mutations file.") +def main(mutations_file, output_dir, maf_from_deepcsa, sample_name_column): + """ + Convert a mutations file to one or multiple VCF-formatted files. + """ + + mutations = pd.read_table(mutations_file) + mutations[sample_name_column] = mutations[sample_name_column].astype(str) + for sample in mutations[sample_name_column].unique(): + + # filter the dataframe for the current sample + sample_mutations = mutations[mutations[sample_name_column] == sample] + + if maf_from_deepcsa: + vcf_info_sample = sample_mutations[['CHROM', 'POS', 'REF', 'ALT', 'FILTER', 'INFO', 'FORMAT', 'SAMPLE']].copy() + else: + vcf_info_sample = build_vcf_like_dataframe(sample_mutations, sample) + # mandatory columns should be: [['CHROM', 'POS', 'REF', 'ALT', 'FILTER', 'DEPTH', 'ALT_DEPTH']] + # Ns can be assumed 0 and AM can be assumed to be the same as duplex + + # clean FILTER field of all deepCSA annotations + if len(filters_to_remove) > 0: + vcf_info_sample["FILTER"] = vcf_info_sample["FILTER"].apply(remove_deepcsa_filters, filters_to_removee = filters_to_remove) + + # add other necessary columns + vcf_info_sample["ID"] = '.' + vcf_info_sample["QUAL"] = 100 + + # rename sample column + vcf_info_sample.rename(columns={"SAMPLE": sample}, inplace=True) + + cols = ["CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT", sample] + + sample_file = f"{output_dir}/{sample}.vcf" + + with open(sample_file, "w") as f: + f.write(vardict_vcf_header) + f.write("#" + "\t".join(cols) + "\n") + + # write the data + vcf_info_sample.to_csv(f, sep="\t", columns=cols, index=False, header=False) + + print(f"VCF file for {sample} : {sample_file}") + +if __name__ == "__main__": + main() diff --git a/conf/modules.config b/conf/modules.config index ea420932..ff1f2177 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -139,9 +139,23 @@ process { // } withName: 'VCF2MAF' { - ext.args = "--level ${params.confidence_level}" + ext.args = "--level ${params.confidence_level}" } + withName: 'MAF2VCF' { + ext.args = "--output-dir . --maf-from-deepcsa --sample-name-column SAMPLE_ID" + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/signatures/sigprofilermatrixgenerator/vcfs" }, + pattern: "**{vcf}" + ] + ] + } + + + + withName: 'POSTPROCESSVEPPANEL' { ext.canonical_only = params.panel_with_canonical publishDir = [ @@ -486,6 +500,16 @@ process { ] } + withName: 'SIGPROMATRIXGENERATOR' { + + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/signatures/sigprofilermatrixgenerator" }, + pattern: "**{txt,pdf}" + ] + ] + } withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { publishDir = [ @@ -540,7 +564,7 @@ process { --missing_values mean_samples" } - withName: 'SITESFROMPOSITIONS|SUMMARIZE_ANNOTATION|SIGPROFILERASSIGNMENT|ONCODRIVECLUSTL|POSTPROCESSVEPPANEL' { + withName: 'SITESFROMPOSITIONS|SUMMARIZE_ANNOTATION|SIGPROFILERASSIGNMENT|SIGPROMATRIXGENERATOR|ONCODRIVECLUSTL|POSTPROCESSVEPPANEL' { ext.assembly = params.vep_genome == 'GRCh38' ? 'hg38' : params.vep_genome == 'GRCm38' ? 'mm10' : diff --git a/modules/local/maf2vcf.nf b/modules/local/maf2vcf.nf new file mode 100644 index 00000000..fe5dbd3c --- /dev/null +++ b/modules/local/maf2vcf.nf @@ -0,0 +1,43 @@ +process MAF_2_VCF { + + tag "${meta.id}" + label 'process_low' + + container "docker.io/bbglab/deepcsa-core:0.0.1-alpha" + + + input: + tuple val(meta), path (maf_file) + + output: + path "*.vcf" , emit: vcf_files + path "versions.yml" , topic: versions + + + script: + def prefix = task.ext.prefix ?: '' + prefix = "${meta.id}${prefix}" + def args = task.ext.args ?: "" + """ + deepcsa_maf2samplevcfs.py \\ + --mutations-file ${maf_file} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "" + prefix = "${meta.id}${prefix}" + """ + touch ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/sigprofiler_matrix_generator.nf b/modules/local/sigprofiler_matrix_generator.nf new file mode 100644 index 00000000..0b16761c --- /dev/null +++ b/modules/local/sigprofiler_matrix_generator.nf @@ -0,0 +1,43 @@ +process SIGPROFILER_MATRIX_GENERATOR { + + tag "${meta.id}" + label 'process_low' + + // container "docker.io/bbglab/sigprofilermatrixgenerator:latest" + container "docker.io/ferriolcalvet/sigprofilermatrixgenerator:ucsd_dockerfile" + + input: + tuple val(meta), path (vcf_files) + + output: + path "output_*/" , emit: sigprofiler_output + path "versions.yml" , topic: versions + + + script: + def prefix = task.ext.prefix ?: '' + prefix = "${meta.id}${prefix}" + def ref_genome = task.ext.assembly ? "-r ${task.ext.assembly}" : "" + """ + SigProfilerMatrixGenerator -i ${vcf_files} \\ + -o output_${prefix} \\ + ${ref_genome} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "" + prefix = "${meta.id}${prefix}" + """ + touch ${prefix}.decomposed_probabilities.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/subworkflows/local/signatures/main.nf b/subworkflows/local/signatures/main.nf index 5f483c60..cde346cf 100644 --- a/subworkflows/local/signatures/main.nf +++ b/subworkflows/local/signatures/main.nf @@ -1,12 +1,16 @@ -include { MATRIX_CONCAT as MATRIXCONCATWGS } from '../../../modules/local/sig_matrix_concat/main' +include { MATRIX_CONCAT as MATRIXCONCATWGS } from '../../../modules/local/sig_matrix_concat/main' include { SIGPROFILERASSIGNMENT } from '../../../modules/local/signatures/sigprofiler/assignment/main' -include { SIGNATURES_PROBABILITIES as SIGPROBS } from '../../../modules/local/combine_sbs/main' -// include { MSIGHDP } from '../../../modules/local/signatures/msighdp/main' -include { HDP_EXTRACTION as HDPEXTRACTION } from '../signatures_hdp/main' +include { SIGNATURES_PROBABILITIES as SIGPROBS } from '../../../modules/local/combine_sbs/main' +// include { MSIGHDP } from '../../../modules/local/signatures/msighdp/main' +include { HDP_EXTRACTION as HDPEXTRACTION } from '../signatures_hdp/main' + +include { MAF_2_VCF as MAF2VCF } from '../../../modules/local/maf2vcf.nf' +include { SIGPROFILER_MATRIX_GENERATOR as SIGPROMATRIXGENERATOR } from '../../../modules/local/sigprofiler_matrix_generator.nf' workflow SIGNATURES { take: + somatic_mutations matrix_wgs reference_signatures samples @@ -44,8 +48,22 @@ workflow SIGNATURES { // MATRIXCONCAT.out.wgs_tsv.flatten().map{ it -> [ [id : it.name.tokenize('.')[0]] , it] }.set{ named_matrices } // MSIGHDP(matrix) + Channel.of([ [ id: "all_samples" ] ]) + .join( somatic_mutations ) + .set{ maf2vcf_inputs } + + + MAF2VCF(maf2vcf_inputs) + vcf_files = MAF2VCF.out.vcf_files.flatten().collect() + + + SIGPROMATRIXGENERATOR( + vcf_files, + ) + + emit: plots = SIGPROFILERASSIGNMENT.out.plots // channel: [ val(meta), file(depths) ] // plots_extraction = MSIGHDP.out.plots // channel: [ val(meta), file(depths) ] mutation_probs = signature_probs_samples -} +} \ No newline at end of file diff --git a/workflows/deepcsa.nf b/workflows/deepcsa.nf index a84f4125..ba242b96 100644 --- a/workflows/deepcsa.nf +++ b/workflows/deepcsa.nf @@ -491,11 +491,13 @@ workflow DEEPCSA{ } + + if (params.signatures){ // Signature Analysis if (params.profileall){ - SIGNATURESALL(MUTPROFILEALL.out.wgs_sigprofiler, cosmic_ref, TABLE2GROUP.out.json_samples) + SIGNATURESALL(somatic_mutations, MUTPROFILEALL.out.wgs_sigprofiler, cosmic_ref, TABLE2GROUP.out.json_samples) somatic_mutations .join(SIGNATURESALL.out.mutation_probs) @@ -505,13 +507,13 @@ workflow DEEPCSA{ } if (params.profilenonprot){ - SIGNATURESNONPROT(MUTPROFILENONPROT.out.wgs_sigprofiler, cosmic_ref, TABLE2GROUP.out.json_samples) + SIGNATURESNONPROT(somatic_mutations,MUTPROFILENONPROT.out.wgs_sigprofiler, cosmic_ref, TABLE2GROUP.out.json_samples) } if (params.profileexons){ - SIGNATURESEXONS(MUTPROFILEEXONS.out.wgs_sigprofiler, cosmic_ref, TABLE2GROUP.out.json_samples) + SIGNATURESEXONS(somatic_mutations, MUTPROFILEEXONS.out.wgs_sigprofiler, cosmic_ref, TABLE2GROUP.out.json_samples) } if (params.profileintrons){ - SIGNATURESINTRONS(MUTPROFILEINTRONS.out.wgs_sigprofiler, cosmic_ref, TABLE2GROUP.out.json_samples) + SIGNATURESINTRONS(somatic_mutations,MUTPROFILEINTRONS.out.wgs_sigprofiler, cosmic_ref, TABLE2GROUP.out.json_samples) } } From 3a1726aed8a5575e9a47c5d46dec40ac3b38d3d2 Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Tue, 26 Aug 2025 00:01:15 +0200 Subject: [PATCH 2/2] update sigprofiler matrix generator - clean unused files - reorder module imports - not tested --- conf/modules.config | 15 ++++- modules/local/{maf2vcf.nf => maf2vcf/main.nf} | 0 modules/local/signatures/msighdp/main.nf | 49 --------------- .../sigprofiler/matrixgenerator/main.nf | 47 +++++++++++++++ modules/local/sigprofiler_matrix_generator.nf | 43 -------------- subworkflows/local/signatures/main.nf | 24 -------- subworkflows/local/signatures_hdp/main.nf | 14 ----- workflows/deepcsa.nf | 59 +++++++++++-------- 8 files changed, 94 insertions(+), 157 deletions(-) rename modules/local/{maf2vcf.nf => maf2vcf/main.nf} (100%) delete mode 100644 modules/local/signatures/msighdp/main.nf create mode 100644 modules/local/signatures/sigprofiler/matrixgenerator/main.nf delete mode 100644 modules/local/sigprofiler_matrix_generator.nf diff --git a/conf/modules.config b/conf/modules.config index ff1f2177..e68e4409 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -501,12 +501,23 @@ process { } withName: 'SIGPROMATRIXGENERATOR' { + ext.args = "--plot \ + --tsb_stat \ + --seqInfo \ + --cushion 100" + ext.genome_assembly = + params.vep_genome == 'GRCh38' ? 'GRCh38' : + params.vep_genome == 'GRCh37' ? 'GRCh37' : + params.vep_genome == 'GRCm38' ? 'mm10' : + params.vep_genome == 'GRCm39' ? 'mm39' : + null + publishDir = [ [ mode: params.publish_dir_mode, path: { "${params.outdir}/signatures/sigprofilermatrixgenerator" }, - pattern: "**{txt,pdf}" + pattern: "**{txt,pdf,all}" ] ] } @@ -564,7 +575,7 @@ process { --missing_values mean_samples" } - withName: 'SITESFROMPOSITIONS|SUMMARIZE_ANNOTATION|SIGPROFILERASSIGNMENT|SIGPROMATRIXGENERATOR|ONCODRIVECLUSTL|POSTPROCESSVEPPANEL' { + withName: 'SITESFROMPOSITIONS|SUMMARIZE_ANNOTATION|SIGPROFILERASSIGNMENT|ONCODRIVECLUSTL|POSTPROCESSVEPPANEL' { ext.assembly = params.vep_genome == 'GRCh38' ? 'hg38' : params.vep_genome == 'GRCm38' ? 'mm10' : diff --git a/modules/local/maf2vcf.nf b/modules/local/maf2vcf/main.nf similarity index 100% rename from modules/local/maf2vcf.nf rename to modules/local/maf2vcf/main.nf diff --git a/modules/local/signatures/msighdp/main.nf b/modules/local/signatures/msighdp/main.nf deleted file mode 100644 index 53e1711a..00000000 --- a/modules/local/signatures/msighdp/main.nf +++ /dev/null @@ -1,49 +0,0 @@ -process MSIGHDP { - tag "$meta.id" - label 'process_medium' - - container 'docker.io/ferriolcalvet/msighdp:latest' - - input: - tuple val(meta), path(matrix) - - output: - tuple val(meta), path("**.pdf") , emit: plots - tuple val(meta), path("**.csv") , emit: stats - path "versions.yml" , topic: versions - - - script: - def prefix = task.ext.prefix ?: "" - prefix = "${meta.id}${prefix}" - def k_guess = task.ext.k_guess ?: "12" - """ - msighdp_run.R \\ - 123 \\ - ${matrix} \\ - output.${prefix} \\ - ${k_guess} \\ - ${task.cpus} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - R : \$(Rscript --version | sed -e 's/.*version //g') - Rscript : \$(Rscript --version | sed -e 's/.*version //g') - mSigHdp : 2.1.2 - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "" - prefix = "${meta.id}${prefix}" - """ - touch ${prefix}.pdf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - R : \$(Rscript --version | sed -e 's/.*version //g') - Rscript : \$(Rscript --version | sed -e 's/.*version //g') - mSigHdp : 2.1.2 - END_VERSIONS - """ -} diff --git a/modules/local/signatures/sigprofiler/matrixgenerator/main.nf b/modules/local/signatures/sigprofiler/matrixgenerator/main.nf new file mode 100644 index 00000000..5ab951e1 --- /dev/null +++ b/modules/local/signatures/sigprofiler/matrixgenerator/main.nf @@ -0,0 +1,47 @@ +process SIGPROFILER_MATRIXGENERATOR { + tag "${task.ext.prefix}" + label 'process_single' + + container 'docker.io/ferriolcalvet/sigprofilermatrixgenerator:1.3.5' + + input: + path (vcf) + + output: + path("input_mutations/output/plots/*"), optional : true, emit: output_plots + path("input_mutations/output/ID/*") , optional : true, emit: matrices_ID + path("input_mutations/output/DBS/*") , optional : true, emit: matrices_DBS + path("input_mutations/output/SBS/*") , optional : true, emit: matrices_SBS + path("input_mutations/output/TSB/*") , optional : true, emit: transcription_bias + path "versions.yml" , topic: versions + + + script: + def prefix = task.ext.prefix ?: 'samples' + def args = task.ext.args ?: "" + def genome = task.ext.genome_assembly ?: "GRCh38" + """ + mkdir input_mutations + cp *.vcf input_mutations/. + + SigProfilerMatrixGenerator matrix_generator \\ + ${prefix} \\ + ${genome} \\ + input_mutations/ \\ + ${args} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + SigProfilerMatrixGenerator: 1.3.5 + END_VERSIONS + """ + + stub: + """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + SigProfilerMatrixGenerator: 1.3.5 + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/local/sigprofiler_matrix_generator.nf b/modules/local/sigprofiler_matrix_generator.nf deleted file mode 100644 index 0b16761c..00000000 --- a/modules/local/sigprofiler_matrix_generator.nf +++ /dev/null @@ -1,43 +0,0 @@ -process SIGPROFILER_MATRIX_GENERATOR { - - tag "${meta.id}" - label 'process_low' - - // container "docker.io/bbglab/sigprofilermatrixgenerator:latest" - container "docker.io/ferriolcalvet/sigprofilermatrixgenerator:ucsd_dockerfile" - - input: - tuple val(meta), path (vcf_files) - - output: - path "output_*/" , emit: sigprofiler_output - path "versions.yml" , topic: versions - - - script: - def prefix = task.ext.prefix ?: '' - prefix = "${meta.id}${prefix}" - def ref_genome = task.ext.assembly ? "-r ${task.ext.assembly}" : "" - """ - SigProfilerMatrixGenerator -i ${vcf_files} \\ - -o output_${prefix} \\ - ${ref_genome} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "" - prefix = "${meta.id}${prefix}" - """ - touch ${prefix}.decomposed_probabilities.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/subworkflows/local/signatures/main.nf b/subworkflows/local/signatures/main.nf index cde346cf..08de6b62 100644 --- a/subworkflows/local/signatures/main.nf +++ b/subworkflows/local/signatures/main.nf @@ -1,16 +1,11 @@ include { MATRIX_CONCAT as MATRIXCONCATWGS } from '../../../modules/local/sig_matrix_concat/main' include { SIGPROFILERASSIGNMENT } from '../../../modules/local/signatures/sigprofiler/assignment/main' include { SIGNATURES_PROBABILITIES as SIGPROBS } from '../../../modules/local/combine_sbs/main' -// include { MSIGHDP } from '../../../modules/local/signatures/msighdp/main' include { HDP_EXTRACTION as HDPEXTRACTION } from '../signatures_hdp/main' -include { MAF_2_VCF as MAF2VCF } from '../../../modules/local/maf2vcf.nf' -include { SIGPROFILER_MATRIX_GENERATOR as SIGPROMATRIXGENERATOR } from '../../../modules/local/sigprofiler_matrix_generator.nf' - workflow SIGNATURES { take: - somatic_mutations matrix_wgs reference_signatures samples @@ -43,27 +38,8 @@ workflow SIGNATURES { HDPEXTRACTION(named_matrices_wgs_hdp, reference_signatures) - // matrix.map{ it -> it[1] }.collect().map{ it -> [[ id:"all_samples" ], it]}.set{ all_matrices } - // MATRIXCONCAT(all_matrices, samples) - // MATRIXCONCAT.out.wgs_tsv.flatten().map{ it -> [ [id : it.name.tokenize('.')[0]] , it] }.set{ named_matrices } - // MSIGHDP(matrix) - - Channel.of([ [ id: "all_samples" ] ]) - .join( somatic_mutations ) - .set{ maf2vcf_inputs } - - - MAF2VCF(maf2vcf_inputs) - vcf_files = MAF2VCF.out.vcf_files.flatten().collect() - - - SIGPROMATRIXGENERATOR( - vcf_files, - ) - emit: plots = SIGPROFILERASSIGNMENT.out.plots // channel: [ val(meta), file(depths) ] - // plots_extraction = MSIGHDP.out.plots // channel: [ val(meta), file(depths) ] mutation_probs = signature_probs_samples } \ No newline at end of file diff --git a/subworkflows/local/signatures_hdp/main.nf b/subworkflows/local/signatures_hdp/main.nf index 5ec77fa0..7ba2cf1a 100644 --- a/subworkflows/local/signatures_hdp/main.nf +++ b/subworkflows/local/signatures_hdp/main.nf @@ -1,10 +1,7 @@ include { PREPARE_INPUT } from '../../../modules/local/signatures/hdp/prepareinput/main' include { RUN_HDP_CHAIN_SAMPLING } from '../../../modules/local/signatures/hdp/chainsampling/main' -// include { NORMALIZE_SIGNATURES } from '../../../modules/local/signatures/hdp/normalize_sigs/main' include { PROCESS_HDP_RESULTS } from '../../../modules/local/signatures/hdp/process_results/main' -// include { COMPARE_SIGNATURES as COMPARENORMALIZEDSIGNATURES } from '../../../modules/local/signatures/hdp/compare_sigs/main' include { COMPARE_SIGNATURES as COMPARESIGNATURES } from '../../../modules/local/signatures/hdp/compare_sigs/main' -// include { COMPARE_SIGNATURES as COMPARECANCERSIGNATURES } from '../../../modules/local/signatures/hdp/compare_sigs/main' @@ -23,13 +20,9 @@ workflow HDP_EXTRACTION { PREPARE_INPUT(samples_matrix) - // Create a channel with iter values from 1 to 15 iter_ch = Channel.of(1..15) - - // Combine the input data channel with the iter channel combined_input_ch = PREPARE_INPUT.out.input_data.combine(iter_ch) - // Run the process with the combined input RUN_HDP_CHAIN_SAMPLING(combined_input_ch) // Collect all iteration results @@ -37,15 +30,8 @@ workflow HDP_EXTRACTION { PROCESS_HDP_RESULTS(PREPARE_INPUT.out.input_data, all_iterations_ch) - // if (params.norm_file != "NA") { - // normalized_results_ch = NORMALIZE_SIGNATURES(processed_results_ch) - // compared_normalized_results_ch = COMPARENORMALIZEDSIGNATURES(normalized_results_ch, reference_signatures) - // } - COMPARESIGNATURES(PROCESS_HDP_RESULTS.out.processed_results, reference_signatures) - // final_results_ch = COMPARECANCERSIGNATURES(compared_results_ch, reference_signatures) - // emit: // plots = SIGPROFILERASSIGNMENT.out.plots // channel: [ val(meta), file(depths) ] diff --git a/workflows/deepcsa.nf b/workflows/deepcsa.nf index ba242b96..8839da08 100644 --- a/workflows/deepcsa.nf +++ b/workflows/deepcsa.nf @@ -1,8 +1,4 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - PRINT PARAMS SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + include { paramsSummaryMap } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' @@ -11,15 +7,8 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_d /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS + IMPORT LOCAL SUBWORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Consisting of a mix of local and nf-core/modules. */ // SUBWORKFLOW @@ -86,26 +75,36 @@ include { TABIX_BGZIPTABIX_QUERY as DEPTHSNONPROTCONS } from '../modules/n include { TABIX_BGZIPTABIX_QUERY as DEPTHSINTRONSCONS } from '../modules/nf-core/tabix/bgziptabixquery/main' include { TABIX_BGZIPTABIX_QUERY as DEPTHSSYNONYMOUSCONS } from '../modules/nf-core/tabix/bgziptabixquery/main' -include { SELECT_MUTDENSITIES as SYNMUTDENSITY } from '../modules/local/select_mutdensity/main' -include { SELECT_MUTDENSITIES as SYNMUTREADSRATE } from '../modules/local/select_mutdensity/main' - -include { DNA_2_PROTEIN_MAPPING as DNA2PROTEINMAPPING } from '../modules/local/dna2protein/main' - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT NF-CORE MODULES/SUBWORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Installed directly from nf-core/modules. */ // MODULE include { MULTIQC } from '../modules/nf-core/multiqc/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT CUSTOM MODULES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { TABLE_2_GROUP as TABLE2GROUP } from '../modules/local/table2groups/main' include { ANNOTATE_DEPTHS as ANNOTATEDEPTHS } from '../modules/local/annotatedepth/main' include { DOWNSAMPLE_DEPTHS as DOWNSAMPLEDEPTHS } from '../modules/local/downsample/depths/main' -include { TABLE_2_GROUP as TABLE2GROUP } from '../modules/local/table2groups/main' + +include { SELECT_MUTDENSITIES as SYNMUTDENSITY } from '../modules/local/select_mutdensity/main' +include { SELECT_MUTDENSITIES as SYNMUTREADSRATE } from '../modules/local/select_mutdensity/main' + +include { DNA_2_PROTEIN_MAPPING as DNA2PROTEINMAPPING } from '../modules/local/dna2protein/main' + +include { MAF_2_VCF as MAF2VCF } from '../modules/local/maf2vcf/main' +include { SIGPROFILER_MATRIXGENERATOR as SIGPROMATRIXGENERATOR } from '../modules/local/signatures/sigprofiler/matrixgenerator/main' + include { MUTATIONS_2_SIGNATURES as MUTS2SIGS } from '../modules/local/mutations2sbs/main' /* @@ -495,25 +494,35 @@ workflow DEEPCSA{ if (params.signatures){ + Channel.of([ [ id: "all_samples" ] ]) + .join( somatic_mutations ) + .set{ maf2vcf_inputs } + + MAF2VCF(maf2vcf_inputs) + vcf_files = MAF2VCF.out.vcf_files.flatten().collect() + + SIGPROMATRIXGENERATOR( + vcf_files, + ) + // Signature Analysis if (params.profileall){ - SIGNATURESALL(somatic_mutations, MUTPROFILEALL.out.wgs_sigprofiler, cosmic_ref, TABLE2GROUP.out.json_samples) + SIGNATURESALL(MUTPROFILEALL.out.wgs_sigprofiler, cosmic_ref, TABLE2GROUP.out.json_samples) somatic_mutations .join(SIGNATURESALL.out.mutation_probs) .set{mutations_n_sbs} MUTS2SIGS(mutations_n_sbs) - } if (params.profilenonprot){ - SIGNATURESNONPROT(somatic_mutations,MUTPROFILENONPROT.out.wgs_sigprofiler, cosmic_ref, TABLE2GROUP.out.json_samples) + SIGNATURESNONPROT(MUTPROFILENONPROT.out.wgs_sigprofiler, cosmic_ref, TABLE2GROUP.out.json_samples) } if (params.profileexons){ - SIGNATURESEXONS(somatic_mutations, MUTPROFILEEXONS.out.wgs_sigprofiler, cosmic_ref, TABLE2GROUP.out.json_samples) + SIGNATURESEXONS(MUTPROFILEEXONS.out.wgs_sigprofiler, cosmic_ref, TABLE2GROUP.out.json_samples) } if (params.profileintrons){ - SIGNATURESINTRONS(somatic_mutations,MUTPROFILEINTRONS.out.wgs_sigprofiler, cosmic_ref, TABLE2GROUP.out.json_samples) + SIGNATURESINTRONS(MUTPROFILEINTRONS.out.wgs_sigprofiler, cosmic_ref, TABLE2GROUP.out.json_samples) } }