From a9dba8ceb255817423bbd3d59a2f42e85a5d2d70 Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Wed, 18 Dec 2024 03:32:16 +0100 Subject: [PATCH 1/3] update default test paths to irb cluster --- assets/input_double_bam.csv | 4 ++-- conf/test.config | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/assets/input_double_bam.csv b/assets/input_double_bam.csv index 12ec9c2d..0eefcc4d 100644 --- a/assets/input_double_bam.csv +++ b/assets/input_double_bam.csv @@ -1,3 +1,3 @@ sample,vcf,bam,pileup_bam,pileup_ind -K_5_1_A_1,/workspace/datasets/transfer/ferriol_deepcsa/K_5_1_A_1.high.filtered.vcf,/workspace/datasets/transfer/ferriol_deepcsa/K_5_1_A_1.sorted.bam,/workspace/datasets/transfer/ferriol_deepcsa/allmolecules/K_5_1_A_1.sorted.bam,/workspace/datasets/transfer/ferriol_deepcsa/allmolecules/K_5_1_A_1.sorted.bam.csi -K_6_1_A_1,/workspace/datasets/transfer/ferriol_deepcsa/K_6_1_A_1.high.filtered.vcf,/workspace/datasets/transfer/ferriol_deepcsa/K_6_1_A_1.sorted.bam,/workspace/datasets/transfer/ferriol_deepcsa/allmolecules/K_6_1_A_1.sorted.bam,/workspace/datasets/transfer/ferriol_deepcsa/allmolecules/K_6_1_A_1.sorted.bam.csi +K_5_1_A_1,/data/bbg/datasets/transfer/ferriol_deepcsa/K_5_1_A_1.high.filtered.vcf,/data/bbg/datasets/transfer/ferriol_deepcsa/K_5_1_A_1.sorted.bam,/data/bbg/datasets/transfer/ferriol_deepcsa/allmolecules/K_5_1_A_1.sorted.bam,/data/bbg/datasets/transfer/ferriol_deepcsa/allmolecules/K_5_1_A_1.sorted.bam.csi +K_6_1_A_1,/data/bbg/datasets/transfer/ferriol_deepcsa/K_6_1_A_1.high.filtered.vcf,/data/bbg/datasets/transfer/ferriol_deepcsa/K_6_1_A_1.sorted.bam,/data/bbg/datasets/transfer/ferriol_deepcsa/allmolecules/K_6_1_A_1.sorted.bam,/data/bbg/datasets/transfer/ferriol_deepcsa/allmolecules/K_6_1_A_1.sorted.bam.csi diff --git a/conf/test.config b/conf/test.config index 04b14c36..cafa23d4 100644 --- a/conf/test.config +++ b/conf/test.config @@ -27,7 +27,7 @@ params { omega_hotspots = false - omega_hotspots_bedfile = "/workspace/datasets/transfer/ferriol_deepcsa/kidney_panel.hotspots.bed4.bed" + omega_hotspots_bedfile = "/data/bbg/datasets/transfer/ferriol_deepcsa/kidney_panel.hotspots.bed4.bed" hotspot_expansion = 30 oncodrivefml = false From f1073e7969dff00a800c891c15ea514eb6c8158b Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Wed, 18 Dec 2024 04:39:09 +0100 Subject: [PATCH 2/3] update omega_globalloc - use synonymous mutation rates instead of relative mutrates - use mutation rate for omegagloballoc and mutated reads rate for omegamultigloballoc - add all_samples mutational profile to omega input - rename scripts and modules accordingly to the changes --- bin/omega_compute_relative_mutrate.py | 40 --------------- bin/omega_select_mutrate.py | 49 +++++++++++++++++++ conf/modules.config | 7 +++ .../local/bbgtools/omega/estimator/main.nf | 2 +- .../local/bbgtools/omega/preprocess/main.nf | 19 +++++-- .../main.nf | 16 +++--- subworkflows/local/omega/main.nf | 20 ++++++-- workflows/deepcsa.nf | 18 ++++--- 8 files changed, 108 insertions(+), 63 deletions(-) delete mode 100755 bin/omega_compute_relative_mutrate.py create mode 100755 bin/omega_select_mutrate.py rename modules/local/{relative_mutrate => select_mutrate}/main.nf (70%) diff --git a/bin/omega_compute_relative_mutrate.py b/bin/omega_compute_relative_mutrate.py deleted file mode 100755 index 09f3c672..00000000 --- a/bin/omega_compute_relative_mutrate.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/local/bin/python - - -import click -import pandas as pd -from read_utils import custom_na_values - - -def compute_relative_mutrate(mutrate_file, output_file): - """ - INFO - """ - - mutrate_df = pd.read_csv(mutrate_file, sep = "\t", header = 0, na_values = custom_na_values) - - synonymous_mutrates_all_samples = mutrate_df[(mutrate_df["MUTTYPES"] == "SNV") & - (mutrate_df["GENE"] != "ALL_GENES")].reset_index(drop = True) - - relative_synonymous_mutrates_all_samples = synonymous_mutrates_all_samples[['GENE', 'MUTRATE_MB_ADJUSTED']].set_index(['GENE']) \ - / synonymous_mutrates_all_samples["MUTRATE_MB_ADJUSTED"].sum() - relative_synonymous_mutrates_all_samples.columns = ["REL_MUTRATE"] - - relative_synonymous_mutrates_all_samples.reset_index()[["GENE", "REL_MUTRATE"]].to_csv(f"{output_file}", - header=["GENE", "SYNONYMOUS_MUTS"], - index=False, - sep="\t") - - -@click.command() -@click.option('--mutrates', type=click.Path(exists=True), help='Input mutation rate file') -@click.option('--output', type=click.Path(), help='Output file') - - -def main(mutrates, output): - click.echo(f"Computing the relative mutation rate...") - compute_relative_mutrate(mutrates, output) - -if __name__ == '__main__': - main() - diff --git a/bin/omega_select_mutrate.py b/bin/omega_select_mutrate.py new file mode 100755 index 00000000..f5860081 --- /dev/null +++ b/bin/omega_select_mutrate.py @@ -0,0 +1,49 @@ +#!/usr/local/bin/python + + +import click +import pandas as pd +from read_utils import custom_na_values + + +def select_syn_mutrate(mutrate_file, output_file, mode): + """ + INFO + """ + + mutrate_df = pd.read_csv(mutrate_file, sep = "\t", header = 0, na_values = custom_na_values) + + synonymous_mutrates_all_samples = mutrate_df[(mutrate_df["MUTTYPES"] == "SNV") & + (mutrate_df["GENE"] != "ALL_GENES")].reset_index(drop = True) + + if mode == 'mutations': + synonymous_mutrates_genes = synonymous_mutrates_all_samples[['GENE', 'MUTRATE_MB_ADJUSTED']] + elif mode == 'mutated_reads': + synonymous_mutrates_genes = synonymous_mutrates_all_samples[['GENE', 'MUTREADSRATE_MB_ADJUSTED']] + + ## FIXME not sure if this is really needed since when called through main() + # the input would have already been forced to be either of the two options + # it might still be useful in case this was not called from main() + else: + print('unknown mode, please enter either mutations or mutated_reads') + exit(1) ## FIXME not sure if this is the right code to exit with + + synonymous_mutrates_genes.columns = ["GENE", "MUTRATE"] + synonymous_mutrates_genes.to_csv(f"{output_file}", + header=True, + index=False, + sep="\t") + + +@click.command() +@click.option('--mutrates', type=click.Path(exists=True), help='Input mutation rate file') +@click.option('--output', type=click.Path(), help='Output file') +@click.option('--mode', type=click.Choice(['mutations', 'mutated_reads']), default='mutations') + +def main(mutrates, output, mode): + click.echo("Selecting the gene synonymous mutation rates...") + select_syn_mutrate(mutrates, output, mode) + +if __name__ == '__main__': + main() + diff --git a/conf/modules.config b/conf/modules.config index 5eec249d..18c6643b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -489,6 +489,13 @@ process { ] } + withName: 'SYNMUTRATE' { + ext.mode = 'mutations' + } + withName: 'SYNMUTREADSRATE' { + ext.mode = 'mutated_reads' + } + } diff --git a/modules/local/bbgtools/omega/estimator/main.nf b/modules/local/bbgtools/omega/estimator/main.nf index bac76fda..15612302 100644 --- a/modules/local/bbgtools/omega/estimator/main.nf +++ b/modules/local/bbgtools/omega/estimator/main.nf @@ -5,7 +5,7 @@ process OMEGA_ESTIMATOR { label 'process_high_memory' - container 'docker.io/ferriolcalvet/omega:latest' + container 'docker.io/ferriolcalvet/omega:20241217' input: tuple val(meta) , path(mutabilities_table), path(mutations_table), path(depths) diff --git a/modules/local/bbgtools/omega/preprocess/main.nf b/modules/local/bbgtools/omega/preprocess/main.nf index d46bde25..03cbda41 100644 --- a/modules/local/bbgtools/omega/preprocess/main.nf +++ b/modules/local/bbgtools/omega/preprocess/main.nf @@ -5,12 +5,13 @@ process OMEGA_PREPROCESS { label 'process_high_memory' - container 'docker.io/ferriolcalvet/omega:latest' + container 'docker.io/ferriolcalvet/omega:20241217' input: tuple val(meta) , path(mutations), path(depths), path(mutation_profile) tuple val(meta2), path (annotated_panel) tuple val(meta3), path (syn_muts_global) + tuple val(meta4), path (mut_profile_global, stageAs: 'global_mutprofile.tsv') output: @@ -26,7 +27,7 @@ process OMEGA_PREPROCESS { def prefix = task.ext.prefix ?: "${meta.id}" // TODO revise this fix def sample_name = prefix.tokenize('.')[0] - def global_loc = task.ext.global_loc ? "--absent-synonymous infer_global_custom --relative-synonymous-muts-file ${syn_muts_global}" : "--absent-synonymous ignore" + def global_loc = task.ext.global_loc ? "--absent-synonymous infer_global_custom --mutational-profile-global-file global_mutprofile.tsv --synonymous-mutrates-file ${syn_muts_global}" : "--absent-synonymous ignore" prefix = task.ext.global_loc ? "${prefix}.gLoc" : "${prefix}" """ omega preprocessing --preprocessing-mode compute_mutabilities \\ @@ -36,7 +37,7 @@ process OMEGA_PREPROCESS { --table-observed-muts mutations_per_sample_gene_impact_context.${prefix}.tsv \\ --mutabilities-table mutability_per_sample_gene_context.${prefix}.tsv \\ --synonymous-muts-table syn_muts.${prefix}.tsv \\ - --mutational-profile ${mutation_profile} \\ + --mutational-profile-file ${mutation_profile} \\ --single-sample ${sample_name} \\ ${global_loc} # $args -c $task.cpus @@ -59,3 +60,15 @@ process OMEGA_PREPROCESS { """ } +// omega preprocessing --preprocessing-mode compute_mutabilities +// --depths-file all_samples.subset_depths.tsv.gz +// --mutations-file all_samples.mutations.tsv +// --input-vep-postprocessed-file consensus.exons_splice_sites.tsv +// --table-observed-muts mutations_per_sample_gene_impact_context.all_samples2.global_loc.gLoc.tsv +// --mutabilities-table mutability_per_sample_gene_context.all_samples2.global_loc.gLoc.tsv +// --synonymous-muts-table syn_muts.all_samples2.global_loc.gLoc.tsv +// --mutational-profile-file all_samples.all.profile.tsv +// --mutational-profile-global-file P19_0033_BTR_01.all.profile.tsv +// --single-sample all_samples +// --absent-synonymous infer_global_custom +// --synonymous-mutrates-file mutrates_per_gene.tsv diff --git a/modules/local/relative_mutrate/main.nf b/modules/local/select_mutrate/main.nf similarity index 70% rename from modules/local/relative_mutrate/main.nf rename to modules/local/select_mutrate/main.nf index a8853c79..48549e90 100644 --- a/modules/local/relative_mutrate/main.nf +++ b/modules/local/select_mutrate/main.nf @@ -1,4 +1,4 @@ -process RELATIVE_MUTRATE { +process SELECT_MUTRATES { tag "$meta.id" label 'process_single' @@ -12,20 +12,21 @@ process RELATIVE_MUTRATE { tuple val(meta), path(mutation_rates) output: - tuple val(meta), path("*.rel_mutrates.tsv") , emit: mutrate - path "versions.yml" , emit: versions + tuple val(meta), path("*.gene_mutrates.tsv") , emit: mutrate + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def mode = task.ext.mode ?: "mutations" """ - omega_compute_relative_mutrate.py \\ + omega_select_mutrate.py \\ --mutrates ${mutation_rates} \\ - --output ${prefix}.rel_mutrates.tsv; + --output ${prefix}.gene_mutrates.tsv \\ + --mode ${mode}; cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -35,9 +36,8 @@ process RELATIVE_MUTRATE { stub: def prefix = task.ext.prefix ?: "all_samples" - def panel_version = task.ext.panel_version ?: "${meta2.id}" """ - touch ${prefix}.rel_mutrates.tsv + touch ${prefix}.gene_mutrates.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/subworkflows/local/omega/main.nf b/subworkflows/local/omega/main.nf index b6b6f4bb..88028e6c 100644 --- a/subworkflows/local/omega/main.nf +++ b/subworkflows/local/omega/main.nf @@ -57,6 +57,10 @@ workflow OMEGA_ANALYSIS{ .join( profile ) .set{ muts_n_depths_n_profile } + Channel.of([ [ id: "all_samples" ] ]) + .join( profile ).first() + .set{ all_samples_mut_profile } + if (params.omega_hotspots){ EXPANDREGIONS(panel, hotspots_file) @@ -68,8 +72,11 @@ workflow OMEGA_ANALYSIS{ json_hotspots = bedfile } - // FIXME: here I am using bedfile as a dummy value channel - PREPROCESSING( muts_n_depths_n_profile, expanded_panel, bedfile) + // FIXME here I am using bedfile as a dummy value channel + PREPROCESSING( muts_n_depths_n_profile, + expanded_panel, + bedfile, + all_samples_mut_profile) ch_versions = ch_versions.mix(PREPROCESSING.out.versions) PREPROCESSING.out.mutabs_n_mutations_tsv @@ -98,14 +105,19 @@ workflow OMEGA_ANALYSIS{ if (params.omega_globalloc) { - PREPROCESSINGGLOBALLOC( muts_n_depths_n_profile, expanded_panel, relative_mutationrates.first() ) + PREPROCESSINGGLOBALLOC(muts_n_depths_n_profile, + expanded_panel, + relative_mutationrates.first(), + all_samples_mut_profile) ch_versions = ch_versions.mix(PREPROCESSINGGLOBALLOC.out.versions) PREPROCESSINGGLOBALLOC.out.mutabs_n_mutations_tsv .join( depth ) .set{ preprocess_globalloc_n_depths } - ESTIMATORGLOBALLOC( preprocess_globalloc_n_depths, expanded_panel, GROUPGENES.out.json_genes.first()) + ESTIMATORGLOBALLOC(preprocess_globalloc_n_depths, + expanded_panel, + GROUPGENES.out.json_genes.first()) ch_versions = ch_versions.mix(ESTIMATORGLOBALLOC.out.versions) global_loc_results = ESTIMATORGLOBALLOC.out.results diff --git a/workflows/deepcsa.nf b/workflows/deepcsa.nf index 870cecdc..22bed4b7 100644 --- a/workflows/deepcsa.nf +++ b/workflows/deepcsa.nf @@ -117,7 +117,8 @@ include { TABIX_BGZIPTABIX_QUERY as DEPTHSNONPROTCONS } from '../modules/n include { TABIX_BGZIPTABIX_QUERY as DEPTHSINTRONSCONS } from '../modules/nf-core/tabix/bgziptabixquery/main' include { TABIX_BGZIPTABIX_QUERY as DEPTHSSYNONYMOUSCONS } from '../modules/nf-core/tabix/bgziptabixquery/main' -include { RELATIVE_MUTRATE as RELMUTRATE } from '../modules/local/relative_mutrate/main' +include { SELECT_MUTRATES as SYNMUTRATE } from '../modules/local/select_mutrate/main' +include { SELECT_MUTRATES as SYNMUTREADSRATE } from '../modules/local/select_mutrate/main' @@ -247,8 +248,11 @@ workflow DEEPCSA{ .join( MUTRATESYNONYMOUS.out.mutrates ) .set{ all_samples_syn_mutrate } - RELMUTRATE(all_samples_syn_mutrate) - ch_versions = ch_versions.mix(RELMUTRATE.out.versions) + SYNMUTRATE(all_samples_syn_mutrate) + ch_versions = ch_versions.mix(SYNMUTRATE.out.versions) + + SYNMUTREADSRATE(all_samples_syn_mutrate) + ch_versions = ch_versions.mix(SYNMUTREADSRATE.out.versions) // Concatenate all outputs into a single file @@ -442,7 +446,7 @@ workflow DEEPCSA{ CREATEPANELS.out.exons_consensus_panel, custom_groups_table, hotspots_bed_file, - RELMUTRATE.out.mutrate + SYNMUTRATE.out.mutrate ) positive_selection_results = positive_selection_results.join(OMEGA.out.results, remainder: true) positive_selection_results = positive_selection_results.join(OMEGA.out.results_global, remainder: true) @@ -456,7 +460,7 @@ workflow DEEPCSA{ CREATEPANELS.out.exons_consensus_panel, custom_groups_table, hotspots_bed_file, - RELMUTRATE.out.mutrate + SYNMUTREADSRATE.out.mutrate ) positive_selection_results = positive_selection_results.join(OMEGAMULTI.out.results, remainder: true) positive_selection_results = positive_selection_results.join(OMEGAMULTI.out.results_global, remainder: true) @@ -470,7 +474,7 @@ workflow DEEPCSA{ CREATEPANELS.out.exons_consensus_panel, custom_groups_table, hotspots_bed_file, - RELMUTRATE.out.mutrate + SYNMUTRATE.out.mutrate ) ch_versions = ch_versions.mix(OMEGANONPROT.out.versions) @@ -481,7 +485,7 @@ workflow DEEPCSA{ CREATEPANELS.out.exons_consensus_panel, custom_groups_table, hotspots_bed_file, - RELMUTRATE.out.mutrate + SYNMUTREADSRATE.out.mutrate ) ch_versions = ch_versions.mix(OMEGANONPROTMULTI.out.versions) } From edfa571ae890aba8c6696d1420b4089a62b2756c Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Wed, 18 Dec 2024 21:26:54 +0100 Subject: [PATCH 3/3] remove other_sample_SNP filter from somatic mutations --- conf/modules.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 18c6643b..c0598137 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -295,7 +295,7 @@ process { ['"VAF_AM" : "le ', "${params.germline_threshold}", '"'].join('').trim(), ['"vd_VAF" : "le ', "${params.germline_threshold}", '"'].join('').trim(), ['"DEPTH" : "ge ', "${params.mutation_depth_threshold}", '"'].join('').trim(), - '"FILTER" : ["notcontains NM20", "notcontains n_rich", "notcontains cohort_n_rich", "notcontains cohort_n_rich_uni", "notcontains no_pileup_support", "notcontains other_sample_SNP", "notcontains low_mappability" ]', + '"FILTER" : ["notcontains NM20", "notcontains n_rich", "notcontains cohort_n_rich", "notcontains cohort_n_rich_uni", "notcontains no_pileup_support", "notcontains low_mappability" ]', '"VAF_distorted_expanded" : false', ].join(',\t').trim() } @@ -315,7 +315,7 @@ process { ext.filters = { [['"VAF" : "le ', "${params.germline_threshold}", '"'].join('').trim(), ['"vd_VAF" : "le ', "${params.germline_threshold}", '"'].join('').trim(), ['"DEPTH" : "ge ', "${params.mutation_depth_threshold}", '"'].join('').trim(), - '"FILTER" : ["notcontains NM20", "notcontains n_rich", "notcontains cohort_n_rich", "notcontains cohort_n_rich_uni", "notcontains no_pileup_support", "notcontains other_sample_SNP", "notcontains low_mappability" ]', + '"FILTER" : ["notcontains NM20", "notcontains n_rich", "notcontains cohort_n_rich", "notcontains cohort_n_rich_uni", "notcontains no_pileup_support", "notcontains low_mappability" ]', '"VAF_distorted_expanded" : false', ].join(',\t').trim() }