Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 24 additions & 9 deletions bin/mut_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def compute_mutation_matrix(sample_name, mutations_file, mutation_matrix, method
sep = "\t")


def compute_mutation_profile(sample_name, mutation_matrix_file, trinucleotide_counts_file, json_output, plot,
def compute_mutation_profile(sample_name, mutation_matrix_file, trinucleotide_counts_file, plot,
wgs = False, wgs_trinucleotide_counts = False, sigprofiler = False):
"""
Compute mutational profile from the input data
Expand All @@ -115,6 +115,14 @@ def compute_mutation_profile(sample_name, mutation_matrix_file, trinucleotide_co
mutation_matrix = mutation_matrix.set_index("CONTEXT_MUT")
total_mutations = np.sum(mutation_matrix[sample_name])

# proportion of SBS mutations per trinucleotide in panel
mutation_matrix[sample_name] = mutation_matrix[sample_name] / total_mutations
mutation_matrix.to_csv(f"{sample_name}.proportion_mutations.tsv",
header = True,
index = True,
sep = "\t")


# Load the trinucleotide counts
trinucleotide_counts = pd.read_csv(trinucleotide_counts_file, sep = "\t", header = 0)
trinucleotide_counts = trinucleotide_counts.set_index("CONTEXT")
Expand Down Expand Up @@ -149,7 +157,7 @@ def compute_mutation_profile(sample_name, mutation_matrix_file, trinucleotide_co
mut_probability.index.name = "CONTEXT_MUT"
mut_probability = mut_probability.reset_index()

mut_probability.to_csv(json_output,
mut_probability.to_csv(f"{sample_name}.profile.tsv",
header = True,
index = False,
sep = "\t")
Expand All @@ -163,7 +171,7 @@ def compute_mutation_profile(sample_name, mutation_matrix_file, trinucleotide_co
output_f = f'{sample_name}.profile.pdf')

# plot the profile as a percentage of SBS mutations seen in our sequenced panel
plot_profile(dict(zip(mutation_matrix.index, [x[0].item() for x in (mutation_matrix / mutation_matrix.sum()).values])),
plot_profile(dict(zip(mutation_matrix.index, mutation_matrix[sample_name])),
title=f'{sample_name} ({round(total_mutations)} muts)',
output_f = f'{sample_name}.profile.percentage.pdf')

Expand All @@ -185,22 +193,30 @@ def compute_mutation_profile(sample_name, mutation_matrix_file, trinucleotide_co
profile_trinuc_clean = profile_trinuc_clean.reindex(contexts_formatted)
profile_trinuc_clean.columns = [sample_name]

profile_trinuc_clean.to_csv(f"{json_output}.matrix.WGS",
profile_trinuc_clean.to_csv(f"{sample_name}.matrix.WGS.tsv",
header = True,
index = True,
sep = "\t")

profile_trinuc_clean_proportion = profile_trinuc_clean.copy()
profile_trinuc_clean_proportion[sample_name] = profile_trinuc_clean_proportion[sample_name] / profile_trinuc_clean_proportion[sample_name].sum()
profile_trinuc_clean_proportion.to_csv(f"{sample_name}.proportion_mutations.WGS.tsv",
header = True,
index = True,
sep = "\t")


# plot the profile as a percentage of SBS mutations seen after sequencing one WGS
# if mutations were occuring with the same probabilities as they occur in our sequenced panel
plot_profile(dict(zip(profile_trinuc_clean.index.values, (profile_trinuc_clean[sample_name] / profile_trinuc_clean[sample_name].sum()).values)),
plot_profile(dict(zip(profile_trinuc_clean_proportion.index.values, profile_trinuc_clean_proportion[sample_name].values)),
title=f'{sample_name} ({round(total_mutations)} muts)',
output_f = f'{sample_name}.profile.percentage_WGS.pdf')

if sigprofiler:
profile_trinuc_clean.index = contexts_formatted_sigprofiler
profile_trinuc_clean.index.name = "CONTEXT_MUT"
profile_trinuc_clean = profile_trinuc_clean.reset_index().sort_values(by = "CONTEXT_MUT")
profile_trinuc_clean.to_csv(f"{json_output}.matrix.WGS.sigprofiler",
profile_trinuc_clean.to_csv(f"{sample_name}.matrix.WGS.sigprofiler.tsv",
header = True,
index = False,
sep = "\t")
Expand All @@ -220,7 +236,6 @@ def compute_mutation_profile(sample_name, mutation_matrix_file, trinucleotide_co

@click.option('--mutation_matrix', type=click.Path(exists=True), help='Mutation matrix file (for profile mode)')
@click.option('--trinucleotide_counts', type=click.Path(exists=True), help='Trinucleotide counts file (for profile mode)')
@click.option('--out_profile', type=click.Path(), help='JSON output file (for profile mode)')
@click.option('--plot', is_flag=True, help='Generate plot and save as PDF')
@click.option('--wgs', is_flag=True, help='Store matrix of mutation counts at WGS level')
@click.option('--wgs_trinucleotide_counts', type=click.Path(exists=True), help='Trinucleotide counts file of the WGS (for profile mode if WGS active)')
Expand All @@ -229,7 +244,7 @@ def compute_mutation_profile(sample_name, mutation_matrix_file, trinucleotide_co
@click.option('--sigprofiler', is_flag=True, help='Store the index column using the SigProfiler format')

def main(mode, sample_name, mut_file, out_matrix, method, pseud, sigprofiler, per_sample, mutation_matrix,
trinucleotide_counts, out_profile, plot, wgs, wgs_trinucleotide_counts):
trinucleotide_counts, plot, wgs, wgs_trinucleotide_counts):

if mode == 'matrix':
click.echo(f"Running in matrix mode...")
Expand All @@ -239,7 +254,7 @@ def main(mode, sample_name, mut_file, out_matrix, method, pseud, sigprofiler, pe

elif mode == 'profile':
click.echo(f"Running in profile mode...")
compute_mutation_profile(sample_name, mutation_matrix, trinucleotide_counts, out_profile, plot, wgs, wgs_trinucleotide_counts, sigprofiler)
compute_mutation_profile(sample_name, mutation_matrix, trinucleotide_counts, plot, wgs, wgs_trinucleotide_counts, sigprofiler)
click.echo("Profile computation completed.")

else:
Expand Down
16 changes: 9 additions & 7 deletions modules/local/compute_profile/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,26 +11,28 @@ process COMPUTE_PROFILE {
path( wgs_trinucleotides )

output:
tuple val(meta), path("*.profile.tsv") , emit: profile
tuple val(meta), path("*.pdf") , optional:true , emit: plots
tuple val(meta), path("*.matrix.WGS") , optional:true , emit: wgs
tuple val(meta), path("*.matrix.WGS.sigprofiler") , optional:true , emit: wgs_sigprofiler
path "versions.yml" , topic: versions
tuple val(meta), path("*.profile.tsv") , emit: profile
tuple val(meta), path("*.proportion_mutations.tsv") , optional:true , emit: panel_proportions
tuple val(meta), path("*.proportion_mutations.WGS.tsv") , optional:true , emit: wgs_proportions
tuple val(meta), path("*.matrix.WGS.tsv") , optional:true , emit: wgs
tuple val(meta), path("*.matrix.WGS.sigprofiler.tsv") , optional:true , emit: wgs_sigprofiler

tuple val(meta), path("*.pdf") , optional:true , emit: plots

path "versions.yml" , topic: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ""
def prefix = task.ext.prefix ?: "${meta.id}"
def filters = task.ext.filters ?: ""
def wgs_trinuc = wgs_trinucleotides ? "--wgs --wgs_trinucleotide_counts ${wgs_trinucleotides}" : ""
"""
mut_profile.py profile \\
--sample_name ${prefix} \\
--mutation_matrix ${matrix} \\
--trinucleotide_counts ${trinucleotide} \\
--out_profile ${prefix}.profile.tsv \\
${wgs_trinuc} \\
${args}
cat <<-END_VERSIONS > versions.yml
Expand Down