Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 17 additions & 10 deletions bin/compute_mutrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def mutrate_sample(maf_df, depths_df, depths_adj_df, sample_name, type_list = Fa
# mutation rate metrics
sample_features = {"N_MUTS" : n_muts,
"N_MUTATED" : n_mutated_reads,
"DEPTH" : depths_df[f"{sample_name}"].sum(),
"DEPTH_ADJUSTED": depths_adj_df[f"{sample_name}"].sum()
"DEPTH" : depths_df.drop_duplicates(subset = ["CHROM", "POS"])[f"{sample_name}"].sum(),
"DEPTH_ADJUSTED": depths_adj_df[f"{sample_name}"].sum() # they should be the same for all impacts not for subsets of impacts
}
sample_features["MUTRATE_MB"] = ( sample_features["N_MUTS"] / sample_features["DEPTH"] * 1000000 ).astype(float)
sample_features["MUTRATE_MB_ADJUSTED"] = ( sample_features["N_MUTS"] / sample_features["DEPTH_ADJUSTED"] * 1000000 ).astype(float)
Expand Down Expand Up @@ -116,22 +116,26 @@ def compute_mutrate(maf_path, depths_path, annot_panel_path, sample_name, panel_
annot_panel_df = pd.read_csv(annot_panel_path, sep = "\t", na_values = custom_na_values)

# Subset depths with panel
## mode 1: each position counts one
## mode 1: each position counts one (once per gene, be careful that it might be duplicated in different genes)
depths_subset_df = depths_df.merge(annot_panel_df[["CHROM", "POS", "GENE"]].drop_duplicates(),
on = ["CHROM", "POS"], how = "inner")
## mode 2 (adjusted): each position counts as many times it contributes to the panel
depths_df[sample_name.split('.')[0]] = depths_df[sample_name.split('.')[0]] / 3 # the depth per position can contribute to three different mutations
depths_subset_adj_df = depths_df.merge(annot_panel_df[["CHROM", "POS", "GENE"]], on = ["CHROM", "POS"], how = "inner")

## mode 3 (adjusted): each position counts as many times it contributes to the panel, but ONLY ONCE PER SAMPLE
depths_subset_adj_sample_df = depths_df.merge(annot_panel_df.drop_duplicates(subset = ["CHROM", "POS", "REF", "ALT"])[["CHROM", "POS"]],
on = ["CHROM", "POS"], how = "inner")

del depths_df
del annot_panel_df

# Compute mutation rates
## sample mutation rate
mutrate_sample_allmuts_df = mutrate_sample(maf_df, depths_subset_df, depths_subset_adj_df, sample_name.split('.')[0])
mutrate_sample_snvs_df = mutrate_sample(maf_df, depths_subset_df, depths_subset_adj_df, sample_name.split('.')[0], ["SNV"])
mutrate_sample_nonsnvs_df = mutrate_sample(maf_df, depths_subset_df, depths_subset_adj_df, sample_name.split('.')[0], ["INSERTION", "DELETION", "COMPLEX", "MNV"])
mutrate_sample_indels_df = mutrate_sample(maf_df, depths_subset_df, depths_subset_adj_df, sample_name.split('.')[0], ["INSERTION", "DELETION"])
mutrate_sample_allmuts_df = mutrate_sample(maf_df, depths_subset_df, depths_subset_adj_sample_df, sample_name.split('.')[0])
mutrate_sample_snvs_df = mutrate_sample(maf_df, depths_subset_df, depths_subset_adj_sample_df, sample_name.split('.')[0], ["SNV"])
mutrate_sample_nonsnvs_df = mutrate_sample(maf_df, depths_subset_df, depths_subset_adj_sample_df, sample_name.split('.')[0], ["INSERTION", "DELETION", "COMPLEX", "MNV"])
mutrate_sample_indels_df = mutrate_sample(maf_df, depths_subset_df, depths_subset_adj_sample_df, sample_name.split('.')[0], ["INSERTION", "DELETION"])
## per gene mutation rate
mutrate_genes_allmuts_df = mutrate_gene(maf_df, depths_subset_df, depths_subset_adj_df, sample_name.split('.')[0])
mutrate_genes_snvs_df = mutrate_gene(maf_df, depths_subset_df, depths_subset_adj_df, sample_name.split('.')[0], ["SNV"])
Expand All @@ -145,9 +149,12 @@ def compute_mutrate(maf_path, depths_path, annot_panel_path, sample_name, panel_

# Save
mutrate_df[["SAMPLE_ID", "GENE", "REGIONS", "MUTTYPES",
"N_MUTS", "N_MUTATED", "DEPTH",
"MUTRATE_MB", "MUTRATE_MB_ADJUSTED", "MUTRATE_KB", "MUTREADSRATE_KB_ADJUSTED",
"MUTREADSRATE_MB", "MUTREADSRATE_MB_ADJUSTED", "MUTREADSRATE_KB", "MUTREADSRATE_KB_ADJUSTED"]].to_csv(f"{sample_name.split('.')[0]}.{panel_v}.mutrates.tsv",
"DEPTH",
"N_MUTS", "N_MUTATED",
"MUTRATE_MB", "MUTRATE_MB_ADJUSTED",
"MUTRATE_KB", "MUTRATE_KB_ADJUSTED",
"MUTREADSRATE_MB", "MUTREADSRATE_MB_ADJUSTED",
"MUTREADSRATE_KB", "MUTREADSRATE_KB_ADJUSTED"]].to_csv(f"{sample_name.split('.')[0]}.{panel_v}.mutrates.tsv",
sep = "\t",
header = True,
index = False
Expand Down
26 changes: 13 additions & 13 deletions conf/mice.config
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@ params {
config_profile_description = 'Running deepCSA for mice data'

// Input data
input = '/workspace/nobackup2/prominent/ucsf_mice/deepUMIcaller/deepCSA_input.med.double_bam.csv'
input = '/data/bbg/nobackup2/prominent/ucsf_mice/deepUMIcaller/deepCSA_input.med.csv'

// Fasta references
fasta = '/workspace/datasets/genomes/mm39/GCA_000001635.9_genome/GCA_000001635.9_GRCm39_genomic.fna'
fasta = '/data/bbg/datasets/genomes/mm39/GCA_000001635.9_genome/GCA_000001635.9_GRCm39_genomic.fna'

features_table = "/workspace/nobackup2/prominent/ucsf_mice/2024-08-07_clinical_features_summary.tsv"
features_table = "/data/bbg/nobackup2/prominent/ucsf_mice/2024-10-13_clinical_features_summary.tsv"
features_table_separator = 'tab'
features_table_dict = ['"unique_identifier" : "sample"',
'"groups_of_interest" : [ ["DMBA"], ["TPA"], ["treatment"], ["sex"] ]'
'"groups_of_interest" : [ ["DMBA"], ["TPA"], ["treatment"], ["sex"], ["timepoint"], ["timepoint", "treatment"], ["timepoint", "TPA"], ["timepoint", "DMBA"] ]'
].join(',\t').trim()

use_custom_minimum_depth = 5
Expand All @@ -24,15 +24,15 @@ params {
vep_out_format = "tab"
vep_params = "--no_stats --cache --offline --symbol --protein --canonical"
vep_species = "mus_musculus"
vep_cache = "/workspace/datasets/vep/mus_musculus/111_GRCm39"
vep_cache = "/data/bbg/datasets/vep/mus_musculus/111_GRCm39"


// oncodrive3d
datasets3d = "/workspace/nobackup/scratch/oncodrive3d/datasets_mouse"
datasets3d = "/data/bbg/nobackup/scratch/oncodrive3d/datasets_mouse"
// annotations3d = "/workspace/nobackup/scratch/oncodrive3d/annotations_240506"

omega_hotspots = false
omega_hotspots_bedfile = "/workspace/datasets/transfer/ferriol_deepcsa/mouse_skin_panel.hotspots.bed4.bed"
omega_hotspots = true
omega_hotspots_bedfile = "/data/bbg/datasets/transfer/ferriol_deepcsa/mouse_skin_panel.hotspots.bed4.bed"
hotspot_expansion = 30


Expand All @@ -43,20 +43,20 @@ params {
// o3d_plot = false
// o3d_plot_chimerax = false

omega = false
omega = true
omega_globalloc = false
omega_vaf_distorsioned = false
omega_plot = false
omega_plot = true


signatures = false
mutationrate = false
signatures = true
mutationrate = true
mutated_epithelium = false
mutated_epithelium_vaf = false

indels = false

profileall = false
profileall = true
profilenonprot = false
profileexons = false
profileintrons = false
Expand Down
64 changes: 49 additions & 15 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,16 @@ process {
]
}

withName: 'BBG_DEEPCSA:CREATEPANELS:CREATESAMPLEPANELSSYNONYMOUS' {
publishDir = [
[
mode: params.publish_dir_mode,
path: { "${params.outdir}/createpanels/samplepanels/createsamplepanelssynonymous" },
pattern: "*{tsv,bed}"
]
]
}


withName: 'SUBSETDEPTHS' {
ext.prefix = { "${meta.id}.subset_depths" }
Expand All @@ -170,6 +180,30 @@ process {
enabled : false
]
}
if (params.store_depths){
withName: 'DEPTHS.*CONS' {
ext.prefix = { "${meta.id}.subset_depths" }
ext.args = ''
ext.args2 = '-s 1 -b 2 -e 2'
ext.args3 = '-h'
ext.extension = 'tsv'
ext.header = '1'
}
} else {
withName: 'DEPTHS.*CONS' {
ext.prefix = { "${meta.id}.subset_depths" }
ext.args = ''
ext.args2 = '-s 1 -b 2 -e 2'
ext.args3 = '-h'
ext.extension = 'tsv'
ext.header = '1'
publishDir = [
enabled : false
]
}

}


withName: 'SUBSETMUTATIONS' {
ext.prefix = { "${meta.id}.subset_mutations" }
Expand Down Expand Up @@ -328,7 +362,7 @@ process {
// }


withName: 'SUBSET_MUTPROFILE' {
withName: 'SUBSETMUTPROFILE' {
ext.filters = { ['"TYPE" : "SNV"'].join(',\t').trim()
}
ext.output_fmt = { ['"header": true',
Expand All @@ -343,7 +377,7 @@ process {
}

if (params.profilenonprot){
withName: '.*NONPROT:SUBSET_MUTPROFILE' {
withName: '.*NONPROT:SUBSETMUTPROFILE' {
ext.filters = { [ '"TYPE" : "SNV"',
'"Protein_affecting": "non_protein_affecting"'].join(',\t').trim()
}
Expand All @@ -360,7 +394,7 @@ process {
}


withName: 'SUBSET_MUTABILITY' {
withName: 'SUBSETMUTABILITY' {
ext.filters = { ['"TYPE" : "SNV"'].join(',\t').trim()
}
ext.output_fmt = { ['"header": true',
Expand Down Expand Up @@ -399,7 +433,7 @@ process {
}

if (params.mutationrate) {
withName: 'SUBSET_MUTRATE' {
withName: 'SUBSETMUTRATE' {
ext.filters = ''

ext.output_fmt = { ['"header": true',
Expand All @@ -413,7 +447,7 @@ process {
]
}

withName: 'BBG_DEEPCSA:MUTRATEPROT:SUBSET_MUTRATE' {
withName: 'BBG_DEEPCSA:MUTRATEPROT:SUBSETMUTRATE' {
ext.filters = { ['"Protein_affecting": "protein_affecting"'].join(',\t').trim()
}
ext.output_fmt = { ['"header": true',
Expand All @@ -427,7 +461,7 @@ process {
]
}

withName: '.*NONPROT:SUBSET_MUTRATE' {
withName: '.*NONPROT:SUBSETMUTRATE' {
ext.filters = { ['"Protein_affecting": "non_protein_affecting"'].join(',\t').trim()
}
ext.output_fmt = { ['"header": true',
Expand All @@ -441,7 +475,7 @@ process {
]
}

withName: '.*SYNONYMOUS:SUBSET_MUTRATE' {
withName: '.*SYNONYMOUS:SUBSETMUTRATE' {
ext.filters = { ['"canonical_Consequence_broader": "synonymous"'].join(',\t').trim()
}
ext.output_fmt = { ['"header": true',
Expand All @@ -459,7 +493,7 @@ process {


if (params.indels) {
withName: 'SUBSET_INDELS' {
withName: 'SUBSETINDELS' {
// ext.filters = ''
ext.filters = { ['"FILTER": "notcontains repetitive_variant"'].join(',\t').trim() }

Expand All @@ -478,7 +512,7 @@ process {


if (params.mutated_epithelium) {
withName: 'SUBSET_MUTEPI' {
withName: 'SUBSETMUTEPI' {
ext.filters = { ['"Protein_affecting": "protein_affecting"',
'"TYPE" : "SNV"',
'"VAF" : "gt 0"',
Expand All @@ -504,7 +538,7 @@ process {
}

if (params.pileup_all_duplex){
withName: 'SUBSET_MUTEPI' {
withName: 'SUBSETMUTEPI' {
ext.filters = { ['"Protein_affecting": "protein_affecting"',
'"TYPE" : "SNV"',
'"VAF" : "gt 0"',
Expand Down Expand Up @@ -532,7 +566,7 @@ process {
}

if (params.mutated_epithelium_vaf) {
withName: 'SUBSET_MUTEPIVAF' {
withName: 'SUBSETMUTEPIVAF' {
ext.filters = { ['"Protein_affecting": "protein_affecting"',
'"TYPE" : "SNV"',
'"VAF" : "gt 0"',
Expand All @@ -548,7 +582,7 @@ process {
]
}
if (params.all_duplex_counts){
withName: 'SUBSET_MUTEPIVAFAM' {
withName: 'SUBSETMUTEPIVAFAM' {
ext.filters = { ['"Protein_affecting": "protein_affecting"',
'"TYPE" : "SNV"',
'"VAF_AM" : "gt 0"',
Expand All @@ -569,7 +603,7 @@ process {
}

if (params.oncodrivefml) {
withName: 'SUBSET_ONCODRIVEFML' {
withName: 'SUBSETONCODRIVEFML' {
ext.filters = { "" }
ext.output_fmt = { ['"header": true',
'"columns": ["CHROM_ensembl", "POS_ensembl", "REF_ensembl", "ALT_ensembl", "SAMPLE_ID"]',
Expand All @@ -591,7 +625,7 @@ process {
}

if (params.oncodrive3d){
withName: 'SUBSET_ONCODRIVE3D' {
withName: 'SUBSETONCODRIVE3D' {
ext.filters = { ['"TYPE" : "SNV"'].join(',\t').trim()
// '"canonical_Consequence": "contains missense_variant"'
}
Expand Down Expand Up @@ -663,7 +697,7 @@ process {



withName: 'SUBSET_ONCODRIVECLUSTL' {
withName: 'SUBSETONCODRIVECLUSTL' {
ext.filters = { "" }

// ext.filters = { ['"TYPE" : "SNV"'].join(',\t').trim()
Expand Down
10 changes: 5 additions & 5 deletions conf/tools/omega.config
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

process {

withName: 'SUBSET_OMEGA' {
withName: 'SUBSETOMEGA' {
ext.filters = { ['"TYPE" : "SNV"'].join(',\t').trim()
}
ext.output_fmt = { ['"header": true',
Expand All @@ -27,7 +27,7 @@ process {
}


withName: '.*MULTI:SUBSET_OMEGA' {
withName: '.*MULTI:SUBSETOMEGA' {
ext.filters = { ['"TYPE" : "SNV"'].join(',\t').trim()
}
ext.output_fmt = { ['"header": true',
Expand Down Expand Up @@ -163,7 +163,7 @@ process {
]
]
}
withName: 'SUBSET_OMEGA_REDUCED' {
withName: 'SUBSETOMEGA_REDUCED' {
ext.filters = { ['"TYPE" : "SNV"',
'"VAF_distorted_reduced" : true',
].join(',\t').trim()
Expand Down Expand Up @@ -201,7 +201,7 @@ process {
]
]
}
withName: 'SUBSET_OMEGA_EXPANDED' {
withName: 'SUBSETOMEGA_EXPANDED' {
ext.filters = { ['"TYPE" : "SNV"',
'"VAF_distorted_expanded" : true',
].join(',\t').trim()
Expand Down Expand Up @@ -239,7 +239,7 @@ process {
]
]
}
withName: 'SUBSET_OMEGA_OK' {
withName: 'SUBSETOMEGA_OK' {
ext.filters = { ['"TYPE" : "SNV"',
'"VAF_distorted" : false',
].join(',\t').trim()
Expand Down
3 changes: 1 addition & 2 deletions modules/local/signatures/sigprofiler/assignment/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ process SIGPROFILERASSIGNMENT {
// sample_reconstruction_plots=False, verbose=False)"
"""
#python -c "from SigProfilerAssignment import Analyzer as Analyze; Analyze.cosmic_fit('${matrix}', 'output_${prefix}', input_type='matrix', context_type='96', signature_database='${reference_signatures}', genome_build='${assembly}', sample_reconstruction_plots= 'pdf', exclude_signature_subgroups= ${params.exclude_subgroups})"
python -c "from SigProfilerAssignment import Analyzer as Analyze; Analyze.cosmic_fit('${matrix}', 'output_${prefix}', input_type='matrix', context_type='96', genome_build='${assembly}', exclude_signature_subgroups=${params.exclude_subgroups})"
#python -c "from SigProfilerAssignment import Analyzer as Analyze; Analyze.cosmic_fit('${matrix}', 'output_${prefix}', input_type='matrix', context_type='96', genome_build='${assembly}', signature_database='${reference_signatures}', exclude_signature_subgroups=${params.exclude_subgroups})"
python -c "from SigProfilerAssignment import Analyzer as Analyze; Analyze.cosmic_fit('${matrix}', 'output_${prefix}', input_type='matrix', context_type='96', genome_build='${assembly}', signature_database='${reference_signatures}', exclude_signature_subgroups=${params.exclude_subgroups})"

mv output_${prefix}/Assignment_Solution/Activities/Decomposed_MutationType_Probabilities.txt output_${prefix}/Assignment_Solution/Activities/Decomposed_MutationType_Probabilities.${prefix}.txt;

Expand Down
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ params {
all_duplex_counts = false
pileup_all_duplex = false
plot_depths = false
store_depths = false


// depth and panel
Expand Down
9 changes: 4 additions & 5 deletions subworkflows/local/indels/main.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
include { TABIX_BGZIPTABIX_QUERY as SUBSETDEPTHS } from '../../../modules/nf-core/tabix/bgziptabixquery/main'
include { TABIX_BGZIPTABIX_QUERY as SUBSETMUTATIONS } from '../../../modules/nf-core/tabix/bgziptabixquery/main'

include { SUBSET_MAF as SUBSET_INDELS } from '../../../modules/local/subsetmaf/main'
include { SUBSET_MAF as SUBSETINDELS } from '../../../modules/local/subsetmaf/main'

include { INDELS_COMPARISON as INDELS } from '../../../modules/local/indels/main'

Expand All @@ -18,10 +17,10 @@ workflow INDELS_SELECTION {
SUBSETMUTATIONS(mutations, bedfile)
ch_versions = ch_versions.mix(SUBSETMUTATIONS.out.versions)

SUBSET_INDELS(SUBSETMUTATIONS.out.subset)
ch_versions = ch_versions.mix(SUBSET_INDELS.out.versions)
SUBSETINDELS(SUBSETMUTATIONS.out.subset)
ch_versions = ch_versions.mix(SUBSETINDELS.out.versions)

INDELS(SUBSET_INDELS.out.mutations)
INDELS(SUBSETINDELS.out.mutations)
ch_versions = ch_versions.mix(INDELS.out.versions)

emit:
Expand Down
Loading