Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions GATK/4.2.1.0/GenotypeGvcfs.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
process GenotypeGVCFs {
tag {"GATK GenotypeGVCFs ${analysis_id} - ${interval_file.baseName}"}
label 'GATK_4_2_1_0'
label 'GATK_4_2_1_0_GenotypeGVCFs'
container = 'broadinstitute/gatk:4.2.1.0'
shell = ['/bin/bash', '-euo', 'pipefail']

input:
tuple(analysis_id, path(gvcf_files), path(gvcf_idx_files), path(interval_file))

output:
tuple(
analysis_id,
path("${analysis_id}_${interval_file.baseName}${ext_vcf}"),
path("${analysis_id}_${interval_file.baseName}${ext_vcf}${ext_vcf_index}"),
emit:vcf_file
)

script:
def input_files = gvcf_files.collect{"$it"}.join(" --variant ")
ext_vcf = params.compress || gvcf_files.getExtension() == ".gz" ? ".vcf.gz" : ".vcf"
ext_vcf_index = params.compress || gvcf_files.getExtension() == ".gz" ? ".tbi" : ".idx"
"""
gatk --java-options "-Xmx${task.memory.toGiga()-4}G" GenotypeGVCFs \
--reference ${params.genome} \
--variant $input_files \
--output ${analysis_id}_${interval_file.baseName}${ext_vcf} \
--intervals ${interval_file} \
${params.optional}
"""
}

process GenotypeGVCF {
tag {"GATK GenotypeGVCF ${sample_id} - ${interval_file.baseName}"}
label 'GATK_4_2_1_0'
label 'GATK_4_2_1_0_GenotypeGVCF'
container = 'broadinstitute/gatk:4.2.1.0'
shell = ['/bin/bash', '-euo', 'pipefail']

input:
tuple(sample_id, path(gvcf_files), path(gvcf_idx_files), path(interval_file))

output:
tuple(
val(sample_id),
path("${sample_id}_${interval_file.baseName}${ext_vcf}"),
path("${sample_id}_${interval_file.baseName}${ext_vcf}${ext_vcf_index}"),
emit: vcf_file
)

script:
def input_files = gvcf_files.collect{"$it"}.join(" --variant ")
ext_vcf = params.compress || gvcf_files.getExtension() == ".gz" ? ".vcf.gz" : ".vcf"
ext_vcf_index = params.compress || gvcf_files.getExtension() == ".gz" ? ".tbi" : ".idx"
"""
gatk --java-options "-Xmx${task.memory.toGiga()-4}G" GenotypeGVCFs \
--reference ${params.genome} \
--variant $input_files \
--output ${sample_id}_${interval_file.baseName}${ext_vcf} \
--intervals ${interval_file} \
${params.optional}
"""
}
71 changes: 71 additions & 0 deletions GATK/4.2.1.0/HaplotypeCaller.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
process HaplotypeCaller {
tag {"GATK HaplotypeCaller ${analysis_id} - ${interval_file.baseName}"}
label 'GATK_4_2_1_0'
label 'GATK_4_2_1_0_HaplotypeCaller'
container = 'broadinstitute/gatk:4.2.1.0'
shell = ['/bin/bash', '-euo', 'pipefail']

input:
tuple(analysis_id, path(bam_files), path(bai_files), path(interval_file))

output:
tuple(
val(analysis_id),
path("${analysis_id}.${interval_file.baseName}${ext_vcf}"),
path("${analysis_id}.${interval_file.baseName}${ext_vcf}${ext_vcf_index}"),
emit: vcf_file
)

script:
def input_files = bam_files.collect{"$it"}.join(" --input ")
ext_vcf = ".vcf"
ext_vcf_index = ".idx"
if( params.compress )
ext_vcf = ".vcf.gz"
ext_vcf_index = ".tbi"
"""
gatk --java-options "-Xmx${task.memory.toGiga()-4}G" HaplotypeCaller \
--reference ${params.genome} \
--input ${input_files} \
--intervals ${interval_file} \
--output ${analysis_id}.${interval_file.baseName}${ext_vcf} \
${params.optional}
"""
}

process HaplotypeCallerGVCF {
tag {"GATK HaplotypeCallerGVCF ${sample_id} - ${interval_file.baseName}"}
label 'GATK_4_2_1_0'
label 'GATK_4_2_1_0_HaplotypeCallerGVCF'
container = 'broadinstitute/gatk:4.2.1.0'
shell = ['/bin/bash', '-euo', 'pipefail']
params.emit_ref_confidence = 'GVCF'

input:
tuple(sample_id, path(bam_file), path(bai_file), path(interval_file))

output:
tuple(
val(sample_id),
path("${sample_id}_${interval_file.baseName}${ext_gvcf}"),
path("${sample_id}_${interval_file.baseName}${ext_gvcf}${ext_gvcf_index}"),
path(interval_file),
emit: vcf_file
)

script:
ext_gvcf = ".g.vcf"
ext_gvcf_index = ".idx"
if( params.compress )
ext_gvcf = ".g.vcf.gz"
ext_gvcf_index = ".tbi"
"""
gatk --java-options "-Xmx${task.memory.toGiga()-4}G" HaplotypeCaller \
--reference ${params.genome} \
--input ${bam_file} \
--intervals ${interval_file} \
--output ${sample_id}_${interval_file.baseName}${ext_gvcf} \
--emit-ref-confidence ${params.emit_ref_confidence} \
${params.optional}
"""
}
50 changes: 50 additions & 0 deletions GATK/4.2.1.0/MergeVcfs.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
process MergeVcfs {
tag {"GATK MergeVcfs ${output_name}"}
label 'GATK_4_2_1_0'
label 'GATK_4_2_1_0_MergeVcfs'
container = 'broadinstitute/gatk:4.2.1.0'
shell = ['/bin/bash', '-euo', 'pipefail']

input:
tuple(output_name, path(vcf_files), path(vcf_idx_files))

output:
tuple(output_name, path("${output_name}${ext_vcf}"), path("${output_name}${ext_vcf}${ext_vcf_index}"), emit:vcf_file)

script:
def input_files = vcf_files.collect{"$it"}.join(" --INPUT ")
ext_vcf = ".vcf"
ext_vcf_index = ".idx"
if( params.compress )
ext_vcf = ".vcf.gz"
ext_vcf_index = ".tbi"
"""
gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${input_files} --OUTPUT ${output_name}${ext_vcf}
"""
}


process MergeGvcfs {
tag {"GATK MergeGvcfs ${output_name}"}
label 'GATK_4_2_1_0'
label 'GATK_4_2_1_0_MergeGvcfs'
container = 'broadinstitute/gatk:4.2.1.0'
shell = ['/bin/bash', '-euo', 'pipefail']

input:
tuple(output_name, path(vcf_files), path(vcf_idx_files))

output:
tuple(output_name, path("${output_name}${ext_gvcf}"), path("${output_name}${ext_gvcf}${ext_gvcf_index}"), emit:vcf_file)

script:
def input_files = vcf_files.collect{"$it"}.join(" --INPUT ")
ext_gvcf = ".g.vcf"
ext_gvcf_index = ".idx"
if( params.compress )
ext_gvcf = ".g.vcf.gz"
ext_gvcf_index = ".tbi"
"""
gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${input_files} --OUTPUT ${output_name}${ext_gvcf}
"""
}
30 changes: 30 additions & 0 deletions GATK/4.2.1.0/SelectVariants.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
process SelectVariantsSample {
tag {"GATK SelectVariantsSample ${analysis_id} - ${sample_id}"}
label 'GATK_4_2_1_0'
label 'GATK_4_2_1_0_SelectVariantsSample'
container = 'broadinstitute/gatk:4.2.1.0'
shell = ['/bin/bash', '-euo', 'pipefail']

input:
tuple(analysis_id, path(vcf_file), path(vcf_idx_file), sample_id)

output:
tuple(
sample_id,
path("${sample_id}_${vcf_file.baseName}${ext_vcf}"),
path("${sample_id}_${vcf_file.baseName}${ext_vcf}${ext_vcf_index}"),
emit: vcf_file
)

script:
ext_vcf = params.compress || vcf_file.getExtension() == ".gz" ? ".vcf.gz" : ".vcf"
ext_vcf_index = params.compress || vcf_file.getExtension() == ".gz" ? ".tbi" : ".idx"
"""
gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants \
--reference ${params.genome} \
--variant ${vcf_file} \
--output ${sample_id}_${vcf_file.baseName}${ext_vcf} \
--sample-name ${sample_id} \
${params.optional}
"""
}
26 changes: 26 additions & 0 deletions GATK/4.2.1.0/VariantFiltration.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
process VariantFiltrationSnpIndel {
tag {"GATK VariantFiltrationSnpIndel ${analysis_id}"}
label 'GATK_4_2_1_0'
label 'GATK_4_2_1_0_VariantFiltrationSnpIndel'
container = 'broadinstitute/gatk:4.2.1.0'
shell = ['/bin/bash', '-euo', 'pipefail']

input:
tuple(analysis_id, path(vcf_file), path(vcf_idx_file))

output:
tuple(analysis_id, path("${vcf_file.baseName}.filter${ext_vcf}"), path("${vcf_file.baseName}.filter${ext_vcf}${ext_vcf_index}"), emit: vcf_file)

script:
ext_vcf = params.compress || vcf_file.getExtension() == ".gz" ? ".vcf.gz" : ".vcf"
ext_vcf_index = params.compress || vcf_file.getExtension() == ".gz" ? ".tbi" : ".idx"
"""
gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants --reference ${params.genome} --variant $vcf_file --output ${vcf_file.baseName}.snp${ext_vcf} --select-type-to-exclude INDEL
gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants --reference ${params.genome} --variant $vcf_file --output ${vcf_file.baseName}.indel${ext_vcf} --select-type-to-include INDEL

gatk --java-options "-Xmx${task.memory.toGiga()-4}G" VariantFiltration --reference ${params.genome} --variant ${vcf_file.baseName}.snp${ext_vcf} --output ${vcf_file.baseName}.snp_filter${ext_vcf} ${params.snp_filter} ${params.snp_cluster}
gatk --java-options "-Xmx${task.memory.toGiga()-4}G" VariantFiltration --reference ${params.genome} --variant ${vcf_file.baseName}.indel${ext_vcf} --output ${vcf_file.baseName}.indel_filter${ext_vcf} ${params.indel_filter}

gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${vcf_file.baseName}.snp_filter${ext_vcf} --INPUT ${vcf_file.baseName}.indel_filter${ext_vcf} --OUTPUT ${vcf_file.baseName}.filter${ext_vcf}
"""
}