diff --git a/GATK/4.2.1.0/GenotypeGvcfs.nf b/GATK/4.2.1.0/GenotypeGvcfs.nf new file mode 100644 index 00000000..5dfeba7e --- /dev/null +++ b/GATK/4.2.1.0/GenotypeGvcfs.nf @@ -0,0 +1,63 @@ +process GenotypeGVCFs { + tag {"GATK GenotypeGVCFs ${analysis_id} - ${interval_file.baseName}"} + label 'GATK_4_2_1_0' + label 'GATK_4_2_1_0_GenotypeGVCFs' + container = 'broadinstitute/gatk:4.2.1.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(analysis_id, path(gvcf_files), path(gvcf_idx_files), path(interval_file)) + + output: + tuple( + analysis_id, + path("${analysis_id}_${interval_file.baseName}${ext_vcf}"), + path("${analysis_id}_${interval_file.baseName}${ext_vcf}${ext_vcf_index}"), + emit:vcf_file + ) + + script: + def input_files = gvcf_files.collect{"$it"}.join(" --variant ") + ext_vcf = params.compress || gvcf_files.getExtension() == ".gz" ? ".vcf.gz" : ".vcf" + ext_vcf_index = params.compress || gvcf_files.getExtension() == ".gz" ? ".tbi" : ".idx" + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" GenotypeGVCFs \ + --reference ${params.genome} \ + --variant $input_files \ + --output ${analysis_id}_${interval_file.baseName}${ext_vcf} \ + --intervals ${interval_file} \ + ${params.optional} + """ +} + +process GenotypeGVCF { + tag {"GATK GenotypeGVCF ${sample_id} - ${interval_file.baseName}"} + label 'GATK_4_2_1_0' + label 'GATK_4_2_1_0_GenotypeGVCF' + container = 'broadinstitute/gatk:4.2.1.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(gvcf_files), path(gvcf_idx_files), path(interval_file)) + + output: + tuple( + val(sample_id), + path("${sample_id}_${interval_file.baseName}${ext_vcf}"), + path("${sample_id}_${interval_file.baseName}${ext_vcf}${ext_vcf_index}"), + emit: vcf_file + ) + + script: + def input_files = gvcf_files.collect{"$it"}.join(" --variant ") + ext_vcf = params.compress || gvcf_files.getExtension() == ".gz" ? ".vcf.gz" : ".vcf" + ext_vcf_index = params.compress || gvcf_files.getExtension() == ".gz" ? ".tbi" : ".idx" + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" GenotypeGVCFs \ + --reference ${params.genome} \ + --variant $input_files \ + --output ${sample_id}_${interval_file.baseName}${ext_vcf} \ + --intervals ${interval_file} \ + ${params.optional} + """ +} diff --git a/GATK/4.2.1.0/HaplotypeCaller.nf b/GATK/4.2.1.0/HaplotypeCaller.nf new file mode 100644 index 00000000..0cad1bec --- /dev/null +++ b/GATK/4.2.1.0/HaplotypeCaller.nf @@ -0,0 +1,71 @@ +process HaplotypeCaller { + tag {"GATK HaplotypeCaller ${analysis_id} - ${interval_file.baseName}"} + label 'GATK_4_2_1_0' + label 'GATK_4_2_1_0_HaplotypeCaller' + container = 'broadinstitute/gatk:4.2.1.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(analysis_id, path(bam_files), path(bai_files), path(interval_file)) + + output: + tuple( + val(analysis_id), + path("${analysis_id}.${interval_file.baseName}${ext_vcf}"), + path("${analysis_id}.${interval_file.baseName}${ext_vcf}${ext_vcf_index}"), + emit: vcf_file + ) + + script: + def input_files = bam_files.collect{"$it"}.join(" --input ") + ext_vcf = ".vcf" + ext_vcf_index = ".idx" + if( params.compress ) + ext_vcf = ".vcf.gz" + ext_vcf_index = ".tbi" + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" HaplotypeCaller \ + --reference ${params.genome} \ + --input ${input_files} \ + --intervals ${interval_file} \ + --output ${analysis_id}.${interval_file.baseName}${ext_vcf} \ + ${params.optional} + """ +} + +process HaplotypeCallerGVCF { + tag {"GATK HaplotypeCallerGVCF ${sample_id} - ${interval_file.baseName}"} + label 'GATK_4_2_1_0' + label 'GATK_4_2_1_0_HaplotypeCallerGVCF' + container = 'broadinstitute/gatk:4.2.1.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + params.emit_ref_confidence = 'GVCF' + + input: + tuple(sample_id, path(bam_file), path(bai_file), path(interval_file)) + + output: + tuple( + val(sample_id), + path("${sample_id}_${interval_file.baseName}${ext_gvcf}"), + path("${sample_id}_${interval_file.baseName}${ext_gvcf}${ext_gvcf_index}"), + path(interval_file), + emit: vcf_file + ) + + script: + ext_gvcf = ".g.vcf" + ext_gvcf_index = ".idx" + if( params.compress ) + ext_gvcf = ".g.vcf.gz" + ext_gvcf_index = ".tbi" + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" HaplotypeCaller \ + --reference ${params.genome} \ + --input ${bam_file} \ + --intervals ${interval_file} \ + --output ${sample_id}_${interval_file.baseName}${ext_gvcf} \ + --emit-ref-confidence ${params.emit_ref_confidence} \ + ${params.optional} + """ +} diff --git a/GATK/4.2.1.0/MergeVcfs.nf b/GATK/4.2.1.0/MergeVcfs.nf new file mode 100644 index 00000000..acb683c7 --- /dev/null +++ b/GATK/4.2.1.0/MergeVcfs.nf @@ -0,0 +1,50 @@ +process MergeVcfs { + tag {"GATK MergeVcfs ${output_name}"} + label 'GATK_4_2_1_0' + label 'GATK_4_2_1_0_MergeVcfs' + container = 'broadinstitute/gatk:4.2.1.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(output_name, path(vcf_files), path(vcf_idx_files)) + + output: + tuple(output_name, path("${output_name}${ext_vcf}"), path("${output_name}${ext_vcf}${ext_vcf_index}"), emit:vcf_file) + + script: + def input_files = vcf_files.collect{"$it"}.join(" --INPUT ") + ext_vcf = ".vcf" + ext_vcf_index = ".idx" + if( params.compress ) + ext_vcf = ".vcf.gz" + ext_vcf_index = ".tbi" + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${input_files} --OUTPUT ${output_name}${ext_vcf} + """ +} + + +process MergeGvcfs { + tag {"GATK MergeGvcfs ${output_name}"} + label 'GATK_4_2_1_0' + label 'GATK_4_2_1_0_MergeGvcfs' + container = 'broadinstitute/gatk:4.2.1.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(output_name, path(vcf_files), path(vcf_idx_files)) + + output: + tuple(output_name, path("${output_name}${ext_gvcf}"), path("${output_name}${ext_gvcf}${ext_gvcf_index}"), emit:vcf_file) + + script: + def input_files = vcf_files.collect{"$it"}.join(" --INPUT ") + ext_gvcf = ".g.vcf" + ext_gvcf_index = ".idx" + if( params.compress ) + ext_gvcf = ".g.vcf.gz" + ext_gvcf_index = ".tbi" + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${input_files} --OUTPUT ${output_name}${ext_gvcf} + """ +} diff --git a/GATK/4.2.1.0/SelectVariants.nf b/GATK/4.2.1.0/SelectVariants.nf new file mode 100644 index 00000000..20036edb --- /dev/null +++ b/GATK/4.2.1.0/SelectVariants.nf @@ -0,0 +1,30 @@ +process SelectVariantsSample { + tag {"GATK SelectVariantsSample ${analysis_id} - ${sample_id}"} + label 'GATK_4_2_1_0' + label 'GATK_4_2_1_0_SelectVariantsSample' + container = 'broadinstitute/gatk:4.2.1.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(analysis_id, path(vcf_file), path(vcf_idx_file), sample_id) + + output: + tuple( + sample_id, + path("${sample_id}_${vcf_file.baseName}${ext_vcf}"), + path("${sample_id}_${vcf_file.baseName}${ext_vcf}${ext_vcf_index}"), + emit: vcf_file + ) + + script: + ext_vcf = params.compress || vcf_file.getExtension() == ".gz" ? ".vcf.gz" : ".vcf" + ext_vcf_index = params.compress || vcf_file.getExtension() == ".gz" ? ".tbi" : ".idx" + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants \ + --reference ${params.genome} \ + --variant ${vcf_file} \ + --output ${sample_id}_${vcf_file.baseName}${ext_vcf} \ + --sample-name ${sample_id} \ + ${params.optional} + """ +} diff --git a/GATK/4.2.1.0/VariantFiltration.nf b/GATK/4.2.1.0/VariantFiltration.nf new file mode 100644 index 00000000..90e75772 --- /dev/null +++ b/GATK/4.2.1.0/VariantFiltration.nf @@ -0,0 +1,26 @@ +process VariantFiltrationSnpIndel { + tag {"GATK VariantFiltrationSnpIndel ${analysis_id}"} + label 'GATK_4_2_1_0' + label 'GATK_4_2_1_0_VariantFiltrationSnpIndel' + container = 'broadinstitute/gatk:4.2.1.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(analysis_id, path(vcf_file), path(vcf_idx_file)) + + output: + tuple(analysis_id, path("${vcf_file.baseName}.filter${ext_vcf}"), path("${vcf_file.baseName}.filter${ext_vcf}${ext_vcf_index}"), emit: vcf_file) + + script: + ext_vcf = params.compress || vcf_file.getExtension() == ".gz" ? ".vcf.gz" : ".vcf" + ext_vcf_index = params.compress || vcf_file.getExtension() == ".gz" ? ".tbi" : ".idx" + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants --reference ${params.genome} --variant $vcf_file --output ${vcf_file.baseName}.snp${ext_vcf} --select-type-to-exclude INDEL + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants --reference ${params.genome} --variant $vcf_file --output ${vcf_file.baseName}.indel${ext_vcf} --select-type-to-include INDEL + + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" VariantFiltration --reference ${params.genome} --variant ${vcf_file.baseName}.snp${ext_vcf} --output ${vcf_file.baseName}.snp_filter${ext_vcf} ${params.snp_filter} ${params.snp_cluster} + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" VariantFiltration --reference ${params.genome} --variant ${vcf_file.baseName}.indel${ext_vcf} --output ${vcf_file.baseName}.indel_filter${ext_vcf} ${params.indel_filter} + + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${vcf_file.baseName}.snp_filter${ext_vcf} --INPUT ${vcf_file.baseName}.indel_filter${ext_vcf} --OUTPUT ${vcf_file.baseName}.filter${ext_vcf} + """ +}