From 9de36c6f388d496503cadcd98f2e37225a33a4c8 Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Tue, 11 May 2021 12:24:22 +0200 Subject: [PATCH 01/21] Add new gatk 4.2 modules. --- GATK/4.2.0.0/HaplotypeCaller.nf | 49 +++++++++++++++++++++++++++++++ GATK/4.2.0.0/MergeVcfs.nf | 19 ++++++++++++ GATK/4.2.0.0/SelectVariants.nf | 18 ++++++++++++ GATK/4.2.0.0/VariantFiltration.nf | 24 +++++++++++++++ 4 files changed, 110 insertions(+) create mode 100644 GATK/4.2.0.0/HaplotypeCaller.nf create mode 100644 GATK/4.2.0.0/MergeVcfs.nf create mode 100644 GATK/4.2.0.0/SelectVariants.nf create mode 100644 GATK/4.2.0.0/VariantFiltration.nf diff --git a/GATK/4.2.0.0/HaplotypeCaller.nf b/GATK/4.2.0.0/HaplotypeCaller.nf new file mode 100644 index 00000000..1b5394b9 --- /dev/null +++ b/GATK/4.2.0.0/HaplotypeCaller.nf @@ -0,0 +1,49 @@ +process HaplotypeCaller { + tag {"GATK HaplotypeCaller ${analysis_id} - ${interval_file.baseName}"} + label 'GATK_4_2_0_0' + label 'GATK_4_2_0_0_HaplotypeCaller' + container = 'broadinstitute/gatk:4.2.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(analysis_id, path(bam_files), path(bai_files), path(interval_file)) + + output: + tuple(val(analysis_id), path("${analysis_id}.${interval_file.baseName}.vcf"), path("${analysis_id}.${interval_file.baseName}.vcf.idx"), emit: vcf_file) + + script: + def input_files = bam_files.collect{"$it"}.join(" --input_file ") + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" HaplotypeCaller \ + --reference ${params.genome} \ + --input ${input_files} \ + --intervals ${interval_file} \ + --output ${analysis_id}.${interval_file.baseName}.vcf \ + ${params.optional} + """ +} + +process HaplotypeCallerGVCF { + tag {"GATK HaplotypeCallerGVCF ${sample_id} - ${interval_file.baseName}"} + label 'GATK_4_2_0_0_gf15c1c3ef' + label 'GATK_4_2_0_0_gf15c1c3ef_HaplotypeCallerGVCF' + container = 'broadinstitute/gatk:4.2.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(bam_file), path(bai_file), path(interval_file)) + + output: + tuple(val(sample_id), path("${sample_id}_${interval_file.baseName}.g.vcf"), path("${sample_id}_${interval_file.baseName}.g.vcf.idx"), path(interval_file), emit: vcf_file) + + script: + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" HaplotypeCaller \ + --reference ${params.genome} \ + --input ${bam_file} \ + --intervals ${interval_file} \ + --output ${sample_id}_${interval_file.baseName}.g.vcf \ + --emit-ref-confidence GVCF \ + ${params.optional} + """ +} diff --git a/GATK/4.2.0.0/MergeVcfs.nf b/GATK/4.2.0.0/MergeVcfs.nf new file mode 100644 index 00000000..f82af26c --- /dev/null +++ b/GATK/4.2.0.0/MergeVcfs.nf @@ -0,0 +1,19 @@ +process MergeVcfs { + tag {"GATK MergeVcfs ${analysis_id}"} + label 'GATK_4_2_0_0' + label 'GATK_4_2_0_0_MergeVcfs' + container = 'broadinstitute/gatk:4.2.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(analysis_id, path(vcf_files), path(vcf_idx_files)) + + output: + tuple(analysis_id, path("${analysis_id}.vcf"), path("${analysis_id}.vcf.idx"), emit:vcf_file) + + script: + def input_files = vcf_files.collect{"$it"}.join(" --INPUT ") + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${input_files} --OUTPUT ${analysis_id}.vcf + """ +} \ No newline at end of file diff --git a/GATK/4.2.0.0/SelectVariants.nf b/GATK/4.2.0.0/SelectVariants.nf new file mode 100644 index 00000000..a4143abe --- /dev/null +++ b/GATK/4.2.0.0/SelectVariants.nf @@ -0,0 +1,18 @@ +process SelectVariantsSample { + tag {"GATK SelectVariantsSample ${analysis_id} - ${sample_id}"} + label 'GATK_4_2_0_0' + label 'GATK_4_2_0_0_SelectVariantsSample' + container = 'broadinstitute/gatk:4.2.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(analysis_id, path(vcf_file), path(vcf_idx_file), sample_id) + + output: + tuple(sample_id, path("${sample_id}_${vcf_file.baseName}.vcf"), path("${sample_id}_${vcf_file.baseName}.vcf.idx"), emit: vcf_file) + + script: + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants --reference ${params.genome} -variant ${vcf_file} --output ${sample_id}_${vcf_file.baseName}.vcf --sample-name ${sample_id} + """ +} diff --git a/GATK/4.2.0.0/VariantFiltration.nf b/GATK/4.2.0.0/VariantFiltration.nf new file mode 100644 index 00000000..4dbb4a7f --- /dev/null +++ b/GATK/4.2.0.0/VariantFiltration.nf @@ -0,0 +1,24 @@ +process VariantFiltrationSnpIndel { + tag {"GATK VariantFiltrationSnpIndel ${analysis_id}"} + label 'GATK_4_2_0_0' + label 'GATK_4_2_0_0_VariantFiltrationSnpIndel' + container = 'broadinstitute/gatk:4.2.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(analysis_id, path(vcf_file), path(vcf_idx_file)) + + output: + tuple(analysis_id, path("${vcf_file.baseName}.filter.vcf"), path("${vcf_file.baseName}.filter.vcf.idx"), emit: vcf_file) + + script: + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants --reference ${params.genome} --variant $vcf_file --output ${vcf_file.baseName}.snp.vcf --select-type-to-exclude INDEL + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants --reference ${params.genome} --variant $vcf_file --output ${vcf_file.baseName}.indel.vcf --select-type-to-include INDEL + + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" VariantFiltration --reference ${params.genome} --variant ${vcf_file.baseName}.snp.vcf --output ${vcf_file.baseName}.snp_filter.vcf ${params.snp_filter} ${params.snp_cluster} + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" VariantFiltration --reference ${params.genome} --variant ${vcf_file.baseName}.indel.vcf --output ${vcf_file.baseName}.indel_filter.vcf ${params.indel_filter} + + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${vcf_file.baseName}.snp_filter.vcf --INPUT ${vcf_file.baseName}.indel_filter.vcf --OUTPUT ${vcf_file.baseName}.filter.vcf + """ +} From 5ac81c9f87923910d431c43a7720423ebd63774f Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Tue, 11 May 2021 14:09:37 +0200 Subject: [PATCH 02/21] Fix multi sample input --- GATK/4.2.0.0/HaplotypeCaller.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GATK/4.2.0.0/HaplotypeCaller.nf b/GATK/4.2.0.0/HaplotypeCaller.nf index 1b5394b9..bf49b4bc 100644 --- a/GATK/4.2.0.0/HaplotypeCaller.nf +++ b/GATK/4.2.0.0/HaplotypeCaller.nf @@ -12,7 +12,7 @@ process HaplotypeCaller { tuple(val(analysis_id), path("${analysis_id}.${interval_file.baseName}.vcf"), path("${analysis_id}.${interval_file.baseName}.vcf.idx"), emit: vcf_file) script: - def input_files = bam_files.collect{"$it"}.join(" --input_file ") + def input_files = bam_files.collect{"$it"}.join(" --input ") """ gatk --java-options "-Xmx${task.memory.toGiga()-4}G" HaplotypeCaller \ --reference ${params.genome} \ From 5e7c374b1de75d0fedc31f3c5f44b20c1cee8c7e Mon Sep 17 00:00:00 2001 From: ellendejong Date: Thu, 1 Jul 2021 12:57:37 +0200 Subject: [PATCH 03/21] Add params.optional to selectVariantsSample GATK --- GATK/4.2.0.0/SelectVariants.nf | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/GATK/4.2.0.0/SelectVariants.nf b/GATK/4.2.0.0/SelectVariants.nf index a4143abe..3939d0e1 100644 --- a/GATK/4.2.0.0/SelectVariants.nf +++ b/GATK/4.2.0.0/SelectVariants.nf @@ -13,6 +13,11 @@ process SelectVariantsSample { script: """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants --reference ${params.genome} -variant ${vcf_file} --output ${sample_id}_${vcf_file.baseName}.vcf --sample-name ${sample_id} + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants \ + --reference ${params.genome} \ + -variant ${vcf_file} \ + --output ${sample_id}_${vcf_file.baseName}.vcf \ + --sample-name ${sample_id} \ + ${params.optional} """ } From 4dc1ad75360d00e24b085a488034f74cb89cb52b Mon Sep 17 00:00:00 2001 From: melferink Date: Fri, 16 Jul 2021 13:00:30 +0200 Subject: [PATCH 04/21] removed gf15c1c3ef, added default setting GVCF --- GATK/4.2.0.0/HaplotypeCaller.nf | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/GATK/4.2.0.0/HaplotypeCaller.nf b/GATK/4.2.0.0/HaplotypeCaller.nf index bf49b4bc..edb3ec96 100644 --- a/GATK/4.2.0.0/HaplotypeCaller.nf +++ b/GATK/4.2.0.0/HaplotypeCaller.nf @@ -25,11 +25,12 @@ process HaplotypeCaller { process HaplotypeCallerGVCF { tag {"GATK HaplotypeCallerGVCF ${sample_id} - ${interval_file.baseName}"} - label 'GATK_4_2_0_0_gf15c1c3ef' - label 'GATK_4_2_0_0_gf15c1c3ef_HaplotypeCallerGVCF' + label 'GATK_4_2_0_0' + label 'GATK_4_2_0_0_HaplotypeCallerGVCF' container = 'broadinstitute/gatk:4.2.0.0' shell = ['/bin/bash', '-euo', 'pipefail'] - + params.emit_ref_confidence = 'GVCF' + input: tuple(sample_id, path(bam_file), path(bai_file), path(interval_file)) @@ -43,7 +44,7 @@ process HaplotypeCallerGVCF { --input ${bam_file} \ --intervals ${interval_file} \ --output ${sample_id}_${interval_file.baseName}.g.vcf \ - --emit-ref-confidence GVCF \ + --emit-ref-confidence ${params.emit_ref_confidence} \ ${params.optional} """ } From 24dc86589fabddd35e87f4eb56960b528d3619ce Mon Sep 17 00:00:00 2001 From: melferink Date: Fri, 16 Jul 2021 13:01:36 +0200 Subject: [PATCH 05/21] renamed analysis_id to output_name. Added process MergeGvcfs --- GATK/4.2.0.0/MergeVcfs.nf | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/GATK/4.2.0.0/MergeVcfs.nf b/GATK/4.2.0.0/MergeVcfs.nf index f82af26c..cf57c628 100644 --- a/GATK/4.2.0.0/MergeVcfs.nf +++ b/GATK/4.2.0.0/MergeVcfs.nf @@ -1,19 +1,40 @@ process MergeVcfs { - tag {"GATK MergeVcfs ${analysis_id}"} + tag {"GATK MergeVcfs ${output_name}"} label 'GATK_4_2_0_0' label 'GATK_4_2_0_0_MergeVcfs' container = 'broadinstitute/gatk:4.2.0.0' shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple(analysis_id, path(vcf_files), path(vcf_idx_files)) + tuple(output_name, path(vcf_files), path(vcf_idx_files)) output: - tuple(analysis_id, path("${analysis_id}.vcf"), path("${analysis_id}.vcf.idx"), emit:vcf_file) + tuple(output_name, path("${output_name}.vcf"), path("${output_name}.vcf.idx"), emit:vcf_file) script: def input_files = vcf_files.collect{"$it"}.join(" --INPUT ") """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${input_files} --OUTPUT ${analysis_id}.vcf + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${input_files} --OUTPUT ${output_name}.vcf """ -} \ No newline at end of file +} + + +process MergeGvcfs { + tag {"GATK MergeGvcfs ${output_name}"} + label 'GATK_4_2_0_0' + label 'GATK_4_2_0_0_MergeGvcfs' + container = 'broadinstitute/gatk:4.2.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(output_name, path(vcf_files), path(vcf_idx_files)) + + output: + tuple(output_name, path("${output_name}.g.vcf"), path("${output_name}.g.vcf.idx"), emit:vcf_file) + + script: + def input_files = vcf_files.collect{"$it"}.join(" --INPUT ") + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${input_files} --OUTPUT ${output_name}.g.vcf + """ +} From dee280645a44dcece57d63fdca095c02846e809c Mon Sep 17 00:00:00 2001 From: melferink Date: Fri, 16 Jul 2021 13:01:58 +0200 Subject: [PATCH 06/21] first commit --- GATK/4.2.0.0/GenotypeGvcfs.nf | 49 +++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 GATK/4.2.0.0/GenotypeGvcfs.nf diff --git a/GATK/4.2.0.0/GenotypeGvcfs.nf b/GATK/4.2.0.0/GenotypeGvcfs.nf new file mode 100644 index 00000000..9e71f43a --- /dev/null +++ b/GATK/4.2.0.0/GenotypeGvcfs.nf @@ -0,0 +1,49 @@ +process GenotypeGVCFs { + tag {"GATK GenotypeGVCFs ${analysis_id} - ${interval_file.baseName}"} + label 'GATK_4_2_0_0' + label 'GATK_4_2_0_0_GenotypeGVCFs' + container = 'broadinstitute/gatk:4.2.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(analysis_id, path(gvcf_files), path(gvcf_idx_files), path(interval_file)) + + output: + tuple(analysis_id, path("${analysis_id}_${interval_file.baseName}.vcf"), path("${analysis_id}_${interval_file.baseName}.vcf.idx"), emit:vcf_file) + + script: + def input_files = gvcf_files.collect{"$it"}.join(" --variant ") + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" GenotypeGVCFs \ + --reference ${params.genome} \ + --variant $input_files \ + --output ${analysis_id}_${interval_file.baseName}.vcf \ + --intervals ${interval_file} \ + ${params.optional} + """ +} + +process GenotypeGVCF { + tag {"GATK GenotypeGVCF ${sample_id} - ${interval_file.baseName}"} + label 'GATK_4_2_0_0' + label 'GATK_4_2_0_0_GenotypeGVCF' + container = 'broadinstitute/gatk:4.2.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(gvcf_files), path(gvcf_idx_files), path(interval_file)) + + output: + tuple(val(sample_id), path("${sample_id}_${interval_file.baseName}.vcf"), path("${sample_id}_${interval_file.baseName}.vcf.idx"), emit: vcf_file) + + script: + def input_files = gvcf_files.collect{"$it"}.join(" --variant ") + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" GenotypeGVCFs \ + --reference ${params.genome} \ + --variant $input_files \ + --output ${sample_id}_${interval_file.baseName}.vcf \ + --intervals ${interval_file} \ + ${params.optional} + """ +} From 8682db1721ac65591d05c2f96c780c412115fa59 Mon Sep 17 00:00:00 2001 From: melferink Date: Tue, 17 Aug 2021 10:34:23 +0200 Subject: [PATCH 07/21] first commit --- GATK/4.2.1.0/GenotypeGvcfs.nf | 49 +++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 GATK/4.2.1.0/GenotypeGvcfs.nf diff --git a/GATK/4.2.1.0/GenotypeGvcfs.nf b/GATK/4.2.1.0/GenotypeGvcfs.nf new file mode 100644 index 00000000..bc634087 --- /dev/null +++ b/GATK/4.2.1.0/GenotypeGvcfs.nf @@ -0,0 +1,49 @@ +process GenotypeGVCFs { + tag {"GATK GenotypeGVCFs ${analysis_id} - ${interval_file.baseName}"} + label 'GATK_4_2_1_0' + label 'GATK_4_2_1_0_GenotypeGVCFs' + container = 'broadinstitute/gatk:4.2.1.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(analysis_id, path(gvcf_files), path(gvcf_idx_files), path(interval_file)) + + output: + tuple(analysis_id, path("${analysis_id}_${interval_file.baseName}.vcf"), path("${analysis_id}_${interval_file.baseName}.vcf.idx"), emit:vcf_file) + + script: + def input_files = gvcf_files.collect{"$it"}.join(" --variant ") + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" GenotypeGVCFs \ + --reference ${params.genome} \ + --variant $input_files \ + --output ${analysis_id}_${interval_file.baseName}.vcf \ + --intervals ${interval_file} \ + ${params.optional} + """ +} + +process GenotypeGVCF { + tag {"GATK GenotypeGVCF ${sample_id} - ${interval_file.baseName}"} + label 'GATK_4_2_1_0' + label 'GATK_4_2_1_0_GenotypeGVCF' + container = 'broadinstitute/gatk:4.2.1.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(gvcf_files), path(gvcf_idx_files), path(interval_file)) + + output: + tuple(val(sample_id), path("${sample_id}_${interval_file.baseName}.vcf"), path("${sample_id}_${interval_file.baseName}.vcf.idx"), emit: vcf_file) + + script: + def input_files = gvcf_files.collect{"$it"}.join(" --variant ") + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" GenotypeGVCFs \ + --reference ${params.genome} \ + --variant $input_files \ + --output ${sample_id}_${interval_file.baseName}.vcf \ + --intervals ${interval_file} \ + ${params.optional} + """ +} From f051ecb4f65fc159ab68b065e21b86c612c2e84b Mon Sep 17 00:00:00 2001 From: melferink Date: Tue, 17 Aug 2021 10:34:30 +0200 Subject: [PATCH 08/21] first commit --- GATK/4.2.1.0/HaplotypeCaller.nf | 50 +++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 GATK/4.2.1.0/HaplotypeCaller.nf diff --git a/GATK/4.2.1.0/HaplotypeCaller.nf b/GATK/4.2.1.0/HaplotypeCaller.nf new file mode 100644 index 00000000..e0708351 --- /dev/null +++ b/GATK/4.2.1.0/HaplotypeCaller.nf @@ -0,0 +1,50 @@ +process HaplotypeCaller { + tag {"GATK HaplotypeCaller ${analysis_id} - ${interval_file.baseName}"} + label 'GATK_4_2_1_0' + label 'GATK_4_2_1_0_HaplotypeCaller' + container = 'broadinstitute/gatk:4.2.1.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(analysis_id, path(bam_files), path(bai_files), path(interval_file)) + + output: + tuple(val(analysis_id), path("${analysis_id}.${interval_file.baseName}.vcf"), path("${analysis_id}.${interval_file.baseName}.vcf.idx"), emit: vcf_file) + + script: + def input_files = bam_files.collect{"$it"}.join(" --input ") + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" HaplotypeCaller \ + --reference ${params.genome} \ + --input ${input_files} \ + --intervals ${interval_file} \ + --output ${analysis_id}.${interval_file.baseName}.vcf \ + ${params.optional} + """ +} + +process HaplotypeCallerGVCF { + tag {"GATK HaplotypeCallerGVCF ${sample_id} - ${interval_file.baseName}"} + label 'GATK_4_2_1_0' + label 'GATK_4_2_1_0_HaplotypeCallerGVCF' + container = 'broadinstitute/gatk:4.2.1.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + params.emit_ref_confidence = 'GVCF' + + input: + tuple(sample_id, path(bam_file), path(bai_file), path(interval_file)) + + output: + tuple(val(sample_id), path("${sample_id}_${interval_file.baseName}.g.vcf"), path("${sample_id}_${interval_file.baseName}.g.vcf.idx"), path(interval_file), emit: vcf_file) + + script: + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" HaplotypeCaller \ + --reference ${params.genome} \ + --input ${bam_file} \ + --intervals ${interval_file} \ + --output ${sample_id}_${interval_file.baseName}.g.vcf \ + --emit-ref-confidence ${params.emit_ref_confidence} \ + ${params.optional} + """ +} From 34f3ff33259b144b37fda8f9bfa0114b4187710e Mon Sep 17 00:00:00 2001 From: melferink Date: Tue, 17 Aug 2021 10:34:37 +0200 Subject: [PATCH 09/21] first commit --- GATK/4.2.1.0/MergeVcfs.nf | 40 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 GATK/4.2.1.0/MergeVcfs.nf diff --git a/GATK/4.2.1.0/MergeVcfs.nf b/GATK/4.2.1.0/MergeVcfs.nf new file mode 100644 index 00000000..3d670838 --- /dev/null +++ b/GATK/4.2.1.0/MergeVcfs.nf @@ -0,0 +1,40 @@ +process MergeVcfs { + tag {"GATK MergeVcfs ${output_name}"} + label 'GATK_4_2_1_0' + label 'GATK_4_2_1_0_MergeVcfs' + container = 'broadinstitute/gatk:4.2.1.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(output_name, path(vcf_files), path(vcf_idx_files)) + + output: + tuple(output_name, path("${output_name}.vcf"), path("${output_name}.vcf.idx"), emit:vcf_file) + + script: + def input_files = vcf_files.collect{"$it"}.join(" --INPUT ") + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${input_files} --OUTPUT ${output_name}.vcf + """ +} + + +process MergeGvcfs { + tag {"GATK MergeGvcfs ${output_name}"} + label 'GATK_4_2_1_0' + label 'GATK_4_2_1_0_MergeGvcfs' + container = 'broadinstitute/gatk:4.2.1.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(output_name, path(vcf_files), path(vcf_idx_files)) + + output: + tuple(output_name, path("${output_name}.g.vcf"), path("${output_name}.g.vcf.idx"), emit:vcf_file) + + script: + def input_files = vcf_files.collect{"$it"}.join(" --INPUT ") + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${input_files} --OUTPUT ${output_name}.g.vcf + """ +} From d0a5767f295a4a3f1e807a5f984b0b8dd0aa0195 Mon Sep 17 00:00:00 2001 From: melferink Date: Tue, 17 Aug 2021 10:34:43 +0200 Subject: [PATCH 10/21] first commit --- GATK/4.2.1.0/SelectVariants.nf | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 GATK/4.2.1.0/SelectVariants.nf diff --git a/GATK/4.2.1.0/SelectVariants.nf b/GATK/4.2.1.0/SelectVariants.nf new file mode 100644 index 00000000..50629e58 --- /dev/null +++ b/GATK/4.2.1.0/SelectVariants.nf @@ -0,0 +1,23 @@ +process SelectVariantsSample { + tag {"GATK SelectVariantsSample ${analysis_id} - ${sample_id}"} + label 'GATK_4_2_1_0' + label 'GATK_4_2_1_0_SelectVariantsSample' + container = 'broadinstitute/gatk:4.2.1.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(analysis_id, path(vcf_file), path(vcf_idx_file), sample_id) + + output: + tuple(sample_id, path("${sample_id}_${vcf_file.baseName}.vcf"), path("${sample_id}_${vcf_file.baseName}.vcf.idx"), emit: vcf_file) + + script: + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants \ + --reference ${params.genome} \ + -variant ${vcf_file} \ + --output ${sample_id}_${vcf_file.baseName}.vcf \ + --sample-name ${sample_id} \ + ${params.optional} + """ +} From e09f49646de59223c1dc724c9619efa627363950 Mon Sep 17 00:00:00 2001 From: melferink Date: Tue, 17 Aug 2021 10:34:50 +0200 Subject: [PATCH 11/21] first commit --- GATK/4.2.1.0/VariantFiltration.nf | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 GATK/4.2.1.0/VariantFiltration.nf diff --git a/GATK/4.2.1.0/VariantFiltration.nf b/GATK/4.2.1.0/VariantFiltration.nf new file mode 100644 index 00000000..16a34902 --- /dev/null +++ b/GATK/4.2.1.0/VariantFiltration.nf @@ -0,0 +1,24 @@ +process VariantFiltrationSnpIndel { + tag {"GATK VariantFiltrationSnpIndel ${analysis_id}"} + label 'GATK_4_2_1_0' + label 'GATK_4_2_1_0_VariantFiltrationSnpIndel' + container = 'broadinstitute/gatk:4.2.1.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(analysis_id, path(vcf_file), path(vcf_idx_file)) + + output: + tuple(analysis_id, path("${vcf_file.baseName}.filter.vcf"), path("${vcf_file.baseName}.filter.vcf.idx"), emit: vcf_file) + + script: + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants --reference ${params.genome} --variant $vcf_file --output ${vcf_file.baseName}.snp.vcf --select-type-to-exclude INDEL + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants --reference ${params.genome} --variant $vcf_file --output ${vcf_file.baseName}.indel.vcf --select-type-to-include INDEL + + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" VariantFiltration --reference ${params.genome} --variant ${vcf_file.baseName}.snp.vcf --output ${vcf_file.baseName}.snp_filter.vcf ${params.snp_filter} ${params.snp_cluster} + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" VariantFiltration --reference ${params.genome} --variant ${vcf_file.baseName}.indel.vcf --output ${vcf_file.baseName}.indel_filter.vcf ${params.indel_filter} + + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${vcf_file.baseName}.snp_filter.vcf --INPUT ${vcf_file.baseName}.indel_filter.vcf --OUTPUT ${vcf_file.baseName}.filter.vcf + """ +} From 7331656182e968c0f64fd489f3c3be722857a595 Mon Sep 17 00:00:00 2001 From: melferink Date: Wed, 2 Mar 2022 10:06:49 +0100 Subject: [PATCH 12/21] added gzip --- GATK/4.2.1.0/GenotypeGvcfs.nf | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/GATK/4.2.1.0/GenotypeGvcfs.nf b/GATK/4.2.1.0/GenotypeGvcfs.nf index bc634087..93de328b 100644 --- a/GATK/4.2.1.0/GenotypeGvcfs.nf +++ b/GATK/4.2.1.0/GenotypeGvcfs.nf @@ -9,15 +9,17 @@ process GenotypeGVCFs { tuple(analysis_id, path(gvcf_files), path(gvcf_idx_files), path(interval_file)) output: - tuple(analysis_id, path("${analysis_id}_${interval_file.baseName}.vcf"), path("${analysis_id}_${interval_file.baseName}.vcf.idx"), emit:vcf_file) + tuple(analysis_id, path("${analysis_id}_${interval_file.baseName}${ext_vcf}"), path("${analysis_id}_${interval_file.baseName}${ext_vcf}${ext_vcf_index}"), emit:vcf_file) script: def input_files = gvcf_files.collect{"$it"}.join(" --variant ") + ext_vcf = params.compress || gvcf_files.getExtension() == ".gz" ? ".vcf.gz" : ".vcf" + ext_vcf_index = params.compress || gvcf_files.getExtension() == ".gz" ? ".tbi" : ".idx" """ gatk --java-options "-Xmx${task.memory.toGiga()-4}G" GenotypeGVCFs \ --reference ${params.genome} \ --variant $input_files \ - --output ${analysis_id}_${interval_file.baseName}.vcf \ + --output ${analysis_id}_${interval_file.baseName}${ext_vcf} \ --intervals ${interval_file} \ ${params.optional} """ @@ -29,20 +31,22 @@ process GenotypeGVCF { label 'GATK_4_2_1_0_GenotypeGVCF' container = 'broadinstitute/gatk:4.2.1.0' shell = ['/bin/bash', '-euo', 'pipefail'] - + input: tuple(sample_id, path(gvcf_files), path(gvcf_idx_files), path(interval_file)) output: - tuple(val(sample_id), path("${sample_id}_${interval_file.baseName}.vcf"), path("${sample_id}_${interval_file.baseName}.vcf.idx"), emit: vcf_file) + tuple(val(sample_id), path("${sample_id}_${interval_file.baseName}${ext_vcf}"), path("${sample_id}_${interval_file.baseName}${ext_vcf}${ext_vcf_index}"), emit: vcf_file) script: def input_files = gvcf_files.collect{"$it"}.join(" --variant ") + ext_vcf = params.compress || gvcf_files.getExtension() == ".gz" ? ".vcf.gz" : ".vcf" + ext_vcf_index = params.compress || gvcf_files.getExtension() == ".gz" ? ".tbi" : ".idx" """ gatk --java-options "-Xmx${task.memory.toGiga()-4}G" GenotypeGVCFs \ --reference ${params.genome} \ --variant $input_files \ - --output ${sample_id}_${interval_file.baseName}.vcf \ + --output ${sample_id}_${interval_file.baseName}${ext_vcf} \ --intervals ${interval_file} \ ${params.optional} """ From 1e8f45387505f083f1e1a3dad71119ff0c665dab Mon Sep 17 00:00:00 2001 From: melferink Date: Wed, 2 Mar 2022 10:10:47 +0100 Subject: [PATCH 13/21] added gzip --- GATK/4.2.1.0/HaplotypeCaller.nf | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/GATK/4.2.1.0/HaplotypeCaller.nf b/GATK/4.2.1.0/HaplotypeCaller.nf index e0708351..efed2c61 100644 --- a/GATK/4.2.1.0/HaplotypeCaller.nf +++ b/GATK/4.2.1.0/HaplotypeCaller.nf @@ -9,16 +9,21 @@ process HaplotypeCaller { tuple(analysis_id, path(bam_files), path(bai_files), path(interval_file)) output: - tuple(val(analysis_id), path("${analysis_id}.${interval_file.baseName}.vcf"), path("${analysis_id}.${interval_file.baseName}.vcf.idx"), emit: vcf_file) + tuple(val(analysis_id), path("${analysis_id}.${interval_file.baseName}${ext_vcf}"), path("${analysis_id}.${interval_file.baseName}${ext_vcf}${ext_vcf_index}"), emit: vcf_file) script: def input_files = bam_files.collect{"$it"}.join(" --input ") + ext_vcf = ".vcf" + ext_vcf_index = ".idx" + if( params.compress ) + ext_vcf = ".vcf.gz" + ext_vcf_index = ".tbi" """ gatk --java-options "-Xmx${task.memory.toGiga()-4}G" HaplotypeCaller \ --reference ${params.genome} \ --input ${input_files} \ --intervals ${interval_file} \ - --output ${analysis_id}.${interval_file.baseName}.vcf \ + --output ${analysis_id}.${interval_file.baseName}${ext_vcf} \ ${params.optional} """ } @@ -30,20 +35,25 @@ process HaplotypeCallerGVCF { container = 'broadinstitute/gatk:4.2.1.0' shell = ['/bin/bash', '-euo', 'pipefail'] params.emit_ref_confidence = 'GVCF' - + input: tuple(sample_id, path(bam_file), path(bai_file), path(interval_file)) output: - tuple(val(sample_id), path("${sample_id}_${interval_file.baseName}.g.vcf"), path("${sample_id}_${interval_file.baseName}.g.vcf.idx"), path(interval_file), emit: vcf_file) + tuple(val(sample_id), path("${sample_id}_${interval_file.baseName}${ext_gvcf}"), path("${sample_id}_${interval_file.baseName}${ext_gvcf}${ext_gvcf_index}"), path(interval_file), emit: vcf_file) script: + ext_gvcf = ".g.vcf" + ext_gvcf_index = ".idx" + if( params.compress ) + ext_gvcf = ".g.vcf.gz" + ext_gvcf_index = ".tbi" """ gatk --java-options "-Xmx${task.memory.toGiga()-4}G" HaplotypeCaller \ --reference ${params.genome} \ --input ${bam_file} \ --intervals ${interval_file} \ - --output ${sample_id}_${interval_file.baseName}.g.vcf \ + --output ${sample_id}_${interval_file.baseName}${ext_gvcf} \ --emit-ref-confidence ${params.emit_ref_confidence} \ ${params.optional} """ From 141ea0c11b337e3b506f3125247b5c4e20e98bc1 Mon Sep 17 00:00:00 2001 From: melferink Date: Wed, 2 Mar 2022 10:12:45 +0100 Subject: [PATCH 14/21] added gzip --- GATK/4.2.1.0/MergeVcfs.nf | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/GATK/4.2.1.0/MergeVcfs.nf b/GATK/4.2.1.0/MergeVcfs.nf index 3d670838..acb683c7 100644 --- a/GATK/4.2.1.0/MergeVcfs.nf +++ b/GATK/4.2.1.0/MergeVcfs.nf @@ -9,12 +9,17 @@ process MergeVcfs { tuple(output_name, path(vcf_files), path(vcf_idx_files)) output: - tuple(output_name, path("${output_name}.vcf"), path("${output_name}.vcf.idx"), emit:vcf_file) + tuple(output_name, path("${output_name}${ext_vcf}"), path("${output_name}${ext_vcf}${ext_vcf_index}"), emit:vcf_file) script: def input_files = vcf_files.collect{"$it"}.join(" --INPUT ") + ext_vcf = ".vcf" + ext_vcf_index = ".idx" + if( params.compress ) + ext_vcf = ".vcf.gz" + ext_vcf_index = ".tbi" """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${input_files} --OUTPUT ${output_name}.vcf + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${input_files} --OUTPUT ${output_name}${ext_vcf} """ } @@ -30,11 +35,16 @@ process MergeGvcfs { tuple(output_name, path(vcf_files), path(vcf_idx_files)) output: - tuple(output_name, path("${output_name}.g.vcf"), path("${output_name}.g.vcf.idx"), emit:vcf_file) + tuple(output_name, path("${output_name}${ext_gvcf}"), path("${output_name}${ext_gvcf}${ext_gvcf_index}"), emit:vcf_file) script: def input_files = vcf_files.collect{"$it"}.join(" --INPUT ") + ext_gvcf = ".g.vcf" + ext_gvcf_index = ".idx" + if( params.compress ) + ext_gvcf = ".g.vcf.gz" + ext_gvcf_index = ".tbi" """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${input_files} --OUTPUT ${output_name}.g.vcf + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${input_files} --OUTPUT ${output_name}${ext_gvcf} """ } From 10915fb0da3643fcd3106139604918cb7eedecc2 Mon Sep 17 00:00:00 2001 From: melferink Date: Wed, 2 Mar 2022 10:14:03 +0100 Subject: [PATCH 15/21] added gzip --- GATK/4.2.1.0/SelectVariants.nf | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/GATK/4.2.1.0/SelectVariants.nf b/GATK/4.2.1.0/SelectVariants.nf index 50629e58..c600c8e2 100644 --- a/GATK/4.2.1.0/SelectVariants.nf +++ b/GATK/4.2.1.0/SelectVariants.nf @@ -9,14 +9,16 @@ process SelectVariantsSample { tuple(analysis_id, path(vcf_file), path(vcf_idx_file), sample_id) output: - tuple(sample_id, path("${sample_id}_${vcf_file.baseName}.vcf"), path("${sample_id}_${vcf_file.baseName}.vcf.idx"), emit: vcf_file) + tuple(sample_id, path("${sample_id}_${vcf_file.baseName}${ext_vcf}"), path("${sample_id}_${vcf_file.baseName}${ext_vcf}${ext_vcf_index}"), emit: vcf_file) script: + ext_vcf = params.compress || vcf_file.getExtension() == ".gz" ? ".vcf.gz" : ".vcf" + ext_vcf_index = params.compress || vcf_file.getExtension() == ".gz" ? ".tbi" : ".idx" """ gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants \ --reference ${params.genome} \ -variant ${vcf_file} \ - --output ${sample_id}_${vcf_file.baseName}.vcf \ + --output ${sample_id}_${vcf_file.baseName}${ext_vcf} \ --sample-name ${sample_id} \ ${params.optional} """ From e347f9d28262d30541ae438e58d74613b064face Mon Sep 17 00:00:00 2001 From: melferink Date: Wed, 2 Mar 2022 10:15:30 +0100 Subject: [PATCH 16/21] added gzip --- GATK/4.2.1.0/VariantFiltration.nf | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/GATK/4.2.1.0/VariantFiltration.nf b/GATK/4.2.1.0/VariantFiltration.nf index 16a34902..90e75772 100644 --- a/GATK/4.2.1.0/VariantFiltration.nf +++ b/GATK/4.2.1.0/VariantFiltration.nf @@ -9,16 +9,18 @@ process VariantFiltrationSnpIndel { tuple(analysis_id, path(vcf_file), path(vcf_idx_file)) output: - tuple(analysis_id, path("${vcf_file.baseName}.filter.vcf"), path("${vcf_file.baseName}.filter.vcf.idx"), emit: vcf_file) + tuple(analysis_id, path("${vcf_file.baseName}.filter${ext_vcf}"), path("${vcf_file.baseName}.filter${ext_vcf}${ext_vcf_index}"), emit: vcf_file) script: + ext_vcf = params.compress || vcf_file.getExtension() == ".gz" ? ".vcf.gz" : ".vcf" + ext_vcf_index = params.compress || vcf_file.getExtension() == ".gz" ? ".tbi" : ".idx" """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants --reference ${params.genome} --variant $vcf_file --output ${vcf_file.baseName}.snp.vcf --select-type-to-exclude INDEL - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants --reference ${params.genome} --variant $vcf_file --output ${vcf_file.baseName}.indel.vcf --select-type-to-include INDEL + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants --reference ${params.genome} --variant $vcf_file --output ${vcf_file.baseName}.snp${ext_vcf} --select-type-to-exclude INDEL + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants --reference ${params.genome} --variant $vcf_file --output ${vcf_file.baseName}.indel${ext_vcf} --select-type-to-include INDEL - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" VariantFiltration --reference ${params.genome} --variant ${vcf_file.baseName}.snp.vcf --output ${vcf_file.baseName}.snp_filter.vcf ${params.snp_filter} ${params.snp_cluster} - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" VariantFiltration --reference ${params.genome} --variant ${vcf_file.baseName}.indel.vcf --output ${vcf_file.baseName}.indel_filter.vcf ${params.indel_filter} + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" VariantFiltration --reference ${params.genome} --variant ${vcf_file.baseName}.snp${ext_vcf} --output ${vcf_file.baseName}.snp_filter${ext_vcf} ${params.snp_filter} ${params.snp_cluster} + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" VariantFiltration --reference ${params.genome} --variant ${vcf_file.baseName}.indel${ext_vcf} --output ${vcf_file.baseName}.indel_filter${ext_vcf} ${params.indel_filter} - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${vcf_file.baseName}.snp_filter.vcf --INPUT ${vcf_file.baseName}.indel_filter.vcf --OUTPUT ${vcf_file.baseName}.filter.vcf + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${vcf_file.baseName}.snp_filter${ext_vcf} --INPUT ${vcf_file.baseName}.indel_filter${ext_vcf} --OUTPUT ${vcf_file.baseName}.filter${ext_vcf} """ } From d96f8a628f5e26efbd803c1cb7d66af44bddcd8e Mon Sep 17 00:00:00 2001 From: melferink Date: Wed, 23 Mar 2022 10:32:53 +0100 Subject: [PATCH 17/21] removed gatk 4.2.0.0 --- GATK/4.2.0.0/GenotypeGvcfs.nf | 49 ------------------------------ GATK/4.2.0.0/HaplotypeCaller.nf | 50 ------------------------------- GATK/4.2.0.0/MergeVcfs.nf | 40 ------------------------- GATK/4.2.0.0/SelectVariants.nf | 23 -------------- GATK/4.2.0.0/VariantFiltration.nf | 24 --------------- 5 files changed, 186 deletions(-) delete mode 100644 GATK/4.2.0.0/GenotypeGvcfs.nf delete mode 100644 GATK/4.2.0.0/HaplotypeCaller.nf delete mode 100644 GATK/4.2.0.0/MergeVcfs.nf delete mode 100644 GATK/4.2.0.0/SelectVariants.nf delete mode 100644 GATK/4.2.0.0/VariantFiltration.nf diff --git a/GATK/4.2.0.0/GenotypeGvcfs.nf b/GATK/4.2.0.0/GenotypeGvcfs.nf deleted file mode 100644 index 9e71f43a..00000000 --- a/GATK/4.2.0.0/GenotypeGvcfs.nf +++ /dev/null @@ -1,49 +0,0 @@ -process GenotypeGVCFs { - tag {"GATK GenotypeGVCFs ${analysis_id} - ${interval_file.baseName}"} - label 'GATK_4_2_0_0' - label 'GATK_4_2_0_0_GenotypeGVCFs' - container = 'broadinstitute/gatk:4.2.0.0' - shell = ['/bin/bash', '-euo', 'pipefail'] - - input: - tuple(analysis_id, path(gvcf_files), path(gvcf_idx_files), path(interval_file)) - - output: - tuple(analysis_id, path("${analysis_id}_${interval_file.baseName}.vcf"), path("${analysis_id}_${interval_file.baseName}.vcf.idx"), emit:vcf_file) - - script: - def input_files = gvcf_files.collect{"$it"}.join(" --variant ") - """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" GenotypeGVCFs \ - --reference ${params.genome} \ - --variant $input_files \ - --output ${analysis_id}_${interval_file.baseName}.vcf \ - --intervals ${interval_file} \ - ${params.optional} - """ -} - -process GenotypeGVCF { - tag {"GATK GenotypeGVCF ${sample_id} - ${interval_file.baseName}"} - label 'GATK_4_2_0_0' - label 'GATK_4_2_0_0_GenotypeGVCF' - container = 'broadinstitute/gatk:4.2.0.0' - shell = ['/bin/bash', '-euo', 'pipefail'] - - input: - tuple(sample_id, path(gvcf_files), path(gvcf_idx_files), path(interval_file)) - - output: - tuple(val(sample_id), path("${sample_id}_${interval_file.baseName}.vcf"), path("${sample_id}_${interval_file.baseName}.vcf.idx"), emit: vcf_file) - - script: - def input_files = gvcf_files.collect{"$it"}.join(" --variant ") - """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" GenotypeGVCFs \ - --reference ${params.genome} \ - --variant $input_files \ - --output ${sample_id}_${interval_file.baseName}.vcf \ - --intervals ${interval_file} \ - ${params.optional} - """ -} diff --git a/GATK/4.2.0.0/HaplotypeCaller.nf b/GATK/4.2.0.0/HaplotypeCaller.nf deleted file mode 100644 index edb3ec96..00000000 --- a/GATK/4.2.0.0/HaplotypeCaller.nf +++ /dev/null @@ -1,50 +0,0 @@ -process HaplotypeCaller { - tag {"GATK HaplotypeCaller ${analysis_id} - ${interval_file.baseName}"} - label 'GATK_4_2_0_0' - label 'GATK_4_2_0_0_HaplotypeCaller' - container = 'broadinstitute/gatk:4.2.0.0' - shell = ['/bin/bash', '-euo', 'pipefail'] - - input: - tuple(analysis_id, path(bam_files), path(bai_files), path(interval_file)) - - output: - tuple(val(analysis_id), path("${analysis_id}.${interval_file.baseName}.vcf"), path("${analysis_id}.${interval_file.baseName}.vcf.idx"), emit: vcf_file) - - script: - def input_files = bam_files.collect{"$it"}.join(" --input ") - """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" HaplotypeCaller \ - --reference ${params.genome} \ - --input ${input_files} \ - --intervals ${interval_file} \ - --output ${analysis_id}.${interval_file.baseName}.vcf \ - ${params.optional} - """ -} - -process HaplotypeCallerGVCF { - tag {"GATK HaplotypeCallerGVCF ${sample_id} - ${interval_file.baseName}"} - label 'GATK_4_2_0_0' - label 'GATK_4_2_0_0_HaplotypeCallerGVCF' - container = 'broadinstitute/gatk:4.2.0.0' - shell = ['/bin/bash', '-euo', 'pipefail'] - params.emit_ref_confidence = 'GVCF' - - input: - tuple(sample_id, path(bam_file), path(bai_file), path(interval_file)) - - output: - tuple(val(sample_id), path("${sample_id}_${interval_file.baseName}.g.vcf"), path("${sample_id}_${interval_file.baseName}.g.vcf.idx"), path(interval_file), emit: vcf_file) - - script: - """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" HaplotypeCaller \ - --reference ${params.genome} \ - --input ${bam_file} \ - --intervals ${interval_file} \ - --output ${sample_id}_${interval_file.baseName}.g.vcf \ - --emit-ref-confidence ${params.emit_ref_confidence} \ - ${params.optional} - """ -} diff --git a/GATK/4.2.0.0/MergeVcfs.nf b/GATK/4.2.0.0/MergeVcfs.nf deleted file mode 100644 index cf57c628..00000000 --- a/GATK/4.2.0.0/MergeVcfs.nf +++ /dev/null @@ -1,40 +0,0 @@ -process MergeVcfs { - tag {"GATK MergeVcfs ${output_name}"} - label 'GATK_4_2_0_0' - label 'GATK_4_2_0_0_MergeVcfs' - container = 'broadinstitute/gatk:4.2.0.0' - shell = ['/bin/bash', '-euo', 'pipefail'] - - input: - tuple(output_name, path(vcf_files), path(vcf_idx_files)) - - output: - tuple(output_name, path("${output_name}.vcf"), path("${output_name}.vcf.idx"), emit:vcf_file) - - script: - def input_files = vcf_files.collect{"$it"}.join(" --INPUT ") - """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${input_files} --OUTPUT ${output_name}.vcf - """ -} - - -process MergeGvcfs { - tag {"GATK MergeGvcfs ${output_name}"} - label 'GATK_4_2_0_0' - label 'GATK_4_2_0_0_MergeGvcfs' - container = 'broadinstitute/gatk:4.2.0.0' - shell = ['/bin/bash', '-euo', 'pipefail'] - - input: - tuple(output_name, path(vcf_files), path(vcf_idx_files)) - - output: - tuple(output_name, path("${output_name}.g.vcf"), path("${output_name}.g.vcf.idx"), emit:vcf_file) - - script: - def input_files = vcf_files.collect{"$it"}.join(" --INPUT ") - """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${input_files} --OUTPUT ${output_name}.g.vcf - """ -} diff --git a/GATK/4.2.0.0/SelectVariants.nf b/GATK/4.2.0.0/SelectVariants.nf deleted file mode 100644 index 3939d0e1..00000000 --- a/GATK/4.2.0.0/SelectVariants.nf +++ /dev/null @@ -1,23 +0,0 @@ -process SelectVariantsSample { - tag {"GATK SelectVariantsSample ${analysis_id} - ${sample_id}"} - label 'GATK_4_2_0_0' - label 'GATK_4_2_0_0_SelectVariantsSample' - container = 'broadinstitute/gatk:4.2.0.0' - shell = ['/bin/bash', '-euo', 'pipefail'] - - input: - tuple(analysis_id, path(vcf_file), path(vcf_idx_file), sample_id) - - output: - tuple(sample_id, path("${sample_id}_${vcf_file.baseName}.vcf"), path("${sample_id}_${vcf_file.baseName}.vcf.idx"), emit: vcf_file) - - script: - """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants \ - --reference ${params.genome} \ - -variant ${vcf_file} \ - --output ${sample_id}_${vcf_file.baseName}.vcf \ - --sample-name ${sample_id} \ - ${params.optional} - """ -} diff --git a/GATK/4.2.0.0/VariantFiltration.nf b/GATK/4.2.0.0/VariantFiltration.nf deleted file mode 100644 index 4dbb4a7f..00000000 --- a/GATK/4.2.0.0/VariantFiltration.nf +++ /dev/null @@ -1,24 +0,0 @@ -process VariantFiltrationSnpIndel { - tag {"GATK VariantFiltrationSnpIndel ${analysis_id}"} - label 'GATK_4_2_0_0' - label 'GATK_4_2_0_0_VariantFiltrationSnpIndel' - container = 'broadinstitute/gatk:4.2.0.0' - shell = ['/bin/bash', '-euo', 'pipefail'] - - input: - tuple(analysis_id, path(vcf_file), path(vcf_idx_file)) - - output: - tuple(analysis_id, path("${vcf_file.baseName}.filter.vcf"), path("${vcf_file.baseName}.filter.vcf.idx"), emit: vcf_file) - - script: - """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants --reference ${params.genome} --variant $vcf_file --output ${vcf_file.baseName}.snp.vcf --select-type-to-exclude INDEL - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants --reference ${params.genome} --variant $vcf_file --output ${vcf_file.baseName}.indel.vcf --select-type-to-include INDEL - - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" VariantFiltration --reference ${params.genome} --variant ${vcf_file.baseName}.snp.vcf --output ${vcf_file.baseName}.snp_filter.vcf ${params.snp_filter} ${params.snp_cluster} - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" VariantFiltration --reference ${params.genome} --variant ${vcf_file.baseName}.indel.vcf --output ${vcf_file.baseName}.indel_filter.vcf ${params.indel_filter} - - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${vcf_file.baseName}.snp_filter.vcf --INPUT ${vcf_file.baseName}.indel_filter.vcf --OUTPUT ${vcf_file.baseName}.filter.vcf - """ -} From 61ae1804b1b3cfc5969e5716e972a909691e9668 Mon Sep 17 00:00:00 2001 From: melferink Date: Fri, 15 Apr 2022 11:13:51 +0200 Subject: [PATCH 18/21] corrected typo --- GATK/4.2.1.0/SelectVariants.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GATK/4.2.1.0/SelectVariants.nf b/GATK/4.2.1.0/SelectVariants.nf index c600c8e2..39ce8d9e 100644 --- a/GATK/4.2.1.0/SelectVariants.nf +++ b/GATK/4.2.1.0/SelectVariants.nf @@ -17,7 +17,7 @@ process SelectVariantsSample { """ gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants \ --reference ${params.genome} \ - -variant ${vcf_file} \ + --variant ${vcf_file} \ --output ${sample_id}_${vcf_file.baseName}${ext_vcf} \ --sample-name ${sample_id} \ ${params.optional} From 892cbcd884ef0ade6089c17d331d3ffeb43b08c1 Mon Sep 17 00:00:00 2001 From: melferink Date: Fri, 15 Apr 2022 11:24:11 +0200 Subject: [PATCH 19/21] refactor --- GATK/4.2.1.0/GenotypeGvcfs.nf | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/GATK/4.2.1.0/GenotypeGvcfs.nf b/GATK/4.2.1.0/GenotypeGvcfs.nf index 93de328b..5dfeba7e 100644 --- a/GATK/4.2.1.0/GenotypeGvcfs.nf +++ b/GATK/4.2.1.0/GenotypeGvcfs.nf @@ -9,7 +9,12 @@ process GenotypeGVCFs { tuple(analysis_id, path(gvcf_files), path(gvcf_idx_files), path(interval_file)) output: - tuple(analysis_id, path("${analysis_id}_${interval_file.baseName}${ext_vcf}"), path("${analysis_id}_${interval_file.baseName}${ext_vcf}${ext_vcf_index}"), emit:vcf_file) + tuple( + analysis_id, + path("${analysis_id}_${interval_file.baseName}${ext_vcf}"), + path("${analysis_id}_${interval_file.baseName}${ext_vcf}${ext_vcf_index}"), + emit:vcf_file + ) script: def input_files = gvcf_files.collect{"$it"}.join(" --variant ") @@ -36,7 +41,12 @@ process GenotypeGVCF { tuple(sample_id, path(gvcf_files), path(gvcf_idx_files), path(interval_file)) output: - tuple(val(sample_id), path("${sample_id}_${interval_file.baseName}${ext_vcf}"), path("${sample_id}_${interval_file.baseName}${ext_vcf}${ext_vcf_index}"), emit: vcf_file) + tuple( + val(sample_id), + path("${sample_id}_${interval_file.baseName}${ext_vcf}"), + path("${sample_id}_${interval_file.baseName}${ext_vcf}${ext_vcf_index}"), + emit: vcf_file + ) script: def input_files = gvcf_files.collect{"$it"}.join(" --variant ") From b73c29ab991d9ff40ec1392fb8de327d2eca26eb Mon Sep 17 00:00:00 2001 From: melferink Date: Fri, 15 Apr 2022 11:24:18 +0200 Subject: [PATCH 20/21] refactor --- GATK/4.2.1.0/HaplotypeCaller.nf | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/GATK/4.2.1.0/HaplotypeCaller.nf b/GATK/4.2.1.0/HaplotypeCaller.nf index efed2c61..0cad1bec 100644 --- a/GATK/4.2.1.0/HaplotypeCaller.nf +++ b/GATK/4.2.1.0/HaplotypeCaller.nf @@ -9,7 +9,12 @@ process HaplotypeCaller { tuple(analysis_id, path(bam_files), path(bai_files), path(interval_file)) output: - tuple(val(analysis_id), path("${analysis_id}.${interval_file.baseName}${ext_vcf}"), path("${analysis_id}.${interval_file.baseName}${ext_vcf}${ext_vcf_index}"), emit: vcf_file) + tuple( + val(analysis_id), + path("${analysis_id}.${interval_file.baseName}${ext_vcf}"), + path("${analysis_id}.${interval_file.baseName}${ext_vcf}${ext_vcf_index}"), + emit: vcf_file + ) script: def input_files = bam_files.collect{"$it"}.join(" --input ") @@ -40,7 +45,13 @@ process HaplotypeCallerGVCF { tuple(sample_id, path(bam_file), path(bai_file), path(interval_file)) output: - tuple(val(sample_id), path("${sample_id}_${interval_file.baseName}${ext_gvcf}"), path("${sample_id}_${interval_file.baseName}${ext_gvcf}${ext_gvcf_index}"), path(interval_file), emit: vcf_file) + tuple( + val(sample_id), + path("${sample_id}_${interval_file.baseName}${ext_gvcf}"), + path("${sample_id}_${interval_file.baseName}${ext_gvcf}${ext_gvcf_index}"), + path(interval_file), + emit: vcf_file + ) script: ext_gvcf = ".g.vcf" From 15fe7b3a559e93b8e2466357dc06f9210d18dad2 Mon Sep 17 00:00:00 2001 From: melferink Date: Fri, 15 Apr 2022 11:24:28 +0200 Subject: [PATCH 21/21] refactor --- GATK/4.2.1.0/SelectVariants.nf | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/GATK/4.2.1.0/SelectVariants.nf b/GATK/4.2.1.0/SelectVariants.nf index 39ce8d9e..20036edb 100644 --- a/GATK/4.2.1.0/SelectVariants.nf +++ b/GATK/4.2.1.0/SelectVariants.nf @@ -9,7 +9,12 @@ process SelectVariantsSample { tuple(analysis_id, path(vcf_file), path(vcf_idx_file), sample_id) output: - tuple(sample_id, path("${sample_id}_${vcf_file.baseName}${ext_vcf}"), path("${sample_id}_${vcf_file.baseName}${ext_vcf}${ext_vcf_index}"), emit: vcf_file) + tuple( + sample_id, + path("${sample_id}_${vcf_file.baseName}${ext_vcf}"), + path("${sample_id}_${vcf_file.baseName}${ext_vcf}${ext_vcf_index}"), + emit: vcf_file + ) script: ext_vcf = params.compress || vcf_file.getExtension() == ".gz" ? ".vcf.gz" : ".vcf"