From 388637aa8408933c20aeb55cce5ac2073f23409d Mon Sep 17 00:00:00 2001 From: tilschaef Date: Mon, 25 May 2020 18:00:32 +0200 Subject: [PATCH 01/24] fixed output redirection --- RSeQC/3.0.1/RSeQC.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RSeQC/3.0.1/RSeQC.nf b/RSeQC/3.0.1/RSeQC.nf index 8b928a82..3ced0b2b 100644 --- a/RSeQC/3.0.1/RSeQC.nf +++ b/RSeQC/3.0.1/RSeQC.nf @@ -19,7 +19,7 @@ process RSeQC { read_distribution.py -i ${bam_file} -r ${genome_bed12} > ${bam_file.baseName}.read_distribution.txt infer_experiment.py -i ${bam_file} -r ${genome_bed12} > ${bam_file.baseName}.infer_experiment.txt junction_annotation.py -i ${bam_file} -o ${bam_file.baseName}.rseqc -r ${genome_bed12} - bam_stat.py -i ${bam_file} 2> ${bam_file.baseName}.bam_stat.txt + bam_stat.py -i ${bam_file} > ${bam_file.baseName}.bam_stat.txt junction_saturation.py -i ${bam_file} -o ${bam_file.baseName}.rseqc -r ${genome_bed12} 2> ${bam_file.baseName}.junction_annotation_log.txt read_duplication.py -i ${bam_file} -o ${bam_file.baseName}.read_duplication """ From 78788593b7c5e03927448dc37e0f98816055061d Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Tue, 26 May 2020 14:56:52 +0200 Subject: [PATCH 02/24] Update README.md --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index bdaa803f..ccd5c3a1 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ See `utils/template.nf` for a process template which uses the following guidelin - Use the original tool version numbering - Use CamelCase for tool, command and process names - Use lowercase with words separated by underscores for params, inputs, outputs and scripts. +- Use 4 spaces per indentation level. - All input and output identifiers should reflect their conceptual identity. Use informative names like unaligned_sequences, reference_genome, phylogeny, or aligned_sequences instead of foo_input, foo_file, result, input, output, and so forth. - Define two labels for each process, containing toolname, version and command separated by an underscore. - BWA_0.7.17 @@ -42,16 +43,16 @@ See `utils/template.nf` for a process template which uses the following guidelin - Use separate process input channels as much as possible. Use tuples for linked inputs only. ``` input: - tuple sample_id, rg_id, bam, bai - path genome_fasta + val(analysis_id) + tuple(sample_id, path(bam), path(bai)) ``` - Define named process output channels. This ensures that outputs can be referenced in external scope by their respective names. Indicate whether an output channel is optional. ``` output: - path "my_file.txt", emit: my_file - path "my_optional_file.txt", optional: my_optional_file, emit: my_optional_file - ...... + path("my_file.txt", emit: my_file) + path("my_optional_file.txt", optional: my_optional_file, emit: my_optional_file) ``` +- Use `params` for resource files, for example `genome.fasta`, `database.vcf`. ## GUIX 1. Creating squashfs immage From dda1e41de3676413218c264d0d81f42f3737e9ba Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Tue, 26 May 2020 14:59:03 +0200 Subject: [PATCH 03/24] Update template.nf --- Utils/template.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Utils/template.nf b/Utils/template.nf index fb059716..481a2b70 100644 --- a/Utils/template.nf +++ b/Utils/template.nf @@ -6,15 +6,16 @@ process Command { shell = ['/bin/bash', '-euo', 'pipefail'] input: + val(analysis_id) tuple(sample_id, path(input_file)) output: tuple(sample_id, path(output_file), emit: output_file) - path("*.{tsv,txt}", emit: my_output) + path("log.txt", emit: log) script: """ - tool command ${params.optional} + tool command ${params.optional} ${analysis_id} ${params.resource_file} ${input_file} """ } From 2820d2725aa26bba330ddbf160e23d07802c37d3 Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Tue, 26 May 2020 14:59:25 +0200 Subject: [PATCH 04/24] Update template.nf --- Utils/template.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/Utils/template.nf b/Utils/template.nf index 481a2b70..5ed8e204 100644 --- a/Utils/template.nf +++ b/Utils/template.nf @@ -13,7 +13,6 @@ process Command { tuple(sample_id, path(output_file), emit: output_file) path("log.txt", emit: log) - script: """ tool command ${params.optional} ${analysis_id} ${params.resource_file} ${input_file} From 1e85be9da95db3205cb1479d67519b100a115272 Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Wed, 27 May 2020 11:00:46 +0200 Subject: [PATCH 05/24] Refactor Dx (#32) * Refactor: - tabs after input/output/script - file -> path - Add emit to outputs * Update multi fastq input. * Add val * Update refactor * Fix name --- BWA/0.7.17/BWASW.nf | 10 ++++---- BWA/0.7.17/MEM.nf | 14 +++++------ FastQC/0.11.8/FastQC.nf | 10 ++++---- GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf | 12 +++++----- GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf | 22 +++++++++--------- GATK/3.8-1-0-gf15c1c3ef/IndelRealigner.nf | 23 +++++++++---------- .../RealignerTargetCreator.nf | 21 ++++++++--------- GATK/3.8-1-0-gf15c1c3ef/SelectVariants.nf | 10 ++++---- GATK/3.8-1-0-gf15c1c3ef/UnifiedGenotyper.nf | 10 ++++---- GATK/3.8-1-0-gf15c1c3ef/VariantFiltration.nf | 18 +++++++-------- MultiQC/1.8/MultiQC.nf | 12 +++++----- Picard/2.22.0/CollectHsMetrics.nf | 11 ++++----- Picard/2.22.0/CollectMultipleMetrics.nf | 11 ++++----- Picard/2.22.0/CreateSequenceDictionary.nf | 4 +--- Picard/2.22.0/EstimateLibraryComplexity.nf | 11 ++++----- Picard/2.22.0/IntervalListTools.nf | 21 ++++++++--------- Sambamba/0.7.0/Flagstat.nf | 10 ++++---- Sambamba/0.7.0/Markdup.nf | 21 ++++++++--------- Sambamba/0.7.0/Merge.nf | 10 ++++---- Sambamba/0.7.0/ViewSort.nf | 10 ++++---- Sambamba/0.7.0/ViewUnmapped.nf | 12 +++++----- Samtools/1.10/Flagstat.nf | 10 ++++---- Samtools/1.10/MPileup.nf | 20 ++++++++-------- Samtools/1.10/View.nf | 10 ++++---- 24 files changed, 157 insertions(+), 166 deletions(-) diff --git a/BWA/0.7.17/BWASW.nf b/BWA/0.7.17/BWASW.nf index f26f41c1..e9a78a7e 100644 --- a/BWA/0.7.17/BWASW.nf +++ b/BWA/0.7.17/BWASW.nf @@ -6,13 +6,13 @@ process BWASW { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, rg_id, file(fastq: "*") + tuple(sample_id, rg_id, path(fastq)) output: - tuple sample_id, rg_id, file("${fastq[0].simpleName}.sam") + tuple(sample_id, rg_id, path("${fastq[0].simpleName}.sam"), emit: sam_file) script: - """ - bwa bwasw -t ${task.cpus} ${params.optional} ${params.genome} ${fastq} > ${fastq[0].simpleName}.sam - """ + """ + bwa bwasw -t ${task.cpus} ${params.optional} ${params.genome} ${fastq} > ${fastq[0].simpleName}.sam + """ } diff --git a/BWA/0.7.17/MEM.nf b/BWA/0.7.17/MEM.nf index 59c48a0d..1daf5158 100644 --- a/BWA/0.7.17/MEM.nf +++ b/BWA/0.7.17/MEM.nf @@ -6,16 +6,16 @@ process MEM { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, rg_id, file(fastq: "*") + tuple(sample_id, rg_id, path(fastq)) output: - tuple sample_id, rg_id, file("${fastq[0].simpleName}.sam") + tuple(sample_id, rg_id, path("${fastq[0].simpleName}.sam"), emit: sam_file) script: - def barcode = rg_id.split('_')[1] - def readgroup = "\"@RG\\tID:${rg_id}\\tSM:${sample_id}\\tPL:ILLUMINA\\tLB:${sample_id}\\tPU:${barcode}\"" + def barcode = rg_id.split('_')[1] + def readgroup = "\"@RG\\tID:${rg_id}\\tSM:${sample_id}\\tPL:ILLUMINA\\tLB:${sample_id}\\tPU:${barcode}\"" - """ - bwa mem -t ${task.cpus} -R ${readgroup} ${params.optional} ${params.genome} ${fastq} > ${fastq[0].simpleName}.sam - """ + """ + bwa mem -t ${task.cpus} -R ${readgroup} ${params.optional} ${params.genome} ${fastq} > ${fastq[0].simpleName}.sam + """ } diff --git a/FastQC/0.11.8/FastQC.nf b/FastQC/0.11.8/FastQC.nf index e2405e0c..1d1f8279 100644 --- a/FastQC/0.11.8/FastQC.nf +++ b/FastQC/0.11.8/FastQC.nf @@ -5,13 +5,13 @@ process FastQC { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, rg_id, file(fastq: "*") + tuple(sample_id, rg_id, path(fastq)) output: - file "*_fastqc.{zip,html}" + path("*_fastqc.{zip,html}", emit: report) script: - """ - fastqc ${params.optional} -t ${task.cpus} ${fastq} - """ + """ + fastqc ${params.optional} -t ${task.cpus} ${fastq} + """ } diff --git a/GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf b/GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf index d9f6bdfe..a2f7a4f0 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf @@ -6,14 +6,14 @@ process CombineVariants { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(analysis_id), file(vcf_files), file(vcf_idx_files) + tuple(analysis_id, path(vcf_files), path(vcf_idx_files)) output: - tuple val(analysis_id), file("${analysis_id}.vcf"), file("${analysis_id}.vcf.idx") + tuple(analysis_id, path("${analysis_id}.vcf"), path("${analysis_id}.vcf.idx"), emit:vcf_file) script: - def input_files = vcf_files.collect{"$it"}.join(" -V ") - """ - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T CombineVariants --reference_sequence ${params.genome} -V ${input_files} --out ${analysis_id}.vcf ${params.optional} - """ + def input_files = vcf_files.collect{"$it"}.join(" -V ") + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T CombineVariants --reference_sequence ${params.genome} -V ${input_files} --out ${analysis_id}.vcf ${params.optional} + """ } diff --git a/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf b/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf index f04f9d2f..3c42714f 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf @@ -6,19 +6,19 @@ process HaplotypeCaller { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(analysis_id), file(bam_files), file(bai_files), file(interval_file) + tuple(analysis_id, path(bam_files), path(bai_files), path(interval_file)) output: - tuple val(analysis_id), file("${analysis_id}.${interval_file.baseName}.vcf"), file("${analysis_id}.${interval_file.baseName}.vcf.idx") + tuple(val(analysis_id), file("${analysis_id}.${interval_file.baseName}.vcf"), file("${analysis_id}.${interval_file.baseName}.vcf.idx"), emit: vcf_file) script: - def input_files = bam_files.collect{"$it"}.join(" --input_file ") - """ - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T HaplotypeCaller \ - --reference_sequence ${params.genome} \ - --input_file ${input_files} \ - --intervals ${interval_file} \ - --out ${analysis_id}.${interval_file.baseName}.vcf \ - ${params.optional} - """ + def input_files = bam_files.collect{"$it"}.join(" --input_file ") + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T HaplotypeCaller \ + --reference_sequence ${params.genome} \ + --input_file ${input_files} \ + --intervals ${interval_file} \ + --out ${analysis_id}.${interval_file.baseName}.vcf \ + ${params.optional} + """ } diff --git a/GATK/3.8-1-0-gf15c1c3ef/IndelRealigner.nf b/GATK/3.8-1-0-gf15c1c3ef/IndelRealigner.nf index c268b0ea..f5e46cc2 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/IndelRealigner.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/IndelRealigner.nf @@ -6,20 +6,19 @@ process IndelRealigner { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(sample_id), file(bam_file), file(bai_file), val(chr), file(target_intervals) + tuple(sample_id, path(bam_file), path(bai_file), chr, path(target_intervals)) output: - tuple val(sample_id), file("${bam_file.baseName}.realigned.${chr}.bam"), file("${bam_file.baseName}.realigned.${chr}.bai") + tuple(sample_id, path("${bam_file.baseName}.realigned.${chr}.bam"), path("${bam_file.baseName}.realigned.${chr}.bai"), emit: bam_file) script: - - """ - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T IndelRealigner \ - --reference_sequence ${params.genome} \ - --input_file ${bam_file} \ - --intervals ${chr} \ - --targetIntervals ${bam_file.baseName}.target_intervals.${chr}.list \ - --out ${bam_file.baseName}.realigned.${chr}.bam \ - ${params.optional} - """ + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T IndelRealigner \ + --reference_sequence ${params.genome} \ + --input_file ${bam_file} \ + --intervals ${chr} \ + --targetIntervals ${bam_file.baseName}.target_intervals.${chr}.list \ + --out ${bam_file.baseName}.realigned.${chr}.bam \ + ${params.optional} + """ } diff --git a/GATK/3.8-1-0-gf15c1c3ef/RealignerTargetCreator.nf b/GATK/3.8-1-0-gf15c1c3ef/RealignerTargetCreator.nf index 1f0df48b..c0780237 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/RealignerTargetCreator.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/RealignerTargetCreator.nf @@ -6,19 +6,18 @@ process RealignerTargetCreator { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(sample_id), file(bam_file), file(bai_file), val(chr) + tuple(sample_id, path(bam_file), path(bai_file), chr) output: - tuple val(sample_id), val(chr), file("${bam_file.baseName}.target_intervals.${chr}.list") + tuple(sample_id, chr, path("${bam_file.baseName}.target_intervals.${chr}.list"), emit: interval_list) script: - - """ - java -Xmx${task.memory.toGiga()-4}G -jar $params.gatk_path -T RealignerTargetCreator \ - --reference_sequence ${params.genome} \ - --input_file ${bam_file} \ - --intervals ${chr} \ - --out ${bam_file.baseName}.target_intervals.${chr}.list \ - ${params.optional} - """ + """ + java -Xmx${task.memory.toGiga()-4}G -jar $params.gatk_path -T RealignerTargetCreator \ + --reference_sequence ${params.genome} \ + --input_file ${bam_file} \ + --intervals ${chr} \ + --out ${bam_file.baseName}.target_intervals.${chr}.list \ + ${params.optional} + """ } diff --git a/GATK/3.8-1-0-gf15c1c3ef/SelectVariants.nf b/GATK/3.8-1-0-gf15c1c3ef/SelectVariants.nf index aed8c282..1f4a8d7f 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/SelectVariants.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/SelectVariants.nf @@ -6,13 +6,13 @@ process SelectVariantsSample { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(analysis_id), file(vcf_file), file(vcf_idx_file), val(sample_id) + tuple(analysis_id, path(vcf_file), path(vcf_idx_file), sample_id) output: - tuple val(sample_id), file("${sample_id}_${vcf_file.baseName}.vcf"), file("${sample_id}_${vcf_file.baseName}.vcf.idx") + tuple(sample_id, path("${sample_id}_${vcf_file.baseName}.vcf"), path("${sample_id}_${vcf_file.baseName}.vcf.idx"), emit: vcf_file) script: - """ - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T SelectVariants --reference_sequence ${params.genome} -V ${vcf_file} --out ${sample_id}_${vcf_file.baseName}.vcf -sn ${sample_id} - """ + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T SelectVariants --reference_sequence ${params.genome} -V ${vcf_file} --out ${sample_id}_${vcf_file.baseName}.vcf -sn ${sample_id} + """ } diff --git a/GATK/3.8-1-0-gf15c1c3ef/UnifiedGenotyper.nf b/GATK/3.8-1-0-gf15c1c3ef/UnifiedGenotyper.nf index a25e20e9..5a190faf 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/UnifiedGenotyper.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/UnifiedGenotyper.nf @@ -6,14 +6,14 @@ process UnifiedGenotyper { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(sample_id), file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - tuple val(sample_id), file("${sample_id}.vcf") + tuple(sample_id, path("${sample_id}.vcf"), emit: vcf_file) script: - """ - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T UnifiedGenotyper --reference_sequence ${params.genome} --input_file ${bam_file} --out ${sample_id}.vcf ${params.optional} - """ + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T UnifiedGenotyper --reference_sequence ${params.genome} --input_file ${bam_file} --out ${sample_id}.vcf ${params.optional} + """ } diff --git a/GATK/3.8-1-0-gf15c1c3ef/VariantFiltration.nf b/GATK/3.8-1-0-gf15c1c3ef/VariantFiltration.nf index 0a2301da..bc79805b 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/VariantFiltration.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/VariantFiltration.nf @@ -6,19 +6,19 @@ process VariantFiltrationSnpIndel { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(analysis_id), file(vcf_file), file(vcf_idx_file) + tuple(analysis_id, path(vcf_file), path(vcf_idx_file)) output: - tuple val(analysis_id), file("${vcf_file.baseName}.filter.vcf"), file("${vcf_file.baseName}.filter.vcf.idx") + tuple(analysis_id, path("${vcf_file.baseName}.filter.vcf"), path("${vcf_file.baseName}.filter.vcf.idx"), emit: vcf_file) script: - """ - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T SelectVariants --reference_sequence ${params.genome} -V $vcf_file --out ${vcf_file.baseName}.snp.vcf --selectTypeToExclude INDEL - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T SelectVariants --reference_sequence ${params.genome} -V $vcf_file --out ${vcf_file.baseName}.indel.vcf --selectTypeToInclude INDEL + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T SelectVariants --reference_sequence ${params.genome} -V $vcf_file --out ${vcf_file.baseName}.snp.vcf --selectTypeToExclude INDEL + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T SelectVariants --reference_sequence ${params.genome} -V $vcf_file --out ${vcf_file.baseName}.indel.vcf --selectTypeToInclude INDEL - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T VariantFiltration --reference_sequence ${params.genome} -V ${vcf_file.baseName}.snp.vcf --out ${vcf_file.baseName}.snp_filter.vcf ${params.snp_filter} ${params.snp_cluster} - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T VariantFiltration --reference_sequence ${params.genome} -V ${vcf_file.baseName}.indel.vcf --out ${vcf_file.baseName}.indel_filter.vcf ${params.indel_filter} + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T VariantFiltration --reference_sequence ${params.genome} -V ${vcf_file.baseName}.snp.vcf --out ${vcf_file.baseName}.snp_filter.vcf ${params.snp_filter} ${params.snp_cluster} + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T VariantFiltration --reference_sequence ${params.genome} -V ${vcf_file.baseName}.indel.vcf --out ${vcf_file.baseName}.indel_filter.vcf ${params.indel_filter} - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T CombineVariants --reference_sequence ${params.genome} -V ${vcf_file.baseName}.snp_filter.vcf -V ${vcf_file.baseName}.indel_filter.vcf --out ${vcf_file.baseName}.filter.vcf --assumeIdenticalSamples - """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T CombineVariants --reference_sequence ${params.genome} -V ${vcf_file.baseName}.snp_filter.vcf -V ${vcf_file.baseName}.indel_filter.vcf --out ${vcf_file.baseName}.filter.vcf --assumeIdenticalSamples + """ } diff --git a/MultiQC/1.8/MultiQC.nf b/MultiQC/1.8/MultiQC.nf index 424252c9..dfd0074c 100644 --- a/MultiQC/1.8/MultiQC.nf +++ b/MultiQC/1.8/MultiQC.nf @@ -5,14 +5,14 @@ process MultiQC { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(analysis_id), file(qc_files: "*") + val(analysis_id) + path(qc_files) output: - file "${analysis_id}_multiqc_report.html" - file "${analysis_id}_multiqc_report_data" + tuple(path("${analysis_id}_multiqc_report.html"), path("${analysis_id}_multiqc_report_data"), emit: report) script: - """ - multiqc ${params.optional} --title ${analysis_id} . - """ + """ + multiqc ${params.optional} --title ${analysis_id} . + """ } diff --git a/Picard/2.22.0/CollectHsMetrics.nf b/Picard/2.22.0/CollectHsMetrics.nf index 8b9a2524..b9633e13 100644 --- a/Picard/2.22.0/CollectHsMetrics.nf +++ b/Picard/2.22.0/CollectHsMetrics.nf @@ -6,14 +6,13 @@ process CollectHsMetrics { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(sample_id), file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - file("${sample_id}.HsMetrics.txt") + path("${sample_id}.HsMetrics.txt", emit: txt_file) script: - - """ - picard -Xmx${task.memory.toGiga()-4}G CollectHsMetrics TMP_DIR=\$TMPDIR R=${params.genome} INPUT=${bam_file} OUTPUT=${sample_id}.HsMetrics.txt BAIT_INTERVALS=${params.bait} TARGET_INTERVALS=${params.target} ${params.optional} - """ + """ + picard -Xmx${task.memory.toGiga()-4}G CollectHsMetrics TMP_DIR=\$TMPDIR R=${params.genome} INPUT=${bam_file} OUTPUT=${sample_id}.HsMetrics.txt BAIT_INTERVALS=${params.bait} TARGET_INTERVALS=${params.target} ${params.optional} + """ } diff --git a/Picard/2.22.0/CollectMultipleMetrics.nf b/Picard/2.22.0/CollectMultipleMetrics.nf index d166150f..45be1ea8 100644 --- a/Picard/2.22.0/CollectMultipleMetrics.nf +++ b/Picard/2.22.0/CollectMultipleMetrics.nf @@ -6,14 +6,13 @@ process CollectMultipleMetrics { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(sample_id), file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - file("*.txt") + path("*.txt", emit: txt_files) script: - - """ - picard -Xmx${task.memory.toGiga()-4}G CollectMultipleMetrics TMP_DIR=\$TMPDIR R=${params.genome} INPUT=${bam_file} OUTPUT=${sample_id} EXT=.txt ${params.optional} - """ + """ + picard -Xmx${task.memory.toGiga()-4}G CollectMultipleMetrics TMP_DIR=\$TMPDIR R=${params.genome} INPUT=${bam_file} OUTPUT=${sample_id} EXT=.txt ${params.optional} + """ } diff --git a/Picard/2.22.0/CreateSequenceDictionary.nf b/Picard/2.22.0/CreateSequenceDictionary.nf index 0e43f4ed..df15d284 100644 --- a/Picard/2.22.0/CreateSequenceDictionary.nf +++ b/Picard/2.22.0/CreateSequenceDictionary.nf @@ -13,8 +13,6 @@ process CreateSequenceDictionary { script: """ - picard -Xmx${task.memory.toGiga()-4}G CreateSequenceDictionary \ - REFERENCE=${genome_fasta} \ - OUTPUT=${genome_fasta.baseName}.dict + picard -Xmx${task.memory.toGiga()-4}G CreateSequenceDictionary REFERENCE=${genome_fasta} OUTPUT=${genome_fasta.baseName}.dict """ } diff --git a/Picard/2.22.0/EstimateLibraryComplexity.nf b/Picard/2.22.0/EstimateLibraryComplexity.nf index c608907e..6100ff95 100644 --- a/Picard/2.22.0/EstimateLibraryComplexity.nf +++ b/Picard/2.22.0/EstimateLibraryComplexity.nf @@ -6,14 +6,13 @@ process EstimateLibraryComplexity { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(sample_id), file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - file("${sample_id}.LibraryComplexity.txt") + path("${sample_id}.LibraryComplexity.txt", emit: txt_file) script: - - """ - picard -Xmx${task.memory.toGiga()-4}G EstimateLibraryComplexity TMP_DIR=\$TMPDIR INPUT=${bam_file} OUTPUT=${sample_id}.LibraryComplexity.txt ${params.optional} - """ + """ + picard -Xmx${task.memory.toGiga()-4}G EstimateLibraryComplexity TMP_DIR=\$TMPDIR INPUT=${bam_file} OUTPUT=${sample_id}.LibraryComplexity.txt ${params.optional} + """ } diff --git a/Picard/2.22.0/IntervalListTools.nf b/Picard/2.22.0/IntervalListTools.nf index 077d0d6a..d81bde50 100644 --- a/Picard/2.22.0/IntervalListTools.nf +++ b/Picard/2.22.0/IntervalListTools.nf @@ -6,20 +6,19 @@ process IntervalListTools { shell = ['/bin/bash', '-euo', 'pipefail'] input: - file(interval_list) + path(interval_list) output: - file("temp_*/*.interval_list") + path("temp_*/*.interval_list", emit: interval_list) script: + """ + picard -Xmx${task.memory.toGiga()-4}G IntervalListTools TMP_DIR=\$TMPDIR \ + INPUT=${interval_list} OUTPUT=. \ + SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \ + SCATTER_COUNT=${params.scatter_count} \ + UNIQUE=true \ - """ - picard -Xmx${task.memory.toGiga()-4}G IntervalListTools TMP_DIR=\$TMPDIR \ - INPUT=${interval_list} OUTPUT=. \ - SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \ - SCATTER_COUNT=${params.scatter_count} \ - UNIQUE=true \ - - for folder in temp*; do mv \$folder/scattered.interval_list \$folder/\$folder\\.interval_list; done - """ + for folder in temp*; do mv \$folder/scattered.interval_list \$folder/\$folder\\.interval_list; done + """ } diff --git a/Sambamba/0.7.0/Flagstat.nf b/Sambamba/0.7.0/Flagstat.nf index 89b84f9e..87b57cbb 100644 --- a/Sambamba/0.7.0/Flagstat.nf +++ b/Sambamba/0.7.0/Flagstat.nf @@ -6,13 +6,13 @@ process Flagstat { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - file("${bam_file.baseName}.flagstat") + path("${bam_file.baseName}.flagstat", emit: flagstat) script: - """ - sambamba flagstat -t ${task.cpus} ${bam_file} > ${bam_file.baseName}.flagstat - """ + """ + sambamba flagstat -t ${task.cpus} ${bam_file} > ${bam_file.baseName}.flagstat + """ } diff --git a/Sambamba/0.7.0/Markdup.nf b/Sambamba/0.7.0/Markdup.nf index afaf378c..d47b5986 100644 --- a/Sambamba/0.7.0/Markdup.nf +++ b/Sambamba/0.7.0/Markdup.nf @@ -6,15 +6,14 @@ process Markdup { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, rg_id, file(bam_file), file(bai_file) - + tuple(sample_id, rg_id, path(bam_file), path(bai_file)) output: - tuple sample_id, rg_id, file("${bam_file.baseName}.markdup.bam"), file("${bam_file.baseName}.markdup.bam.bai") + tuple(sample_id, rg_id, path("${bam_file.baseName}.markdup.bam"), path("${bam_file.baseName}.markdup.bam.bai"), emit: bam_file) script: - """ - sambamba markdup -t ${task.cpus} ${bam_file} ${bam_file.baseName}.markdup.bam - """ + """ + sambamba markdup -t ${task.cpus} ${bam_file} ${bam_file.baseName}.markdup.bam + """ } process MarkdupMerge { @@ -25,13 +24,13 @@ process MarkdupMerge { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, file(bam_files) + tuple(sample_id, path(bam_files)) output: - tuple sample_id, file("${sample_id}.markdup.bam"), file("${sample_id}.markdup.bam.bai") + tuple(sample_id, path("${sample_id}.markdup.bam"), path("${sample_id}.markdup.bam.bai"), emit: bam_file) script: - """ - sambamba markdup -t ${task.cpus} ${bam_files} ${sample_id}.markdup.bam - """ + """ + sambamba markdup -t ${task.cpus} ${bam_files} ${sample_id}.markdup.bam + """ } diff --git a/Sambamba/0.7.0/Merge.nf b/Sambamba/0.7.0/Merge.nf index a74673c5..6cf85c8c 100644 --- a/Sambamba/0.7.0/Merge.nf +++ b/Sambamba/0.7.0/Merge.nf @@ -6,13 +6,13 @@ process Merge { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, file(bam_files), file(bai_files) + tuple(sample_id, path(bam_files), path(bai_files)) output: - tuple sample_id, file("${sample_id}.bam"), file("${sample_id}.bam.bai") + tuple(sample_id, path("${sample_id}.bam"), path("${sample_id}.bam.bai"), emit: bam_file) script: - """ - sambamba merge -t ${task.cpus} ${sample_id}.bam ${bam_files} - """ + """ + sambamba merge -t ${task.cpus} ${sample_id}.bam ${bam_files} + """ } diff --git a/Sambamba/0.7.0/ViewSort.nf b/Sambamba/0.7.0/ViewSort.nf index a3c2e94d..174f6d7b 100644 --- a/Sambamba/0.7.0/ViewSort.nf +++ b/Sambamba/0.7.0/ViewSort.nf @@ -6,13 +6,13 @@ process ViewSort { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, rg_id, file(sam_file) + tuple(sample_id, rg_id, path(sam_file)) output: - tuple sample_id, rg_id, file("${sam_file.baseName}.sort.bam"), file("${sam_file.baseName}.sort.bam.bai") + tuple(sample_id, rg_id, path("${sam_file.baseName}.sort.bam"), path("${sam_file.baseName}.sort.bam.bai"), emit: bam_file) script: - """ - sambamba view -t ${task.cpus} -S -f bam ${sam_file} | sambamba sort -t ${task.cpus} -m ${task.memory.toGiga()}G -o ${sam_file.baseName}.sort.bam /dev/stdin - """ + """ + sambamba view -t ${task.cpus} -S -f bam ${sam_file} | sambamba sort -t ${task.cpus} -m ${task.memory.toGiga()}G -o ${sam_file.baseName}.sort.bam /dev/stdin + """ } diff --git a/Sambamba/0.7.0/ViewUnmapped.nf b/Sambamba/0.7.0/ViewUnmapped.nf index b27b474e..0aada4de 100644 --- a/Sambamba/0.7.0/ViewUnmapped.nf +++ b/Sambamba/0.7.0/ViewUnmapped.nf @@ -6,14 +6,14 @@ process ViewUnmapped { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - tuple sample_id, file("${bam_file.baseName}.unmapped.bam"), file("${bam_file.baseName}.unmapped.bam.bai") + tuple(sample_id, path("${bam_file.baseName}.unmapped.bam"), path("${bam_file.baseName}.unmapped.bam.bai"), emit: bam_file) script: - """ - sambamba view -t ${task.cpus} -f bam -F 'unmapped and mate_is_unmapped' ${bam_file} > ${bam_file.baseName}.unmapped.bam - sambamba index -t ${task.cpus} ${bam_file.baseName}.unmapped.bam - """ + """ + sambamba view -t ${task.cpus} -f bam -F 'unmapped and mate_is_unmapped' ${bam_file} > ${bam_file.baseName}.unmapped.bam + sambamba index -t ${task.cpus} ${bam_file.baseName}.unmapped.bam + """ } diff --git a/Samtools/1.10/Flagstat.nf b/Samtools/1.10/Flagstat.nf index a881c5d1..862ec040 100644 --- a/Samtools/1.10/Flagstat.nf +++ b/Samtools/1.10/Flagstat.nf @@ -6,13 +6,13 @@ process Flagstat { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - tuple sample_id, file("${bam_file.baseName}.flagstat") + path("${bam_file.baseName}.flagstat", emit: flagstat) script: - """ - samtools flagstat ${bam_file} > ${bam_file.baseName}.flagstat - """ + """ + samtools flagstat ${bam_file} > ${bam_file.baseName}.flagstat + """ } diff --git a/Samtools/1.10/MPileup.nf b/Samtools/1.10/MPileup.nf index 6597c503..f6183c88 100644 --- a/Samtools/1.10/MPileup.nf +++ b/Samtools/1.10/MPileup.nf @@ -7,15 +7,15 @@ process MPileup { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - tuple sample_id, file("${bam_file.baseName}.pileup") + tuple(sample_id, path("${bam_file.baseName}.pileup"), emit: pileup) script: - """ - samtools mpileup ${params.optional} -f ${params.genome} ${bam_file} > ${bam_file.baseName}.pileup - """ + """ + samtools mpileup ${params.optional} -f ${params.genome} ${bam_file} > ${bam_file.baseName}.pileup + """ } process MPileup_bcf { @@ -27,13 +27,13 @@ process MPileup_bcf { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - tuple sample_id, file("${bam_file.baseName}.bcf") + tuple(sample_id, path("${bam_file.baseName}.bcf"), emit: bcf) script: - """ - samtools mpileup ${params.optional} -u -f ${params.genome} ${bam_file} > ${bam_file.baseName}.bcf - """ + """ + samtools mpileup ${params.optional} -u -f ${params.genome} ${bam_file} > ${bam_file.baseName}.bcf + """ } diff --git a/Samtools/1.10/View.nf b/Samtools/1.10/View.nf index 435f31d7..97f9c1e8 100644 --- a/Samtools/1.10/View.nf +++ b/Samtools/1.10/View.nf @@ -6,13 +6,13 @@ process View { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - tuple sample_id, file("${bam_file.baseName}.sam") + tuple(sample_id, path("${bam_file.baseName}.sam"), emit: sam_file) script: - """ - samtools view ${params.optional} ${bam_file} ${params.region} > ${bam_file.baseName}.sam - """ + """ + samtools view ${params.optional} ${bam_file} ${params.region} > ${bam_file.baseName}.sam + """ } From e962793fdcadc9561bc3035bf6cebc5e67b35cd7 Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Wed, 27 May 2020 16:31:35 +0200 Subject: [PATCH 06/24] Add BaseRecalibrator, including PrintReads. --- GATK/3.8-1-0-gf15c1c3ef/BaseRecalibrator.nf | 32 +++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 GATK/3.8-1-0-gf15c1c3ef/BaseRecalibrator.nf diff --git a/GATK/3.8-1-0-gf15c1c3ef/BaseRecalibrator.nf b/GATK/3.8-1-0-gf15c1c3ef/BaseRecalibrator.nf new file mode 100644 index 00000000..419c0539 --- /dev/null +++ b/GATK/3.8-1-0-gf15c1c3ef/BaseRecalibrator.nf @@ -0,0 +1,32 @@ +process BaseRecalibrator { + tag {"GATK BaseRecalibrator ${sample_id} - ${chr}"} + label 'GATK_3_8_1_0_gf15c1c3ef' + label 'GATK_3_8_1_0_gf15c1c3ef_BaseRecalibrator' + container = 'quay.io/biocontainers/gatk:3.8--py27_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(bam_file), path(bai_file), chr) + + output: + tuple(sample_id, path("${bam_file.baseName}.bqsr.${chr}.bam"), path("${bam_file.baseName}.bqsr.${chr}.bai"), emit: bam_file) + + script: + """ + java -Xmx${task.memory.toGiga()-4}G -jar $params.gatk_path -T BaseRecalibrator \ + --num_cpu_threads_per_data_thread ${task.cpus} \ + --reference_sequence ${params.genome} \ + --input_file ${bam_file} \ + --intervals ${chr} \ + --out ${bam_file.baseName}.bqsr.${chr}.table \ + ${params.optional} + + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T PrintReads \ + --num_cpu_threads_per_data_thread ${task.cpus} \ + --reference_sequence ${params.genome} \ + --input_file ${bam_file} \ + --BQSR ${bam_file.baseName}.bqsr.${chr}.table \ + --intervals ${chr} \ + --out ${bam_file.baseName}.bqsr.${chr}.bam + """ +} From a7d04fb578ef9a646cc111f947c534bef7740ffa Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Thu, 4 Jun 2020 11:01:47 +0200 Subject: [PATCH 07/24] Add optional. --- Picard/2.22.0/IntervalListTools.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Picard/2.22.0/IntervalListTools.nf b/Picard/2.22.0/IntervalListTools.nf index d81bde50..838e5801 100644 --- a/Picard/2.22.0/IntervalListTools.nf +++ b/Picard/2.22.0/IntervalListTools.nf @@ -18,7 +18,8 @@ process IntervalListTools { SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \ SCATTER_COUNT=${params.scatter_count} \ UNIQUE=true \ - + ${params.optional} + for folder in temp*; do mv \$folder/scattered.interval_list \$folder/\$folder\\.interval_list; done """ } From 8db71b5c89b9a9dbd98bb9464fc03f69e3da63ef Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Thu, 4 Jun 2020 13:52:56 +0200 Subject: [PATCH 08/24] Add HaplotypeCallerGVCF --- GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf b/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf index 3c42714f..03d5f594 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf @@ -22,3 +22,29 @@ process HaplotypeCaller { ${params.optional} """ } + +process HaplotypeCallerGVCF { + tag {"GATK HaplotypeCallerGVCF ${sample_id} - ${interval_file.baseName}"} + label 'GATK_3_8_1_0_gf15c1c3ef' + label 'GATK_3_8_1_0_gf15c1c3ef_HaplotypeCaller' + container = 'quay.io/biocontainers/gatk:3.8--py27_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(bam_file), path(bai_file), path(interval_file)) + + output: + tuple(val(sample_id), file("${sample_id}.${interval_file.baseName}.g.vcf."), file("${sample_id}.${interval_file.baseName}.g.vcf.idx"), emit: vcf_file) + + script: + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T HaplotypeCaller \ + --reference_sequence ${params.genome} \ + --input_file ${bam_file} \ + --intervals ${interval_file} \ + --out ${analysis_id}.${interval_file.baseName}.g.vcf \ + --emitRefConfidence GVCF \ + --GVCFGQBands 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,70,80,90,99 \ + ${params.optional} + """ +} \ No newline at end of file From ad2b8176acdd439ee9936b3ed3e22ce3af203c83 Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Thu, 4 Jun 2020 14:30:45 +0200 Subject: [PATCH 09/24] Add gvcf tools. --- GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf | 20 ++++++++++++++++++++ GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf b/GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf index a2f7a4f0..5ed12ece 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf @@ -17,3 +17,23 @@ process CombineVariants { java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T CombineVariants --reference_sequence ${params.genome} -V ${input_files} --out ${analysis_id}.vcf ${params.optional} """ } + +process CombineVariantsGVCF { + tag {"GATK CombineVariantsGVCF ${sample_id}"} + label 'GATK_3_8_1_0_gf15c1c3ef' + label 'GATK_3_8_1_0_gf15c1c3ef_CombineVariantsGVCF' + container = 'quay.io/biocontainers/gatk:3.8--py27_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(vcf_files), path(vcf_idx_files)) + + output: + tuple(sample_id, path("${sample_id}.g.vcf"), path("${sample_id}.g.vcf.idx"), emit:vcf_file) + + script: + def input_files = vcf_files.collect{"$it"}.join(" -V ") + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T CombineVariants --reference_sequence ${params.genome} -V ${input_files} --out ${sample_id}.g.vcf ${params.optional} + """ +} diff --git a/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf b/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf index 03d5f594..dafb3e67 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf @@ -26,7 +26,7 @@ process HaplotypeCaller { process HaplotypeCallerGVCF { tag {"GATK HaplotypeCallerGVCF ${sample_id} - ${interval_file.baseName}"} label 'GATK_3_8_1_0_gf15c1c3ef' - label 'GATK_3_8_1_0_gf15c1c3ef_HaplotypeCaller' + label 'GATK_3_8_1_0_gf15c1c3ef_HaplotypeCallerGVCF' container = 'quay.io/biocontainers/gatk:3.8--py27_1' shell = ['/bin/bash', '-euo', 'pipefail'] From 0513c77cf0614f8a8420b12e57b1d3c312c6b379 Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Fri, 5 Jun 2020 12:35:41 +0200 Subject: [PATCH 10/24] Add CatVariants and GenotypeGVCFs --- GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf | 19 +++++++++++++++++++ GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf create mode 100644 GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf diff --git a/GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf b/GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf new file mode 100644 index 00000000..98ffe9a6 --- /dev/null +++ b/GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf @@ -0,0 +1,19 @@ +process CatVariantsGVCF { + tag {"GATK CatVariantsGVCF ${sample_id}"} + label 'GATK_3_8_1_0_gf15c1c3ef' + label 'GATK_3_8_1_0_gf15c1c3ef_CatVariantsGVCF' + container = 'quay.io/biocontainers/gatk:3.8--py27_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(gvcf_files), path(gvcf_idx_files)) + + output: + tuple(sample_id, path("${sample_id}.g.vcf"), path("${sample_id}.g.vcf.idx"), emit:vcf_file) + + script: + def input_files = gvcf_files.collect{"$it"}.join(" -V ") + """ + java -Xmx${task.memory.toGiga()-4}G -cp ${params.gatk_path} org.broadinstitute.gatk.tools.CatVariants --reference ${params.genome} -V ${input_files} --out ${sample_id}.g.vcf ${params.optional} + """ +} \ No newline at end of file diff --git a/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf b/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf new file mode 100644 index 00000000..84c95e53 --- /dev/null +++ b/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf @@ -0,0 +1,19 @@ +process GenotypeGVCFs { + tag {"GATK GenotypeGVCFs ${analysis_id}"} + label 'GATK_3_8_1_0_gf15c1c3ef' + label 'GATK_3_8_1_0_gf15c1c3ef_GenotypeGVCFs' + container = 'quay.io/biocontainers/gatk:3.8--py27_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(analysis_id, path(gvcf_files), path(gvcf_idx_files)) + + output: + tuple(analysis_id, path("${analysis_id}.vcf"), path("${analysis_id}.vcf.idx"), emit:vcf_file) + + script: + def input_files = gvcf_files.collect{"$it"}.join(" -V ") + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T GenotypeGVCFs --reference_sequence ${params.genome} -V ${input_files} --out ${analysis_id}.vcf ${params.optional} + """ +} \ No newline at end of file From 27cc98d285111ab0edf35f2f1f51cc4f2e8ac56a Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Fri, 5 Jun 2020 12:39:55 +0200 Subject: [PATCH 11/24] Remove GVCFGQBands --- GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf b/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf index dafb3e67..7330014c 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf @@ -44,7 +44,6 @@ process HaplotypeCallerGVCF { --intervals ${interval_file} \ --out ${analysis_id}.${interval_file.baseName}.g.vcf \ --emitRefConfidence GVCF \ - --GVCFGQBands 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,70,80,90,99 \ ${params.optional} """ } \ No newline at end of file From ceb588a82c785339fb7b311d754045cab871eb7d Mon Sep 17 00:00:00 2001 From: rernst Date: Tue, 9 Jun 2020 11:36:02 +0200 Subject: [PATCH 12/24] Fix modules --- GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf | 4 ++-- GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf | 2 +- GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf b/GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf index 98ffe9a6..fe5aa5e5 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf @@ -14,6 +14,6 @@ process CatVariantsGVCF { script: def input_files = gvcf_files.collect{"$it"}.join(" -V ") """ - java -Xmx${task.memory.toGiga()-4}G -cp ${params.gatk_path} org.broadinstitute.gatk.tools.CatVariants --reference ${params.genome} -V ${input_files} --out ${sample_id}.g.vcf ${params.optional} + java -Xmx${task.memory.toGiga()-4}G -cp ${params.gatk_path} org.broadinstitute.gatk.tools.CatVariants --reference ${params.genome} -V ${input_files} --outputFile ${sample_id}.g.vcf ${params.optional} """ -} \ No newline at end of file +} diff --git a/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf b/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf index 84c95e53..f352e728 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf @@ -16,4 +16,4 @@ process GenotypeGVCFs { """ java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T GenotypeGVCFs --reference_sequence ${params.genome} -V ${input_files} --out ${analysis_id}.vcf ${params.optional} """ -} \ No newline at end of file +} diff --git a/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf b/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf index 7330014c..157b8867 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf @@ -34,7 +34,7 @@ process HaplotypeCallerGVCF { tuple(sample_id, path(bam_file), path(bai_file), path(interval_file)) output: - tuple(val(sample_id), file("${sample_id}.${interval_file.baseName}.g.vcf."), file("${sample_id}.${interval_file.baseName}.g.vcf.idx"), emit: vcf_file) + tuple(val(sample_id), file("${sample_id}_${interval_file.baseName}.g.vcf"), file("${sample_id}_${interval_file.baseName}.g.vcf.idx"), emit: vcf_file) script: """ @@ -42,8 +42,8 @@ process HaplotypeCallerGVCF { --reference_sequence ${params.genome} \ --input_file ${bam_file} \ --intervals ${interval_file} \ - --out ${analysis_id}.${interval_file.baseName}.g.vcf \ + --out ${sample_id}_${interval_file.baseName}.g.vcf \ --emitRefConfidence GVCF \ ${params.optional} """ -} \ No newline at end of file +} From c494bd9f2181972ca0adc4b2b9f1e627a27faf1a Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Wed, 10 Jun 2020 10:51:45 +0200 Subject: [PATCH 13/24] Update gvcf --- GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf | 9 +++++++-- GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf b/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf index f352e728..43260b35 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf @@ -6,7 +6,7 @@ process GenotypeGVCFs { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple(analysis_id, path(gvcf_files), path(gvcf_idx_files)) + tuple(analysis_id, path(gvcf_files), path(gvcf_idx_files), path(interval_file)) output: tuple(analysis_id, path("${analysis_id}.vcf"), path("${analysis_id}.vcf.idx"), emit:vcf_file) @@ -14,6 +14,11 @@ process GenotypeGVCFs { script: def input_files = gvcf_files.collect{"$it"}.join(" -V ") """ - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T GenotypeGVCFs --reference_sequence ${params.genome} -V ${input_files} --out ${analysis_id}.vcf ${params.optional} + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T GenotypeGVCFs \ + --reference_sequence ${params.genome} \ + -V ${input_files} \ + --out ${analysis_id}.vcf \ + --intervals ${interval_file} \ + ${params.optional} \ """ } diff --git a/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf b/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf index 157b8867..c3f0bf28 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf @@ -9,7 +9,7 @@ process HaplotypeCaller { tuple(analysis_id, path(bam_files), path(bai_files), path(interval_file)) output: - tuple(val(analysis_id), file("${analysis_id}.${interval_file.baseName}.vcf"), file("${analysis_id}.${interval_file.baseName}.vcf.idx"), emit: vcf_file) + tuple(val(analysis_id), path("${analysis_id}.${interval_file.baseName}.vcf"), path("${analysis_id}.${interval_file.baseName}.vcf.idx"), emit: vcf_file) script: def input_files = bam_files.collect{"$it"}.join(" --input_file ") @@ -34,7 +34,7 @@ process HaplotypeCallerGVCF { tuple(sample_id, path(bam_file), path(bai_file), path(interval_file)) output: - tuple(val(sample_id), file("${sample_id}_${interval_file.baseName}.g.vcf"), file("${sample_id}_${interval_file.baseName}.g.vcf.idx"), emit: vcf_file) + tuple(val(sample_id), path("${sample_id}_${interval_file.baseName}.g.vcf"), path("${sample_id}_${interval_file.baseName}.g.vcf.idx"), path(interval_file), emit: vcf_file) script: """ From cda8347e5eca53d35a79dbc248a9687eac645a41 Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Wed, 10 Jun 2020 12:09:52 +0200 Subject: [PATCH 14/24] Unique names --- GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf b/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf index 43260b35..1714b61f 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf @@ -9,7 +9,7 @@ process GenotypeGVCFs { tuple(analysis_id, path(gvcf_files), path(gvcf_idx_files), path(interval_file)) output: - tuple(analysis_id, path("${analysis_id}.vcf"), path("${analysis_id}.vcf.idx"), emit:vcf_file) + tuple(analysis_id, path("${analysis_id}_${interval_file.baseName}.vcf"), path("${analysis_id}_${interval_file.baseName}.vcf.idx"), emit:vcf_file) script: def input_files = gvcf_files.collect{"$it"}.join(" -V ") @@ -17,7 +17,7 @@ process GenotypeGVCFs { java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T GenotypeGVCFs \ --reference_sequence ${params.genome} \ -V ${input_files} \ - --out ${analysis_id}.vcf \ + --out ${analysis_id}_${interval_file.baseName}.vcf \ --intervals ${interval_file} \ ${params.optional} \ """ From faa31951be71c3c74f620def99550b201aa07d98 Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Wed, 10 Jun 2020 16:48:13 +0200 Subject: [PATCH 15/24] Add FREEC --- Control-FREEC/11.6/Control-FREEC.nf | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 Control-FREEC/11.6/Control-FREEC.nf diff --git a/Control-FREEC/11.6/Control-FREEC.nf b/Control-FREEC/11.6/Control-FREEC.nf new file mode 100644 index 00000000..80b1a777 --- /dev/null +++ b/Control-FREEC/11.6/Control-FREEC.nf @@ -0,0 +1,17 @@ +process Freec { + tag {"Control-FREEC ${sample_id}"} + label 'Control-FREEC_11_6' + container = 'quay.io/biocontainers/control-freec:11.6--he1b5a44_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(bam_file), path(bai_file)) + + output: + tuple(sample_id, path("*")) + + script: + """ + freec -conf ${params.config} -sample ${bam_file} + """ +} From 05ed063c0bb3cfa3a27c019264ab63c0117b5dcf Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Wed, 10 Jun 2020 16:52:53 +0200 Subject: [PATCH 16/24] Update Freec --- .../Control-FREEC.nf => ControlFREEC/11.6/ControlFREEC.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename Control-FREEC/11.6/Control-FREEC.nf => ControlFREEC/11.6/ControlFREEC.nf (83%) diff --git a/Control-FREEC/11.6/Control-FREEC.nf b/ControlFREEC/11.6/ControlFREEC.nf similarity index 83% rename from Control-FREEC/11.6/Control-FREEC.nf rename to ControlFREEC/11.6/ControlFREEC.nf index 80b1a777..d8e537ec 100644 --- a/Control-FREEC/11.6/Control-FREEC.nf +++ b/ControlFREEC/11.6/ControlFREEC.nf @@ -1,6 +1,6 @@ process Freec { - tag {"Control-FREEC ${sample_id}"} - label 'Control-FREEC_11_6' + tag {"Control_FREEC ${sample_id}"} + label 'Control_FREEC_11_6' container = 'quay.io/biocontainers/control-freec:11.6--he1b5a44_0' shell = ['/bin/bash', '-euo', 'pipefail'] From 674448207a85065cbac17e639991013cd64f6f75 Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Thu, 11 Jun 2020 15:13:11 +0200 Subject: [PATCH 17/24] Update freec --- ControlFREEC/11.5/AssessSignificance.nf | 18 ++++++++++++++++++ ControlFREEC/11.5/Freec.nf | 19 +++++++++++++++++++ ControlFREEC/11.5/MakeGraph.nf | 18 ++++++++++++++++++ ControlFREEC/11.6/ControlFREEC.nf | 17 ----------------- 4 files changed, 55 insertions(+), 17 deletions(-) create mode 100644 ControlFREEC/11.5/AssessSignificance.nf create mode 100644 ControlFREEC/11.5/Freec.nf create mode 100644 ControlFREEC/11.5/MakeGraph.nf delete mode 100644 ControlFREEC/11.6/ControlFREEC.nf diff --git a/ControlFREEC/11.5/AssessSignificance.nf b/ControlFREEC/11.5/AssessSignificance.nf new file mode 100644 index 00000000..da0f9392 --- /dev/null +++ b/ControlFREEC/11.5/AssessSignificance.nf @@ -0,0 +1,18 @@ +process AssessSignificance { + tag {"Control Freec AssessSignificance ${sample_id}"} + label 'ControlFreec_11_5' + label 'ControlFreec_11_5_AssessSignificance' + container = 'quay.io/biocontainers/control-freec:11.5--he1b5a44_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(ratio_file), path(cnv_file)) + + output: + tuple(sample_id, path("${cnv_file.name}.p.value.txt"), emit: cnv_pvalue) + + script: + """ + cat /usr/local/bin/assess_significance.R | R --slave --args ${cnv_file} ${ratio_file} + """ +} diff --git a/ControlFREEC/11.5/Freec.nf b/ControlFREEC/11.5/Freec.nf new file mode 100644 index 00000000..aff81654 --- /dev/null +++ b/ControlFREEC/11.5/Freec.nf @@ -0,0 +1,19 @@ +process Freec { + tag {"Control Freec ${sample_id}"} + label 'ControlFreec_11_5' + label 'ControlFreec_11_5_Freec' + container = 'quay.io/biocontainers/control-freec:11.5--he1b5a44_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(bam_file), path(bai_file)) + + output: + tuple(sample_id, path("${bam_file.name}_ratio.txt"), path("${bam_file.name}_CNVs"), emit: cnv) + tuple(sample_id, path("${bam_file.name}_sample.cpn"), path("${bam_file.name}_ratio.BedGraph"), path("${bam_file.name}_info.txt"), emit: other) + + script: + """ + freec -conf ${params.config} -sample ${bam_file} + """ +} diff --git a/ControlFREEC/11.5/MakeGraph.nf b/ControlFREEC/11.5/MakeGraph.nf new file mode 100644 index 00000000..bde902f2 --- /dev/null +++ b/ControlFREEC/11.5/MakeGraph.nf @@ -0,0 +1,18 @@ +process MakeGraph { + tag {"Control Freec MakeGraph ${sample_id}"} + label 'ControlFreec_11_5' + label 'ControlFreec_11_5_MakeGraph' + container = 'quay.io/biocontainers/control-freec:11.5--he1b5a44_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(ratio_file), path(cnv_file)) + + output: + tuple(sample_id, path("${ratio_file.name}.png"), path("${ratio_file.name}.log2.png"), emit: ratio_png) + + script: + """ + cat /usr/local/bin/makeGraph.R | R --slave --args ${params.ploidy} ${ratio_file} + """ +} diff --git a/ControlFREEC/11.6/ControlFREEC.nf b/ControlFREEC/11.6/ControlFREEC.nf deleted file mode 100644 index d8e537ec..00000000 --- a/ControlFREEC/11.6/ControlFREEC.nf +++ /dev/null @@ -1,17 +0,0 @@ -process Freec { - tag {"Control_FREEC ${sample_id}"} - label 'Control_FREEC_11_6' - container = 'quay.io/biocontainers/control-freec:11.6--he1b5a44_0' - shell = ['/bin/bash', '-euo', 'pipefail'] - - input: - tuple(sample_id, path(bam_file), path(bai_file)) - - output: - tuple(sample_id, path("*")) - - script: - """ - freec -conf ${params.config} -sample ${bam_file} - """ -} From d9678efd191f640a9a49bd40f4c1edecfef7ae93 Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Thu, 11 Jun 2020 16:40:21 +0200 Subject: [PATCH 18/24] Create config in process --- ControlFREEC/11.5/Freec.nf | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/ControlFREEC/11.5/Freec.nf b/ControlFREEC/11.5/Freec.nf index aff81654..07992d2b 100644 --- a/ControlFREEC/11.5/Freec.nf +++ b/ControlFREEC/11.5/Freec.nf @@ -13,7 +13,22 @@ process Freec { tuple(sample_id, path("${bam_file.name}_sample.cpn"), path("${bam_file.name}_ratio.BedGraph"), path("${bam_file.name}_info.txt"), emit: other) script: + def config = '${sample_id}.config' """ - freec -conf ${params.config} -sample ${bam_file} + touch ${config} + echo "[general]" >> ${config} + echo "chrLenFile = ${params.chr_len_file}" >> ${config} + echo "chrFiles = ${params.chr_files}" >> ${config} + echo "gemMappabilityFile = ${params.gem_mappability_file}" >> ${config} + echo "ploidy = ${params.ploidy}" >> ${config} + echo "window = ${params.window}" >> ${config} + echo "BedGraphOutput=TRUE" >> ${config} + echo "maxThreads=${task.cpus}" >> ${config} + + echo "[sample]" >> ${config} + echo "inputFormat = BAM" >> ${config} + echo "mateFile = ${bam_file}" >> ${config} + + freec -conf ${config} """ } From ab68a08ffe048b4bc317acbd4e73e35e0668fff7 Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Fri, 12 Jun 2020 11:26:55 +0200 Subject: [PATCH 19/24] Fix config def --- ControlFREEC/11.5/Freec.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ControlFREEC/11.5/Freec.nf b/ControlFREEC/11.5/Freec.nf index 07992d2b..5aee35fe 100644 --- a/ControlFREEC/11.5/Freec.nf +++ b/ControlFREEC/11.5/Freec.nf @@ -13,7 +13,7 @@ process Freec { tuple(sample_id, path("${bam_file.name}_sample.cpn"), path("${bam_file.name}_ratio.BedGraph"), path("${bam_file.name}_info.txt"), emit: other) script: - def config = '${sample_id}.config' + def config = "${sample_id}.config" """ touch ${config} echo "[general]" >> ${config} From c782c51e35a8e7b661a0050a80a69ad5f76fb20f Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Fri, 12 Jun 2020 14:06:38 +0200 Subject: [PATCH 20/24] Add CollectWgsMetrics --- Picard/2.22.0/CollectWgsMetrics.nf | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 Picard/2.22.0/CollectWgsMetrics.nf diff --git a/Picard/2.22.0/CollectWgsMetrics.nf b/Picard/2.22.0/CollectWgsMetrics.nf new file mode 100644 index 00000000..5002aecd --- /dev/null +++ b/Picard/2.22.0/CollectWgsMetrics.nf @@ -0,0 +1,18 @@ +process CollectWgsMetrics { + tag {"PICARD CollectWgsMetrics ${sample_id}"} + label 'PICARD_2_22_0' + label 'PICARD_2_22_0_CollectWgsMetrics' + container = 'quay.io/biocontainers/picard:2.22.0--0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(bam_file), path(bai_file)) + + output: + path("${sample_id}.WGSMetrics.txt", emit: txt_file) + + script: + """ + picard -Xmx${task.memory.toGiga()-4}G CollectWgsMetrics TMP_DIR=\$TMPDIR R=${params.genome} INPUT=${bam_file} OUTPUT=${sample_id}.WGSMetrics.txt ${params.optional} + """ +} From 77356725d04fcbf1a418f3331ddd20eb910f19e5 Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Mon, 15 Jun 2020 10:16:47 +0200 Subject: [PATCH 21/24] Multiqc compatibility --- Picard/2.22.0/CollectWgsMetrics.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Picard/2.22.0/CollectWgsMetrics.nf b/Picard/2.22.0/CollectWgsMetrics.nf index 5002aecd..d1679793 100644 --- a/Picard/2.22.0/CollectWgsMetrics.nf +++ b/Picard/2.22.0/CollectWgsMetrics.nf @@ -9,7 +9,7 @@ process CollectWgsMetrics { tuple(sample_id, path(bam_file), path(bai_file)) output: - path("${sample_id}.WGSMetrics.txt", emit: txt_file) + path("${sample_id}.wgs_metrics.txt", emit: txt_file) script: """ From c2af63360ec6bf23d09a641ef0985c2c13159bd7 Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Mon, 15 Jun 2020 10:17:00 +0200 Subject: [PATCH 22/24] Add MultiQC 1.9 --- MultiQC/1.9/MultiQC.nf | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 MultiQC/1.9/MultiQC.nf diff --git a/MultiQC/1.9/MultiQC.nf b/MultiQC/1.9/MultiQC.nf new file mode 100644 index 00000000..7b455ff8 --- /dev/null +++ b/MultiQC/1.9/MultiQC.nf @@ -0,0 +1,18 @@ +process MultiQC { + tag {"MultiQC"} + label 'MultiQC_1_9' + container = 'quay.io/biocontainers/multiqc:1.9--pyh9f0ad1d_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + val(analysis_id) + path(qc_files) + + output: + tuple(path("${analysis_id}_multiqc_report.html"), path("${analysis_id}_multiqc_report_data"), emit: report) + + script: + """ + multiqc ${params.optional} --title ${analysis_id} . + """ +} From 1d22468da0a544db28f518148c0cc7a3acade4c7 Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Mon, 15 Jun 2020 12:26:20 +0200 Subject: [PATCH 23/24] Update to .gz files --- GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf b/GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf index fe5aa5e5..ca673c79 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf @@ -9,11 +9,11 @@ process CatVariantsGVCF { tuple(sample_id, path(gvcf_files), path(gvcf_idx_files)) output: - tuple(sample_id, path("${sample_id}.g.vcf"), path("${sample_id}.g.vcf.idx"), emit:vcf_file) + tuple(sample_id, path("${sample_id}.g.vcf.gz"), path("${sample_id}.g.vcf.gz.tbi"), emit:vcf_file) script: def input_files = gvcf_files.collect{"$it"}.join(" -V ") """ - java -Xmx${task.memory.toGiga()-4}G -cp ${params.gatk_path} org.broadinstitute.gatk.tools.CatVariants --reference ${params.genome} -V ${input_files} --outputFile ${sample_id}.g.vcf ${params.optional} + java -Xmx${task.memory.toGiga()-4}G -cp ${params.gatk_path} org.broadinstitute.gatk.tools.CatVariants --reference ${params.genome} -V ${input_files} --outputFile ${sample_id}.g.vcf.gz ${params.optional} """ } From fe1a6161fdaaa978775c3ff4e567bd6bab5b617d Mon Sep 17 00:00:00 2001 From: Robert Ernst Date: Mon, 15 Jun 2020 12:27:03 +0200 Subject: [PATCH 24/24] Rename output. --- Picard/2.22.0/CollectWgsMetrics.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Picard/2.22.0/CollectWgsMetrics.nf b/Picard/2.22.0/CollectWgsMetrics.nf index d1679793..66e51fb2 100644 --- a/Picard/2.22.0/CollectWgsMetrics.nf +++ b/Picard/2.22.0/CollectWgsMetrics.nf @@ -13,6 +13,6 @@ process CollectWgsMetrics { script: """ - picard -Xmx${task.memory.toGiga()-4}G CollectWgsMetrics TMP_DIR=\$TMPDIR R=${params.genome} INPUT=${bam_file} OUTPUT=${sample_id}.WGSMetrics.txt ${params.optional} + picard -Xmx${task.memory.toGiga()-4}G CollectWgsMetrics TMP_DIR=\$TMPDIR R=${params.genome} INPUT=${bam_file} OUTPUT=${sample_id}.wgs_metrics.txt ${params.optional} """ }