diff --git a/BWA/0.7.17/BWASW.nf b/BWA/0.7.17/BWASW.nf index f26f41c1..e9a78a7e 100644 --- a/BWA/0.7.17/BWASW.nf +++ b/BWA/0.7.17/BWASW.nf @@ -6,13 +6,13 @@ process BWASW { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, rg_id, file(fastq: "*") + tuple(sample_id, rg_id, path(fastq)) output: - tuple sample_id, rg_id, file("${fastq[0].simpleName}.sam") + tuple(sample_id, rg_id, path("${fastq[0].simpleName}.sam"), emit: sam_file) script: - """ - bwa bwasw -t ${task.cpus} ${params.optional} ${params.genome} ${fastq} > ${fastq[0].simpleName}.sam - """ + """ + bwa bwasw -t ${task.cpus} ${params.optional} ${params.genome} ${fastq} > ${fastq[0].simpleName}.sam + """ } diff --git a/BWA/0.7.17/MEM.nf b/BWA/0.7.17/MEM.nf index 59c48a0d..1daf5158 100644 --- a/BWA/0.7.17/MEM.nf +++ b/BWA/0.7.17/MEM.nf @@ -6,16 +6,16 @@ process MEM { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, rg_id, file(fastq: "*") + tuple(sample_id, rg_id, path(fastq)) output: - tuple sample_id, rg_id, file("${fastq[0].simpleName}.sam") + tuple(sample_id, rg_id, path("${fastq[0].simpleName}.sam"), emit: sam_file) script: - def barcode = rg_id.split('_')[1] - def readgroup = "\"@RG\\tID:${rg_id}\\tSM:${sample_id}\\tPL:ILLUMINA\\tLB:${sample_id}\\tPU:${barcode}\"" + def barcode = rg_id.split('_')[1] + def readgroup = "\"@RG\\tID:${rg_id}\\tSM:${sample_id}\\tPL:ILLUMINA\\tLB:${sample_id}\\tPU:${barcode}\"" - """ - bwa mem -t ${task.cpus} -R ${readgroup} ${params.optional} ${params.genome} ${fastq} > ${fastq[0].simpleName}.sam - """ + """ + bwa mem -t ${task.cpus} -R ${readgroup} ${params.optional} ${params.genome} ${fastq} > ${fastq[0].simpleName}.sam + """ } diff --git a/ControlFREEC/11.5/AssessSignificance.nf b/ControlFREEC/11.5/AssessSignificance.nf new file mode 100644 index 00000000..da0f9392 --- /dev/null +++ b/ControlFREEC/11.5/AssessSignificance.nf @@ -0,0 +1,18 @@ +process AssessSignificance { + tag {"Control Freec AssessSignificance ${sample_id}"} + label 'ControlFreec_11_5' + label 'ControlFreec_11_5_AssessSignificance' + container = 'quay.io/biocontainers/control-freec:11.5--he1b5a44_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(ratio_file), path(cnv_file)) + + output: + tuple(sample_id, path("${cnv_file.name}.p.value.txt"), emit: cnv_pvalue) + + script: + """ + cat /usr/local/bin/assess_significance.R | R --slave --args ${cnv_file} ${ratio_file} + """ +} diff --git a/ControlFREEC/11.5/Freec.nf b/ControlFREEC/11.5/Freec.nf new file mode 100644 index 00000000..5aee35fe --- /dev/null +++ b/ControlFREEC/11.5/Freec.nf @@ -0,0 +1,34 @@ +process Freec { + tag {"Control Freec ${sample_id}"} + label 'ControlFreec_11_5' + label 'ControlFreec_11_5_Freec' + container = 'quay.io/biocontainers/control-freec:11.5--he1b5a44_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(bam_file), path(bai_file)) + + output: + tuple(sample_id, path("${bam_file.name}_ratio.txt"), path("${bam_file.name}_CNVs"), emit: cnv) + tuple(sample_id, path("${bam_file.name}_sample.cpn"), path("${bam_file.name}_ratio.BedGraph"), path("${bam_file.name}_info.txt"), emit: other) + + script: + def config = "${sample_id}.config" + """ + touch ${config} + echo "[general]" >> ${config} + echo "chrLenFile = ${params.chr_len_file}" >> ${config} + echo "chrFiles = ${params.chr_files}" >> ${config} + echo "gemMappabilityFile = ${params.gem_mappability_file}" >> ${config} + echo "ploidy = ${params.ploidy}" >> ${config} + echo "window = ${params.window}" >> ${config} + echo "BedGraphOutput=TRUE" >> ${config} + echo "maxThreads=${task.cpus}" >> ${config} + + echo "[sample]" >> ${config} + echo "inputFormat = BAM" >> ${config} + echo "mateFile = ${bam_file}" >> ${config} + + freec -conf ${config} + """ +} diff --git a/ControlFREEC/11.5/MakeGraph.nf b/ControlFREEC/11.5/MakeGraph.nf new file mode 100644 index 00000000..bde902f2 --- /dev/null +++ b/ControlFREEC/11.5/MakeGraph.nf @@ -0,0 +1,18 @@ +process MakeGraph { + tag {"Control Freec MakeGraph ${sample_id}"} + label 'ControlFreec_11_5' + label 'ControlFreec_11_5_MakeGraph' + container = 'quay.io/biocontainers/control-freec:11.5--he1b5a44_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(ratio_file), path(cnv_file)) + + output: + tuple(sample_id, path("${ratio_file.name}.png"), path("${ratio_file.name}.log2.png"), emit: ratio_png) + + script: + """ + cat /usr/local/bin/makeGraph.R | R --slave --args ${params.ploidy} ${ratio_file} + """ +} diff --git a/FastQC/0.11.8/FastQC.nf b/FastQC/0.11.8/FastQC.nf index e2405e0c..1d1f8279 100644 --- a/FastQC/0.11.8/FastQC.nf +++ b/FastQC/0.11.8/FastQC.nf @@ -5,13 +5,13 @@ process FastQC { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, rg_id, file(fastq: "*") + tuple(sample_id, rg_id, path(fastq)) output: - file "*_fastqc.{zip,html}" + path("*_fastqc.{zip,html}", emit: report) script: - """ - fastqc ${params.optional} -t ${task.cpus} ${fastq} - """ + """ + fastqc ${params.optional} -t ${task.cpus} ${fastq} + """ } diff --git a/GATK/3.8-1-0-gf15c1c3ef/BaseRecalibrator.nf b/GATK/3.8-1-0-gf15c1c3ef/BaseRecalibrator.nf new file mode 100644 index 00000000..419c0539 --- /dev/null +++ b/GATK/3.8-1-0-gf15c1c3ef/BaseRecalibrator.nf @@ -0,0 +1,32 @@ +process BaseRecalibrator { + tag {"GATK BaseRecalibrator ${sample_id} - ${chr}"} + label 'GATK_3_8_1_0_gf15c1c3ef' + label 'GATK_3_8_1_0_gf15c1c3ef_BaseRecalibrator' + container = 'quay.io/biocontainers/gatk:3.8--py27_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(bam_file), path(bai_file), chr) + + output: + tuple(sample_id, path("${bam_file.baseName}.bqsr.${chr}.bam"), path("${bam_file.baseName}.bqsr.${chr}.bai"), emit: bam_file) + + script: + """ + java -Xmx${task.memory.toGiga()-4}G -jar $params.gatk_path -T BaseRecalibrator \ + --num_cpu_threads_per_data_thread ${task.cpus} \ + --reference_sequence ${params.genome} \ + --input_file ${bam_file} \ + --intervals ${chr} \ + --out ${bam_file.baseName}.bqsr.${chr}.table \ + ${params.optional} + + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T PrintReads \ + --num_cpu_threads_per_data_thread ${task.cpus} \ + --reference_sequence ${params.genome} \ + --input_file ${bam_file} \ + --BQSR ${bam_file.baseName}.bqsr.${chr}.table \ + --intervals ${chr} \ + --out ${bam_file.baseName}.bqsr.${chr}.bam + """ +} diff --git a/GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf b/GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf new file mode 100644 index 00000000..ca673c79 --- /dev/null +++ b/GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf @@ -0,0 +1,19 @@ +process CatVariantsGVCF { + tag {"GATK CatVariantsGVCF ${sample_id}"} + label 'GATK_3_8_1_0_gf15c1c3ef' + label 'GATK_3_8_1_0_gf15c1c3ef_CatVariantsGVCF' + container = 'quay.io/biocontainers/gatk:3.8--py27_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(gvcf_files), path(gvcf_idx_files)) + + output: + tuple(sample_id, path("${sample_id}.g.vcf.gz"), path("${sample_id}.g.vcf.gz.tbi"), emit:vcf_file) + + script: + def input_files = gvcf_files.collect{"$it"}.join(" -V ") + """ + java -Xmx${task.memory.toGiga()-4}G -cp ${params.gatk_path} org.broadinstitute.gatk.tools.CatVariants --reference ${params.genome} -V ${input_files} --outputFile ${sample_id}.g.vcf.gz ${params.optional} + """ +} diff --git a/GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf b/GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf index d9f6bdfe..5ed12ece 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf @@ -6,14 +6,34 @@ process CombineVariants { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(analysis_id), file(vcf_files), file(vcf_idx_files) + tuple(analysis_id, path(vcf_files), path(vcf_idx_files)) output: - tuple val(analysis_id), file("${analysis_id}.vcf"), file("${analysis_id}.vcf.idx") + tuple(analysis_id, path("${analysis_id}.vcf"), path("${analysis_id}.vcf.idx"), emit:vcf_file) script: - def input_files = vcf_files.collect{"$it"}.join(" -V ") - """ - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T CombineVariants --reference_sequence ${params.genome} -V ${input_files} --out ${analysis_id}.vcf ${params.optional} - """ + def input_files = vcf_files.collect{"$it"}.join(" -V ") + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T CombineVariants --reference_sequence ${params.genome} -V ${input_files} --out ${analysis_id}.vcf ${params.optional} + """ +} + +process CombineVariantsGVCF { + tag {"GATK CombineVariantsGVCF ${sample_id}"} + label 'GATK_3_8_1_0_gf15c1c3ef' + label 'GATK_3_8_1_0_gf15c1c3ef_CombineVariantsGVCF' + container = 'quay.io/biocontainers/gatk:3.8--py27_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(vcf_files), path(vcf_idx_files)) + + output: + tuple(sample_id, path("${sample_id}.g.vcf"), path("${sample_id}.g.vcf.idx"), emit:vcf_file) + + script: + def input_files = vcf_files.collect{"$it"}.join(" -V ") + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T CombineVariants --reference_sequence ${params.genome} -V ${input_files} --out ${sample_id}.g.vcf ${params.optional} + """ } diff --git a/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf b/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf new file mode 100644 index 00000000..1714b61f --- /dev/null +++ b/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf @@ -0,0 +1,24 @@ +process GenotypeGVCFs { + tag {"GATK GenotypeGVCFs ${analysis_id}"} + label 'GATK_3_8_1_0_gf15c1c3ef' + label 'GATK_3_8_1_0_gf15c1c3ef_GenotypeGVCFs' + container = 'quay.io/biocontainers/gatk:3.8--py27_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(analysis_id, path(gvcf_files), path(gvcf_idx_files), path(interval_file)) + + output: + tuple(analysis_id, path("${analysis_id}_${interval_file.baseName}.vcf"), path("${analysis_id}_${interval_file.baseName}.vcf.idx"), emit:vcf_file) + + script: + def input_files = gvcf_files.collect{"$it"}.join(" -V ") + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T GenotypeGVCFs \ + --reference_sequence ${params.genome} \ + -V ${input_files} \ + --out ${analysis_id}_${interval_file.baseName}.vcf \ + --intervals ${interval_file} \ + ${params.optional} \ + """ +} diff --git a/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf b/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf index f04f9d2f..c3f0bf28 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf @@ -6,19 +6,44 @@ process HaplotypeCaller { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(analysis_id), file(bam_files), file(bai_files), file(interval_file) + tuple(analysis_id, path(bam_files), path(bai_files), path(interval_file)) output: - tuple val(analysis_id), file("${analysis_id}.${interval_file.baseName}.vcf"), file("${analysis_id}.${interval_file.baseName}.vcf.idx") + tuple(val(analysis_id), path("${analysis_id}.${interval_file.baseName}.vcf"), path("${analysis_id}.${interval_file.baseName}.vcf.idx"), emit: vcf_file) script: - def input_files = bam_files.collect{"$it"}.join(" --input_file ") - """ - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T HaplotypeCaller \ - --reference_sequence ${params.genome} \ - --input_file ${input_files} \ - --intervals ${interval_file} \ - --out ${analysis_id}.${interval_file.baseName}.vcf \ - ${params.optional} - """ + def input_files = bam_files.collect{"$it"}.join(" --input_file ") + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T HaplotypeCaller \ + --reference_sequence ${params.genome} \ + --input_file ${input_files} \ + --intervals ${interval_file} \ + --out ${analysis_id}.${interval_file.baseName}.vcf \ + ${params.optional} + """ +} + +process HaplotypeCallerGVCF { + tag {"GATK HaplotypeCallerGVCF ${sample_id} - ${interval_file.baseName}"} + label 'GATK_3_8_1_0_gf15c1c3ef' + label 'GATK_3_8_1_0_gf15c1c3ef_HaplotypeCallerGVCF' + container = 'quay.io/biocontainers/gatk:3.8--py27_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(bam_file), path(bai_file), path(interval_file)) + + output: + tuple(val(sample_id), path("${sample_id}_${interval_file.baseName}.g.vcf"), path("${sample_id}_${interval_file.baseName}.g.vcf.idx"), path(interval_file), emit: vcf_file) + + script: + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T HaplotypeCaller \ + --reference_sequence ${params.genome} \ + --input_file ${bam_file} \ + --intervals ${interval_file} \ + --out ${sample_id}_${interval_file.baseName}.g.vcf \ + --emitRefConfidence GVCF \ + ${params.optional} + """ } diff --git a/GATK/3.8-1-0-gf15c1c3ef/IndelRealigner.nf b/GATK/3.8-1-0-gf15c1c3ef/IndelRealigner.nf index c268b0ea..f5e46cc2 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/IndelRealigner.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/IndelRealigner.nf @@ -6,20 +6,19 @@ process IndelRealigner { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(sample_id), file(bam_file), file(bai_file), val(chr), file(target_intervals) + tuple(sample_id, path(bam_file), path(bai_file), chr, path(target_intervals)) output: - tuple val(sample_id), file("${bam_file.baseName}.realigned.${chr}.bam"), file("${bam_file.baseName}.realigned.${chr}.bai") + tuple(sample_id, path("${bam_file.baseName}.realigned.${chr}.bam"), path("${bam_file.baseName}.realigned.${chr}.bai"), emit: bam_file) script: - - """ - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T IndelRealigner \ - --reference_sequence ${params.genome} \ - --input_file ${bam_file} \ - --intervals ${chr} \ - --targetIntervals ${bam_file.baseName}.target_intervals.${chr}.list \ - --out ${bam_file.baseName}.realigned.${chr}.bam \ - ${params.optional} - """ + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T IndelRealigner \ + --reference_sequence ${params.genome} \ + --input_file ${bam_file} \ + --intervals ${chr} \ + --targetIntervals ${bam_file.baseName}.target_intervals.${chr}.list \ + --out ${bam_file.baseName}.realigned.${chr}.bam \ + ${params.optional} + """ } diff --git a/GATK/3.8-1-0-gf15c1c3ef/RealignerTargetCreator.nf b/GATK/3.8-1-0-gf15c1c3ef/RealignerTargetCreator.nf index 1f0df48b..c0780237 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/RealignerTargetCreator.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/RealignerTargetCreator.nf @@ -6,19 +6,18 @@ process RealignerTargetCreator { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(sample_id), file(bam_file), file(bai_file), val(chr) + tuple(sample_id, path(bam_file), path(bai_file), chr) output: - tuple val(sample_id), val(chr), file("${bam_file.baseName}.target_intervals.${chr}.list") + tuple(sample_id, chr, path("${bam_file.baseName}.target_intervals.${chr}.list"), emit: interval_list) script: - - """ - java -Xmx${task.memory.toGiga()-4}G -jar $params.gatk_path -T RealignerTargetCreator \ - --reference_sequence ${params.genome} \ - --input_file ${bam_file} \ - --intervals ${chr} \ - --out ${bam_file.baseName}.target_intervals.${chr}.list \ - ${params.optional} - """ + """ + java -Xmx${task.memory.toGiga()-4}G -jar $params.gatk_path -T RealignerTargetCreator \ + --reference_sequence ${params.genome} \ + --input_file ${bam_file} \ + --intervals ${chr} \ + --out ${bam_file.baseName}.target_intervals.${chr}.list \ + ${params.optional} + """ } diff --git a/GATK/3.8-1-0-gf15c1c3ef/SelectVariants.nf b/GATK/3.8-1-0-gf15c1c3ef/SelectVariants.nf index aed8c282..1f4a8d7f 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/SelectVariants.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/SelectVariants.nf @@ -6,13 +6,13 @@ process SelectVariantsSample { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(analysis_id), file(vcf_file), file(vcf_idx_file), val(sample_id) + tuple(analysis_id, path(vcf_file), path(vcf_idx_file), sample_id) output: - tuple val(sample_id), file("${sample_id}_${vcf_file.baseName}.vcf"), file("${sample_id}_${vcf_file.baseName}.vcf.idx") + tuple(sample_id, path("${sample_id}_${vcf_file.baseName}.vcf"), path("${sample_id}_${vcf_file.baseName}.vcf.idx"), emit: vcf_file) script: - """ - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T SelectVariants --reference_sequence ${params.genome} -V ${vcf_file} --out ${sample_id}_${vcf_file.baseName}.vcf -sn ${sample_id} - """ + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T SelectVariants --reference_sequence ${params.genome} -V ${vcf_file} --out ${sample_id}_${vcf_file.baseName}.vcf -sn ${sample_id} + """ } diff --git a/GATK/3.8-1-0-gf15c1c3ef/UnifiedGenotyper.nf b/GATK/3.8-1-0-gf15c1c3ef/UnifiedGenotyper.nf index a25e20e9..5a190faf 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/UnifiedGenotyper.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/UnifiedGenotyper.nf @@ -6,14 +6,14 @@ process UnifiedGenotyper { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(sample_id), file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - tuple val(sample_id), file("${sample_id}.vcf") + tuple(sample_id, path("${sample_id}.vcf"), emit: vcf_file) script: - """ - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T UnifiedGenotyper --reference_sequence ${params.genome} --input_file ${bam_file} --out ${sample_id}.vcf ${params.optional} - """ + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T UnifiedGenotyper --reference_sequence ${params.genome} --input_file ${bam_file} --out ${sample_id}.vcf ${params.optional} + """ } diff --git a/GATK/3.8-1-0-gf15c1c3ef/VariantFiltration.nf b/GATK/3.8-1-0-gf15c1c3ef/VariantFiltration.nf index 0a2301da..bc79805b 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/VariantFiltration.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/VariantFiltration.nf @@ -6,19 +6,19 @@ process VariantFiltrationSnpIndel { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(analysis_id), file(vcf_file), file(vcf_idx_file) + tuple(analysis_id, path(vcf_file), path(vcf_idx_file)) output: - tuple val(analysis_id), file("${vcf_file.baseName}.filter.vcf"), file("${vcf_file.baseName}.filter.vcf.idx") + tuple(analysis_id, path("${vcf_file.baseName}.filter.vcf"), path("${vcf_file.baseName}.filter.vcf.idx"), emit: vcf_file) script: - """ - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T SelectVariants --reference_sequence ${params.genome} -V $vcf_file --out ${vcf_file.baseName}.snp.vcf --selectTypeToExclude INDEL - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T SelectVariants --reference_sequence ${params.genome} -V $vcf_file --out ${vcf_file.baseName}.indel.vcf --selectTypeToInclude INDEL + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T SelectVariants --reference_sequence ${params.genome} -V $vcf_file --out ${vcf_file.baseName}.snp.vcf --selectTypeToExclude INDEL + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T SelectVariants --reference_sequence ${params.genome} -V $vcf_file --out ${vcf_file.baseName}.indel.vcf --selectTypeToInclude INDEL - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T VariantFiltration --reference_sequence ${params.genome} -V ${vcf_file.baseName}.snp.vcf --out ${vcf_file.baseName}.snp_filter.vcf ${params.snp_filter} ${params.snp_cluster} - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T VariantFiltration --reference_sequence ${params.genome} -V ${vcf_file.baseName}.indel.vcf --out ${vcf_file.baseName}.indel_filter.vcf ${params.indel_filter} + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T VariantFiltration --reference_sequence ${params.genome} -V ${vcf_file.baseName}.snp.vcf --out ${vcf_file.baseName}.snp_filter.vcf ${params.snp_filter} ${params.snp_cluster} + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T VariantFiltration --reference_sequence ${params.genome} -V ${vcf_file.baseName}.indel.vcf --out ${vcf_file.baseName}.indel_filter.vcf ${params.indel_filter} - java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T CombineVariants --reference_sequence ${params.genome} -V ${vcf_file.baseName}.snp_filter.vcf -V ${vcf_file.baseName}.indel_filter.vcf --out ${vcf_file.baseName}.filter.vcf --assumeIdenticalSamples - """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T CombineVariants --reference_sequence ${params.genome} -V ${vcf_file.baseName}.snp_filter.vcf -V ${vcf_file.baseName}.indel_filter.vcf --out ${vcf_file.baseName}.filter.vcf --assumeIdenticalSamples + """ } diff --git a/MultiQC/1.8/MultiQC.nf b/MultiQC/1.8/MultiQC.nf index 424252c9..dfd0074c 100644 --- a/MultiQC/1.8/MultiQC.nf +++ b/MultiQC/1.8/MultiQC.nf @@ -5,14 +5,14 @@ process MultiQC { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(analysis_id), file(qc_files: "*") + val(analysis_id) + path(qc_files) output: - file "${analysis_id}_multiqc_report.html" - file "${analysis_id}_multiqc_report_data" + tuple(path("${analysis_id}_multiqc_report.html"), path("${analysis_id}_multiqc_report_data"), emit: report) script: - """ - multiqc ${params.optional} --title ${analysis_id} . - """ + """ + multiqc ${params.optional} --title ${analysis_id} . + """ } diff --git a/MultiQC/1.9/MultiQC.nf b/MultiQC/1.9/MultiQC.nf new file mode 100644 index 00000000..7b455ff8 --- /dev/null +++ b/MultiQC/1.9/MultiQC.nf @@ -0,0 +1,18 @@ +process MultiQC { + tag {"MultiQC"} + label 'MultiQC_1_9' + container = 'quay.io/biocontainers/multiqc:1.9--pyh9f0ad1d_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + val(analysis_id) + path(qc_files) + + output: + tuple(path("${analysis_id}_multiqc_report.html"), path("${analysis_id}_multiqc_report_data"), emit: report) + + script: + """ + multiqc ${params.optional} --title ${analysis_id} . + """ +} diff --git a/Picard/2.22.0/CollectHsMetrics.nf b/Picard/2.22.0/CollectHsMetrics.nf index 8b9a2524..b9633e13 100644 --- a/Picard/2.22.0/CollectHsMetrics.nf +++ b/Picard/2.22.0/CollectHsMetrics.nf @@ -6,14 +6,13 @@ process CollectHsMetrics { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(sample_id), file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - file("${sample_id}.HsMetrics.txt") + path("${sample_id}.HsMetrics.txt", emit: txt_file) script: - - """ - picard -Xmx${task.memory.toGiga()-4}G CollectHsMetrics TMP_DIR=\$TMPDIR R=${params.genome} INPUT=${bam_file} OUTPUT=${sample_id}.HsMetrics.txt BAIT_INTERVALS=${params.bait} TARGET_INTERVALS=${params.target} ${params.optional} - """ + """ + picard -Xmx${task.memory.toGiga()-4}G CollectHsMetrics TMP_DIR=\$TMPDIR R=${params.genome} INPUT=${bam_file} OUTPUT=${sample_id}.HsMetrics.txt BAIT_INTERVALS=${params.bait} TARGET_INTERVALS=${params.target} ${params.optional} + """ } diff --git a/Picard/2.22.0/CollectMultipleMetrics.nf b/Picard/2.22.0/CollectMultipleMetrics.nf index d166150f..45be1ea8 100644 --- a/Picard/2.22.0/CollectMultipleMetrics.nf +++ b/Picard/2.22.0/CollectMultipleMetrics.nf @@ -6,14 +6,13 @@ process CollectMultipleMetrics { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(sample_id), file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - file("*.txt") + path("*.txt", emit: txt_files) script: - - """ - picard -Xmx${task.memory.toGiga()-4}G CollectMultipleMetrics TMP_DIR=\$TMPDIR R=${params.genome} INPUT=${bam_file} OUTPUT=${sample_id} EXT=.txt ${params.optional} - """ + """ + picard -Xmx${task.memory.toGiga()-4}G CollectMultipleMetrics TMP_DIR=\$TMPDIR R=${params.genome} INPUT=${bam_file} OUTPUT=${sample_id} EXT=.txt ${params.optional} + """ } diff --git a/Picard/2.22.0/CollectWgsMetrics.nf b/Picard/2.22.0/CollectWgsMetrics.nf new file mode 100644 index 00000000..66e51fb2 --- /dev/null +++ b/Picard/2.22.0/CollectWgsMetrics.nf @@ -0,0 +1,18 @@ +process CollectWgsMetrics { + tag {"PICARD CollectWgsMetrics ${sample_id}"} + label 'PICARD_2_22_0' + label 'PICARD_2_22_0_CollectWgsMetrics' + container = 'quay.io/biocontainers/picard:2.22.0--0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(bam_file), path(bai_file)) + + output: + path("${sample_id}.wgs_metrics.txt", emit: txt_file) + + script: + """ + picard -Xmx${task.memory.toGiga()-4}G CollectWgsMetrics TMP_DIR=\$TMPDIR R=${params.genome} INPUT=${bam_file} OUTPUT=${sample_id}.wgs_metrics.txt ${params.optional} + """ +} diff --git a/Picard/2.22.0/CreateSequenceDictionary.nf b/Picard/2.22.0/CreateSequenceDictionary.nf index 0e43f4ed..df15d284 100644 --- a/Picard/2.22.0/CreateSequenceDictionary.nf +++ b/Picard/2.22.0/CreateSequenceDictionary.nf @@ -13,8 +13,6 @@ process CreateSequenceDictionary { script: """ - picard -Xmx${task.memory.toGiga()-4}G CreateSequenceDictionary \ - REFERENCE=${genome_fasta} \ - OUTPUT=${genome_fasta.baseName}.dict + picard -Xmx${task.memory.toGiga()-4}G CreateSequenceDictionary REFERENCE=${genome_fasta} OUTPUT=${genome_fasta.baseName}.dict """ } diff --git a/Picard/2.22.0/EstimateLibraryComplexity.nf b/Picard/2.22.0/EstimateLibraryComplexity.nf index c608907e..6100ff95 100644 --- a/Picard/2.22.0/EstimateLibraryComplexity.nf +++ b/Picard/2.22.0/EstimateLibraryComplexity.nf @@ -6,14 +6,13 @@ process EstimateLibraryComplexity { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple val(sample_id), file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - file("${sample_id}.LibraryComplexity.txt") + path("${sample_id}.LibraryComplexity.txt", emit: txt_file) script: - - """ - picard -Xmx${task.memory.toGiga()-4}G EstimateLibraryComplexity TMP_DIR=\$TMPDIR INPUT=${bam_file} OUTPUT=${sample_id}.LibraryComplexity.txt ${params.optional} - """ + """ + picard -Xmx${task.memory.toGiga()-4}G EstimateLibraryComplexity TMP_DIR=\$TMPDIR INPUT=${bam_file} OUTPUT=${sample_id}.LibraryComplexity.txt ${params.optional} + """ } diff --git a/Picard/2.22.0/IntervalListTools.nf b/Picard/2.22.0/IntervalListTools.nf index 077d0d6a..838e5801 100644 --- a/Picard/2.22.0/IntervalListTools.nf +++ b/Picard/2.22.0/IntervalListTools.nf @@ -6,20 +6,20 @@ process IntervalListTools { shell = ['/bin/bash', '-euo', 'pipefail'] input: - file(interval_list) + path(interval_list) output: - file("temp_*/*.interval_list") + path("temp_*/*.interval_list", emit: interval_list) script: - - """ - picard -Xmx${task.memory.toGiga()-4}G IntervalListTools TMP_DIR=\$TMPDIR \ - INPUT=${interval_list} OUTPUT=. \ - SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \ - SCATTER_COUNT=${params.scatter_count} \ - UNIQUE=true \ - - for folder in temp*; do mv \$folder/scattered.interval_list \$folder/\$folder\\.interval_list; done - """ + """ + picard -Xmx${task.memory.toGiga()-4}G IntervalListTools TMP_DIR=\$TMPDIR \ + INPUT=${interval_list} OUTPUT=. \ + SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \ + SCATTER_COUNT=${params.scatter_count} \ + UNIQUE=true \ + ${params.optional} + + for folder in temp*; do mv \$folder/scattered.interval_list \$folder/\$folder\\.interval_list; done + """ } diff --git a/README.md b/README.md index bdaa803f..ccd5c3a1 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ See `utils/template.nf` for a process template which uses the following guidelin - Use the original tool version numbering - Use CamelCase for tool, command and process names - Use lowercase with words separated by underscores for params, inputs, outputs and scripts. +- Use 4 spaces per indentation level. - All input and output identifiers should reflect their conceptual identity. Use informative names like unaligned_sequences, reference_genome, phylogeny, or aligned_sequences instead of foo_input, foo_file, result, input, output, and so forth. - Define two labels for each process, containing toolname, version and command separated by an underscore. - BWA_0.7.17 @@ -42,16 +43,16 @@ See `utils/template.nf` for a process template which uses the following guidelin - Use separate process input channels as much as possible. Use tuples for linked inputs only. ``` input: - tuple sample_id, rg_id, bam, bai - path genome_fasta + val(analysis_id) + tuple(sample_id, path(bam), path(bai)) ``` - Define named process output channels. This ensures that outputs can be referenced in external scope by their respective names. Indicate whether an output channel is optional. ``` output: - path "my_file.txt", emit: my_file - path "my_optional_file.txt", optional: my_optional_file, emit: my_optional_file - ...... + path("my_file.txt", emit: my_file) + path("my_optional_file.txt", optional: my_optional_file, emit: my_optional_file) ``` +- Use `params` for resource files, for example `genome.fasta`, `database.vcf`. ## GUIX 1. Creating squashfs immage diff --git a/RSeQC/3.0.1/RSeQC.nf b/RSeQC/3.0.1/RSeQC.nf index 8b928a82..3ced0b2b 100644 --- a/RSeQC/3.0.1/RSeQC.nf +++ b/RSeQC/3.0.1/RSeQC.nf @@ -19,7 +19,7 @@ process RSeQC { read_distribution.py -i ${bam_file} -r ${genome_bed12} > ${bam_file.baseName}.read_distribution.txt infer_experiment.py -i ${bam_file} -r ${genome_bed12} > ${bam_file.baseName}.infer_experiment.txt junction_annotation.py -i ${bam_file} -o ${bam_file.baseName}.rseqc -r ${genome_bed12} - bam_stat.py -i ${bam_file} 2> ${bam_file.baseName}.bam_stat.txt + bam_stat.py -i ${bam_file} > ${bam_file.baseName}.bam_stat.txt junction_saturation.py -i ${bam_file} -o ${bam_file.baseName}.rseqc -r ${genome_bed12} 2> ${bam_file.baseName}.junction_annotation_log.txt read_duplication.py -i ${bam_file} -o ${bam_file.baseName}.read_duplication """ diff --git a/Sambamba/0.7.0/Flagstat.nf b/Sambamba/0.7.0/Flagstat.nf index 89b84f9e..87b57cbb 100644 --- a/Sambamba/0.7.0/Flagstat.nf +++ b/Sambamba/0.7.0/Flagstat.nf @@ -6,13 +6,13 @@ process Flagstat { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - file("${bam_file.baseName}.flagstat") + path("${bam_file.baseName}.flagstat", emit: flagstat) script: - """ - sambamba flagstat -t ${task.cpus} ${bam_file} > ${bam_file.baseName}.flagstat - """ + """ + sambamba flagstat -t ${task.cpus} ${bam_file} > ${bam_file.baseName}.flagstat + """ } diff --git a/Sambamba/0.7.0/Markdup.nf b/Sambamba/0.7.0/Markdup.nf index afaf378c..d47b5986 100644 --- a/Sambamba/0.7.0/Markdup.nf +++ b/Sambamba/0.7.0/Markdup.nf @@ -6,15 +6,14 @@ process Markdup { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, rg_id, file(bam_file), file(bai_file) - + tuple(sample_id, rg_id, path(bam_file), path(bai_file)) output: - tuple sample_id, rg_id, file("${bam_file.baseName}.markdup.bam"), file("${bam_file.baseName}.markdup.bam.bai") + tuple(sample_id, rg_id, path("${bam_file.baseName}.markdup.bam"), path("${bam_file.baseName}.markdup.bam.bai"), emit: bam_file) script: - """ - sambamba markdup -t ${task.cpus} ${bam_file} ${bam_file.baseName}.markdup.bam - """ + """ + sambamba markdup -t ${task.cpus} ${bam_file} ${bam_file.baseName}.markdup.bam + """ } process MarkdupMerge { @@ -25,13 +24,13 @@ process MarkdupMerge { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, file(bam_files) + tuple(sample_id, path(bam_files)) output: - tuple sample_id, file("${sample_id}.markdup.bam"), file("${sample_id}.markdup.bam.bai") + tuple(sample_id, path("${sample_id}.markdup.bam"), path("${sample_id}.markdup.bam.bai"), emit: bam_file) script: - """ - sambamba markdup -t ${task.cpus} ${bam_files} ${sample_id}.markdup.bam - """ + """ + sambamba markdup -t ${task.cpus} ${bam_files} ${sample_id}.markdup.bam + """ } diff --git a/Sambamba/0.7.0/Merge.nf b/Sambamba/0.7.0/Merge.nf index a74673c5..6cf85c8c 100644 --- a/Sambamba/0.7.0/Merge.nf +++ b/Sambamba/0.7.0/Merge.nf @@ -6,13 +6,13 @@ process Merge { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, file(bam_files), file(bai_files) + tuple(sample_id, path(bam_files), path(bai_files)) output: - tuple sample_id, file("${sample_id}.bam"), file("${sample_id}.bam.bai") + tuple(sample_id, path("${sample_id}.bam"), path("${sample_id}.bam.bai"), emit: bam_file) script: - """ - sambamba merge -t ${task.cpus} ${sample_id}.bam ${bam_files} - """ + """ + sambamba merge -t ${task.cpus} ${sample_id}.bam ${bam_files} + """ } diff --git a/Sambamba/0.7.0/ViewSort.nf b/Sambamba/0.7.0/ViewSort.nf index a3c2e94d..174f6d7b 100644 --- a/Sambamba/0.7.0/ViewSort.nf +++ b/Sambamba/0.7.0/ViewSort.nf @@ -6,13 +6,13 @@ process ViewSort { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, rg_id, file(sam_file) + tuple(sample_id, rg_id, path(sam_file)) output: - tuple sample_id, rg_id, file("${sam_file.baseName}.sort.bam"), file("${sam_file.baseName}.sort.bam.bai") + tuple(sample_id, rg_id, path("${sam_file.baseName}.sort.bam"), path("${sam_file.baseName}.sort.bam.bai"), emit: bam_file) script: - """ - sambamba view -t ${task.cpus} -S -f bam ${sam_file} | sambamba sort -t ${task.cpus} -m ${task.memory.toGiga()}G -o ${sam_file.baseName}.sort.bam /dev/stdin - """ + """ + sambamba view -t ${task.cpus} -S -f bam ${sam_file} | sambamba sort -t ${task.cpus} -m ${task.memory.toGiga()}G -o ${sam_file.baseName}.sort.bam /dev/stdin + """ } diff --git a/Sambamba/0.7.0/ViewUnmapped.nf b/Sambamba/0.7.0/ViewUnmapped.nf index b27b474e..0aada4de 100644 --- a/Sambamba/0.7.0/ViewUnmapped.nf +++ b/Sambamba/0.7.0/ViewUnmapped.nf @@ -6,14 +6,14 @@ process ViewUnmapped { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - tuple sample_id, file("${bam_file.baseName}.unmapped.bam"), file("${bam_file.baseName}.unmapped.bam.bai") + tuple(sample_id, path("${bam_file.baseName}.unmapped.bam"), path("${bam_file.baseName}.unmapped.bam.bai"), emit: bam_file) script: - """ - sambamba view -t ${task.cpus} -f bam -F 'unmapped and mate_is_unmapped' ${bam_file} > ${bam_file.baseName}.unmapped.bam - sambamba index -t ${task.cpus} ${bam_file.baseName}.unmapped.bam - """ + """ + sambamba view -t ${task.cpus} -f bam -F 'unmapped and mate_is_unmapped' ${bam_file} > ${bam_file.baseName}.unmapped.bam + sambamba index -t ${task.cpus} ${bam_file.baseName}.unmapped.bam + """ } diff --git a/Samtools/1.10/Flagstat.nf b/Samtools/1.10/Flagstat.nf index a881c5d1..862ec040 100644 --- a/Samtools/1.10/Flagstat.nf +++ b/Samtools/1.10/Flagstat.nf @@ -6,13 +6,13 @@ process Flagstat { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - tuple sample_id, file("${bam_file.baseName}.flagstat") + path("${bam_file.baseName}.flagstat", emit: flagstat) script: - """ - samtools flagstat ${bam_file} > ${bam_file.baseName}.flagstat - """ + """ + samtools flagstat ${bam_file} > ${bam_file.baseName}.flagstat + """ } diff --git a/Samtools/1.10/MPileup.nf b/Samtools/1.10/MPileup.nf index 6597c503..f6183c88 100644 --- a/Samtools/1.10/MPileup.nf +++ b/Samtools/1.10/MPileup.nf @@ -7,15 +7,15 @@ process MPileup { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - tuple sample_id, file("${bam_file.baseName}.pileup") + tuple(sample_id, path("${bam_file.baseName}.pileup"), emit: pileup) script: - """ - samtools mpileup ${params.optional} -f ${params.genome} ${bam_file} > ${bam_file.baseName}.pileup - """ + """ + samtools mpileup ${params.optional} -f ${params.genome} ${bam_file} > ${bam_file.baseName}.pileup + """ } process MPileup_bcf { @@ -27,13 +27,13 @@ process MPileup_bcf { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - tuple sample_id, file("${bam_file.baseName}.bcf") + tuple(sample_id, path("${bam_file.baseName}.bcf"), emit: bcf) script: - """ - samtools mpileup ${params.optional} -u -f ${params.genome} ${bam_file} > ${bam_file.baseName}.bcf - """ + """ + samtools mpileup ${params.optional} -u -f ${params.genome} ${bam_file} > ${bam_file.baseName}.bcf + """ } diff --git a/Samtools/1.10/View.nf b/Samtools/1.10/View.nf index 435f31d7..97f9c1e8 100644 --- a/Samtools/1.10/View.nf +++ b/Samtools/1.10/View.nf @@ -6,13 +6,13 @@ process View { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple sample_id, file(bam_file), file(bai_file) + tuple(sample_id, path(bam_file), path(bai_file)) output: - tuple sample_id, file("${bam_file.baseName}.sam") + tuple(sample_id, path("${bam_file.baseName}.sam"), emit: sam_file) script: - """ - samtools view ${params.optional} ${bam_file} ${params.region} > ${bam_file.baseName}.sam - """ + """ + samtools view ${params.optional} ${bam_file} ${params.region} > ${bam_file.baseName}.sam + """ } diff --git a/Utils/template.nf b/Utils/template.nf index fb059716..5ed8e204 100644 --- a/Utils/template.nf +++ b/Utils/template.nf @@ -6,15 +6,15 @@ process Command { shell = ['/bin/bash', '-euo', 'pipefail'] input: + val(analysis_id) tuple(sample_id, path(input_file)) output: tuple(sample_id, path(output_file), emit: output_file) - path("*.{tsv,txt}", emit: my_output) + path("log.txt", emit: log) - script: """ - tool command ${params.optional} + tool command ${params.optional} ${analysis_id} ${params.resource_file} ${input_file} """ }