diff --git a/ControlFREEC/11.5/AssessSignificance.nf b/ControlFREEC/11.5/AssessSignificance.nf new file mode 100644 index 00000000..d4ce7af5 --- /dev/null +++ b/ControlFREEC/11.5/AssessSignificance.nf @@ -0,0 +1,18 @@ +process AssessSignificance { + tag {"Control Freec AssessSignificance ${sample_id}"} + label 'ControlFreec_11_5' + label 'ControlFreec_11_5_AssessSignificance' + container = 'library://sawibo/default/bioinf-tools:freec11.5' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(ratio_file), path(cnv_file)) + + output: + tuple(sample_id, path("${cnv_file.name}.p.value.txt"), emit: cnv_pvalue) + + script: + """ + cat /bin/assess_significance.R | R --slave --args ${cnv_file} ${ratio_file} + """ +} diff --git a/ControlFREEC/11.5/Freec.nf b/ControlFREEC/11.5/Freec.nf new file mode 100644 index 00000000..c9422fcc --- /dev/null +++ b/ControlFREEC/11.5/Freec.nf @@ -0,0 +1,36 @@ +process Freec { + tag {"Control Freec ${sample_id}"} + label 'ControlFreec_11_5' + label 'ControlFreec_11_5_Freec' + //TODO: upload to singularity library + container = 'library://sawibo/default/bioinf-tools:freec11.5' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(bam_file), path(bai_file)) + + output: + tuple(sample_id, path("${bam_file.name}_ratio.txt"), path("${bam_file.name}_CNVs"), emit: cnv) + tuple(sample_id, path("${bam_file.name}_sample.cpn"), path("${bam_file.name}_ratio.BedGraph"), path("${bam_file.name}_info.txt"), emit: other) + + script: + def config = "${sample_id}.config" + """ + touch ${config} + echo "[general]" >> ${config} + echo "chrLenFile = ${params.chr_len_file}" >> ${config} + echo "chrFiles = ${params.chr_files}" >> ${config} + echo "gemMappabilityFile = ${params.gem_mappability_file}" >> ${config} + echo "ploidy = ${params.ploidy}" >> ${config} + echo "window = ${params.window}" >> ${config} + echo "telocentromeric = ${params.telocentromeric}" >> ${config} + echo "BedGraphOutput=TRUE" >> ${config} + echo "maxThreads=${task.cpus}" >> ${config} + + echo "[sample]" >> ${config} + echo "inputFormat = BAM" >> ${config} + echo "mateFile = ${bam_file}" >> ${config} + + freec -conf ${config} + """ +} diff --git a/ControlFREEC/11.5/MakeGraph.nf b/ControlFREEC/11.5/MakeGraph.nf new file mode 100644 index 00000000..3375d31b --- /dev/null +++ b/ControlFREEC/11.5/MakeGraph.nf @@ -0,0 +1,18 @@ +process MakeGraph { + tag {"Control Freec MakeGraph ${sample_id}"} + label 'ControlFreec_11_5' + label 'ControlFreec_11_5_MakeGraph' + container = 'library://sawibo/default/bioinf-tools:freec11.5' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(ratio_file), path(cnv_file)) + + output: + tuple(sample_id, path("${ratio_file.name}.png"), path("${ratio_file.name}.log2.png"), emit: ratio_png) + + script: + """ + cat /bin/makeGraph.R | R --slave --args ${params.ploidy} ${ratio_file} + """ +} diff --git a/ControlFREEC/11.5/MakeKaryotype.nf b/ControlFREEC/11.5/MakeKaryotype.nf new file mode 100644 index 00000000..a903e8d2 --- /dev/null +++ b/ControlFREEC/11.5/MakeKaryotype.nf @@ -0,0 +1,18 @@ +process MakeKaryotype { + tag {"Control Freec MakeKaryotype ${sample_id}"} + label 'ControlFreec_11_5' + label 'ControlFreec_11_5_MakeKaryotype' + container = 'library://sawibo/default/bioinf-tools:freec11.5' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(ratio_file), path(cnv_file)) + + output: + tuple(sample_id, path("*_karyotype.pdf"), emit: karyotype_pdf) + + script: + """ + cat /bin/makeKaryotype.R | R --slave --args ${params.ploidy} ${params.maxlevel} ${params.telocentromeric} ${ratio_file} + """ +} diff --git a/GATK/3.8-1-0-gf15c1c3ef/BaseRecalibrator.nf b/GATK/3.8-1-0-gf15c1c3ef/BaseRecalibrator.nf new file mode 100644 index 00000000..419c0539 --- /dev/null +++ b/GATK/3.8-1-0-gf15c1c3ef/BaseRecalibrator.nf @@ -0,0 +1,32 @@ +process BaseRecalibrator { + tag {"GATK BaseRecalibrator ${sample_id} - ${chr}"} + label 'GATK_3_8_1_0_gf15c1c3ef' + label 'GATK_3_8_1_0_gf15c1c3ef_BaseRecalibrator' + container = 'quay.io/biocontainers/gatk:3.8--py27_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(bam_file), path(bai_file), chr) + + output: + tuple(sample_id, path("${bam_file.baseName}.bqsr.${chr}.bam"), path("${bam_file.baseName}.bqsr.${chr}.bai"), emit: bam_file) + + script: + """ + java -Xmx${task.memory.toGiga()-4}G -jar $params.gatk_path -T BaseRecalibrator \ + --num_cpu_threads_per_data_thread ${task.cpus} \ + --reference_sequence ${params.genome} \ + --input_file ${bam_file} \ + --intervals ${chr} \ + --out ${bam_file.baseName}.bqsr.${chr}.table \ + ${params.optional} + + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T PrintReads \ + --num_cpu_threads_per_data_thread ${task.cpus} \ + --reference_sequence ${params.genome} \ + --input_file ${bam_file} \ + --BQSR ${bam_file.baseName}.bqsr.${chr}.table \ + --intervals ${chr} \ + --out ${bam_file.baseName}.bqsr.${chr}.bam + """ +} diff --git a/GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf b/GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf new file mode 100644 index 00000000..ca673c79 --- /dev/null +++ b/GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf @@ -0,0 +1,19 @@ +process CatVariantsGVCF { + tag {"GATK CatVariantsGVCF ${sample_id}"} + label 'GATK_3_8_1_0_gf15c1c3ef' + label 'GATK_3_8_1_0_gf15c1c3ef_CatVariantsGVCF' + container = 'quay.io/biocontainers/gatk:3.8--py27_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(gvcf_files), path(gvcf_idx_files)) + + output: + tuple(sample_id, path("${sample_id}.g.vcf.gz"), path("${sample_id}.g.vcf.gz.tbi"), emit:vcf_file) + + script: + def input_files = gvcf_files.collect{"$it"}.join(" -V ") + """ + java -Xmx${task.memory.toGiga()-4}G -cp ${params.gatk_path} org.broadinstitute.gatk.tools.CatVariants --reference ${params.genome} -V ${input_files} --outputFile ${sample_id}.g.vcf.gz ${params.optional} + """ +} diff --git a/GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf b/GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf index a2f7a4f0..5ed12ece 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf @@ -17,3 +17,23 @@ process CombineVariants { java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T CombineVariants --reference_sequence ${params.genome} -V ${input_files} --out ${analysis_id}.vcf ${params.optional} """ } + +process CombineVariantsGVCF { + tag {"GATK CombineVariantsGVCF ${sample_id}"} + label 'GATK_3_8_1_0_gf15c1c3ef' + label 'GATK_3_8_1_0_gf15c1c3ef_CombineVariantsGVCF' + container = 'quay.io/biocontainers/gatk:3.8--py27_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(vcf_files), path(vcf_idx_files)) + + output: + tuple(sample_id, path("${sample_id}.g.vcf"), path("${sample_id}.g.vcf.idx"), emit:vcf_file) + + script: + def input_files = vcf_files.collect{"$it"}.join(" -V ") + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T CombineVariants --reference_sequence ${params.genome} -V ${input_files} --out ${sample_id}.g.vcf ${params.optional} + """ +} diff --git a/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf b/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf new file mode 100644 index 00000000..1714b61f --- /dev/null +++ b/GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf @@ -0,0 +1,24 @@ +process GenotypeGVCFs { + tag {"GATK GenotypeGVCFs ${analysis_id}"} + label 'GATK_3_8_1_0_gf15c1c3ef' + label 'GATK_3_8_1_0_gf15c1c3ef_GenotypeGVCFs' + container = 'quay.io/biocontainers/gatk:3.8--py27_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(analysis_id, path(gvcf_files), path(gvcf_idx_files), path(interval_file)) + + output: + tuple(analysis_id, path("${analysis_id}_${interval_file.baseName}.vcf"), path("${analysis_id}_${interval_file.baseName}.vcf.idx"), emit:vcf_file) + + script: + def input_files = gvcf_files.collect{"$it"}.join(" -V ") + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T GenotypeGVCFs \ + --reference_sequence ${params.genome} \ + -V ${input_files} \ + --out ${analysis_id}_${interval_file.baseName}.vcf \ + --intervals ${interval_file} \ + ${params.optional} \ + """ +} diff --git a/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf b/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf index 3c42714f..c3f0bf28 100644 --- a/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf +++ b/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf @@ -9,7 +9,7 @@ process HaplotypeCaller { tuple(analysis_id, path(bam_files), path(bai_files), path(interval_file)) output: - tuple(val(analysis_id), file("${analysis_id}.${interval_file.baseName}.vcf"), file("${analysis_id}.${interval_file.baseName}.vcf.idx"), emit: vcf_file) + tuple(val(analysis_id), path("${analysis_id}.${interval_file.baseName}.vcf"), path("${analysis_id}.${interval_file.baseName}.vcf.idx"), emit: vcf_file) script: def input_files = bam_files.collect{"$it"}.join(" --input_file ") @@ -22,3 +22,28 @@ process HaplotypeCaller { ${params.optional} """ } + +process HaplotypeCallerGVCF { + tag {"GATK HaplotypeCallerGVCF ${sample_id} - ${interval_file.baseName}"} + label 'GATK_3_8_1_0_gf15c1c3ef' + label 'GATK_3_8_1_0_gf15c1c3ef_HaplotypeCallerGVCF' + container = 'quay.io/biocontainers/gatk:3.8--py27_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(bam_file), path(bai_file), path(interval_file)) + + output: + tuple(val(sample_id), path("${sample_id}_${interval_file.baseName}.g.vcf"), path("${sample_id}_${interval_file.baseName}.g.vcf.idx"), path(interval_file), emit: vcf_file) + + script: + """ + java -Xmx${task.memory.toGiga()-4}G -jar ${params.gatk_path} -T HaplotypeCaller \ + --reference_sequence ${params.genome} \ + --input_file ${bam_file} \ + --intervals ${interval_file} \ + --out ${sample_id}_${interval_file.baseName}.g.vcf \ + --emitRefConfidence GVCF \ + ${params.optional} + """ +} diff --git a/Manta/1.6.0/Manta.nf b/Manta/1.6.0/Manta.nf new file mode 100644 index 00000000..cb7f7f70 --- /dev/null +++ b/Manta/1.6.0/Manta.nf @@ -0,0 +1,26 @@ +process Manta { + tag {"Manta ConfigAndRun ${sample_id}"} + label 'Manta_1_6_0' + label 'Manta_1_6_0_ConfigAndRun' + container = 'quay.io/biocontainers/manta:1.6.0--py27_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple (sample_id, path(bam_file), path(bai_file)) + + output: + tuple (sample_id, path("*.candidateSmallIndels.*"),path("*.candidateSV.*"),path("*.diploidSV.*"), emit: sv ) + + script: + """ + configManta.py --referenceFasta ${params.genome_fasta} --runDir . --bam $bam_file + ./runWorkflow.py -m local -j ${task.cpus} + + mv results/variants/candidateSmallIndels.vcf.gz Manta_${sample_id}.candidateSmallIndels.vcf.gz + mv results/variants/candidateSmallIndels.vcf.gz.tbi Manta_${sample_id}.candidateSmallIndels.vcf.gz.tbi + mv results/variants/candidateSV.vcf.gz Manta_${sample_id}.candidateSV.vcf.gz + mv results/variants/candidateSV.vcf.gz.tbi Manta_${sample_id}.candidateSV.vcf.gz.tbi + mv results/variants/diploidSV.vcf.gz Manta_${sample_id}.diploidSV.vcf.gz + mv results/variants/diploidSV.vcf.gz.tbi Manta_${sample_id}.diploidSV.vcf.gz.tbi + """ +} diff --git a/MultiQC/1.9/MultiQC.nf b/MultiQC/1.9/MultiQC.nf new file mode 100644 index 00000000..7b455ff8 --- /dev/null +++ b/MultiQC/1.9/MultiQC.nf @@ -0,0 +1,18 @@ +process MultiQC { + tag {"MultiQC"} + label 'MultiQC_1_9' + container = 'quay.io/biocontainers/multiqc:1.9--pyh9f0ad1d_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + val(analysis_id) + path(qc_files) + + output: + tuple(path("${analysis_id}_multiqc_report.html"), path("${analysis_id}_multiqc_report_data"), emit: report) + + script: + """ + multiqc ${params.optional} --title ${analysis_id} . + """ +} diff --git a/Picard/2.22.0/CollectWgsMetrics.nf b/Picard/2.22.0/CollectWgsMetrics.nf new file mode 100644 index 00000000..66e51fb2 --- /dev/null +++ b/Picard/2.22.0/CollectWgsMetrics.nf @@ -0,0 +1,18 @@ +process CollectWgsMetrics { + tag {"PICARD CollectWgsMetrics ${sample_id}"} + label 'PICARD_2_22_0' + label 'PICARD_2_22_0_CollectWgsMetrics' + container = 'quay.io/biocontainers/picard:2.22.0--0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(sample_id, path(bam_file), path(bai_file)) + + output: + path("${sample_id}.wgs_metrics.txt", emit: txt_file) + + script: + """ + picard -Xmx${task.memory.toGiga()-4}G CollectWgsMetrics TMP_DIR=\$TMPDIR R=${params.genome} INPUT=${bam_file} OUTPUT=${sample_id}.wgs_metrics.txt ${params.optional} + """ +} diff --git a/Picard/2.22.0/IntervalListTools.nf b/Picard/2.22.0/IntervalListTools.nf index d81bde50..838e5801 100644 --- a/Picard/2.22.0/IntervalListTools.nf +++ b/Picard/2.22.0/IntervalListTools.nf @@ -18,7 +18,8 @@ process IntervalListTools { SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \ SCATTER_COUNT=${params.scatter_count} \ UNIQUE=true \ - + ${params.optional} + for folder in temp*; do mv \$folder/scattered.interval_list \$folder/\$folder\\.interval_list; done """ }