diff --git a/definitions/pipelines/cle_aml_trio.cwl b/definitions/pipelines/cle_aml_trio.cwl index dce91e30a..688c7fa68 100644 --- a/definitions/pipelines/cle_aml_trio.cwl +++ b/definitions/pipelines/cle_aml_trio.cwl @@ -7,28 +7,23 @@ requirements: - class: SchemaDefRequirement types: - $import: ../types/labelled_file.yml + - $import: ../types/sequence_data.yml - class: SubworkflowFeatureRequirement - class: StepInputExpressionRequirement inputs: reference: string - tumor_bams: - type: File[] - tumor_readgroups: - type: string[] + tumor_sequence: + type: ../types/sequence_data.yml#sequence_data[] tumor_name: type: string? default: 'tumor' - normal_bams: - type: File[] - normal_readgroups: - type: string[] + normal_sequence: + type: ../types/sequence_data.yml#sequence_data[] normal_name: type: string? default: 'normal' - followup_bams: - type: File[] - followup_readgroups: - type: string[] + followup_sequence: + type: ../types/sequence_data.yml#sequence_data[] followup_name: type: string? default: 'followup' @@ -367,8 +362,7 @@ steps: run: exome_alignment.cwl in: reference: reference - bams: normal_bams - readgroups: normal_readgroups + sequence: normal_sequence mills: mills known_indels: known_indels dbsnp_vcf: dbsnp_vcf @@ -391,8 +385,7 @@ steps: run: exome_alignment.cwl in: reference: reference - bams: tumor_bams - readgroups: tumor_readgroups + sequence: tumor_sequence mills: mills known_indels: known_indels dbsnp_vcf: dbsnp_vcf @@ -415,8 +408,7 @@ steps: run: exome_alignment.cwl in: reference: reference - bams: followup_bams - readgroups: followup_readgroups + sequence: followup_sequence mills: mills known_indels: known_indels dbsnp_vcf: dbsnp_vcf diff --git a/definitions/pipelines/cle_somatic_exome.cwl b/definitions/pipelines/cle_somatic_exome.cwl index 3337e329a..413ae0dc9 100644 --- a/definitions/pipelines/cle_somatic_exome.cwl +++ b/definitions/pipelines/cle_somatic_exome.cwl @@ -7,21 +7,18 @@ requirements: - class: SchemaDefRequirement types: - $import: ../types/labelled_file.yml + - $import: ../types/sequence_data.yml - class: SubworkflowFeatureRequirement - class: StepInputExpressionRequirement inputs: reference: string - tumor_bams: - type: File[] - tumor_readgroups: - type: string[] + tumor_sequence: + type: ../types/sequence_data.yml#sequence_data[] tumor_name: type: string? default: 'tumor' - normal_bams: - type: File[] - normal_readgroups: - type: string[] + normal_sequence: + type: ../types/sequence_data.yml#sequence_data[] normal_name: type: string? default: 'normal' @@ -295,8 +292,7 @@ steps: run: exome_alignment.cwl in: reference: reference - bams: tumor_bams - readgroups: tumor_readgroups + sequence: tumor_sequence mills: mills known_indels: known_indels dbsnp_vcf: dbsnp_vcf @@ -319,8 +315,7 @@ steps: run: exome_alignment.cwl in: reference: reference - bams: normal_bams - readgroups: normal_readgroups + sequence: normal_sequence mills: mills known_indels: known_indels dbsnp_vcf: dbsnp_vcf diff --git a/definitions/pipelines/exome.cwl b/definitions/pipelines/exome.cwl index 8048641b7..148b6c7fa 100644 --- a/definitions/pipelines/exome.cwl +++ b/definitions/pipelines/exome.cwl @@ -7,13 +7,12 @@ requirements: - class: SchemaDefRequirement types: - $import: ../types/labelled_file.yml + - $import: ../types/sequence_data.yml - class: SubworkflowFeatureRequirement inputs: reference: string - bams: - type: File[] - readgroups: - type: string[] + sequence: + type: ../types/sequence_data.yml#sequence_data[] mills: type: File secondaryFiles: [.tbi] @@ -179,8 +178,7 @@ steps: run: exome_alignment.cwl in: reference: reference - bams: bams - readgroups: readgroups + sequence: sequence mills: mills known_indels: known_indels dbsnp_vcf: dbsnp_vcf diff --git a/definitions/pipelines/exome_alignment.cwl b/definitions/pipelines/exome_alignment.cwl index 976187eed..bd56f45be 100644 --- a/definitions/pipelines/exome_alignment.cwl +++ b/definitions/pipelines/exome_alignment.cwl @@ -7,13 +7,12 @@ requirements: - class: SchemaDefRequirement types: - $import: ../types/labelled_file.yml + - $import: ../types/sequence_data.yml - class: SubworkflowFeatureRequirement inputs: reference: string - bams: - type: File[] - readgroups: - type: string[] + sequence: + type: ../types/sequence_data.yml#sequence_data[] mills: type: File secondaryFiles: [.tbi] @@ -92,11 +91,10 @@ outputs: outputSource: qc/verify_bam_id_depth steps: alignment: - run: ../subworkflows/bam_to_bqsr.cwl + run: ../subworkflows/sequence_to_bqsr.cwl in: reference: reference - bams: bams - readgroups: readgroups + unaligned: sequence mills: mills known_indels: known_indels dbsnp_vcf: dbsnp_vcf diff --git a/definitions/pipelines/gathered_cle_aml_trio.cwl b/definitions/pipelines/gathered_cle_aml_trio.cwl index b02265c1b..761392e83 100644 --- a/definitions/pipelines/gathered_cle_aml_trio.cwl +++ b/definitions/pipelines/gathered_cle_aml_trio.cwl @@ -7,28 +7,23 @@ requirements: - class: SchemaDefRequirement types: - $import: ../types/labelled_file.yml + - $import: ../types/sequence_data.yml - class: SubworkflowFeatureRequirement - class: StepInputExpressionRequirement inputs: reference: string - tumor_bams: - type: File[] - tumor_readgroups: - type: string[] + tumor_sequence: + type: ../types/sequence_data.yml#sequence_data[] tumor_name: type: string? default: 'tumor' - normal_bams: - type: File[] - normal_readgroups: - type: string[] + normal_sequence: + type: ../types/sequence_data.yml#sequence_data[] normal_name: type: string? default: 'normal' - followup_bams: - type: File[] - followup_readgroups: - type: string[] + followup_sequence: + type: ../types/sequence_data.yml#sequence_data[] followup_name: type: string? default: 'followup' @@ -189,14 +184,11 @@ steps: run: cle_aml_trio.cwl in: reference: reference - tumor_bams: tumor_bams - tumor_readgroups: tumor_readgroups + tumor_sequence: tumor_sequence tumor_name: tumor_name - normal_bams: normal_bams - normal_readgroups: normal_readgroups + normal_sequence: normal_sequence normal_name: normal_name - followup_bams: followup_bams - followup_readgroups: followup_readgroups + followup_sequence: followup_sequence followup_name: followup_name mills: mills known_indels: known_indels diff --git a/definitions/pipelines/gathered_cle_somatic_exome.cwl b/definitions/pipelines/gathered_cle_somatic_exome.cwl index 9a70899db..e8e713cb8 100644 --- a/definitions/pipelines/gathered_cle_somatic_exome.cwl +++ b/definitions/pipelines/gathered_cle_somatic_exome.cwl @@ -7,21 +7,18 @@ requirements: - class: SchemaDefRequirement types: - $import: ../types/labelled_file.yml + - $import: ../types/sequence_data.yml - class: SubworkflowFeatureRequirement - class: StepInputExpressionRequirement inputs: reference: string - tumor_bams: - type: File[] - tumor_readgroups: - type: string[] + tumor_sequence: + type: ../types/sequence_data.yml#sequence_data[] tumor_cram_name: type: string? default: 'tumor.cram' - normal_bams: - type: File[] - normal_readgroups: - type: string[] + normal_sequence: + type: ../types/sequence_data.yml#sequence_data[] normal_cram_name: type: string? default: 'normal.cram' @@ -152,11 +149,9 @@ steps: run: cle_somatic_exome.cwl in: reference: reference - tumor_bams: tumor_bams - tumor_readgroups: tumor_readgroups + tumor_sequence: tumor_sequence tumor_cram_name: tumor_cram_name - normal_bams: normal_bams - normal_readgroups: normal_readgroups + normal_sequence: normal_sequence normal_cram_name: normal_cram_name mills: mills known_indels: known_indels diff --git a/definitions/pipelines/gathered_somatic_exome.cwl b/definitions/pipelines/gathered_somatic_exome.cwl index 6d12c42ad..0d1e6f56d 100644 --- a/definitions/pipelines/gathered_somatic_exome.cwl +++ b/definitions/pipelines/gathered_somatic_exome.cwl @@ -7,21 +7,18 @@ requirements: - class: SchemaDefRequirement types: - $import: ../types/labelled_file.yml + - $import: ../types/sequence_data.yml - class: SubworkflowFeatureRequirement - class: StepInputExpressionRequirement inputs: reference: string - tumor_bams: - type: File[] - tumor_readgroups: - type: string[] + tumor_sequence: + type: ../types/sequence_data.yml#sequence_data[] tumor_cram_name: type: string? default: 'tumor.cram' - normal_bams: - type: File[] - normal_readgroups: - type: string[] + normal_sequence: + type: ../types/sequence_data.yml#sequence_data[] normal_cram_name: type: string? default: 'normal.cram' @@ -149,11 +146,9 @@ steps: run: somatic_exome.cwl in: reference: reference - tumor_bams: tumor_bams - tumor_readgroups: tumor_readgroups + tumor_sequence: tumor_sequence tumor_cram_name: tumor_cram_name - normal_bams: normal_bams - normal_readgroups: normal_readgroups + normal_sequence: normal_sequence normal_cram_name: normal_cram_name mills: mills known_indels: known_indels diff --git a/definitions/pipelines/germline_exome.cwl b/definitions/pipelines/germline_exome.cwl index 3103c8e95..0ad178adc 100644 --- a/definitions/pipelines/germline_exome.cwl +++ b/definitions/pipelines/germline_exome.cwl @@ -7,13 +7,12 @@ requirements: - class: SchemaDefRequirement types: - $import: ../types/labelled_file.yml + - $import: ../types/sequence_data.yml - class: SubworkflowFeatureRequirement inputs: reference: string - bams: - type: File[] - readgroups: - type: string[] + sequence: + type: ../types/sequence_data.yml#sequence_data[] mills: type: File secondaryFiles: [.tbi] @@ -153,8 +152,7 @@ steps: run: exome_alignment.cwl in: reference: reference - bams: bams - readgroups: readgroups + sequence: sequence mills: mills known_indels: known_indels dbsnp_vcf: dbsnp_vcf diff --git a/definitions/pipelines/germline_exome_hla_typing.cwl b/definitions/pipelines/germline_exome_hla_typing.cwl index a610f5c16..b0e0be618 100644 --- a/definitions/pipelines/germline_exome_hla_typing.cwl +++ b/definitions/pipelines/germline_exome_hla_typing.cwl @@ -7,13 +7,12 @@ requirements: - class: SchemaDefRequirement types: - $import: ../types/labelled_file.yml + - $import: ../types/sequence_data.yml - class: SubworkflowFeatureRequirement inputs: reference: string - bams: - type: File[] - readgroups: - type: string[] + sequence: + type: ../types/sequence_data.yml#sequence_data[] mills: type: File secondaryFiles: [.tbi] @@ -149,8 +148,7 @@ steps: run: germline_exome.cwl in: reference: reference - bams: bams - readgroups: readgroups + sequence: sequence mills: mills known_indels: known_indels dbsnp_vcf: dbsnp_vcf diff --git a/definitions/pipelines/germline_wgs.cwl b/definitions/pipelines/germline_wgs.cwl index fb35e969e..5e6801be6 100644 --- a/definitions/pipelines/germline_wgs.cwl +++ b/definitions/pipelines/germline_wgs.cwl @@ -7,13 +7,12 @@ requirements: - class: SchemaDefRequirement types: - $import: ../types/labelled_file.yml + - $import: ../types/sequence_data.yml - class: SubworkflowFeatureRequirement inputs: reference: string - bams: - type: File[] - readgroups: - type: string[] + sequence: + type: ../types/sequence_data.yml#sequence_data[] mills: type: File secondaryFiles: [.tbi] @@ -263,8 +262,7 @@ steps: run: wgs_alignment.cwl in: reference: reference - bams: bams - readgroups: readgroups + sequence: sequence mills: mills known_indels: known_indels dbsnp_vcf: dbsnp_vcf diff --git a/definitions/pipelines/immuno.cwl b/definitions/pipelines/immuno.cwl index 689bc42fa..ced9d2569 100644 --- a/definitions/pipelines/immuno.cwl +++ b/definitions/pipelines/immuno.cwl @@ -7,6 +7,7 @@ requirements: - class: SchemaDefRequirement types: - $import: ../types/labelled_file.yml + - $import: ../types/sequence_data.yml - class: SubworkflowFeatureRequirement inputs: #rnaseq inputs @@ -53,17 +54,13 @@ inputs: #somatic inputs reference: string - tumor_bams: - type: File[] - tumor_readgroups: - type: string[] + tumor_sequence: + type: ../types/sequence_data.yml#sequence_data[] tumor_name: type: string? default: 'tumor' - normal_bams: - type: File[] - normal_readgroups: - type: string[] + normal_sequence: + type: ../types/sequence_data.yml#sequence_data[] normal_name: type: string? default: 'normal' @@ -641,11 +638,9 @@ steps: run: somatic_exome.cwl in: reference: reference - tumor_bams: tumor_bams - tumor_readgroups: tumor_readgroups + tumor_sequence: tumor_sequence tumor_name: tumor_name - normal_bams: normal_bams - normal_readgroups: normal_readgroups + normal_sequence: normal_sequence normal_name: normal_name mills: mills known_indels: known_indels @@ -699,8 +694,7 @@ steps: run: germline_exome_hla_typing.cwl in: reference: reference - bams: normal_bams - readgroups: normal_readgroups + sequence: normal_sequence mills: mills known_indels: known_indels dbsnp_vcf: dbsnp_vcf diff --git a/definitions/pipelines/somatic_exome.cwl b/definitions/pipelines/somatic_exome.cwl index cf6380eb7..1c80978b2 100644 --- a/definitions/pipelines/somatic_exome.cwl +++ b/definitions/pipelines/somatic_exome.cwl @@ -7,21 +7,18 @@ requirements: - class: SchemaDefRequirement types: - $import: ../types/labelled_file.yml + - $import: ../types/sequence_data.yml - class: SubworkflowFeatureRequirement - class: StepInputExpressionRequirement inputs: reference: string - tumor_bams: - type: File[] - tumor_readgroups: - type: string[] + tumor_sequence: + type: ../types/sequence_data.yml#sequence_data[] tumor_name: type: string? default: 'tumor' - normal_bams: - type: File[] - normal_readgroups: - type: string[] + normal_sequence: + type: ../types/sequence_data.yml#sequence_data[] normal_name: type: string? default: 'normal' @@ -353,8 +350,7 @@ steps: run: exome_alignment.cwl in: reference: reference - bams: tumor_bams - readgroups: tumor_readgroups + sequence: tumor_sequence mills: mills known_indels: known_indels dbsnp_vcf: dbsnp_vcf @@ -377,8 +373,7 @@ steps: run: exome_alignment.cwl in: reference: reference - bams: normal_bams - readgroups: normal_readgroups + sequence: normal_sequence mills: mills known_indels: known_indels dbsnp_vcf: dbsnp_vcf diff --git a/definitions/pipelines/wgs.cwl b/definitions/pipelines/wgs.cwl index 348db2102..6f401bf0e 100644 --- a/definitions/pipelines/wgs.cwl +++ b/definitions/pipelines/wgs.cwl @@ -7,13 +7,12 @@ requirements: - class: SchemaDefRequirement types: - $import: ../types/labelled_file.yml + - $import: ../types/sequence_data.yml - class: SubworkflowFeatureRequirement inputs: reference: string - bams: - type: File[] - readgroups: - type: string[] + sequence: + type: ../types/sequence_data.yml#sequence_data[] mills: type: File secondaryFiles: [.tbi] @@ -154,8 +153,7 @@ steps: run: wgs_alignment.cwl in: reference: reference - bams: bams - readgroups: readgroups + sequence: sequence mills: mills known_indels: known_indels dbsnp_vcf: dbsnp_vcf diff --git a/definitions/pipelines/wgs_alignment.cwl b/definitions/pipelines/wgs_alignment.cwl index fcb1ebfd5..60a8e6814 100644 --- a/definitions/pipelines/wgs_alignment.cwl +++ b/definitions/pipelines/wgs_alignment.cwl @@ -7,13 +7,12 @@ requirements: - class: SchemaDefRequirement types: - $import: ../types/labelled_file.yml + - $import: ../types/sequence_data.yml - class: SubworkflowFeatureRequirement inputs: reference: string - bams: - type: File[] - readgroups: - type: string[] + sequence: + type: ../types/sequence_data.yml#sequence_data[] mills: type: File secondaryFiles: [.tbi] @@ -99,11 +98,10 @@ outputs: outputSource: qc/bamcoverage_bigwig steps: alignment: - run: ../subworkflows/bam_to_bqsr.cwl + run: ../subworkflows/sequence_to_bqsr.cwl in: reference: reference - bams: bams - readgroups: readgroups + unaligned: sequence mills: mills known_indels: known_indels dbsnp_vcf: dbsnp_vcf diff --git a/definitions/subworkflows/sequence_align_and_tag_adapter.cwl b/definitions/subworkflows/sequence_align_and_tag_adapter.cwl new file mode 100644 index 000000000..f839ad013 --- /dev/null +++ b/definitions/subworkflows/sequence_align_and_tag_adapter.cwl @@ -0,0 +1,44 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +label: "adapter for sequence_align_and_tag" +doc: "Some workflow engines won't stage files in our nested structure, so parse it out here" +requirements: + - class: InlineJavascriptRequirement + - class: SchemaDefRequirement + types: + - $import: ../types/sequence_data.yml + - class: StepInputExpressionRequirement + - class: SubworkflowFeatureRequirement +inputs: + unaligned: + type: ../types/sequence_data.yml#sequence_data + doc: "the unaligned sequence data with readgroup information" + reference: + type: string + doc: 'bwa-indexed reference file' +outputs: + aligned_bam: + type: File + outputSource: align_and_tag/aligned_bam +steps: + align_and_tag: + run: ../tools/sequence_align_and_tag.cwl + in: + reference: reference + bam: + source: unaligned + valueFrom: "$(self.sequence.hasOwnProperty('bam')? self.sequence.bam : null)" + fastq1: + source: unaligned + valueFrom: "$(self.sequence.hasOwnProperty('fastq1')? self.sequence.fastq1 : null)" + fastq2: + source: unaligned + valueFrom: "$(self.sequence.hasOwnProperty('fastq2')? self.sequence.fastq2 : null)" + readgroup: + source: unaligned + valueFrom: $(self.readgroup) + + out: + [aligned_bam] diff --git a/definitions/subworkflows/sequence_to_bqsr.cwl b/definitions/subworkflows/sequence_to_bqsr.cwl new file mode 100644 index 000000000..aa9b81bbe --- /dev/null +++ b/definitions/subworkflows/sequence_to_bqsr.cwl @@ -0,0 +1,93 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +label: "Raw sequence data to BQSR" +requirements: + - class: SchemaDefRequirement + types: + - $import: ../types/sequence_data.yml + - class: ScatterFeatureRequirement + - class: SubworkflowFeatureRequirement + - class: MultipleInputFeatureRequirement + +inputs: + unaligned: + type: ../types/sequence_data.yml#sequence_data[] + bqsr_intervals: + type: string[] + reference: + type: string + dbsnp_vcf: + type: File + secondaryFiles: [.tbi] + final_name: + type: string? + default: 'final.bam' + mills: + type: File + secondaryFiles: [.tbi] + known_indels: + type: File + secondaryFiles: [.tbi] +outputs: + final_bam: + type: File + outputSource: index_bam/indexed_bam + secondaryFiles: [.bai, ^.bai] + mark_duplicates_metrics_file: + type: File + outputSource: mark_duplicates_and_sort/metrics_file +steps: + align: + scatter: [unaligned] + scatterMethod: dotproduct + run: sequence_align_and_tag_adapter.cwl + in: + unaligned: unaligned + reference: reference + out: + [aligned_bam] + merge: + run: ../tools/merge_bams_samtools.cwl + in: + bams: align/aligned_bam + name: final_name + out: + [merged_bam] + name_sort: + run: ../tools/name_sort.cwl + in: + bam: merge/merged_bam + out: + [name_sorted_bam] + mark_duplicates_and_sort: + run: ../tools/mark_duplicates_and_sort.cwl + in: + bam: name_sort/name_sorted_bam + out: + [sorted_bam, metrics_file] + bqsr: + run: ../tools/bqsr.cwl + in: + reference: reference + bam: mark_duplicates_and_sort/sorted_bam + intervals: bqsr_intervals + known_sites: [dbsnp_vcf, mills, known_indels] + out: + [bqsr_table] + apply_bqsr: + run: ../tools/apply_bqsr.cwl + in: + reference: reference + bam: mark_duplicates_and_sort/sorted_bam + bqsr_table: bqsr/bqsr_table + output_name: final_name + out: + [bqsr_bam] + index_bam: + run: ../tools/index_bam.cwl + in: + bam: apply_bqsr/bqsr_bam + out: + [indexed_bam] diff --git a/definitions/tools/sequence_align_and_tag.cwl b/definitions/tools/sequence_align_and_tag.cwl new file mode 100644 index 000000000..0f9deadc7 --- /dev/null +++ b/definitions/tools/sequence_align_and_tag.cwl @@ -0,0 +1,87 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool +label: "align with bwa_mem and tag" +doc: "Due to workflow runner limitations, use sequence_align_and_tag_adapter.cwl subworkflow to call this" +baseCommand: ["/bin/bash", "sequence_alignment_helper.sh"] +requirements: + - class: SchemaDefRequirement + types: + - $import: ../types/sequence_data.yml + - class: ResourceRequirement + coresMin: 8 + ramMin: 20000 + - class: DockerRequirement + dockerPull: "mgibio/alignment_helper-cwl:1.0.0" + - class: InitialWorkDirRequirement + listing: + - entryname: 'sequence_alignment_helper.sh' + entry: | + set -o pipefail + set -o errexit + set -o nounset + + while getopts "b:?1:?2:?g:r:n:" opt; do + case "$opt" in + b) + MODE=bam + BAM="$OPTARG" + ;; + 1) + MODE=fastq + FASTQ1="$OPTARG" + ;; + 2) + MODE=fastq + FASTQ2="$OPTARG" + ;; + g) + READGROUP="$OPTARG" + ;; + r) + REFERENCE="$OPTARG" + ;; + n) + NTHREADS="$OPTARG" + ;; + esac + done + + if [[ "$MODE" == 'fastq' ]]; then + /usr/local/bin/bwa mem -K 100000000 -t "$NTHREADS" -Y -p -R "$READGROUP" "$REFERENCE" "$FASTQ1" "$FASTQ2" | /usr/local/bin/samblaster -a --addMateTags | /opt/samtools/bin/samtools view -b -S /dev/stdin + fi + if [[ "$MODE" == 'bam' ]]; then + /usr/bin/java -Xmx4g -jar /opt/picard/picard.jar SamToFastq I="$BAM" INTERLEAVE=true INCLUDE_NON_PF_READS=true FASTQ=/dev/stdout | /usr/local/bin/bwa mem -K 100000000 -t "$NTHREADS" -Y -p -R "$READGROUP" "$REFERENCE" /dev/stdin | /usr/local/bin/samblaster -a --addMateTags | /opt/samtools/bin/samtools view -b -S /dev/stdin + fi +stdout: "refAlign.bam" +arguments: + - valueFrom: $(runtime.cores) + position: 5 + prefix: '-n' +inputs: + bam: + type: File? + inputBinding: + prefix: '-b' + fastq1: + type: File? + inputBinding: + prefix: '-1' + fastq2: + type: File? + inputBinding: + prefix: '-2' + readgroup: + type: string + inputBinding: + prefix: '-g' + reference: + type: string + inputBinding: + position: 4 + prefix: '-r' + doc: 'bwa-indexed reference file' +outputs: + aligned_bam: + type: stdout diff --git a/definitions/types/sequence_data.yml b/definitions/types/sequence_data.yml new file mode 100644 index 000000000..324e059a2 --- /dev/null +++ b/definitions/types/sequence_data.yml @@ -0,0 +1,20 @@ +type: record +name: sequence_data +label: sequence data with readgroup information +fields: + sequence: + type: + - type: record + name: bam + fields: + bam: + type: File + - type: record + name: fastqs + fields: + fastq1: + type: File + fastq2: + type: File + readgroup: + type: string diff --git a/example_data/cle_IDT_somatic_exome_template.yaml b/example_data/cle_IDT_somatic_exome_template.yaml index f44a6f2c4..0b7cb3851 100644 --- a/example_data/cle_IDT_somatic_exome_template.yaml +++ b/example_data/cle_IDT_somatic_exome_template.yaml @@ -54,11 +54,12 @@ varscan_strand_filter: 1 varscan_min_var_freq: 0.08 varscan_p_value: 0.1 varscan_max_normal_freq: 0.1 -normal_bams: -- class: File - path: NORMAL_BAM_PATH -normal_readgroups: -- "NORMAL_RG_STR" +normal_sequence: + - sequence: + bam: + class: File + path: NORMAL_BAM_PATH + readgroup: "NORMAL_RG_STR" omni_vcf: class: File path: /gscmnt/gc2709/info/production_reference_GRCh38DH/accessory_vcf/omni25-ld-pruned-20000-2000-0.5-annotated.wchr.sites_only.b38.autosomes_only.vcf.gz @@ -76,11 +77,12 @@ synonyms_file: target_intervals: class: File path: /gscmnt/gc2709/info/production_reference_GRCh38DH/CLE/IDTExome/xgen-exome-research-panel-targets.interval_list -tumor_bams: -- class: File - path: TUMOR_BAM_PATH -tumor_readgroups: -- "TUMOR_RG_STR" +tumor_sequence: + - sequence: + bam: + class: File + path: TUMOR_BAM_PATH + readgroup: "TUMOR_RG_STR" filter_docm_variants: true filter_minimum_depth: 20 cle_vcf_filter: true diff --git a/example_data/cle_aml_trio_template.yaml b/example_data/cle_aml_trio_template.yaml index 232d0f46b..898655c59 100644 --- a/example_data/cle_aml_trio_template.yaml +++ b/example_data/cle_aml_trio_template.yaml @@ -87,11 +87,12 @@ varscan_strand_filter: 1 varscan_min_var_freq: 0.08 varscan_p_value: 0.1 varscan_max_normal_freq: 0.1 -normal_bams: -- class: File - path: NORMAL_BAM_PATH -normal_readgroups: -- "NORMAL_RG_STR" +normal_sequence: + - sequence: + bam: + class: File + path: NORMAL_BAM_PATH + readgroup: "NORMAL_RG_STR" omni_vcf: class: File path: /gscmnt/gc2709/info/production_reference_GRCh38DH/accessory_vcf/omni25-ld-pruned-20000-2000-0.5-annotated.wchr.sites_only.b38.autosomes_only.vcf.gz @@ -116,16 +117,18 @@ synonyms_file: target_intervals: class: File path: /gscmnt/gc13016/cle/54f8f7b915cb472aa183c721307369ab_scratch_space/new_AML_trio/new_cwl/bed_file/IDT_targets_plus50bp_build38.resorted.merged.interval_list -tumor_bams: -- class: File - path: TUMOR_BAM_PATH -tumor_readgroups: -- "TUMOR_RG_STR" -followup_bams: -- class: File - path: FOLLOWUP_BAM_PATH -followup_readgroups: -- "FOLLOWUP_RG_STR" +tumor_sequence: + - sequence: + bam: + class: File + path: TUMOR_BAM_PATH + readgroup: "TUMOR_RG_STR" +followup_sequence: + - sequence: + bam: + class: File + path: FOLLOWUP_BAM_PATH + readgroup: "FOLLOWUP_RG_STR" filter_docm_variants: false filter_minimum_depth: 20 annotate_coding_only: false diff --git a/example_data/exome_workflow.yaml b/example_data/exome_workflow.yaml index d96ff75e5..ae3ae7175 100644 --- a/example_data/exome_workflow.yaml +++ b/example_data/exome_workflow.yaml @@ -2,11 +2,6 @@ bait_intervals: class: File path: exome_workflow/chr17_test_bait.interval_list -bams: -- class: File - path: exome_workflow/2895499223.bam -- class: File - path: exome_workflow/2895499237.bam bqsr_intervals: - chr17 custom_gnomad_vcf: @@ -40,11 +35,19 @@ per_target_intervals: class: File path: exome_workflow/chr17_test_target.interval_list picard_metric_accumulation_level: ALL_READS -readgroups: -- "@RG\tID:2895499223\tPU:H7HY2CCXX.3.ATCACGGT\tSM:H_NJ-HCC1395-HCC1395\tLB:H_NJ-HCC1395-HCC1395-lg24-lib1\tPL:Illumina\tCN:WUGSC" -- "@RG\tID:2895499237\tPU:H7HY2CCXX.4.ATCACGGT\tSM:H_NJ-HCC1395-HCC1395\tLB:H_NJ-HCC1395-HCC1395-lg24-lib1\tPL:Illumina\tCN:WUGSC" reference: /gscmnt/gc2764/cad/jwalker/toil_test/cancer-genomics-workflow/example_data/exome_workflow/chr17_test.fa sample_name: H_NJ-HCC1395-HCC1395 +sequence: + - sequence: + bam: + class: File + path: exome_workflow/2895499223.bam + readgroup: "@RG\tID:2895499223\tPU:H7HY2CCXX.3.ATCACGGT\tSM:H_NJ-HCC1395-HCC1395\tLB:H_NJ-HCC1395-HCC1395-lg24-lib1\tPL:Illumina\tCN:WUGSC" + - sequence: + bam: + class: File + path: exome_workflow/2895499237.bam + readgroup: "@RG\tID:2895499237\tPU:H7HY2CCXX.4.ATCACGGT\tSM:H_NJ-HCC1395-HCC1395\tLB:H_NJ-HCC1395-HCC1395-lg24-lib1\tPL:Illumina\tCN:WUGSC" synonyms_file: class: File path: exome_workflow/chromAlias.ensembl.txt diff --git a/example_data/germline_exome.yaml b/example_data/germline_exome.yaml index f1ccc9ab8..c789dbff1 100644 --- a/example_data/germline_exome.yaml +++ b/example_data/germline_exome.yaml @@ -2,11 +2,6 @@ bait_intervals: class: File path: exome_workflow/chr17_test_bait.interval_list -bams: -- class: File - path: exome_workflow/2895499223.bam -- class: File - path: exome_workflow/2895499237.bam bqsr_intervals: - chr17 custom_gnomad_vcf: @@ -47,10 +42,18 @@ summary_intervals: class: File path: exome_workflow/chr17_test_genes.interval_list picard_metric_accumulation_level: ALL_READS -readgroups: -- "@RG\tID:2895499223\tPU:H7HY2CCXX.3.ATCACGGT\tSM:H_NJ-HCC1395-HCC1395\tLB:H_NJ-HCC1395-HCC1395-lg24-lib1\tPL:Illumina\tCN:WUGSC" -- "@RG\tID:2895499237\tPU:H7HY2CCXX.4.ATCACGGT\tSM:H_NJ-HCC1395-HCC1395\tLB:H_NJ-HCC1395-HCC1395-lg24-lib1\tPL:Illumina\tCN:WUGSC" reference: "/gscmnt/gc2764/cad/jgarza/shared/exome_workflow/chr17_test.fa" +sequence: + - sequence: + bam: + class: File + path: exome_workflow/2895499223.bam + readgroup: "@RG\tID:2895499223\tPU:H7HY2CCXX.3.ATCACGGT\tSM:H_NJ-HCC1395-HCC1395\tLB:H_NJ-HCC1395-HCC1395-lg24-lib1\tPL:Illumina\tCN:WUGSC" + - sequence: + bam: + class: File + path: exome_workflow/2895499237.bam + readgroup: "@RG\tID:2895499237\tPU:H7HY2CCXX.4.ATCACGGT\tSM:H_NJ-HCC1395-HCC1395\tLB:H_NJ-HCC1395-HCC1395-lg24-lib1\tPL:Illumina\tCN:WUGSC" synonyms_file: class: File path: exome_workflow/chromAlias.ensembl.txt diff --git a/example_data/germline_wgs.yaml b/example_data/germline_wgs.yaml index 4ae5b7e6d..42b2b4433 100644 --- a/example_data/germline_wgs.yaml +++ b/example_data/germline_wgs.yaml @@ -1,9 +1,4 @@ --- -bams: -- class: File - path: exome_workflow/2895499223.bam -- class: File - path: exome_workflow/2895499237.bam bqsr_intervals: ["chr17"] custom_gnomad_vcf: class: File @@ -32,9 +27,17 @@ qc_intervals: class: File path: exome_workflow/chr17_test_bait.interval_list reference: "/gscmnt/gc2764/cad/jgarza/pipeline_data/exome_workflow/chr17_test.fa" -readgroups: -- "@RG\tID:2895499223\tPU:H7HY2CCXX.3.ATCACGGT\tSM:H_NJ-HCC1395-HCC1395\tLB:H_NJ-HCC1395-HCC1395-lg24-lib1\tPL:Illumina\tCN:WUGSC" -- "@RG\tID:2895499237\tPU:H7HY2CCXX.4.ATCACGGT\tSM:H_NJ-HCC1395-HCC1395\tLB:H_NJ-HCC1395-HCC1395-lg24-lib1\tPL:Illumina\tCN:WUGSC" +sequence: + - sequence: + bam: + class: File + path: exome_workflow/2895499223.bam + readgroup: "@RG\tID:2895499223\tPU:H7HY2CCXX.3.ATCACGGT\tSM:H_NJ-HCC1395-HCC1395\tLB:H_NJ-HCC1395-HCC1395-lg24-lib1\tPL:Illumina\tCN:WUGSC" + - sequence: + bam: + class: File + path: exome_workflow/2895499237.bam + readgroup: "@RG\tID:2895499237\tPU:H7HY2CCXX.4.ATCACGGT\tSM:H_NJ-HCC1395-HCC1395\tLB:H_NJ-HCC1395-HCC1395-lg24-lib1\tPL:Illumina\tCN:WUGSC" summary_intervals: [ {label: "genes", file: { class: File, path: 'exome_workflow/chr17_test_genes.interval_list' }} ] synonyms_file: diff --git a/example_data/somatic_exome.yaml b/example_data/somatic_exome.yaml index 49619cfb8..856d12bf2 100644 --- a/example_data/somatic_exome.yaml +++ b/example_data/somatic_exome.yaml @@ -2,22 +2,28 @@ bait_intervals: class: File path: exome_workflow/chr17_test_bait.interval_list -tumor_bams: -- class: File - path: exome_workflow/2895499223.bam -- class: File - path: exome_workflow/2895499237.bam -tumor_readgroups: -- "@RG\tID:2895499223\tPU:H7HY2CCXX.3.ATCACGGT\tSM:H_NJ-HCC1395-HCC1395\tLB:H_NJ-HCC1395-HCC1395-lg24-lib1\tPL:Illumina\tCN:WUGSC" -- "@RG\tID:2895499237\tPU:H7HY2CCXX.4.ATCACGGT\tSM:H_NJ-HCC1395-HCC1395\tLB:H_NJ-HCC1395-HCC1395-lg24-lib1\tPL:Illumina\tCN:WUGSC" -normal_bams: -- class: File - path: exome_workflow/2895499331.bam -- class: File - path: exome_workflow/2895499399.bam -normal_readgroups: -- "@RG\tID:2895499331\tPU:H7HY2CCXX.3.TGACCACG\tSM:H_NJ-HCC1395-HCC1395_BL\tLB:H_NJ-HCC1395-HCC1395_BL-lg21-lib1\tPL:Illumina\tCN:WUGSC" -- "@RG\tID:2895499399\tPU:H7HY2CCXX.4.TGACCACG\tSM:H_NJ-HCC1395-HCC1395_BL\tLB:H_NJ-HCC1395-HCC1395_BL-lg21-lib1\tPL:Illumina\tCN:WUGSC" +tumor_sequence: + - sequence: + bam: + class: File + path: exome_workflow/2895499223.bam + readgroup: "@RG\tID:2895499223\tPU:H7HY2CCXX.3.ATCACGGT\tSM:H_NJ-HCC1395-HCC1395\tLB:H_NJ-HCC1395-HCC1395-lg24-lib1\tPL:Illumina\tCN:WUGSC" + - sequence: + bam: + class: File + path: exome_workflow/2895499237.bam + readgroup: "@RG\tID:2895499237\tPU:H7HY2CCXX.4.ATCACGGT\tSM:H_NJ-HCC1395-HCC1395\tLB:H_NJ-HCC1395-HCC1395-lg24-lib1\tPL:Illumina\tCN:WUGSC" +normal_sequence: + - sequence: + bam: + class: File + path: exome_workflow/2895499331.bam + readgroup: "@RG\tID:2895499331\tPU:H7HY2CCXX.3.TGACCACG\tSM:H_NJ-HCC1395-HCC1395_BL\tLB:H_NJ-HCC1395-HCC1395_BL-lg21-lib1\tPL:Illumina\tCN:WUGSC" + - sequence: + bam: + class: File + path: exome_workflow/2895499399.bam + readgroup: "@RG\tID:2895499399\tPU:H7HY2CCXX.4.TGACCACG\tSM:H_NJ-HCC1395-HCC1395_BL\tLB:H_NJ-HCC1395-HCC1395_BL-lg21-lib1\tPL:Illumina\tCN:WUGSC" custom_gnomad_vcf: class: File path: exome_workflow/chr17_test_gnomADe.vcf.gz