Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ process {
withLabel:process_high_memory {
memory = { check_max( 64.GB * task.attempt, 'memory' ) }
}
withLabel:process_med_memory {
memory = { check_max( 16.GB * task.attempt, 'memory' ) }
}
withLabel:error_ignore {
errorStrategy = 'ignore'
}
Expand Down
7 changes: 7 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@
*/

process {
withName: 'LONGEST' {
publishDir = [
path: { "${params.outdir}/longest" },
mode: params.publish_dir_mode
]
}

withName: 'GFFREAD' {
publishDir = [
path: { "${params.outdir}/gffread" },
Expand Down
13 changes: 12 additions & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ include { ORTHOFINDER as ORTHOFINDER_CAFE } from './modules/nf-core/orthofinder/
include { GO_ASSIGN } from './modules/local/go_assign.nf'
include { GO_EXPANSION } from './modules/local/go_expansion.nf'
include { NCBIGENOMEDOWNLOAD } from './modules/nf-core/ncbigenomedownload/main.nf'
include { LONGEST } from './modules/local/longest.nf'
include { GFFREAD } from './modules/local/gffread.nf'
include { CAFE } from './modules/local/cafe.nf'
include { CHROMO_GO } from './modules/local/chromo_go.nf'
Expand Down Expand Up @@ -63,7 +64,17 @@ workflow {
NCBIGENOMEDOWNLOAD ( input_type.ncbi.map { it[0] }, input_type.ncbi.map { it[1] }, [], params.groups)
ch_versions = ch_versions.mix(NCBIGENOMEDOWNLOAD.out.versions.first())

GFFREAD ( NCBIGENOMEDOWNLOAD.out.fna.mix( input_type.local.map { [it[0],file(it[1])] } ), NCBIGENOMEDOWNLOAD.out.gff.mix(input_type.local.map { [it[0],file(it[2])] } ) )
// Combine NCBI downloaded files with local files for LONGEST process
fasta_inputs = NCBIGENOMEDOWNLOAD.out.fna.mix( input_type.local.map { [it[0],file(it[1])] } )
gff_inputs = NCBIGENOMEDOWNLOAD.out.gff.mix(input_type.local.map { [it[0],file(it[2])] } )

// Combine fasta and gff inputs for LONGEST process
fasta_inputs.join(gff_inputs).set { fasta_gff_inputs }

LONGEST ( fasta_gff_inputs )
ch_versions = ch_versions.mix(LONGEST.out.versions.first())

GFFREAD ( LONGEST.out.longest_proteins )
ch_versions = ch_versions.mix(GFFREAD.out.versions.first())

if (params.stats){
Expand Down
24 changes: 6 additions & 18 deletions modules/local/gffread.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,18 @@ process GFFREAD {
container = 'ecoflowucl/gffread_python:python-3.11.9_Linux_x86_64_perl-5.36.0'

input:
tuple val(sample_id), path(fasta)
tuple val(sample_id), path(gff)
tuple val(sample_id), path(fasta), path(gff)

output:
path( "${sample_id}.prot.fa" ), emit: proteins
tuple val(sample_id), path("${sample_id}.prot.fa.largestIsoform.fa" ), emit: proteins_busco
path( "${sample_id}.prot.fa.largestIsoform.fa" ), emit: longest
tuple val(sample_id), path("${sample_id}.prot.fa" ), emit: proteins_busco
path( "${sample_id}.prot.fa" ), emit: longest
path( "${sample_id}.splicedcds.fa" )
path( "${sample_id}.splicedexons.fa" )
path( "${sample_id}.gff_for_jvci.gff3" ), emit: gffs
tuple val(sample_id), path("${sample_id}.gff_for_jvci.gff3"), emit: gffs_agat
path( "${sample_id}_gene_alltran_list.txt" ), emit: gene_to_isoforms
path( "${sample_id}.splicedcds.fa.nucl.longest.fa" )
path( "${sample_id}.splicedcds.fa" )
tuple val( "${sample_id}" ), path( "${fasta}" ), emit: fasta_quast
path "versions.yml", emit: versions

Expand All @@ -36,6 +35,7 @@ process GFFREAD {
fi

#Convert Augustus gff files if found, then do gffread to print out the nucleotide files for each gene.
# Note: The GFF input now comes from LONGEST process (agat longest isoform), so it should already be processed

head -n 1 gff_temp > tbd

Expand All @@ -59,20 +59,8 @@ process GFFREAD {

fi

# Create gene to isoform mapping (still needed for downstream processes)
${projectDir}/bin/gff_to_genetranshash.2.pl
${projectDir}/bin/prot_fasta_to_longest.pl ${sample_id}.prot.fa ${sample_id}_longestisoform.txt
${projectDir}/bin/fasta_topIsoform.pl ${sample_id}.splicedcds.fa ${sample_id}_longestisoform.txt


#This part checks if longest isoform worked, if not we will continue with all proteins into Orthofinder. Warning sent to screen.
#Largest isoforms has content if true
#Largest isoforms does not have content if false. Just use full protein file (could be a genome without isoforms)

if [[ -s ${sample_id}.prot.fa.largestIsoform.fa ]];then
echo all_good
else
cp ${sample_id}.prot.fa ${sample_id}.prot.fa.largestIsoform.fa
fi

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
34 changes: 34 additions & 0 deletions modules/local/longest.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
process LONGEST {

label 'process_medium'
label 'process_med_memory'
tag "$sample_id"
container = 'quay.io/biocontainers/agat:1.4.1--pl5321hdfd78af_0'

input:
tuple val (sample_id), path(fasta), path(gff)

output:
tuple val (sample_id), path( fasta ), path( "${sample_id}.longest.gff3" ), emit: longest_proteins
tuple val (sample_id), path( "${sample_id}.stat.original.txt" ), emit: agat_summary_original
tuple val (sample_id), path( "${sample_id}.stat.long.txt" ), emit: agat_summary_longest
path "versions.yml", emit: versions

script:
"""
# Run agat to find longest orf for each gene
agat_sp_keep_longest_isoform.pl -gff ${gff} -o ${sample_id}.longest.gff3

# Run a few summarisation scripts to report the actual genes being considered.
agat_sp_functional_statistics.pl --gff ${gff} -o ${sample_id}.stat.original.txt
agat_sp_functional_statistics.pl --gff ${sample_id}.longest.gff3 -o ${sample_id}.stat.long.txt

md5sum "${sample_id}.longest.gff3" > "${sample_id}.longest.gff3.md5"

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Perl version: \$(perl --version | grep "version" | sed 's/.*(//g' | sed 's/[)].*//')
END_VERSIONS
"""

}