diff --git a/tiny/rna/collapser.py b/tiny/rna/collapser.py index 4e3504ff..2b9422ff 100644 --- a/tiny/rna/collapser.py +++ b/tiny/rna/collapser.py @@ -8,21 +8,18 @@ import argparse import builtins -import gzip import os from collections import Counter -from functools import partial from typing import Tuple, Iterable +from tiny.rna.util import gzip_open as gz_f + try: from _collections import _count_elements # Load Counter's C helper function if it is available except ImportError: from collections import _count_elements # Slower mapping[elem] = mapping.get(elem,default_val)+1 -# The GZIP read/write interface used by seq_counter() and seq2fasta() -gz_f = partial(gzip.GzipFile, compresslevel=6, fileobj=None, mtime=0) - def get_args() -> 'argparse.NameSpace': """Get command line arguments""" diff --git a/tiny/rna/counter/validation.py b/tiny/rna/counter/validation.py index 437ba538..dbc6d5cc 100644 --- a/tiny/rna/counter/validation.py +++ b/tiny/rna/counter/validation.py @@ -8,7 +8,7 @@ from tiny.rna.counter.hts_parsing import parse_gff, ReferenceTables from tiny.rna.counter.features import FeatureSelector -from tiny.rna.util import sorted_natural +from tiny.rna.util import sorted_natural, gzip_open class ReportFormatter: @@ -215,7 +215,12 @@ def chroms_shared_with_genomes(self, genome_fastas): for fasta in genome_fastas: if not os.path.isfile(fasta): continue - with open(fasta, 'rb') as f: + elif fasta.endswith('.gz'): + file_if = gzip_open + else: + file_if = open + + with file_if(fasta, 'rb') as f: for line in f: if line[0] == ord('>'): genome_chroms.add(line[1:].strip().decode()) diff --git a/tiny/rna/util.py b/tiny/rna/util.py index ac94fd0c..c222469e 100644 --- a/tiny/rna/util.py +++ b/tiny/rna/util.py @@ -1,6 +1,7 @@ import argparse import functools import textwrap +import gzip import time import os import re @@ -93,4 +94,8 @@ def sorted_natural(lines, reverse=False): convert = lambda text: int(text) if text.isdigit() else text.lower() alphanum_key = lambda key: [convert(c) for c in re.split(r'(\d+)', key)] - return sorted(lines, key=alphanum_key, reverse=reverse) \ No newline at end of file + return sorted(lines, key=alphanum_key, reverse=reverse) + + +# File IO interface for reading and writing Gzip files +gzip_open = functools.partial(gzip.GzipFile, compresslevel=6, fileobj=None, mtime=0) \ No newline at end of file