From 43414d238781a123e47b05ba5eb0426fd3d86c84 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Wed, 9 Nov 2022 11:04:00 -0500
Subject: [PATCH 1/2] GFFValidator can now read gzipped fasta files. The gzip
 r/w interface has been moved from tiny-collapse into tiny.rna.util so that
 both it and GFFValidator can use it

---
 tiny/rna/collapser.py          | 7 ++-----
 tiny/rna/counter/validation.py | 7 +++++--
 tiny/rna/util.py               | 7 ++++++-
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/tiny/rna/collapser.py b/tiny/rna/collapser.py
index 4e3504ff..2b9422ff 100644
--- a/tiny/rna/collapser.py
+++ b/tiny/rna/collapser.py
@@ -8,21 +8,18 @@
 
 import argparse
 import builtins
-import gzip
 import os
 
 from collections import Counter
-from functools import partial
 from typing import Tuple, Iterable
 
+from tiny.rna.util import gzip_open as gz_f
+
 try:
     from _collections import _count_elements  # Load Counter's C helper function if it is available
 except ImportError:
     from collections import _count_elements   # Slower mapping[elem] = mapping.get(elem,default_val)+1
 
-# The GZIP read/write interface used by seq_counter() and seq2fasta()
-gz_f = partial(gzip.GzipFile, compresslevel=6, fileobj=None, mtime=0)
-
 
 def get_args() -> 'argparse.NameSpace':
     """Get command line arguments"""
diff --git a/tiny/rna/counter/validation.py b/tiny/rna/counter/validation.py
index 40485e25..b9d21615 100644
--- a/tiny/rna/counter/validation.py
+++ b/tiny/rna/counter/validation.py
@@ -1,11 +1,12 @@
 import functools
 import subprocess
 import sys
+import os
 
 from collections import Counter, defaultdict
 
-from tiny.rna.util import sorted_natural
 from tiny.rna.counter.hts_parsing import parse_gff, ReferenceTables
+from tiny.rna.util import sorted_natural, gzip_open
 
 
 class ReportFormatter:
@@ -198,7 +199,9 @@ def chroms_shared_with_genomes(self, genome_fastas):
 
         genome_chroms = set()
         for fasta in genome_fastas:
-            with open(fasta, 'rb') as f:
+            _, ext = os.path.splitext(fasta)
+            file_if = gzip_open if ext == '.gz' else open
+            with file_if(fasta, 'rb') as f:
                 for line in f:
                     if line[0] == ord('>'):
                         genome_chroms.add(line[1:].strip().decode())
diff --git a/tiny/rna/util.py b/tiny/rna/util.py
index ac94fd0c..c222469e 100644
--- a/tiny/rna/util.py
+++ b/tiny/rna/util.py
@@ -1,6 +1,7 @@
 import argparse
 import functools
 import textwrap
+import gzip
 import time
 import os
 import re
@@ -93,4 +94,8 @@ def sorted_natural(lines, reverse=False):
 
     convert = lambda text: int(text) if text.isdigit() else text.lower()
     alphanum_key = lambda key: [convert(c) for c in re.split(r'(\d+)', key)]
-    return sorted(lines, key=alphanum_key, reverse=reverse)
\ No newline at end of file
+    return sorted(lines, key=alphanum_key, reverse=reverse)
+
+
+# File IO interface for reading and writing Gzip files
+gzip_open = functools.partial(gzip.GzipFile, compresslevel=6, fileobj=None, mtime=0)
\ No newline at end of file

From 46e0e983beb96a61178ae63a7dd435f0412ae615 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Wed, 9 Nov 2022 15:38:47 -0500
Subject: [PATCH 2/2] Slightly clarified logic in
 GFFValidator.chroms_shared_with_genomes()

---
 tiny/rna/counter/validation.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tiny/rna/counter/validation.py b/tiny/rna/counter/validation.py
index 0cb8eb7a..dbc6d5cc 100644
--- a/tiny/rna/counter/validation.py
+++ b/tiny/rna/counter/validation.py
@@ -213,10 +213,12 @@ def chroms_shared_with_genomes(self, genome_fastas):
 
         genome_chroms = set()
         for fasta in genome_fastas:
-            if not os.path.isfile(fasta): continue
-
-            _, ext = os.path.splitext(fasta)
-            file_if = gzip_open if ext == '.gz' else open
+            if not os.path.isfile(fasta):
+                continue
+            elif fasta.endswith('.gz'):
+                file_if = gzip_open
+            else:
+                file_if = open
 
             with file_if(fasta, 'rb') as f:
                 for line in f: