From ddb767a860d8f35e1a44bd0a80c3b89a2a106a48 Mon Sep 17 00:00:00 2001
From: Chris <17653365+odcambc@users.noreply.github.com>
Date: Mon, 25 Nov 2024 13:10:09 -0600
Subject: [PATCH 01/10] 2to3

---
 enrich2/aligner.py                  | 10 ++--
 enrich2/barcode.py                  |  2 +-
 enrich2/barcodeid.py                |  4 +-
 enrich2/barcodevariant.py           |  4 +-
 enrich2/config_check.py             |  8 +--
 enrich2/dataframe.py                |  4 +-
 enrich2/experiment.py               | 12 ++--
 enrich2/fqread.py                   |  6 +-
 enrich2/gui/configurator.py         | 88 ++++++++++++++---------------
 enrich2/gui/create_root_dialog.py   | 28 ++++-----
 enrich2/gui/create_seqlib_dialog.py | 16 +++---
 enrich2/gui/delete_dialog.py        | 20 +++----
 enrich2/gui/dialog_elements.py      | 54 +++++++++---------
 enrich2/gui/edit_dialog.py          | 32 +++++------
 enrich2/gui/runner_window.py        | 34 +++++------
 enrich2/gui/seqlib_apply_dialog.py  | 18 +++---
 enrich2/main.py                     | 10 ++--
 enrich2/overlap.py                  |  6 +-
 enrich2/plots.py                    |  8 +--
 enrich2/random_effects.py           |  2 +-
 enrich2/selection.py                | 16 +++---
 enrich2/seqlib.py                   | 14 ++---
 enrich2/sfmap.py                    | 10 ++--
 enrich2/storemanager.py             | 12 ++--
 enrich2/variant.py                  |  8 +--
 enrich2/wildtype.py                 |  6 +-
 26 files changed, 216 insertions(+), 216 deletions(-)

diff --git a/enrich2/aligner.py b/enrich2/aligner.py
index 30a1f06..eec84aa 100644
--- a/enrich2/aligner.py
+++ b/enrich2/aligner.py
@@ -43,7 +43,7 @@ class Aligner(object):
     _END = 4  # end of traceback
 
     def __init__(self, similarity=_simple_similarity):
-        similarity_keys = similarity.keys()
+        similarity_keys = list(similarity.keys())
         if "gap" in similarity_keys:
             similarity_keys.remove("gap")
         for key in similarity_keys:
@@ -80,12 +80,12 @@ def align(self, seq1, seq2):
         seq2 = seq2.upper()
 
         # build matrix of scores/traceback information
-        for i in xrange(len(seq1) + 1):
+        for i in range(len(seq1) + 1):
             self.matrix[i, 0] = (self.similarity["gap"] * i, Aligner._DEL)
-        for j in xrange(len(seq2) + 1):
+        for j in range(len(seq2) + 1):
             self.matrix[0, j] = (self.similarity["gap"] * j, Aligner._INS)
-        for i in xrange(1, len(seq1) + 1):
-            for j in xrange(1, len(seq2) + 1):
+        for i in range(1, len(seq1) + 1):
+            for j in range(1, len(seq2) + 1):
                 match = (
                     self.matrix[i - 1, j - 1]["score"]
                     + self.similarity[seq1[i - 1]][seq2[j - 1]],
diff --git a/enrich2/barcode.py b/enrich2/barcode.py
index 6b02434..00052ac 100644
--- a/enrich2/barcode.py
+++ b/enrich2/barcode.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+
 import logging
 import sys
 from .seqlib import SeqLib
diff --git a/enrich2/barcodeid.py b/enrich2/barcodeid.py
index 245d2e7..26c79b4 100644
--- a/enrich2/barcodeid.py
+++ b/enrich2/barcodeid.py
@@ -112,7 +112,7 @@ def calculate(self):
                 "identifiers",
                 {
                     k: v
-                    for k, v in df_dict.iteritems()
+                    for k, v in df_dict.items()
                     if v >= self.identifier_min_count
                 },
                 raw=False,
@@ -120,7 +120,7 @@ def calculate(self):
             del df_dict
 
             # write the active subset of the BarcodeMap to the store
-            barcodes = barcode_identifiers.keys()
+            barcodes = list(barcode_identifiers.keys())
             barcode_identifiers = pd.DataFrame(
                 {"value": [barcode_identifiers[bc] for bc in barcodes]}, index=barcodes
             )
diff --git a/enrich2/barcodevariant.py b/enrich2/barcodevariant.py
index c6245e7..a7e6b41 100644
--- a/enrich2/barcodevariant.py
+++ b/enrich2/barcodevariant.py
@@ -112,13 +112,13 @@ def calculate(self):
             # save counts, filtering based on the min count
             self.save_counts(
                 "variants",
-                {k: v for k, v in df_dict.iteritems() if v >= self.variant_min_count},
+                {k: v for k, v in df_dict.items() if v >= self.variant_min_count},
                 raw=False,
             )
             del df_dict
 
             # write the active subset of the BarcodeMap to the store
-            barcodes = barcode_variants.keys()
+            barcodes = list(barcode_variants.keys())
             barcode_variants = pd.DataFrame(
                 {"value": [barcode_variants[bc] for bc in barcodes]}, index=barcodes
             )
diff --git a/enrich2/config_check.py b/enrich2/config_check.py
index 3d61d28..8ec4876 100644
--- a/enrich2/config_check.py
+++ b/enrich2/config_check.py
@@ -20,7 +20,7 @@ def is_experiment(cfg):
         :py:class:`~enrich2.experiment.Experiment`, else False.
 
     """
-    if "conditions" in cfg.keys():
+    if "conditions" in list(cfg.keys()):
         return True
     else:
         return False
@@ -39,7 +39,7 @@ def is_condition(cfg):
         :py:class:`~enrich2.condition.Condition`, else False.
 
     """
-    if "selections" in cfg.keys():
+    if "selections" in list(cfg.keys()):
         return True
     else:
         return False
@@ -58,7 +58,7 @@ def is_selection(cfg):
         :py:class:`~enrich2.selection.Selection`, else False.
 
     """
-    if "libraries" in cfg.keys():
+    if "libraries" in list(cfg.keys()):
         return True
     else:
         return False
@@ -77,7 +77,7 @@ def is_seqlib(cfg):
         derived object, else False.
 
     """
-    if "fastq" in cfg.keys() or "identifiers" in cfg.keys():
+    if "fastq" in list(cfg.keys()) or "identifiers" in list(cfg.keys()):
         return True
     else:
         return False
diff --git a/enrich2/dataframe.py b/enrich2/dataframe.py
index 22707ae..fea518a 100644
--- a/enrich2/dataframe.py
+++ b/enrich2/dataframe.py
@@ -144,10 +144,10 @@ def fill_position_gaps(positions, gap_size):
 
     # fill in short gaps
     fill = set()
-    for i in xrange(len(positions) - 1):
+    for i in range(len(positions) - 1):
         delta = positions[i + 1] - positions[i]
         if delta > 1 and delta <= gap_size:
-            fill.update(positions[i] + n + 1 for n in xrange(delta))
+            fill.update(positions[i] + n + 1 for n in range(delta))
     fill.update(positions)
 
     return sorted(list(fill))
diff --git a/enrich2/experiment.py b/enrich2/experiment.py
index d9ab19a..dcb7c68 100644
--- a/enrich2/experiment.py
+++ b/enrich2/experiment.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+
 import logging
 import pandas as pd
 import numpy as np
@@ -180,7 +180,7 @@ def combine_barcode_maps(self):
 
         bcm = None
         for sel in self.selection_list():
-            if "/main/barcodemap" in sel.store.keys():
+            if "/main/barcodemap" in list(sel.store.keys()):
                 if bcm is None:
                     bcm = sel.store["/main/barcodemap"]
                 else:
@@ -218,7 +218,7 @@ def calc_counts(self, label):
                 selections_index.extend([sel.name] * len(sel.timepoints))
                 values_index.extend(["c_{}".format(x) for x in sorted(sel.timepoints)])
         columns = pd.MultiIndex.from_tuples(
-            zip(conditions_index, selections_index, values_index),
+            list(zip(conditions_index, selections_index, values_index)),
             names=["condition", "selection", "timepoint"],
         )
 
@@ -282,7 +282,7 @@ def calc_shared_full(self, label):
                 selections_index.extend([sel.name] * len(values_list))
                 values_index.extend(sorted(values_list))
         columns = pd.MultiIndex.from_tuples(
-            zip(conditions_index, selections_index, values_index),
+            list(zip(conditions_index, selections_index, values_index)),
             names=["condition", "selection", "value"],
         )
 
@@ -468,7 +468,7 @@ def calc_pvalues_pairwise(self, label):
                 cnd2_index.extend([cnd2] * len(values_list))
                 values_index.extend(sorted(values_list))
         columns = pd.MultiIndex.from_tuples(
-            zip(cnd1_index, cnd2_index, values_index),
+            list(zip(cnd1_index, cnd2_index, values_index)),
             names=["condition1", "condition2", "value"],
         )
 
@@ -528,7 +528,7 @@ def write_tsv(self):
         """
         if self.tsv_requested:
             self.logger.info("Generating tab-separated output files")
-            for k in self.store.keys():
+            for k in list(self.store.keys()):
                 self.write_table_tsv(k)
         for s in self.selection_list():
             s.write_tsv()
diff --git a/enrich2/fqread.py b/enrich2/fqread.py
index 891337a..7c30b54 100644
--- a/enrich2/fqread.py
+++ b/enrich2/fqread.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+
 from sys import stderr
 import os.path
 import re
@@ -255,7 +255,7 @@ def read_fastq(fname, filter_function=None, buffer_size=BUFFER_SIZE, qbase=33):
             leftover = "\n".join(lines[len(lines) - dangling :])
 
         # index into the list of lines to pull out the FASTQ records
-        for i in xrange(fastq_count):
+        for i in range(fastq_count):
             # (header, sequence, header2, quality)
             fq = FQRead(*lines[i * 4 : (i + 1) * 4], qbase=qbase)
             if filter_function is None:  # no filtering
@@ -289,7 +289,7 @@ def read_fastq_multi(
             read_fastq(f, filter_function=None, buffer_size=BUFFER_SIZE, qbase=qbase)
         )
 
-    for records in itertools.izip_longest(*fq_generators, fillvalue=None):
+    for records in itertools.zip_longest(*fq_generators, fillvalue=None):
         if None in records:  # mismatched file lengths
             if match_lengths:
                 yield None
diff --git a/enrich2/gui/configurator.py b/enrich2/gui/configurator.py
index aee12d5..e562a39 100644
--- a/enrich2/gui/configurator.py
+++ b/enrich2/gui/configurator.py
@@ -1,8 +1,8 @@
-from __future__ import print_function
-import Tkinter as tk
-import ttk
-import tkFileDialog
-import tkMessageBox
+
+import tkinter as tk
+import tkinter.ttk
+import tkinter.filedialog
+import tkinter.messagebox
 import platform
 import json
 from ..config_check import is_seqlib, is_experiment, is_selection, seqlib_type
@@ -93,13 +93,13 @@ def treeview_context_menu(self, click):
 
     def create_main_frame(self):
         # Frame for the Treeview and New/Edit/Delete buttons
-        main = ttk.Frame(self, padding=(3, 3, 12, 12))
+        main = tkinter.ttk.Frame(self, padding=(3, 3, 12, 12))
         main.rowconfigure(0, weight=1)
         main.columnconfigure(0, weight=1)
         main.grid(row=0, column=0, sticky="nsew")
 
         # Frame for the Treeview and its scrollbars
-        tree_frame = ttk.Frame(main, padding=(3, 3, 12, 12))
+        tree_frame = tkinter.ttk.Frame(main, padding=(3, 3, 12, 12))
         tree_frame.rowconfigure(0, weight=1)
         tree_frame.rowconfigure(1, weight=0)
         tree_frame.columnconfigure(0, weight=1)
@@ -107,7 +107,7 @@ def create_main_frame(self):
         tree_frame.grid(row=0, column=0, sticky="nsew")
 
         # Treeview with column headings
-        self.treeview = ttk.Treeview(tree_frame)
+        self.treeview = tkinter.ttk.Treeview(tree_frame)
         self.treeview["columns"] = ("class", "barcodes", "variants")
         self.treeview.column("class", width=120)
         self.treeview.heading("class", text="Type")
@@ -132,35 +132,35 @@ def create_main_frame(self):
         self.treeview.config(yscroll=tree_ysb.set, xscroll=tree_xsb.set)
 
         # Frame for New/Edit/Delete buttons
-        button_frame = ttk.Frame(main, padding=(3, 3, 12, 12))
+        button_frame = tkinter.ttk.Frame(main, padding=(3, 3, 12, 12))
         button_frame.grid(row=1, column=0)
-        new_button = ttk.Button(
+        new_button = tkinter.ttk.Button(
             button_frame, text="New...", command=self.new_button_press
         )
         new_button.grid(row=0, column=0)
-        edit_button = ttk.Button(
+        edit_button = tkinter.ttk.Button(
             button_frame, text="Edit...", command=self.edit_button_press
         )
         edit_button.grid(row=0, column=1)
-        delete_button = ttk.Button(
+        delete_button = tkinter.ttk.Button(
             button_frame, text="Delete", command=self.delete_button_press
         )
         delete_button.grid(row=0, column=2)
 
         # Frame for Analysis Options
-        options_frame = ttk.Frame(main, padding=(3, 3, 12, 12))
+        options_frame = tkinter.ttk.Frame(main, padding=(3, 3, 12, 12))
         options_frame.grid(row=0, column=1, rowspan=2, sticky="nsew")
 
         row = 0
-        heading = ttk.Label(options_frame, text="Analysis Options")
+        heading = tkinter.ttk.Label(options_frame, text="Analysis Options")
         heading.grid(column=0, row=row)
         row += 1
 
-        scoring_heading = ttk.Label(options_frame, text="Scoring Method")
+        scoring_heading = tkinter.ttk.Label(options_frame, text="Scoring Method")
         scoring_heading.grid(column=0, row=row)
         row += 1
         for i, k in enumerate(SCORING_METHODS.keys()):
-            rb = ttk.Radiobutton(
+            rb = tkinter.ttk.Radiobutton(
                 options_frame,
                 text=SCORING_METHODS[k].title(),
                 variable=self.scoring_method,
@@ -171,11 +171,11 @@ def create_main_frame(self):
             if i == 0:
                 rb.invoke()
 
-        logr_heading = ttk.Label(options_frame, text="Normalization Method")
+        logr_heading = tkinter.ttk.Label(options_frame, text="Normalization Method")
         logr_heading.grid(column=0, row=row)
         row += 1
         for i, k in enumerate(LOGR_METHODS.keys()):
-            rb = ttk.Radiobutton(
+            rb = tkinter.ttk.Radiobutton(
                 options_frame,
                 text=LOGR_METHODS[k].title(),
                 variable=self.logr_method,
@@ -186,19 +186,19 @@ def create_main_frame(self):
             if i == 0:
                 rb.invoke()
 
-        other_heading = ttk.Label(options_frame, text="Other Options")
+        other_heading = tkinter.ttk.Label(options_frame, text="Other Options")
         other_heading.grid(column=0, row=row)
         row += 1
 
         # force recalculate
-        force_recalculate = ttk.Checkbutton(
+        force_recalculate = tkinter.ttk.Checkbutton(
             options_frame, text="Force Recalculation", variable=self.force_recalculate
         )
         force_recalculate.grid(column=0, row=row, sticky="w")
         row += 1
 
         # component outliers
-        component_outliers = ttk.Checkbutton(
+        component_outliers = tkinter.ttk.Checkbutton(
             options_frame,
             text="Component Outlier Statistics",
             variable=self.component_outliers,
@@ -207,7 +207,7 @@ def create_main_frame(self):
         row += 1
 
         # make plots
-        plots_requested = ttk.Checkbutton(
+        plots_requested = tkinter.ttk.Checkbutton(
             options_frame, text="Make Plots", variable=self.plots_requested
         )
         plots_requested.grid(column=0, row=row, sticky="w")
@@ -215,21 +215,21 @@ def create_main_frame(self):
         row += 1
 
         # write tsv
-        tsv_requested = ttk.Checkbutton(
+        tsv_requested = tkinter.ttk.Checkbutton(
             options_frame, text="Write TSV Files", variable=self.tsv_requested
         )
         tsv_requested.grid(column=0, row=row, sticky="w")
         tsv_requested.invoke()
         row += 1
 
-        go_button = ttk.Button(
+        go_button = tkinter.ttk.Button(
             options_frame, text="Run Analysis", command=self.go_button_press
         )
         go_button.grid(column=0, row=row, sticky="sew")
 
     def go_button_press(self):
         if self.root_element is None:
-            tkMessageBox.showwarning("", "No experimental design specified.")
+            tkinter.messagebox.showwarning("", "No experimental design specified.")
         else:
             RunnerSavePrompt(self)
             RunnerWindow(self)
@@ -273,7 +273,7 @@ def create_new_element(self):
 
     def new_button_press(self):
         if self.treeview.focus() == "" and self.root_element is not None:
-            tkMessageBox.showwarning(None, "No parent element selected.")
+            tkinter.messagebox.showwarning(None, "No parent element selected.")
         else:
             if self.treeview.focus() == "" and self.root_element is None:
                 element = CreateRootDialog(self).element
@@ -288,7 +288,7 @@ def new_button_press(self):
             self.refresh_treeview()
 
             # select the newly added element if it was successfully added
-            if element.treeview_id in self.element_dict.keys():
+            if element.treeview_id in list(self.element_dict.keys()):
                 self.treeview.focus(element.treeview_id)
                 self.treeview.selection_set(element.treeview_id)
             else:
@@ -299,13 +299,13 @@ def new_button_press(self):
 
     def edit_button_press(self):
         if self.treeview.focus() == "":
-            tkMessageBox.showwarning(None, "No element selected.")
+            tkinter.messagebox.showwarning(None, "No element selected.")
         else:
             EditDialog(self, self, self.get_focused_element())
 
     def delete_button_press(self):
         if self.treeview.focus() == "":
-            tkMessageBox.showwarning(None, "No element selected.")
+            tkinter.messagebox.showwarning(None, "No element selected.")
         else:
             DeleteDialog(self, self)
 
@@ -359,15 +359,15 @@ def create_menubar(self):
         self.bind("<{}a>".format(accel_bind), lambda event: self.menu_selectall())
 
     def menu_open(self):
-        fname = tkFileDialog.askopenfilename()
+        fname = tkinter.filedialog.askopenfilename()
         if len(fname) > 0:  # file was selected
             try:
                 with open(fname, "rU") as handle:
                     cfg = json.load(handle)
             except ValueError:
-                tkMessageBox.showerror(None, "Failed to parse config file.")
+                tkinter.messagebox.showerror(None, "Failed to parse config file.")
             except IOError:
-                tkMessageBox.showerror(None, "Could not read config file.")
+                tkinter.messagebox.showerror(None, "Could not read config file.")
             else:
                 if is_experiment(cfg):
                     obj = Experiment()
@@ -376,13 +376,13 @@ def menu_open(self):
                 elif is_seqlib(cfg):
                     obj = SEQLIB_CLASSES[seqlib_type(cfg)]()
                 else:
-                    tkMessageBox.showerror(None, "Unrecognized config format.")
+                    tkinter.messagebox.showerror(None, "Unrecognized config format.")
                     return
                 obj.output_dir_override = False
                 try:
                     obj.configure(cfg)
                 except Exception as e:
-                    tkMessageBox.showerror(
+                    tkinter.messagebox.showerror(
                         None, "Failed to process config file:\n{}".format(e)
                     )
                 else:
@@ -394,32 +394,32 @@ def menu_save(self):
         if len(self.cfg_file_name.get()) == 0:
             self.menu_saveas()
         elif self.root_element is None:
-            tkMessageBox.showwarning(None, "Cannot save empty configuration.")
+            tkinter.messagebox.showwarning(None, "Cannot save empty configuration.")
         else:
             try:
                 with open(self.cfg_file_name.get(), "w") as handle:
                     write_json(self.root_element.serialize(), handle)
             except IOError:
-                tkMessageBox.showerror(None, "Failed to save config file.")
+                tkinter.messagebox.showerror(None, "Failed to save config file.")
             else:
-                tkMessageBox.showinfo(
+                tkinter.messagebox.showinfo(
                     None, "Save successful:\n{}".format(self.cfg_file_name.get())
                 )
 
     def menu_saveas(self):
         if self.root_element is None:
-            tkMessageBox.showwarning(None, "Cannot save empty configuration.")
+            tkinter.messagebox.showwarning(None, "Cannot save empty configuration.")
         else:
-            fname = tkFileDialog.asksaveasfilename()
+            fname = tkinter.filedialog.asksaveasfilename()
             if len(fname) > 0:  # file was selected
                 try:
                     with open(fname, "w") as handle:
                         write_json(self.root_element.serialize(), handle)
                 except IOError:
-                    tkMessageBox.showerror(None, "Failed to save config file.")
+                    tkinter.messagebox.showerror(None, "Failed to save config file.")
                 else:
                     self.cfg_file_name.set(fname)
-                    tkMessageBox.showinfo(
+                    tkinter.messagebox.showinfo(
                         None, "Save successful:\n{}".format(self.cfg_file_name.get())
                     )
 
@@ -427,7 +427,7 @@ def menu_selectall(self):
         """
         Add all elements in the Treeview to the selection.
         """
-        for k in self.element_dict.keys():
+        for k in list(self.element_dict.keys()):
             self.treeview.selection_add(k)
 
     def delete_element(self, tree_id):
@@ -486,11 +486,11 @@ def set_treeview_properties(self, element):
 
         # add the check marks for barcodes/variants
         if "variants" in element.labels:
-            self.treeview.set(element.treeview_id, "variants", u"\u2713")
+            self.treeview.set(element.treeview_id, "variants", "\u2713")
         else:
             self.treeview.set(element.treeview_id, "variants", "")
         if "barcodes" in element.labels:
-            self.treeview.set(element.treeview_id, "barcodes", u"\u2713")
+            self.treeview.set(element.treeview_id, "barcodes", "\u2713")
         else:
             self.treeview.set(element.treeview_id, "barcodes", "")
 
diff --git a/enrich2/gui/create_root_dialog.py b/enrich2/gui/create_root_dialog.py
index c306d20..dfe61ab 100644
--- a/enrich2/gui/create_root_dialog.py
+++ b/enrich2/gui/create_root_dialog.py
@@ -1,7 +1,7 @@
-from __future__ import print_function
-import Tkinter as tk
-import ttk
-import tkSimpleDialog
+
+import tkinter as tk
+import tkinter.ttk
+import tkinter.simpledialog
 from .dialog_elements import FileEntry, StringEntry, DEFAULT_COLUMNS
 from .create_seqlib_dialog import SEQLIB_LABEL_TEXT
 from ..barcode import BarcodeSeqLib
@@ -27,7 +27,7 @@
 }
 
 
-class CreateRootDialog(tkSimpleDialog.Dialog):
+class CreateRootDialog(tkinter.simpledialog.Dialog):
     """
     Dialog box for creating a new root element.
     """
@@ -44,23 +44,23 @@ def __init__(self, parent_window, title="Create Root Object"):
         )
         self.name_tk = StringEntry("Name", self.cfg_dict, "name", optional=False)
         self.element = None
-        tkSimpleDialog.Dialog.__init__(self, parent_window, title)
+        tkinter.simpledialog.Dialog.__init__(self, parent_window, title)
 
     def body(self, master):
         row_no = self.name_tk.body(master, 0)
         row_no += self.output_directory_tk.body(master, row_no)
 
-        element_types = ttk.Frame(master, padding=(3, 3, 12, 12))
+        element_types = tkinter.ttk.Frame(master, padding=(3, 3, 12, 12))
         element_types.grid(
             column=0, row=row_no, sticky="nsew", columnspan=DEFAULT_COLUMNS
         )
 
-        message = ttk.Label(element_types, text="Root object type:")
+        message = tkinter.ttk.Label(element_types, text="Root object type:")
         message.grid(column=0, row=0)
 
-        label = ttk.Label(element_types, text="Experiment")
+        label = tkinter.ttk.Label(element_types, text="Experiment")
         label.grid(column=0, row=1, sticky="w")
-        rb = ttk.Radiobutton(
+        rb = tkinter.ttk.Radiobutton(
             element_types,
             text="Experiment",
             variable=self.element_tkstring,
@@ -69,9 +69,9 @@ def body(self, master):
         rb.grid(column=0, row=2, sticky="w")
         rb.invoke()
 
-        label = ttk.Label(element_types, text="Selection")
+        label = tkinter.ttk.Label(element_types, text="Selection")
         label.grid(column=0, row=3, sticky="w")
-        rb = ttk.Radiobutton(
+        rb = tkinter.ttk.Radiobutton(
             element_types,
             text="Selection",
             variable=self.element_tkstring,
@@ -79,10 +79,10 @@ def body(self, master):
         )
         rb.grid(column=0, row=4, sticky="w")
 
-        label = ttk.Label(element_types, text="SeqLib")
+        label = tkinter.ttk.Label(element_types, text="SeqLib")
         label.grid(column=0, row=5, sticky="w")
         for i, k in enumerate(SEQLIB_LABEL_TEXT.keys()):
-            rb = ttk.Radiobutton(
+            rb = tkinter.ttk.Radiobutton(
                 element_types,
                 text=SEQLIB_LABEL_TEXT[k],
                 variable=self.element_tkstring,
diff --git a/enrich2/gui/create_seqlib_dialog.py b/enrich2/gui/create_seqlib_dialog.py
index f741746..4983399 100644
--- a/enrich2/gui/create_seqlib_dialog.py
+++ b/enrich2/gui/create_seqlib_dialog.py
@@ -1,7 +1,7 @@
-from __future__ import print_function
-import Tkinter as tk
-import ttk
-import tkSimpleDialog
+
+import tkinter as tk
+import tkinter.ttk
+import tkinter.simpledialog
 from collections import OrderedDict
 from ..barcode import BarcodeSeqLib
 from ..barcodevariant import BcvSeqLib
@@ -33,7 +33,7 @@
 }
 
 
-class CreateSeqLibDialog(tkSimpleDialog.Dialog):
+class CreateSeqLibDialog(tkinter.simpledialog.Dialog):
     """
     Dialog box for creating a new SeqLib.
     """
@@ -41,14 +41,14 @@ class CreateSeqLibDialog(tkSimpleDialog.Dialog):
     def __init__(self, parent_window, title="New SeqLib"):
         self.element_tkstring = tk.StringVar()
         self.element_type = None
-        tkSimpleDialog.Dialog.__init__(self, parent_window, title)
+        tkinter.simpledialog.Dialog.__init__(self, parent_window, title)
 
     def body(self, master):
-        message = ttk.Label(master, text="SeqLib type:")
+        message = tkinter.ttk.Label(master, text="SeqLib type:")
         message.grid(column=0, row=0)
 
         for i, k in enumerate(SEQLIB_LABEL_TEXT.keys()):
-            rb = ttk.Radiobutton(
+            rb = tkinter.ttk.Radiobutton(
                 master,
                 text=SEQLIB_LABEL_TEXT[k],
                 variable=self.element_tkstring,
diff --git a/enrich2/gui/delete_dialog.py b/enrich2/gui/delete_dialog.py
index 8e93164..c63bf91 100644
--- a/enrich2/gui/delete_dialog.py
+++ b/enrich2/gui/delete_dialog.py
@@ -1,6 +1,6 @@
-import Tkinter as tk
-import ttk
-import tkSimpleDialog
+import tkinter as tk
+import tkinter.ttk
+import tkinter.simpledialog
 
 
 def subtree_ids(treeview, x, level=0):
@@ -16,7 +16,7 @@ def subtree_ids(treeview, x, level=0):
     return id_list
 
 
-class DeleteDialog(tkSimpleDialog.Dialog):
+class DeleteDialog(tkinter.simpledialog.Dialog):
     """
     Confirmation dialog box for deleting the selected items from the Treeview.
     """
@@ -27,7 +27,7 @@ def __init__(self, parent_window, tree, title="Confirm Deletion"):
         for x in self.tree.treeview.selection():
             if x not in [y[0] for y in self.id_tuples]:
                 self.id_tuples.extend(subtree_ids(self.tree.treeview, x))
-        tkSimpleDialog.Dialog.__init__(self, parent_window, title)
+        tkinter.simpledialog.Dialog.__init__(self, parent_window, title)
 
     def body(self, master):
         """
@@ -45,13 +45,13 @@ def body(self, master):
             message_string = "Delete the following items?\n"
             for x, level in self.id_tuples:
                 if level == 0:
-                    bullet = "    " + u"\u25C6"
+                    bullet = "    " + "\u25C6"
                 else:
-                    bullet = "    " * (level + 1) + u"\u25C7"
-                message_string += u"{bullet} {name}\n".format(
+                    bullet = "    " * (level + 1) + "\u25C7"
+                message_string += "{bullet} {name}\n".format(
                     bullet=bullet, name=self.tree.get_element(x).name
                 )
-        message = ttk.Label(master, text=message_string, justify="left")
+        message = tkinter.ttk.Label(master, text=message_string, justify="left")
         message.grid(row=0, sticky="w")
 
     def buttonbox(self):
@@ -70,7 +70,7 @@ def buttonbox(self):
 
             box.pack()
         else:
-            tkSimpleDialog.Dialog.buttonbox(self)
+            tkinter.simpledialog.Dialog.buttonbox(self)
 
     def apply(self):
         """
diff --git a/enrich2/gui/dialog_elements.py b/enrich2/gui/dialog_elements.py
index 2b9495e..4d73534 100644
--- a/enrich2/gui/dialog_elements.py
+++ b/enrich2/gui/dialog_elements.py
@@ -1,8 +1,8 @@
-from __future__ import print_function
-import Tkinter as tk
-import ttk
-import tkMessageBox
-import tkFileDialog
+
+import tkinter as tk
+import tkinter.ttk
+import tkinter.messagebox
+import tkinter.filedialog
 import os.path
 
 DEFAULT_COLUMNS = 3
@@ -13,7 +13,7 @@ def __init__(self, text):
         self.text = text
 
     def body(self, master, row, columns=DEFAULT_COLUMNS, **kwargs):
-        label = ttk.Label(master, text=self.text)
+        label = tkinter.ttk.Label(master, text=self.text)
         label.grid(row=row, column=0, columnspan=columns, sticky="w")
         return 1
 
@@ -53,7 +53,7 @@ def body(self, master, row, columns=DEFAULT_COLUMNS, **kwargs):
 
         Returns the number of rows taken by this element.
         """
-        self.checkbox = ttk.Checkbutton(master, text=self.text, variable=self.value)
+        self.checkbox = tkinter.ttk.Checkbutton(master, text=self.text, variable=self.value)
         self.checkbox.grid(row=row, column=0, columnspan=columns, sticky="w")
         return 1
 
@@ -105,9 +105,9 @@ def body(self, master, row, columns=DEFAULT_COLUMNS, **kwargs):
 
         Returns the number of rows taken by this element.
         """
-        label = ttk.Label(master, text=self.text)
+        label = tkinter.ttk.Label(master, text=self.text)
         label.grid(row=row, column=0, columnspan=1, sticky="e")
-        self.entry = ttk.Entry(master, textvariable=self.value)
+        self.entry = tkinter.ttk.Entry(master, textvariable=self.value)
         self.entry.grid(row=row, column=1, columnspan=columns - 1, sticky="ew")
         return 1
 
@@ -119,7 +119,7 @@ def validate(self):
         if not self.enabled:
             return True
         elif not self.optional and len(self.value.get()) == 0:
-            tkMessageBox.showwarning("", "{} not specified.".format(self.text))
+            tkinter.messagebox.showwarning("", "{} not specified.".format(self.text))
             return False
         else:
             return True
@@ -168,25 +168,25 @@ def body(self, master, row, columns=DEFAULT_COLUMNS, **kwargs):
 
         Returns the number of rows taken by this element.
         """
-        label = ttk.Label(master, text=self.text)
+        label = tkinter.ttk.Label(master, text=self.text)
         label.grid(row=row, column=0, columnspan=1, sticky="e")
-        self.entry = ttk.Entry(master, textvariable=self.value)
+        self.entry = tkinter.ttk.Entry(master, textvariable=self.value)
         self.entry.grid(row=row, column=1, columnspan=columns - 1, sticky="ew")
         if self.directory:
-            self.choose = ttk.Button(
+            self.choose = tkinter.ttk.Button(
                 master,
                 text="Choose...",
-                command=lambda: self.value.set(tkFileDialog.askdirectory()),
+                command=lambda: self.value.set(tkinter.filedialog.askdirectory()),
             )
         else:
-            self.choose = ttk.Button(
+            self.choose = tkinter.ttk.Button(
                 master,
                 text="Choose...",
-                command=lambda: self.value.set(tkFileDialog.askopenfilename()),
+                command=lambda: self.value.set(tkinter.filedialog.askopenfilename()),
             )
         self.choose.grid(row=row + 1, column=1, sticky="w")
         if self.optional:
-            self.clear = ttk.Button(
+            self.clear = tkinter.ttk.Button(
                 master, text="Clear", command=lambda: self.value.set("")
             )
             self.clear.grid(row=row + 1, column=2, sticky="e")
@@ -197,7 +197,7 @@ def validate(self):
             return True
         elif len(self.value.get()) == 0:
             if not self.optional:
-                tkMessageBox.showwarning("", "{} not specified.".format(self.text))
+                tkinter.messagebox.showwarning("", "{} not specified.".format(self.text))
                 return False
             else:
                 return True
@@ -209,14 +209,14 @@ def validate(self):
                     ):
                         return True
                     else:
-                        tkMessageBox.showwarning(
+                        tkinter.messagebox.showwarning(
                             "", "Invalid file extension " "for {}.".format(self.text)
                         )
                         return False
                 else:  # no extension restriction
                     return True
             else:
-                tkMessageBox.showwarning(
+                tkinter.messagebox.showwarning(
                     "", "{} file does not exist." "".format(self.text)
                 )
                 return False
@@ -252,9 +252,9 @@ def body(self, master, row, columns=DEFAULT_COLUMNS, **kwargs):
 
         Returns the number of rows taken by this element.
         """
-        label = ttk.Label(master, text=self.text)
+        label = tkinter.ttk.Label(master, text=self.text)
         label.grid(row=row, column=0, columnspan=1, sticky="e")
-        self.entry = ttk.Entry(master, textvariable=self.value)
+        self.entry = tkinter.ttk.Entry(master, textvariable=self.value)
         self.entry.grid(row=row, column=1, columnspan=columns - 1, sticky="ew")
         return 1
 
@@ -295,11 +295,11 @@ def body(self, master, row, columns=DEFAULT_COLUMNS, width=4, left=False, **kwar
             label_sticky = "e"
             label_width = 1
 
-        label = ttk.Label(master, text=self.text)
+        label = tkinter.ttk.Label(master, text=self.text)
         label.grid(
             row=row, column=label_column, columnspan=label_width, sticky=label_sticky
         )
-        self.entry = ttk.Entry(master, textvariable=self.value, width=width)
+        self.entry = tkinter.ttk.Entry(master, textvariable=self.value, width=width)
         self.entry.grid(
             row=row, column=entry_column, columnspan=entry_width, sticky=entry_sticky
         )
@@ -320,20 +320,20 @@ def validate(self):
             except ValueError:
                 if len(self.value.get()) == 0:
                     if not self.optional:
-                        tkMessageBox.showwarning(
+                        tkinter.messagebox.showwarning(
                             "", "{} not specified." "".format(self.text)
                         )
                         return False
                     else:
                         return True
                 else:
-                    tkMessageBox.showwarning(
+                    tkinter.messagebox.showwarning(
                         "", "{} is not an integer." "".format(self.text)
                     )
                     return False
             else:
                 if intvalue < self.minvalue:
-                    tkMessageBox.showwarning(
+                    tkinter.messagebox.showwarning(
                         "",
                         "{} lower than minimum value "
                         "({}).".format(self.text, self.minvalue),
diff --git a/enrich2/gui/edit_dialog.py b/enrich2/gui/edit_dialog.py
index 891c167..e974bc5 100644
--- a/enrich2/gui/edit_dialog.py
+++ b/enrich2/gui/edit_dialog.py
@@ -1,8 +1,8 @@
-from __future__ import print_function
-import Tkinter as tk
-import ttk
-import tkSimpleDialog
-import tkMessageBox
+
+import tkinter as tk
+import tkinter.ttk
+import tkinter.simpledialog
+import tkinter.messagebox
 from sys import maxsize
 from collections import OrderedDict
 from .dialog_elements import (
@@ -30,7 +30,7 @@ def clear_nones_filter(v):
     Returns False if v is None, else True.
     """
     if isinstance(v, dict):
-        if len(v.keys()) == 0:
+        if len(list(v.keys())) == 0:
             # removing empty dictionaries breaks SeqLib recognition
             # return False
             return True
@@ -50,7 +50,7 @@ def clear_nones(d):
         return d
     else:
         return dict(
-            (k, clear_nones(v)) for k, v in d.iteritems() if clear_nones_filter(v)
+            (k, clear_nones(v)) for k, v in d.items() if clear_nones_filter(v)
         )
 
 
@@ -102,7 +102,7 @@ def __init__(self, frame_dict):
         self.rb_coutns = None
 
     def body(self, master, row, columns=DEFAULT_COLUMNS, **kwargs):
-        self.rb_fastq = ttk.Radiobutton(
+        self.rb_fastq = tkinter.ttk.Radiobutton(
             master,
             text="FASTQ File Mode",
             variable=self.mode,
@@ -110,7 +110,7 @@ def body(self, master, row, columns=DEFAULT_COLUMNS, **kwargs):
             command=self.fastq_mode,
         )
         self.rb_fastq.grid(row=row, column=0, columnspan=columns, sticky="ew")
-        self.rb_counts = ttk.Radiobutton(
+        self.rb_counts = tkinter.ttk.Radiobutton(
             master,
             text="Count File Mode",
             variable=self.mode,
@@ -165,7 +165,7 @@ def disable(self):
         pass
 
 
-class EditDialog(tkSimpleDialog.Dialog):
+class EditDialog(tkinter.simpledialog.Dialog):
     """
     Dialog box for editing elements. Also used to set properties on newly-created elements.
 
@@ -433,19 +433,19 @@ def __init__(self, parent_window, tree, element, title="Configure Object"):
                     Checkbox("Use Aligner", self.element_cfg["variants"], "use aligner")
                 )
 
-        tkSimpleDialog.Dialog.__init__(self, parent_window, title)
+        tkinter.simpledialog.Dialog.__init__(self, parent_window, title)
 
     def body(self, master):
         """
         Add the UI elements to the edit window. Ordering and placement of UI 
         elements in columns is defined by the ``element_layouts`` dictionary.
         """
-        main = ttk.Frame(master, padding=(3, 3, 12, 12))
+        main = tkinter.ttk.Frame(master, padding=(3, 3, 12, 12))
         main.grid(row=0, column=0, sticky="nsew")
 
         layout = element_layouts[type(self.element).__name__]
         for i, column_tuple in enumerate(layout):
-            new_frame = ttk.Frame(master, padding=(3, 3, 12, 12))
+            new_frame = tkinter.ttk.Frame(master, padding=(3, 3, 12, 12))
             new_frame.grid(row=0, column=i, sticky="nsew")
             row_no = 0
             for row_frame_key in layout[i]:
@@ -463,14 +463,14 @@ def validate(self):
 
         Also checks that child name is unique.
         """
-        for tk_list in self.frame_dict.values():
+        for tk_list in list(self.frame_dict.values()):
             if not all(x.validate() for x in tk_list):
                 return False
 
         if self.element.parent is not None:
             if self.element not in self.element.parent.children:
                 if self.name_entry.value.get() in self.element.parent.child_names():
-                    tkMessageBox.showwarning("", "Sibling names must be unique.")
+                    tkinter.messagebox.showwarning("", "Sibling names must be unique.")
                     return False
 
         return True
@@ -480,7 +480,7 @@ def apply(self):
         Called when the user chooses "OK" and the box closes.
         """
         # apply all changes to the config object
-        for tk_list in self.frame_dict.values():
+        for tk_list in list(self.frame_dict.values()):
             for tk_element in tk_list:
                 tk_element.apply()
 
diff --git a/enrich2/gui/runner_window.py b/enrich2/gui/runner_window.py
index 9986cdf..c6fa54d 100644
--- a/enrich2/gui/runner_window.py
+++ b/enrich2/gui/runner_window.py
@@ -1,14 +1,14 @@
-from __future__ import print_function
-import Tkinter as tk
-import ttk
-import tkSimpleDialog
-import tkMessageBox
+
+import tkinter as tk
+import tkinter.ttk
+import tkinter.simpledialog
+import tkinter.messagebox
 import logging
 
 logger = logging.getLogger(__name__)
 
 
-class RunnerSavePrompt(tkSimpleDialog.Dialog):
+class RunnerSavePrompt(tkinter.simpledialog.Dialog):
     """
     Dialog box for prompting the user to save before running.
     """
@@ -19,20 +19,20 @@ def __init__(self, parent_window, title="Enrich2"):
         self.dialog_text = tk.StringVar()
         self.dialog_text.set("Would you like to save your config changes?")
 
-        tkSimpleDialog.Dialog.__init__(self, parent_window, title)
+        tkinter.simpledialog.Dialog.__init__(self, parent_window, title)
 
     def body(self, master):
-        frame = ttk.Frame(master, padding=(12, 6, 12, 6))
+        frame = tkinter.ttk.Frame(master, padding=(12, 6, 12, 6))
         frame.pack()
 
-        dialog_text_label = ttk.Label(frame, textvariable=self.dialog_text)
+        dialog_text_label = tkinter.ttk.Label(frame, textvariable=self.dialog_text)
         dialog_text_label.grid(column=0, row=0, sticky="nsew")
 
     def apply(self):
         self.pw.menu_save()
 
 
-class RunnerWindow(tkSimpleDialog.Dialog):
+class RunnerWindow(tkinter.simpledialog.Dialog):
     """
     Dialog box for blocking input while running the analysis.
     """
@@ -44,13 +44,13 @@ def __init__(self, parent_window, title="Enrich2"):
         self.dialog_text = tk.StringVar()
         self.dialog_text.set("Ready to start analysis...")
 
-        tkSimpleDialog.Dialog.__init__(self, parent_window, title)
+        tkinter.simpledialog.Dialog.__init__(self, parent_window, title)
 
     def body(self, master):
-        frame = ttk.Frame(master, padding=(12, 6, 12, 6))
+        frame = tkinter.ttk.Frame(master, padding=(12, 6, 12, 6))
         frame.pack()
 
-        dialog_text_label = ttk.Label(frame, textvariable=self.dialog_text)
+        dialog_text_label = tkinter.ttk.Label(frame, textvariable=self.dialog_text)
         dialog_text_label.grid(column=0, row=0, sticky="nsew")
 
         self.run_button = tk.Button(
@@ -91,7 +91,7 @@ def runner(self):
         except Exception as e:
             # display error
             logger.error(e)
-            tkMessageBox.showerror(
+            tkinter.messagebox.showerror(
                 "Enrich2 Error", "Enrich2 encountered an error:\n{}".format(e)
             )
 
@@ -102,7 +102,7 @@ def runner(self):
                 try:
                     self.pw.root_element.make_plots()
                 except Exception as e:
-                    tkMessageBox.showwarning(
+                    tkinter.messagebox.showwarning(
                         None,
                         "Calculations completed, but plotting failed:\n{}".format(e),
                     )
@@ -110,13 +110,13 @@ def runner(self):
                 try:
                     self.pw.root_element.write_tsv()
                 except Exception as e:
-                    tkMessageBox.showwarning(
+                    tkinter.messagebox.showwarning(
                         None,
                         "Calculations completed, but tsv output failed:\n{}".format(e),
                     )
 
             # show the dialog box
-            tkMessageBox.showinfo("", "Analysis completed.")
+            tkinter.messagebox.showinfo("", "Analysis completed.")
 
         finally:
             # close the HDF5 files
diff --git a/enrich2/gui/seqlib_apply_dialog.py b/enrich2/gui/seqlib_apply_dialog.py
index dfb7d5e..45bb030 100644
--- a/enrich2/gui/seqlib_apply_dialog.py
+++ b/enrich2/gui/seqlib_apply_dialog.py
@@ -1,9 +1,9 @@
-import Tkinter as tk
-import ttk
-import tkSimpleDialog
+import tkinter as tk
+import tkinter.ttk
+import tkinter.simpledialog
 
 
-class SeqLibApplyDialog(tkSimpleDialog.Dialog):
+class SeqLibApplyDialog(tkinter.simpledialog.Dialog):
     """
     Confirmation dialog box for applying FASTQ filtering options to selected SeqLibs from the Treeview.
     """
@@ -20,7 +20,7 @@ def __init__(
             and type(self.tree.get_element(self.source_id))
             == type(self.tree.get_element(x))
         ]
-        tkSimpleDialog.Dialog.__init__(self, parent_window, title)
+        tkinter.simpledialog.Dialog.__init__(self, parent_window, title)
 
     def body(self, master):
         """
@@ -36,15 +36,15 @@ def body(self, master):
                 self.tree.get_element(self.target_ids[0]).name,
             )
         else:
-            bullet = "    " + u"\u25C6"
+            bullet = "    " + "\u25C6"
             message_string = 'Apply FASTQ filtering options from "{}"" to the following?\n'.format(
                 self.tree.get_element(self.source_id).name
             )
             for x in self.target_ids:
-                message_string += u"{bullet} {name}\n".format(
+                message_string += "{bullet} {name}\n".format(
                     bullet=bullet, name=self.tree.get_element(x).name
                 )
-        message = ttk.Label(master, text=message_string, justify="left")
+        message = tkinter.ttk.Label(master, text=message_string, justify="left")
         message.grid(row=0, sticky="w")
 
     def buttonbox(self):
@@ -63,7 +63,7 @@ def buttonbox(self):
 
             box.pack()
         else:
-            tkSimpleDialog.Dialog.buttonbox(self)
+            tkinter.simpledialog.Dialog.buttonbox(self)
 
     def apply(self):
         """
diff --git a/enrich2/main.py b/enrich2/main.py
index b3359a8..566d686 100755
--- a/enrich2/main.py
+++ b/enrich2/main.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 #
-from __future__ import print_function
+
 from argparse import ArgumentParser, RawDescriptionHelpFormatter
 import logging
 import json
@@ -75,9 +75,9 @@ def main_cmd():
     desc_string = (
         "Command-line driver for Enrich2 v{}".format(__version__)
         + "\n\nscoring methods:\n"
-        + "\n".join(["  {:22}{}".format(k, v) for k, v in SCORING_METHODS.items()])
+        + "\n".join(["  {:22}{}".format(k, v) for k, v in list(SCORING_METHODS.items())])
         + "\n\nlog ratio methods:\n"
-        + "\n".join(["  {:22}{}".format(k, v) for k, v in LOGR_METHODS.items()])
+        + "\n".join(["  {:22}{}".format(k, v) for k, v in list(LOGR_METHODS.items())])
     )
 
     # create parser and add description
@@ -90,10 +90,10 @@ def main_cmd():
     # add command line arguments
     parser.add_argument("config", help="JSON configuration file")
     parser.add_argument(
-        "scoring_method", help="scoring method", choices=SCORING_METHODS.keys()
+        "scoring_method", help="scoring method", choices=list(SCORING_METHODS.keys())
     )
     parser.add_argument(
-        "logr_method", help="log ratio method", choices=LOGR_METHODS.keys()
+        "logr_method", help="log ratio method", choices=list(LOGR_METHODS.keys())
     )
 
     # add support for semantic version checking
diff --git a/enrich2/overlap.py b/enrich2/overlap.py
index 2c6737b..aec274f 100644
--- a/enrich2/overlap.py
+++ b/enrich2/overlap.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+
 import pandas as pd
 import logging
 from matplotlib.backends.backend_pdf import PdfPages
@@ -198,7 +198,7 @@ def merge_reads(self, fwd, rev):
 
         mismatches = 0
         first = True
-        for i in xrange(self.overlap_length):
+        for i in range(self.overlap_length):
             a = self.fwd_start - 1 + i
             b = len(rev) - self.rev_start - self.overlap_length + i + 1
             try:
@@ -247,7 +247,7 @@ def counts_from_reads(self):
             data=0,
             index=[
                 x + self.fwd_start + self.wt.dna_offset
-                for x in xrange(0, self.overlap_length)
+                for x in range(0, self.overlap_length)
             ],
             columns=["resolved", "unresolved", "first"],
         )
diff --git a/enrich2/plots.py b/enrich2/plots.py
index b459fd8..7b369dd 100644
--- a/enrich2/plots.py
+++ b/enrich2/plots.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+
 import collections
 import logging
 import numpy as np
@@ -292,7 +292,7 @@ def barcodemap_plot(
         )
         return
 
-    if len(data.keys()) <= 1:
+    if len(list(data.keys())) <= 1:
         logger.warning("Not enough elements to make barcodemap plot")
         return
 
@@ -302,7 +302,7 @@ def barcodemap_plot(
     configure_axes(ax)
 
     # plot the histogram
-    ax.hist(data.values(), bins=bins, log=log, color=color)
+    ax.hist(list(data.values()), bins=bins, log=log, color=color)
 
     # set the title and axes labels
     ax.set_title("Barcodes per Variant\n{}".format(obj.name))
@@ -653,7 +653,7 @@ def density_ax(ax, ys, xmin, xmax, xlabel, line_params, legend_loc="best"):
 
     xs = np.linspace(xmin, xmax, 1000)
 
-    for i in xrange(len(ys)):
+    for i in range(len(ys)):
         ax.plot(xs, d_ys[i].evaluate(xs), label=ys[i].name, **line_params[i])
 
     ax.legend(loc=legend_loc, **legend_params)
diff --git a/enrich2/random_effects.py b/enrich2/random_effects.py
index 273ae30..563defe 100644
--- a/enrich2/random_effects.py
+++ b/enrich2/random_effects.py
@@ -20,7 +20,7 @@ def rml_estimator(y, sigma2i, iterations=50):
     sigma2ML = np.sum((y - np.mean(y, axis=0)) ** 2 / (len(beta0) - 1), axis=0)
     eps = np.zeros(beta0.shape)
     betaML = None
-    for _ in xrange(iterations):
+    for _ in range(iterations):
         w = 1 / (sigma2i + sigma2ML)
         sw = np.sum(w, axis=0)
         sw2 = np.sum(w ** 2, axis=0)
diff --git a/enrich2/selection.py b/enrich2/selection.py
index da6b4f0..b0dbf6e 100644
--- a/enrich2/selection.py
+++ b/enrich2/selection.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+
 from .barcode import BarcodeSeqLib
 from .barcodevariant import BcvSeqLib
 from .barcodeid import BcidSeqLib
@@ -166,7 +166,7 @@ def configure(self, cfg, configure_children=True):
                     lib = SEQLIB_CLASSES[libtype]()
                     # don't re-parse the barcode maps if possible
                     mapfile = lib_cfg["barcodes"]["map file"]
-                    if mapfile in self.barcode_maps.keys():
+                    if mapfile in list(self.barcode_maps.keys()):
                         lib.configure(lib_cfg, barcode_map=self.barcode_maps[mapfile])
                     else:
                         lib.configure(lib_cfg)
@@ -247,7 +247,7 @@ def is_barcodevariant(self):
         """
         return (
             all(isinstance(lib, BcvSeqLib) for lib in self.children)
-            and len(self.barcode_maps.keys()) == 1
+            and len(list(self.barcode_maps.keys())) == 1
         )
 
     def is_barcodeid(self):
@@ -259,7 +259,7 @@ def is_barcodeid(self):
         """
         return (
             all(isinstance(lib, BcidSeqLib) for lib in self.children)
-            and len(self.barcode_maps.keys()) == 1
+            and len(list(self.barcode_maps.keys())) == 1
         )
 
     def is_coding(self):
@@ -298,7 +298,7 @@ def merge_counts_unfiltered(self, label):
         self.logger.info("Aggregating SeqLib data")
 
         destination = "/main/{}/counts_unfiltered".format(label)
-        if destination in self.store.keys():
+        if destination in list(self.store.keys()):
             # need to remove the current destination table because we are using append
             # append is required because it takes the "min_itemsize" argument, and put doesn't
             self.logger.info("Replacing existing '{}'".format(destination))
@@ -325,7 +325,7 @@ def merge_counts_unfiltered(self, label):
 
         # perform operation in chunks
         tp_frame = None
-        for i in xrange(0, len(complete_index), self.chunksize):
+        for i in range(0, len(complete_index), self.chunksize):
             # don't duplicate the index if the chunksize is large
             if self.chunksize < len(complete_index):
                 index_chunk = complete_index[i : i + self.chunksize]
@@ -724,7 +724,7 @@ def calc_regression(self, label):
         """
         if self.check_store("/main/{}/scores".format(label)):
             return
-        elif "/main/{}/scores".format(label) in self.store.keys():
+        elif "/main/{}/scores".format(label) in list(self.store.keys()):
             # need to remove the current keys because we are using append
             self.store.remove("/main/{}/scores".format(label))
 
@@ -1107,7 +1107,7 @@ def write_tsv(self):
         """
         if self.tsv_requested:
             self.logger.info("Generating tab-separated output files")
-            for k in self.store.keys():
+            for k in list(self.store.keys()):
                 self.write_table_tsv(k)
         for lib in self.children:
             lib.write_tsv()
diff --git a/enrich2/seqlib.py b/enrich2/seqlib.py
index 07765bc..65b241c 100644
--- a/enrich2/seqlib.py
+++ b/enrich2/seqlib.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+
 import logging
 import os.path
 import pandas as pd
@@ -80,7 +80,7 @@ def serialize_filters(self):
         Return a dictionary of filtering options that have non-default values.
         """
         cfg = dict()
-        for key in self.filters.keys():
+        for key in list(self.filters.keys()):
             if self.filters[key] != self.default_filters[key]:
                 cfg[key] = self.filters[key]
         return cfg
@@ -190,7 +190,7 @@ def save_counts(self, label, df_dict, raw):
         If *raw* is ``True``, the counts are stored under
         ``"/raw/label/counts"``; else ``"/main/label/counts"``.
         """
-        if len(df_dict.keys()) == 0:
+        if len(list(df_dict.keys())) == 0:
             raise ValueError("Failed to count {} [{}]".format(label, self.name))
         df = pd.DataFrame.from_dict(df_dict, orient="index", dtype=np.int32)
         df.columns = ["count"]
@@ -262,8 +262,8 @@ def save_filter_stats(self):
 
         This DataFrame contains the same information as ``report_filter_stats``
         """
-        df = pd.DataFrame(index=SeqLib.filter_messages.values(), columns=["count"])
-        for key in self.filter_stats.keys():
+        df = pd.DataFrame(index=list(SeqLib.filter_messages.values()), columns=["count"])
+        for key in list(self.filter_stats.keys()):
             if self.filter_stats[key] > 0 or key == "total":
                 df.loc[SeqLib.filter_messages[key], "count"] = self.filter_stats[key]
         df.dropna(inplace=True)
@@ -346,7 +346,7 @@ def write_tsv(self):
         """
         if self.tsv_requested:
             self.logger.info("Generating tab-separated output files")
-            for k in self.store.keys():
+            for k in list(self.store.keys()):
                 self.write_table_tsv(k)
 
     def counts_from_file_h5(self, fname):
@@ -362,7 +362,7 @@ def counts_from_file_h5(self, fname):
         )
         # this could probably be much more efficient, but the PyTables docs
         # don't explain copying subsets of files adequately
-        raw_keys = [key for key in store.keys() if key.startswith("/raw/")]
+        raw_keys = [key for key in list(store.keys()) if key.startswith("/raw/")]
         if len(raw_keys) == 0:
             raise ValueError(
                 "No raw counts found in '{}' [{}]" "".format(fname, self.name)
diff --git a/enrich2/sfmap.py b/enrich2/sfmap.py
index 43bb438..10dd4e3 100644
--- a/enrich2/sfmap.py
+++ b/enrich2/sfmap.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+
 import logging
 import numpy as np
 import matplotlib.pyplot as plt
@@ -319,9 +319,9 @@ def sfmap_axes(
     # add marks on wild type positions
     wt = list(wt)
     if tall:
-        wt_xy = zip((list(df.columns).index(x) for x in wt), reversed(xrange(len(wt))))
+        wt_xy = list(zip((list(df.columns).index(x) for x in wt), reversed(list(range(len(wt))))))
     else:
-        wt_xy = zip(xrange(len(wt)), (list(df.index).index(x) for x in wt))
+        wt_xy = list(zip(list(range(len(wt))), (list(df.index).index(x) for x in wt)))
     for x, y in wt_xy:
         ax.add_patch(
             Circle(
@@ -341,8 +341,8 @@ def sfmap_axes(
         # rescale the SE's onto 0 .. 0.98
         # rescaling onto 0 .. 1.0 causes the corners to look funny
         masked_se = masked_se / vmax_se * 0.98
-        for x in xrange(len(df.index)):
-            for y in xrange(len(df.columns)):
+        for x in range(len(df.index)):
+            for y in range(len(df.columns)):
                 value = masked_se[x, y]
                 if value and value >= 0.02:  # not masked, above threshold
                     corner_dist = (1.0 - value) / 2.0
diff --git a/enrich2/storemanager.py b/enrich2/storemanager.py
index e428c80..ae4c328 100644
--- a/enrich2/storemanager.py
+++ b/enrich2/storemanager.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+
 import os
 import logging
 import pandas as pd
@@ -118,7 +118,7 @@ def child_labels(self):
         for x in self.children:
             shared.extend(x.labels)
         shared = collections.Counter(shared)
-        shared = [x for x in shared.keys() if shared[x] == len(self.children)]
+        shared = [x for x in list(shared.keys()) if shared[x] == len(self.children)]
         return sorted(shared, key=lambda a: ELEMENT_LABELS.index(a))
 
     @property
@@ -285,7 +285,7 @@ def scoring_method(self, value):
         """
         Make sure the *value* is valid and set it.
         """
-        if value in SCORING_METHODS.keys():
+        if value in list(SCORING_METHODS.keys()):
             self._scoring_method = value
         else:
             raise ValueError(
@@ -427,7 +427,7 @@ def logr_method(self, value):
         """
         Make sure the *value* is valid and set it.
         """
-        if value in LOGR_METHODS.keys():
+        if value in list(LOGR_METHODS.keys()):
             self._logr_method = value
         else:
             raise ValueError(
@@ -692,7 +692,7 @@ def check_store(self, key):
         Returns:
             bool: True if the key exists in the HDF5 store, else False.
         """
-        if key in self.store.keys():
+        if key in list(self.store.keys()):
             self.logger.info("Found existing '{}'".format(key))
             return True
         else:
@@ -712,7 +712,7 @@ def map_table(
 
         This method really needs a better name.
         """
-        if destination in self.store.keys():
+        if destination in list(self.store.keys()):
             # remove the current destination table because we are using append
             # append takes the "min_itemsize" argument, and put doesn't
             self.logger.info("Overwriting existing '{}'".format(destination))
diff --git a/enrich2/variant.py b/enrich2/variant.py
index f919a25..9576504 100644
--- a/enrich2/variant.py
+++ b/enrich2/variant.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+
 import re
 from .aligner import Aligner
 from .seqlib import SeqLib
@@ -296,7 +296,7 @@ def align_variant(self, variant_dna):
         .. warning:: Using the :py:class:`~seqlib.aligner.Aligner` \
         dramatically increases runtime.
         """
-        if variant_dna in self.aligner_cache.keys():
+        if variant_dna in list(self.aligner_cache.keys()):
             return self.aligner_cache[variant_dna]
 
         mutations = list()
@@ -355,7 +355,7 @@ def count_variant(self, variant_dna, include_indels=True):
                 return None
         else:
             mutations = list()
-            for i in xrange(len(variant_dna)):
+            for i in range(len(variant_dna)):
                 if variant_dna[i] != self.wt.dna_seq[i]:
                     mutations.append(
                         (
@@ -381,7 +381,7 @@ def count_variant(self, variant_dna, include_indels=True):
         mutation_strings = list()
         if self.is_coding():
             variant_protein = ""
-            for i in xrange(0, len(variant_dna), 3):
+            for i in range(0, len(variant_dna), 3):
                 try:
                     variant_protein += CODON_TABLE[variant_dna[i : i + 3]]
                 except KeyError:  # garbage codon due to indel, X, or N
diff --git a/enrich2/wildtype.py b/enrich2/wildtype.py
index ab2b565..bfcffd2 100644
--- a/enrich2/wildtype.py
+++ b/enrich2/wildtype.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+
 import logging
 import re
 from .constants import CODON_TABLE
@@ -68,7 +68,7 @@ def configure(self, cfg):
 
                 # perform translation
                 self.protein_seq = ""
-                for i in xrange(0, len(self.dna_seq), 3):
+                for i in range(0, len(self.dna_seq), 3):
                     self.protein_seq += CODON_TABLE[self.dna_seq[i : i + 3]]
 
                 # set the reference offset if it's a multiple of three
@@ -139,4 +139,4 @@ def position_tuples(self, protein=False):
             seq = self.dna_seq
             offset = self.dna_offset
 
-        return [(i + offset + 1, seq[i]) for i in xrange(len(seq))]
+        return [(i + offset + 1, seq[i]) for i in range(len(seq))]

From 73ca809b673bfcc1e6b891f66f4440cec01dab7e Mon Sep 17 00:00:00 2001
From: Chris <17653365+odcambc@users.noreply.github.com>
Date: Mon, 25 Nov 2024 13:23:48 -0600
Subject: [PATCH 02/10] Minor fixes

---
 enrich2/barcodemap.py   |  2 +-
 enrich2/seqlib.py       |  2 +-
 enrich2/storemanager.py |  5 +++--
 enrich2/wildtype.py     | 11 +++++------
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/enrich2/barcodemap.py b/enrich2/barcodemap.py
index b3f03b9..29fa36e 100644
--- a/enrich2/barcodemap.py
+++ b/enrich2/barcodemap.py
@@ -86,7 +86,7 @@ def __init__(self, mapfile, is_variant=False):
                 if self[barcode] != value:
                     raise ValueError(
                         "Barcode '{}' assigned to multiple "
-                        "unique values".format(barcode, self.name)
+                        "unique values: {}".format(barcode, self.name)
                     )
             else:
                 self[barcode] = value
diff --git a/enrich2/seqlib.py b/enrich2/seqlib.py
index 65b241c..032c864 100644
--- a/enrich2/seqlib.py
+++ b/enrich2/seqlib.py
@@ -173,7 +173,7 @@ def report_filtered_read(self, fq, filter_flags):
         ``SeqLib.filter_messages`` dictionary.
         """
         self.logger.debug(
-            "Filtered read ({messages})\n{read!s}".format(
+            "Filtered read ({messages})\n{read!s}\n{name}".format(
                 messages=", ".join(
                     SeqLib.filter_messages[x] for x in filter_flags if filter_flags[x]
                 ),
diff --git a/enrich2/storemanager.py b/enrich2/storemanager.py
index ae4c328..e3b9224 100644
--- a/enrich2/storemanager.py
+++ b/enrich2/storemanager.py
@@ -5,6 +5,7 @@
 import collections
 import getpass
 import time
+from typing import Optional
 
 
 #: Dictionary specifying available scoring methods for the analysis
@@ -65,9 +66,9 @@ class StoreManager(object):
     store and directory management.
     """
 
-    store_suffix = None
+    store_suffix = Optional[str]
     has_store = True
-    treeview_class_name = None
+    treeview_class_name = Optional[str]
 
     def __init__(self):
         self.logger = logging.getLogger("{}.{}".format(__name__, self.__class__))
diff --git a/enrich2/wildtype.py b/enrich2/wildtype.py
index bfcffd2..872c1d4 100644
--- a/enrich2/wildtype.py
+++ b/enrich2/wildtype.py
@@ -1,4 +1,3 @@
-
 import logging
 import re
 from .constants import CODON_TABLE
@@ -6,11 +5,11 @@
 
 class WildTypeSequence(object):
     """
-    Container class for wild type sequence information. Used by :py:class:`~seqlib.seqlib.VariantSeqLib` objects and 
-    :py:class:`~enrich2.selection.Selection` or :py:class:`~enrich2.experiment.Experiment` objects that contain 
+    Container class for wild type sequence information. Used by :py:class:`~seqlib.seqlib.VariantSeqLib` objects and
+    :py:class:`~enrich2.selection.Selection` or :py:class:`~enrich2.experiment.Experiment` objects that contain
     variant information.
 
-    Requires a *parent_name* that associates this object with a StoreManager object for the 
+    Requires a *parent_name* that associates this object with a StoreManager object for the
     purposes of error reporting and logging.
     """
 
@@ -69,7 +68,7 @@ def configure(self, cfg):
                 # perform translation
                 self.protein_seq = ""
                 for i in range(0, len(self.dna_seq), 3):
-                    self.protein_seq += CODON_TABLE[self.dna_seq[i : i + 3]]
+                    self.protein_seq += CODON_TABLE[self.dna_seq[i: i + 3]]
 
                 # set the reference offset if it's a multiple of three
                 if self.dna_offset % 3 == 0:
@@ -122,7 +121,7 @@ def duplicate(self, new_parent_name):
 
     def position_tuples(self, protein=False):
         """
-        Return a list of tuples containing the position number (after offset adjustment) and 
+        Return a list of tuples containing the position number (after offset adjustment) and
         single-letter symbol (nucleotide or amino acid) for each position the wild type sequence.
         """
         if protein:

From b1a6e733301642e6fa5c3dc7331b7af7e3517d49 Mon Sep 17 00:00:00 2001
From: Chris <17653365+odcambc@users.noreply.github.com>
Date: Tue, 3 Dec 2024 19:40:02 -0600
Subject: [PATCH 03/10] update fastq header format

---
 enrich2/fqread.py | 112 +++++++++++++++++++++++++---------------------
 1 file changed, 62 insertions(+), 50 deletions(-)

diff --git a/enrich2/fqread.py b/enrich2/fqread.py
index 7c30b54..f22d44c 100644
--- a/enrich2/fqread.py
+++ b/enrich2/fqread.py
@@ -2,7 +2,6 @@
 from sys import stderr
 import os.path
 import re
-import string
 import itertools
 import bz2
 import gzip
@@ -10,32 +9,45 @@
 
 # The following regex is referenced by line number in the class documentation.
 # Matches FASTQ headers based on the following pattern (modify as needed):
-# @<MachineName>:<Lane>:<Tile>:<X>:<Y>:<Chastity>#<IndexRead>/<ReadNumber>
+# @<instrument>:<run number>:<flowcell ID>:<lane>:<tile>:<x-pos>:<y-pos> <read>:<is filtered>:<control number>:<sample number>
+
+# Example: @M02564:876:000000000-L3775:1:1101:16862:1800 1:N:0:TCACTCGA+TAACGGTT
+# Sample number contains indexes if they are present.
+
+# See: https://help.basespace.illumina.com/files-used-by-basespace/fastq-files
 header_pattern = re.compile(
-    "@(?P<MachineName>.+)"
-    ":(?P<Lane>\d+)"
-    ":(?P<Tile>\d+)"
-    ":(?P<X>\d+)"
-    ":(?P<Y>\d+)"
-    ":(?P<Chastity>[01])"
-    "#(?P<IndexRead>\d)"
-    "/(?P<ReadNumber>\d)"
+    r"""
+    @(?P<Instrument>[^:]+):
+    (?P<RunNumber>\d+):
+    (?P<FlowcellID>[^:]+):
+    (?P<Lane>\d+):
+    (?P<Tile>\d+):
+    (?P<XPos>\d+):
+    (?P<YPos>\d+)
+    \s
+    (?P<Read>\d+):
+    (?P<IsFiltered>[YN]):
+    (?P<ControlNumber>[^:]+):
+    (?P<SampleNumber>[^:]+)
+    """,
+    re.VERBOSE,
 )
 
-
 BUFFER_SIZE = 100000  # empirically optimized for reading FASTQ files
 
 
-dna_trans = string.maketrans("actgACTG", "tgacTGAC")
+dna_trans = str.maketrans(
+    {"a": "t", "c": "g", "t": "a", "g": "c", "A": "T", "C": "G", "T": "A", "G": "C"}
+)
 
 
 class FQRead(object):
     """
-    Stores a single record from a FASTQ_ file. Quality values are stored 
+    Stores a single record from a FASTQ_ file. Quality values are stored
     internally as a list of integer `Phred quality scores \
-    <http://www.phrap.com/phred/#qualityscores>`_. The *qbase* parameter is 
-    the ASCII value that correponds to Phred score of 0. The *sequence* and 
-    *quality* strings must be the same length. 
+    <http://www.phrap.com/phred/#qualityscores>`_. The *qbase* parameter is
+    the ASCII value that correponds to Phred score of 0. The *sequence* and
+    *quality* strings must be the same length.
     """
 
     # use slots for memory efficiency
@@ -56,7 +68,7 @@ def __init__(self, header, sequence, header2, quality, qbase=33):
 
     def __str__(self):
         """
-        Reformat as a four-line FASTQ_ record. This method converts the 
+        Reformat as a four-line FASTQ_ record. This method converts the
         integer quality values back into a string.
         """
         return "\n".join(
@@ -76,7 +88,7 @@ def __len__(self):
 
     def trim(self, start=1, end=None):
         """
-        Trims this :py:class:`~fqread.FQRead` to contain bases between 
+        Trims this :py:class:`~fqread.FQRead` to contain bases between
         *start* and *end* (inclusive). Bases are numbered starting at 1.
         """
         self.sequence = self.sequence[start - 1 : end]
@@ -84,14 +96,14 @@ def trim(self, start=1, end=None):
 
     def trim_length(self, length, start=1):
         """
-        Trims this :py:class:`~fqread.FQRead` to contain *length* bases, 
+        Trims this :py:class:`~fqread.FQRead` to contain *length* bases,
         beginning with *start*. Bases are numbered starting at 1.
         """
         self.trim(start=start, end=start + length - 1)
 
     def revcomp(self):
         """
-        Reverse-complement the sequence in place. Also reverses the array of 
+        Reverse-complement the sequence in place. Also reverses the array of
         quality values.
         """
         self.sequence = self.sequence.translate(dna_trans)[::-1]
@@ -100,9 +112,9 @@ def revcomp(self):
     def header_information(self, pattern=header_pattern):
         """header_information(pattern=header_pattern)
 
-        Parses the first FASTQ_ header (@ header) and returns a dictionary. 
-        Dictionary keys are the named groups in the regular expression 
-        *pattern*. Unnamed matches are ignored. Integer values are converted 
+        Parses the first FASTQ_ header (@ header) and returns a dictionary.
+        Dictionary keys are the named groups in the regular expression
+        *pattern*. Unnamed matches are ignored. Integer values are converted
         from strings to integers.
 
         The default pattern matches a header in the format::
@@ -134,13 +146,13 @@ def mean_quality(self):
 
     def is_chaste(self, raises=True):
         """
-        Returns ``True`` if the chastity bit is set in the header. The 
-        regular experession used by :py:meth:`header_information` must  
-        include a ``'Chastity'`` match that equals ``1`` if the read is 
+        Returns ``True`` if the chastity bit is set in the header. The
+        regular experession used by :py:meth:`header_information` must
+        include a ``'Chastity'`` match that equals ``1`` if the read is
         chaste.
 
-        If ``raises`` is ``True``, raises an informative error if the 
-        chastity information in the header is not found. Otherwise, a 
+        If ``raises`` is ``True``, raises an informative error if the
+        chastity information in the header is not found. Otherwise, a
         read without chastity information is treated as unchaste.
         """
         try:
@@ -162,15 +174,15 @@ def is_chaste(self, raises=True):
 
 def split_fastq_path(fname):
     """
-    Check that *fname* exists and has a valid FASTQ_ file extension. Valid 
-    file extensions are ``.fastq`` or ``.fq``, optionally followed by ``.gz`` 
-    or ``.bz2`` if the file is compressed. 
+    Check that *fname* exists and has a valid FASTQ_ file extension. Valid
+    file extensions are ``.fastq`` or ``.fq``, optionally followed by ``.gz``
+    or ``.bz2`` if the file is compressed.
 
-    Returns a tuple containing the directory, the file base name with no 
-    extension, the FASTQ_ file extension used, and the compression format 
+    Returns a tuple containing the directory, the file base name with no
+    extension, the FASTQ_ file extension used, and the compression format
     (``"gz"``, ``"bz2"``, or ``None``).
 
-    Raises an ``IOError`` if the file doesn't exist. Returns ``None`` if the 
+    Raises an ``IOError`` if the file doesn't exist. Returns ``None`` if the
     file extension is not recognized.
     """
     if os.path.isfile(fname):
@@ -197,9 +209,9 @@ def split_fastq_path(fname):
 
 def create_compressed_outfile(fname, compression):
     """
-    Utility function for opening compressed output files. Accepted values for 
-    *compression* are ``"gz"``, ``"bz2"``, or ``None``. Returns a file handle 
-    of the appropriate type opened for writing. Existing files with the same 
+    Utility function for opening compressed output files. Accepted values for
+    *compression* are ``"gz"``, ``"bz2"``, or ``None``. Returns a file handle
+    of the appropriate type opened for writing. Existing files with the same
     name are overwritten.
     """
     if compression == "bz2":
@@ -215,10 +227,10 @@ def create_compressed_outfile(fname, compression):
 
 def read_fastq(fname, filter_function=None, buffer_size=BUFFER_SIZE, qbase=33):
     """
-    Generator function for reading from FASTQ_ file *fname*. Yields an 
-    :py:class:`~fqread.FQRead` object for each FASTQ_ record in the file. The 
-    *filter_function* must operate on an :py:class:`~fqread.FQRead` object 
-    and return ``True`` or ``False``. If the result is ``False``, the record 
+    Generator function for reading from FASTQ_ file *fname*. Yields an
+    :py:class:`~fqread.FQRead` object for each FASTQ_ record in the file. The
+    *filter_function* must operate on an :py:class:`~fqread.FQRead` object
+    and return ``True`` or ``False``. If the result is ``False``, the record
     will be skipped silently.
 
     .. note:: To read multiple files in parallel (such as index or \
@@ -272,15 +284,15 @@ def read_fastq_multi(
     fnames, filter_function=None, buffer_size=BUFFER_SIZE, match_lengths=True, qbase=33
 ):
     """
-    Generator function for reading from multiple FASTQ_ files in parallel. 
-    The argument *fnames* is an iterable of FASTQ_ file names. Yields a 
-    tuple of :py:class:`~fqread.FQRead` objects, one for each file in 
-    *fnames*. The *filter_function* must operate on an :py:class:`FQRead` 
-    object and return ``True`` or ``False``. If the result is ``False`` for 
+    Generator function for reading from multiple FASTQ_ files in parallel.
+    The argument *fnames* is an iterable of FASTQ_ file names. Yields a
+    tuple of :py:class:`~fqread.FQRead` objects, one for each file in
+    *fnames*. The *filter_function* must operate on an :py:class:`FQRead`
+    object and return ``True`` or ``False``. If the result is ``False`` for
     any :py:class:`FQRead` in the tuple, the entire tuple will be skipped.
 
-    If *match_lengths* is ``True``, the generator will yield ``None`` if the 
-    files do not contain the same number of FASTQ_ records. Otherwise, it 
+    If *match_lengths* is ``True``, the generator will yield ``None`` if the
+    files do not contain the same number of FASTQ_ records. Otherwise, it
     will silently ignore partial records.
     """
     fq_generators = list()
@@ -305,8 +317,8 @@ def read_fastq_multi(
 
 def fastq_filter_chastity(fq):
     """
-    Filtering function for :py:func:`read_fastq` and 
-    :py:func:`read_fastq_multi`. Returns ``True`` if the 
+    Filtering function for :py:func:`read_fastq` and
+    :py:func:`read_fastq_multi`. Returns ``True`` if the
     :py:class:`~fqread.FQRead` object *fq* is chaste.
     """
     return fq.is_chaste()

From 5458f0fa5bb693ae8afa850d535f3842eb20558b Mon Sep 17 00:00:00 2001
From: Chris <17653365+odcambc@users.noreply.github.com>
Date: Tue, 3 Dec 2024 19:40:29 -0600
Subject: [PATCH 04/10] fix escaping in cite

---
 enrich2/random_effects.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/enrich2/random_effects.py b/enrich2/random_effects.py
index 563defe..55c2211 100644
--- a/enrich2/random_effects.py
+++ b/enrich2/random_effects.py
@@ -10,7 +10,7 @@ def rml_estimator(y, sigma2i, iterations=50):
               title={Mixed models: theory and applications with R},
               author={Demidenko, Eugene},
               year={2013},
-              publisher={John Wiley \& Sons}
+              publisher={John Wiley & Sons}
             }
 
     """

From 7713e4e6a5362f38a407a2e958d2d8a02bf4ec30 Mon Sep 17 00:00:00 2001
From: Chris <17653365+odcambc@users.noreply.github.com>
Date: Tue, 3 Dec 2024 19:46:59 -0600
Subject: [PATCH 05/10] fixes to types, file mode

---
 enrich2/experiment.py | 12 ++++-----
 enrich2/main.py       |  2 +-
 enrich2/selection.py  | 62 +++++++++++++++++++++----------------------
 3 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/enrich2/experiment.py b/enrich2/experiment.py
index dcb7c68..b44183a 100644
--- a/enrich2/experiment.py
+++ b/enrich2/experiment.py
@@ -130,16 +130,16 @@ def validate(self):
 
     def is_coding(self):
         """
-        Return ``True`` if the all :py:class:`~selection.Selection` in the 
-        :py:class:`~experiment.Experiment` count protein-coding variants, else 
+        Return ``True`` if the all :py:class:`~selection.Selection` in the
+        :py:class:`~experiment.Experiment` count protein-coding variants, else
         ``False``.
         """
         return all(x.is_coding() for x in self.selection_list())
 
     def has_wt_sequence(self):
         """
-        Return ``True`` if the all :py:class:`~selection.Selection` in the 
-        :py:class:`~experiment.Experiment` have a wild type sequence, else 
+        Return ``True`` if the all :py:class:`~selection.Selection` in the
+        :py:class:`~experiment.Experiment` have a wild type sequence, else
         ``False``.
         """
         return all(x.has_wt_sequence() for x in self.selection_list())
@@ -172,7 +172,7 @@ def combine_barcode_maps(self):
         If multiple variants or IDs map to the same barcode, only the first one
         will be present in the barcode map table.
 
-        The ``'/main/barcodemap'`` table is not created if no 
+        The ``'/main/barcodemap'`` table is not created if no
         :py:class:`~selection.Selection` has barcode map information.
         """
         if self.check_store("/main/barcodemap"):
@@ -427,7 +427,7 @@ def calc_pvalues_wt(self, label):
             [sorted(self.child_names()), sorted(["z", "pvalue_raw"])],
             names=["condition", "value"],
         )
-        result_df = pd.DataFrame(index=data.index, columns=columns)
+        result_df = pd.DataFrame(index=data.index, columns=columns, dtype=float)
 
         condition_labels = data.columns.levels[0]
         for cnd in condition_labels:
diff --git a/enrich2/main.py b/enrich2/main.py
index 566d686..a2c93db 100755
--- a/enrich2/main.py
+++ b/enrich2/main.py
@@ -157,7 +157,7 @@ def main_cmd():
 
     # read the JSON file
     try:
-        cfg = json.load(open(args.config, "U"))
+        cfg = json.load(open(args.config, "r"))
     except IOError:
         raise IOError("Failed to open '{}' [{}]".format(args.config, DRIVER_NAME))
     except ValueError:
diff --git a/enrich2/selection.py b/enrich2/selection.py
index b0dbf6e..8cd46cd 100644
--- a/enrich2/selection.py
+++ b/enrich2/selection.py
@@ -42,7 +42,7 @@
 
 def regression_apply(row, timepoints, weighted):
     """
-    :py:meth:`pandas.DataFrame.apply` apply function for calculating 
+    :py:meth:`pandas.DataFrame.apply` apply function for calculating
     enrichment using linear regression. If *weighted* is ``True`` perform
     weighted least squares; else perform ordinary least squares.
 
@@ -77,8 +77,8 @@ def regression_apply(row, timepoints, weighted):
 
 class Selection(StoreManager):
     """
-    Class for a single selection replicate, consisting of multiple 
-    timepoints. This class coordinates :py:class:`~seqlib.seqlib.SeqLib` 
+    Class for a single selection replicate, consisting of multiple
+    timepoints. This class coordinates :py:class:`~seqlib.seqlib.SeqLib`
     objects.
     """
 
@@ -94,7 +94,7 @@ def __init__(self):
 
     def _children(self):
         """
-        Return the :py:class:`~seqlib.seqlib.SeqLib` objects as a list, 
+        Return the :py:class:`~seqlib.seqlib.SeqLib` objects as a list,
         sorted by timepoint and then by name.
         """
         libs = list()
@@ -104,7 +104,7 @@ def _children(self):
 
     def remove_child_id(self, tree_id):
         """
-        Remove the reference to a :py:class:`~seqlib.seqlib.SeqLib` with 
+        Remove the reference to a :py:class:`~seqlib.seqlib.SeqLib` with
         Treeview id *tree_id*. Deletes empty time points.
         """
         empty = None
@@ -140,10 +140,10 @@ def wt(self):
 
     def configure(self, cfg, configure_children=True):
         """
-        Set up the :py:class:`~selection.Selection` using the *cfg* object, 
+        Set up the :py:class:`~selection.Selection` using the *cfg* object,
         usually from a ``.json`` configuration file.
 
-        If *configure_children* is false, do not configure the children in 
+        If *configure_children* is false, do not configure the children in
         *cfg*.
         """
         StoreManager.configure(self, cfg)
@@ -240,9 +240,9 @@ def add_child(self, child):
 
     def is_barcodevariant(self):
         """
-        Return ``True`` if all :py:class:`~seqlib.seqlib.SeqLib` in the 
-        :py:class:`~selection.Selection` are 
-        :py:class:`~barcodevariant.BcvSeqLib` objects with 
+        Return ``True`` if all :py:class:`~seqlib.seqlib.SeqLib` in the
+        :py:class:`~selection.Selection` are
+        :py:class:`~barcodevariant.BcvSeqLib` objects with
         the same barcode map, else ``False``.
         """
         return (
@@ -252,9 +252,9 @@ def is_barcodevariant(self):
 
     def is_barcodeid(self):
         """
-        Return ``True`` if all :py:class:`~seqlib.SeqLib` in the 
-        :py:class:`~selection.Selection` are 
-        :py:class:`~barcodeid.BcidSeqLib` objects with 
+        Return ``True`` if all :py:class:`~seqlib.SeqLib` in the
+        :py:class:`~selection.Selection` are
+        :py:class:`~barcodeid.BcidSeqLib` objects with
         the same barcode map, else ``False``.
         """
         return (
@@ -264,24 +264,24 @@ def is_barcodeid(self):
 
     def is_coding(self):
         """
-        Return ``True`` if the all :py:class:`~seqlib.seqlib.SeqLib` in the 
-        :py:class:`~selection.Selection` count protein-coding variants, else 
+        Return ``True`` if the all :py:class:`~seqlib.seqlib.SeqLib` in the
+        :py:class:`~selection.Selection` count protein-coding variants, else
         ``False``.
         """
         return all(x.is_coding() for x in self.children)
 
     def has_wt_sequence(self):
         """
-        Return ``True`` if the all :py:class:`~seqlib.seqlib.SeqLib` in the 
-        :py:class:`~selection.Selection` have a wild type sequence, else 
+        Return ``True`` if the all :py:class:`~seqlib.seqlib.SeqLib` in the
+        :py:class:`~selection.Selection` have a wild type sequence, else
         ``False``.
         """
         return all(x.has_wt_sequence() for x in self.children)
 
     def merge_counts_unfiltered(self, label):
         """
-        Counts :py:class:`~seqlib.seqlib.SeqLib` objects and tabulates counts 
-        for each timepoint. :py:class:`~seqlib.seqlib.SeqLib` objects from 
+        Counts :py:class:`~seqlib.seqlib.SeqLib` objects and tabulates counts
+        for each timepoint. :py:class:`~seqlib.seqlib.SeqLib` objects from
         the same timepoint are combined by summing the counts.
 
         Stores the unfiltered counts under ``/main/label/counts_unfiltered``.
@@ -364,14 +364,14 @@ def merge_counts_unfiltered(self, label):
 
     def filter_counts(self, label):
         """
-        Converts unfiltered counts stored in ``/main/label/counts_unfiltered`` 
-        into filtered counts calculated from complete cases (elements with a 
+        Converts unfiltered counts stored in ``/main/label/counts_unfiltered``
+        into filtered counts calculated from complete cases (elements with a
         non-zero count in each time point).
 
-        For the most basic element type (variant or barcode, depending on the 
-        experimental design), the result of this operation simply drops any 
-        rows that have missing counts. For other element types, such as 
-        synonymous variants, the counts are re-aggregated using only the 
+        For the most basic element type (variant or barcode, depending on the
+        experimental design), the result of this operation simply drops any
+        rows that have missing counts. For other element types, such as
+        synonymous variants, the counts are re-aggregated using only the
         complete cases in the underlying element type.
         """
         if (self.is_barcodeid() or self.is_barcodevariant()) and label != "barcodes":
@@ -387,7 +387,7 @@ def filter_counts(self, label):
         df.dropna(axis="index", how="any", inplace=True)
         self.store.put(
             "/main/{}/counts".format(label),
-            df.astype(float),
+            df.astype(int),
             format="table",
             data_columns=df.columns,
         )
@@ -414,7 +414,7 @@ def combine_barcode_maps(self):
 
     def calculate(self):
         """
-        Wrapper method to calculate counts and enrichment scores 
+        Wrapper method to calculate counts and enrichment scores
         for all data in the :py:class:`~selection.Selection`.
         """
         if len(self.labels) == 0:
@@ -819,7 +819,7 @@ def wt_plot(self, pdf):
 
         *pdf* is an open PdfPages instance.
 
-        Only created for selections that use WLS or OLS scoring and have a wild type specified. 
+        Only created for selections that use WLS or OLS scoring and have a wild type specified.
         Uses :py:func:`~plots.fit_axes` for the plotting.
         """
         self.logger.info("Creating wild type fit plot")
@@ -1102,7 +1102,7 @@ def write_tsv(self):
         """
         Write each table from the store to its own tab-separated file.
 
-        Files are written to a ``tsv`` directory in the default output location. 
+        Files are written to a ``tsv`` directory in the default output location.
         File names are the HDF5 key with ``'_'`` substituted for ``'/'``.
         """
         if self.tsv_requested:
@@ -1114,7 +1114,7 @@ def write_tsv(self):
 
     def synonymous_variants(self):
         """
-        Populate and return a dictionary mapping synonymous variants to the 
+        Populate and return a dictionary mapping synonymous variants to the
         list of associated variants in ``/main/variants/counts``.
         """
         mapping = dict()
@@ -1258,7 +1258,7 @@ def barcodemap_mapping(self):
 
     def calc_outliers(self, label, minimum_components=4, log_chunksize=20000):
         """
-        Test whether an element's individual components have significantly different 
+        Test whether an element's individual components have significantly different
         scores from the element. Results are stored in ``'/main/<label>/outliers'``.
 
         Args:

From 8ad9cde923bfbfc76148aaa44aee177b994ffe6e Mon Sep 17 00:00:00 2001
From: Chris <17653365+odcambc@users.noreply.github.com>
Date: Tue, 3 Dec 2024 20:00:23 -0600
Subject: [PATCH 06/10] fix indexing, type handling

---
 enrich2/config_check.py     | 26 +++++++++++++-------------
 enrich2/experiment.py       |  4 ++--
 enrich2/gui/configurator.py | 10 +++++-----
 enrich2/seqlib.py           |  6 ++++--
 4 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/enrich2/config_check.py b/enrich2/config_check.py
index 8ec4876..c84fb25 100644
--- a/enrich2/config_check.py
+++ b/enrich2/config_check.py
@@ -9,14 +9,14 @@
 
 def is_experiment(cfg):
     """
-    Check if the given configuration object specifies an 
+    Check if the given configuration object specifies an
     :py:class:`~enrich2.experiment.Experiment`.
 
     Args:
         cfg (dict): decoded JSON object
 
     Returns:
-        bool: True if `cfg` if specifies an 
+        bool: True if `cfg` if specifies an
         :py:class:`~enrich2.experiment.Experiment`, else False.
 
     """
@@ -28,14 +28,14 @@ def is_experiment(cfg):
 
 def is_condition(cfg):
     """
-    Check if the given configuration object specifies a 
+    Check if the given configuration object specifies a
     :py:class:`~enrich2.condition.Condition`.
 
     Args:
         cfg (dict): decoded JSON object
 
     Returns:
-        bool: True if `cfg` if specifies a 
+        bool: True if `cfg` if specifies a
         :py:class:`~enrich2.condition.Condition`, else False.
 
     """
@@ -47,14 +47,14 @@ def is_condition(cfg):
 
 def is_selection(cfg):
     """
-    Check if the given configuration object specifies a 
+    Check if the given configuration object specifies a
     :py:class:`~enrich2.selection.Selection`.
 
     Args:
         cfg (dict): decoded JSON object
 
     Returns:
-        bool: True if `cfg` if specifies a 
+        bool: True if `cfg` if specifies a
         :py:class:`~enrich2.selection.Selection`, else False.
 
     """
@@ -66,14 +66,14 @@ def is_selection(cfg):
 
 def is_seqlib(cfg):
     """
-    Check if the given configuration object specifies a 
+    Check if the given configuration object specifies a
     :py:class:`~enrich2.seqlib.SeqLib` derived object.
 
     Args:
         cfg (dict): decoded JSON object
 
     Returns:
-        bool: True if `cfg` if specifies a :py:class:`~enrich2.seqlib.SeqLib` 
+        bool: True if `cfg` if specifies a :py:class:`~enrich2.seqlib.SeqLib`
         derived object, else False.
 
     """
@@ -85,14 +85,14 @@ def is_seqlib(cfg):
 
 def seqlib_type(cfg):
     """
-    Get the type of :py:class:`~enrich2.seqlib.SeqLib` derived object 
+    Get the type of :py:class:`~enrich2.seqlib.SeqLib` derived object
     specified by the configuration object.
 
     Args:
         cfg (dict): decoded JSON object
 
     Returns:
-        str: The class name of the :py:class:`~seqlib.seqlib.SeqLib` derived 
+        str: The class name of the :py:class:`~seqlib.seqlib.SeqLib` derived
         object specified by `cfg`.
 
     Raises:
@@ -123,15 +123,15 @@ def seqlib_type(cfg):
 
 def element_type(cfg):
     """
-    Get the type of :py:class:`~enrich2.storemanager.StoreManager` derived 
+    Get the type of :py:class:`~enrich2.storemanager.StoreManager` derived
     object specified by the configuration object.
 
     Args:
         cfg (dict): decoded JSON object
 
     Returns:
-        str: The class name of the 
-        :py:class:`~enrich2.storemanager.StoreManager` derived object specified 
+        str: The class name of the
+        :py:class:`~enrich2.storemanager.StoreManager` derived object specified
         by `cfg`.
 
     Raises:
diff --git a/enrich2/experiment.py b/enrich2/experiment.py
index b44183a..329b099 100644
--- a/enrich2/experiment.py
+++ b/enrich2/experiment.py
@@ -251,7 +251,7 @@ def calc_counts(self, label):
                     "/main/{}/counts_unfiltered" "".format(label)
                 )
                 for tp in sel.timepoints:
-                    data.loc[:][cnd.name, sel.name, "c_{}".format(tp)] = sel_data[
+                    data.loc[:][(cnd.name, sel.name, "c_{}".format(tp))] = sel_data[
                         "c_{}".format(tp)
                     ]
         self.store.put("/main/{}/counts".format(label), data, format="table")
@@ -308,7 +308,7 @@ def calc_shared_full(self, label):
         self.logger.info(
             "Populating Experiment data frame with scores ({})".format(label)
         )
-        data = pd.DataFrame(index=combined, columns=columns)
+        data = pd.DataFrame(index=combined, columns=columns).astype(float)
         for cnd in self.children:
             for sel in cnd.children:
                 sel_data = sel.store.select("/main/{}/scores".format(label))
diff --git a/enrich2/gui/configurator.py b/enrich2/gui/configurator.py
index e562a39..e43678f 100644
--- a/enrich2/gui/configurator.py
+++ b/enrich2/gui/configurator.py
@@ -238,7 +238,7 @@ def create_new_element(self):
         """
         Create and return a new element based on the current selection.
 
-        This element is not added to the treeview. 
+        This element is not added to the treeview.
         """
         element = None
         parent_element = self.get_focused_element()
@@ -362,7 +362,7 @@ def menu_open(self):
         fname = tkinter.filedialog.askopenfilename()
         if len(fname) > 0:  # file was selected
             try:
-                with open(fname, "rU") as handle:
+                with open(fname, "r") as handle:
                     cfg = json.load(handle)
             except ValueError:
                 tkinter.messagebox.showerror(None, "Failed to parse config file.")
@@ -432,11 +432,11 @@ def menu_selectall(self):
 
     def delete_element(self, tree_id):
         """
-        Delete element with Treeview id *tree_id* from the tree, from the element 
-        dictionary, and from the associated data structure. Recursively 
+        Delete element with Treeview id *tree_id* from the tree, from the element
+        dictionary, and from the associated data structure. Recursively
         deletes all children of *tree_id*.
 
-        The tree should be refreshed using :py:meth:`refresh_tree` after 
+        The tree should be refreshed using :py:meth:`refresh_tree` after
         each deletion. This is the responsibility of the caller.
 
         """
diff --git a/enrich2/seqlib.py b/enrich2/seqlib.py
index 032c864..2c27bd2 100644
--- a/enrich2/seqlib.py
+++ b/enrich2/seqlib.py
@@ -278,7 +278,7 @@ def read_quality_filter(self, fq):
         Checks ``'chastity'``, ``'min quality'``, ``'avg quality'``,
         ``'max N'``, and ``'remove unresolvable'``.
         Counts failed reads for later output and reports the filtered read if
-        desired. 
+        desired.
         Returns ``True`` if the read passes all filters, else ``False``.
         """
         filter_flags = dict()
@@ -397,7 +397,9 @@ def counts_from_file_tsv(self, fname):
         if label is None:
             raise ValueError("No valid element labels [{}]".format(self.name))
         key = "/raw/{}/counts".format(label)
-        self.store.put(key, df, format="table", data_columns=df.columns, dtype=np.int32)
+        self.store.put(
+            key, df.astype(np.int32), format="table", data_columns=df.columns
+        )
 
     def counts_from_file(self, fname):
         """Get raw counts from a counts file instead of FASTQ_ file.

From 9940687f131d52193c4d5bbfde2e042c593f41d7 Mon Sep 17 00:00:00 2001
From: Chris <17653365+odcambc@users.noreply.github.com>
Date: Wed, 4 Dec 2024 09:58:08 -0600
Subject: [PATCH 07/10] update open newline mode

---
 enrich2/barcodemap.py | 2 +-
 enrich2/fqread.py     | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/enrich2/barcodemap.py b/enrich2/barcodemap.py
index 29fa36e..387d12a 100644
--- a/enrich2/barcodemap.py
+++ b/enrich2/barcodemap.py
@@ -43,7 +43,7 @@ def __init__(self, mapfile, is_variant=False):
             elif ext in (".gz"):
                 handle = gzip.GzipFile(mapfile, "rU")
             else:
-                handle = open(mapfile, "rU")
+                handle = open(mapfile, "r")
         except IOError:
             raise IOError(
                 "Could not open barcode map file '{}' [{}]".format(mapfile, self.name)
diff --git a/enrich2/fqread.py b/enrich2/fqread.py
index f22d44c..f152413 100644
--- a/enrich2/fqread.py
+++ b/enrich2/fqread.py
@@ -238,11 +238,11 @@ def read_fastq(fname, filter_function=None, buffer_size=BUFFER_SIZE, qbase=33):
     """
     _, _, _, compression = split_fastq_path(fname)
     if compression is None:  # raw FASTQ
-        handle = open(fname, "rU")
+        handle = open(fname, "r")
     elif compression == "bz2":
-        handle = bz2.BZ2File(fname, "rU")
+        handle = bz2.BZ2File(fname, "r")
     elif compression == "gz":
-        handle = gzip.GzipFile(fname, "rU")
+        handle = gzip.GzipFile(fname, "r")
     else:
         raise IOError("unrecognized compression mode '{mode}'".format(mode=compression))
 

From 26dcb67904cfbc3d3b83f3de27a1cbb3c19e987c Mon Sep 17 00:00:00 2001
From: Chris <17653365+odcambc@users.noreply.github.com>
Date: Wed, 4 Dec 2024 10:01:17 -0600
Subject: [PATCH 08/10] update env to py3

---
 docs/_static/enrich2_env.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/_static/enrich2_env.yml b/docs/_static/enrich2_env.yml
index f21afb1..f735506 100644
--- a/docs/_static/enrich2_env.yml
+++ b/docs/_static/enrich2_env.yml
@@ -1,6 +1,6 @@
 name: enrich2
 dependencies:
-  - python=2.7
+  - python=3
   - numpy
   - scipy
   - pandas

From 0f2c913e861883745d693deb2bb53314430a081a Mon Sep 17 00:00:00 2001
From: Chris <17653365+odcambc@users.noreply.github.com>
Date: Wed, 4 Dec 2024 14:16:15 -0600
Subject: [PATCH 09/10] byte handling and regex fixes

---
 enrich2/barcodemap.py |  5 +++--
 enrich2/fqread.py     | 10 +++++-----
 enrich2/main.py       |  6 +++++-
 enrich2/seqlib.py     |  7 +++++--
 enrich2/variant.py    | 27 +++++++++++++--------------
 5 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/enrich2/barcodemap.py b/enrich2/barcodemap.py
index 387d12a..357dfbd 100644
--- a/enrich2/barcodemap.py
+++ b/enrich2/barcodemap.py
@@ -39,9 +39,9 @@ def __init__(self, mapfile, is_variant=False):
         try:
             ext = os.path.splitext(mapfile)[-1].lower()
             if ext in (".bz2"):
-                handle = bz2.BZ2File(mapfile, "rU")
+                handle = bz2.BZ2File(mapfile, "r")
             elif ext in (".gz"):
-                handle = gzip.GzipFile(mapfile, "rU")
+                handle = gzip.GzipFile(mapfile, "r")
             else:
                 handle = open(mapfile, "r")
         except IOError:
@@ -51,6 +51,7 @@ def __init__(self, mapfile, is_variant=False):
 
         # handle each line
         for line in handle:
+            line = line.decode("utf-8")
             # skip comments and whitespace-only lines
             if len(line.strip()) == 0 or line[0] == "#":
                 continue
diff --git a/enrich2/fqread.py b/enrich2/fqread.py
index f152413..2c1a8d0 100644
--- a/enrich2/fqread.py
+++ b/enrich2/fqread.py
@@ -56,7 +56,7 @@ class FQRead(object):
     def __init__(self, header, sequence, header2, quality, qbase=33):
         if len(sequence) != len(quality):
             raise ValueError("different lengths for sequence and quality")
-        elif header[0] != "@" or header2[0] != "+":
+        elif chr(header[0]) != "@" or chr(header2[0]) != "+":
             raise ValueError("improperly formatted FASTQ record")
         else:
             self.header = header
@@ -247,7 +247,7 @@ def read_fastq(fname, filter_function=None, buffer_size=BUFFER_SIZE, qbase=33):
         raise IOError("unrecognized compression mode '{mode}'".format(mode=compression))
 
     eof = False
-    leftover = ""
+    leftover = b""
 
     while not eof:
         buf = handle.read(buffer_size)
@@ -255,8 +255,8 @@ def read_fastq(fname, filter_function=None, buffer_size=BUFFER_SIZE, qbase=33):
             eof = True
 
         buf = leftover + buf  # prepend partial record from previous buffer
-        lines = buf.split("\n")
-        fastq_count = len(lines) / 4
+        lines = buf.split(b"\n")
+        fastq_count = int(len(lines) / 4)
 
         if not eof:  # handle lines from the trailing partial FASTQ record
             dangling = len(lines) % 4
@@ -264,7 +264,7 @@ def read_fastq(fname, filter_function=None, buffer_size=BUFFER_SIZE, qbase=33):
                 dangling = 4
                 fastq_count = fastq_count - 1
             # join the leftover lines back into a string
-            leftover = "\n".join(lines[len(lines) - dangling :])
+            leftover = b"\n".join(lines[len(lines) - dangling :])
 
         # index into the list of lines to pull out the FASTQ records
         for i in range(fastq_count):
diff --git a/enrich2/main.py b/enrich2/main.py
index a2c93db..d612b95 100755
--- a/enrich2/main.py
+++ b/enrich2/main.py
@@ -150,7 +150,11 @@ def main_cmd():
 
     # start the logs
     if args.log_file is not None:
-        logging.basicConfig(filename=args.log_file, level=LOG_LEVEL, format=LOG_FORMAT)
+        # Create directory if it doesn't exist
+        log_dir = os.path.dirname(args.log_file)
+        if not os.path.exists(log_dir):
+            os.makedirs(log_dir)
+        logging.basicConfig(filename=args.log_file, encoding='utf-8', level=LOG_LEVEL, format=LOG_FORMAT)
     else:
         logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT)
     logger = logging.getLogger(__name__)
diff --git a/enrich2/seqlib.py b/enrich2/seqlib.py
index 2c27bd2..7ba4257 100644
--- a/enrich2/seqlib.py
+++ b/enrich2/seqlib.py
@@ -190,6 +190,9 @@ def save_counts(self, label, df_dict, raw):
         If *raw* is ``True``, the counts are stored under
         ``"/raw/label/counts"``; else ``"/main/label/counts"``.
         """
+        # Check if df_dict sequences are bytes, and convert to utf-8 if so
+        if isinstance(list(df_dict.keys())[0], bytes):
+            df_dict = {s.decode('utf-8') : v for (v,s) in enumerate(df_dict)}
         if len(list(df_dict.keys())) == 0:
             raise ValueError("Failed to count {} [{}]".format(label, self.name))
         df = pd.DataFrame.from_dict(df_dict, orient="index", dtype=np.int32)
@@ -301,13 +304,13 @@ def read_quality_filter(self, fq):
                 filter_flags["avg quality"] = True
 
         if self.filters["max N"] >= 0:
-            if fq.sequence.upper().count("N") > self.filters["max N"]:
+            if fq.sequence.upper().count(b"N") > self.filters["max N"]:
                 self.filter_stats["max N"] += 1
                 filter_flags["max N"] = True
 
         if "remove unresolvable" in self.filters:  # OverlapSeqLib only
             if self.filters["remove unresolvable"]:
-                if "X" in fq.sequence:
+                if b"X" in fq.sequence:
                     self.filter_stats["remove unresolvable"] += 1
                     filter_flags["remove unresolvable"] = True
 
diff --git a/enrich2/variant.py b/enrich2/variant.py
index 9576504..2e2a59b 100644
--- a/enrich2/variant.py
+++ b/enrich2/variant.py
@@ -13,24 +13,23 @@
 
 #: Matches a single amino acid substitution in HGVS_ format.
 re_protein = re.compile(
-    "(?P<match>p\.(?P<pre>[A-Z][a-z][a-z])(?P<pos>-?\d+)" "(?P<post>[A-Z][a-z][a-z]))"
+    r"(?P<match>p\.(?P<pre>[A-Z][a-z][a-z])(?P<pos>-?\d+)" "(?P<post>[A-Z][a-z][a-z]))"
 )
 
 #: Matches a single nucleotide substitution (coding or noncoding)
 #: in HGVS_ format.
 re_nucleotide = re.compile(
-    "(?P<match>[nc]\.(?P<pos>-?\d+)(?P<pre>[ACGT])>(?P<post>[ACGT]))"
+    r"(?P<match>[nc]\.(?P<pos>-?\d+)(?P<pre>[ACGT])>(?P<post>[ACGT]))"
 )
 
 #: Matches a single coding nucleotide substitution in HGVS_ format.
 re_coding = re.compile(
-    "(?P<match>c\.(?P<pos>-?\d+)(?P<pre>[ACGT])>(?P<post>[ACGT]) "
-    "\(p\.(?:=|[A-Z][a-z][a-z]-?\d+[A-Z][a-z][a-z])\))"
+    r"(?P<match>c\.(?P<pos>-?\d+)(?P<pre>[ACGT])>(?P<post>[ACGT]) \(p\.(?:=|[A-Z][a-z][a-z]-?\d+[A-Z][a-z][a-z])\))"
 )
 
 #: Matches a single noncoding nucleotide substitution in HGVS_ format.
 re_noncoding = re.compile(
-    "(?P<match>n\.(?P<pos>-?\d+)(?P<pre>[ACGT])>(?P<post>[ACGT]))"
+    r"(?P<match>n\.(?P<pos>-?\d+)(?P<pre>[ACGT])>(?P<post>[ACGT]))"
 )
 
 
@@ -69,13 +68,13 @@ def hgvs2single(s):
 def single2hgvs(s):
     """
     Convert single-letter amino acid changes in the form
-    <pre><pos><post> into HGVS strings that match Enrich2 
+    <pre><pos><post> into HGVS strings that match Enrich2
     output.
 
     Searches the string s for all instances of the above
     pattern and returns a list of Enrich2 variants.
     """
-    t = re.findall("[A-Z*]\d+[A-Z*]", s)
+    t = re.findall(r"[A-Z*]\d+[A-Z*]", s)
     return ["p.{}{}{}".format(AA_CODES[x[0]], x[1:-1], AA_CODES[x[-1]]) for x in t]
 
 
@@ -174,7 +173,7 @@ def protein_variant(variant):
     elif variant == SYNONYMOUS_VARIANT:
         return SYNONYMOUS_VARIANT
     else:
-        matches = re.findall("\((p\.\S*)\)", variant)
+        matches = re.findall(r"\((p\.\S*)\)", variant)
         if len(matches) == 0:
             raise ValueError("Invalid coding variant string.")
         # uniqify and remove synonymous
@@ -389,19 +388,19 @@ def count_variant(self, variant_dna, include_indels=True):
 
             for pos, change in mutations:
                 ref_dna_pos = pos + self.wt.dna_offset + 1
-                ref_pro_pos = pos / 3 + self.wt.protein_offset + 1
+                ref_pro_pos = int(pos / 3) + self.wt.protein_offset + 1
                 mut = "c.{pos}{change}".format(pos=ref_dna_pos, change=change)
                 if has_indel(change):
                     mut += " (p.{pre}{pos}fs)".format(
-                        pre=AA_CODES[self.wt.protein_seq[pos / 3]], pos=ref_pro_pos
+                        pre=AA_CODES[self.wt.protein_seq[int(pos / 3)]], pos=ref_pro_pos
                     )
-                elif variant_protein[pos / 3] == self.wt.protein_seq[pos / 3]:
+                elif variant_protein[int(pos / 3)] == self.wt.protein_seq[int(pos / 3)]:
                     mut += " (p.=)"
                 else:
                     mut += " (p.{pre}{pos}{post})".format(
-                        pre=AA_CODES[self.wt.protein_seq[pos / 3]],
+                        pre=AA_CODES[self.wt.protein_seq[int(pos / 3)]],
                         pos=ref_pro_pos,
-                        post=AA_CODES[variant_protein[pos / 3]],
+                        post=AA_CODES[variant_protein[int(pos / 3)]],
                     )
                 mutation_strings.append(mut)
         else:
@@ -419,7 +418,7 @@ def count_variant(self, variant_dna, include_indels=True):
     def count_synonymous(self):
         """
         Combine counts for synonymous variants (defined as variants that differ
-        at the nucleotide level but not at the amino acid level) and store them 
+        at the nucleotide level but not at the amino acid level) and store them
         under the ``synonymous`` label.
 
         This method should be called only after ``variants`` have been counted.

From d2abab2525bf222e07829e51aed15280e341dcf5 Mon Sep 17 00:00:00 2001
From: Chris <17653365+odcambc@users.noreply.github.com>
Date: Wed, 4 Dec 2024 16:04:11 -0600
Subject: [PATCH 10/10] fix writing count csv

---
 enrich2/experiment.py | 3 ++-
 enrich2/wildtype.py   | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/enrich2/experiment.py b/enrich2/experiment.py
index 329b099..9efb98f 100644
--- a/enrich2/experiment.py
+++ b/enrich2/experiment.py
@@ -251,9 +251,10 @@ def calc_counts(self, label):
                     "/main/{}/counts_unfiltered" "".format(label)
                 )
                 for tp in sel.timepoints:
-                    data.loc[:][(cnd.name, sel.name, "c_{}".format(tp))] = sel_data[
+                    data[(cnd.name, sel.name, "c_{}".format(tp))] = sel_data[
                         "c_{}".format(tp)
                     ]
+
         self.store.put("/main/{}/counts".format(label), data, format="table")
 
     def calc_shared_full(self, label):
diff --git a/enrich2/wildtype.py b/enrich2/wildtype.py
index 872c1d4..3670b75 100644
--- a/enrich2/wildtype.py
+++ b/enrich2/wildtype.py
@@ -72,7 +72,7 @@ def configure(self, cfg):
 
                 # set the reference offset if it's a multiple of three
                 if self.dna_offset % 3 == 0:
-                    self.protein_offset = self.dna_offset / 3
+                    self.protein_offset = int(self.dna_offset / 3)
                 else:
                     self.logger.warning(
                         "Ignoring reference offset for protein changes (not a multiple of three)"