FowlerLab · afrubin · Dec 7, 2024 · Nov 25, 2024 · Nov 25, 2024 · Dec 4, 2024
diff --git a/docs/_static/enrich2_env.yml b/docs/_static/enrich2_env.yml
@@ -1,6 +1,6 @@
 name: enrich2
 dependencies:
-  - python=2.7
+  - python=3
   - numpy
   - scipy
   - pandas

diff --git a/enrich2/aligner.py b/enrich2/aligner.py
@@ -43,7 +43,7 @@ class Aligner(object):
     _END = 4  # end of traceback
 
     def __init__(self, similarity=_simple_similarity):
-        similarity_keys = similarity.keys()
+        similarity_keys = list(similarity.keys())
         if "gap" in similarity_keys:
             similarity_keys.remove("gap")
         for key in similarity_keys:
@@ -80,12 +80,12 @@ def align(self, seq1, seq2):
         seq2 = seq2.upper()
 
         # build matrix of scores/traceback information
-        for i in xrange(len(seq1) + 1):
+        for i in range(len(seq1) + 1):
             self.matrix[i, 0] = (self.similarity["gap"] * i, Aligner._DEL)
-        for j in xrange(len(seq2) + 1):
+        for j in range(len(seq2) + 1):
             self.matrix[0, j] = (self.similarity["gap"] * j, Aligner._INS)
-        for i in xrange(1, len(seq1) + 1):
-            for j in xrange(1, len(seq2) + 1):
+        for i in range(1, len(seq1) + 1):
+            for j in range(1, len(seq2) + 1):
                 match = (
                     self.matrix[i - 1, j - 1]["score"]
                     + self.similarity[seq1[i - 1]][seq2[j - 1]],

diff --git a/enrich2/barcode.py b/enrich2/barcode.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+
 import logging
 import sys
 from .seqlib import SeqLib

diff --git a/enrich2/barcodeid.py b/enrich2/barcodeid.py
@@ -112,15 +112,15 @@ def calculate(self):
                 "identifiers",
                 {
                     k: v
-                    for k, v in df_dict.iteritems()
+                    for k, v in df_dict.items()
                     if v >= self.identifier_min_count
                 },
                 raw=False,
             )
             del df_dict
 
             # write the active subset of the BarcodeMap to the store
-            barcodes = barcode_identifiers.keys()
+            barcodes = list(barcode_identifiers.keys())
             barcode_identifiers = pd.DataFrame(
                 {"value": [barcode_identifiers[bc] for bc in barcodes]}, index=barcodes
             )

diff --git a/enrich2/barcodemap.py b/enrich2/barcodemap.py
@@ -39,18 +39,19 @@ def __init__(self, mapfile, is_variant=False):
         try:
             ext = os.path.splitext(mapfile)[-1].lower()
             if ext in (".bz2"):
-                handle = bz2.BZ2File(mapfile, "rU")
+                handle = bz2.BZ2File(mapfile, "r")
             elif ext in (".gz"):
-                handle = gzip.GzipFile(mapfile, "rU")
+                handle = gzip.GzipFile(mapfile, "r")
             else:
-                handle = open(mapfile, "rU")
+                handle = open(mapfile, "r")
         except IOError:
             raise IOError(
                 "Could not open barcode map file '{}' [{}]".format(mapfile, self.name)
             )
 
         # handle each line
         for line in handle:
+            line = line.decode("utf-8")
             # skip comments and whitespace-only lines
             if len(line.strip()) == 0 or line[0] == "#":
                 continue
@@ -86,7 +87,7 @@ def __init__(self, mapfile, is_variant=False):
                 if self[barcode] != value:
                     raise ValueError(
                         "Barcode '{}' assigned to multiple "
-                        "unique values".format(barcode, self.name)
+                        "unique values: {}".format(barcode, self.name)
                     )
             else:
                 self[barcode] = value

diff --git a/enrich2/barcodevariant.py b/enrich2/barcodevariant.py
@@ -112,13 +112,13 @@ def calculate(self):
             # save counts, filtering based on the min count
             self.save_counts(
                 "variants",
-                {k: v for k, v in df_dict.iteritems() if v >= self.variant_min_count},
+                {k: v for k, v in df_dict.items() if v >= self.variant_min_count},
                 raw=False,
             )
             del df_dict
 
             # write the active subset of the BarcodeMap to the store
-            barcodes = barcode_variants.keys()
+            barcodes = list(barcode_variants.keys())
             barcode_variants = pd.DataFrame(
                 {"value": [barcode_variants[bc] for bc in barcodes]}, index=barcodes
             )

diff --git a/enrich2/config_check.py b/enrich2/config_check.py
@@ -9,90 +9,90 @@
 
 def is_experiment(cfg):
     """
-    Check if the given configuration object specifies an 
+    Check if the given configuration object specifies an
     :py:class:`~enrich2.experiment.Experiment`.
 
     Args:
         cfg (dict): decoded JSON object
 
     Returns:
-        bool: True if `cfg` if specifies an 
+        bool: True if `cfg` if specifies an
         :py:class:`~enrich2.experiment.Experiment`, else False.
 
     """
-    if "conditions" in cfg.keys():
+    if "conditions" in list(cfg.keys()):
         return True
     else:
         return False
 
 
 def is_condition(cfg):
     """
-    Check if the given configuration object specifies a 
+    Check if the given configuration object specifies a
     :py:class:`~enrich2.condition.Condition`.
 
     Args:
         cfg (dict): decoded JSON object
 
     Returns:
-        bool: True if `cfg` if specifies a 
+        bool: True if `cfg` if specifies a
         :py:class:`~enrich2.condition.Condition`, else False.
 
     """
-    if "selections" in cfg.keys():
+    if "selections" in list(cfg.keys()):
         return True
     else:
         return False
 
 
 def is_selection(cfg):
     """
-    Check if the given configuration object specifies a 
+    Check if the given configuration object specifies a
     :py:class:`~enrich2.selection.Selection`.
 
     Args:
         cfg (dict): decoded JSON object
 
     Returns:
-        bool: True if `cfg` if specifies a 
+        bool: True if `cfg` if specifies a
         :py:class:`~enrich2.selection.Selection`, else False.
 
     """
-    if "libraries" in cfg.keys():
+    if "libraries" in list(cfg.keys()):
         return True
     else:
         return False
 
 
 def is_seqlib(cfg):
     """
-    Check if the given configuration object specifies a 
+    Check if the given configuration object specifies a
     :py:class:`~enrich2.seqlib.SeqLib` derived object.
 
     Args:
         cfg (dict): decoded JSON object
 
     Returns:
-        bool: True if `cfg` if specifies a :py:class:`~enrich2.seqlib.SeqLib` 
+        bool: True if `cfg` if specifies a :py:class:`~enrich2.seqlib.SeqLib`
         derived object, else False.
 
     """
-    if "fastq" in cfg.keys() or "identifiers" in cfg.keys():
+    if "fastq" in list(cfg.keys()) or "identifiers" in list(cfg.keys()):
         return True
     else:
         return False
 
 
 def seqlib_type(cfg):
     """
-    Get the type of :py:class:`~enrich2.seqlib.SeqLib` derived object 
+    Get the type of :py:class:`~enrich2.seqlib.SeqLib` derived object
     specified by the configuration object.
 
     Args:
         cfg (dict): decoded JSON object
 
     Returns:
-        str: The class name of the :py:class:`~seqlib.seqlib.SeqLib` derived 
+        str: The class name of the :py:class:`~seqlib.seqlib.SeqLib` derived
         object specified by `cfg`.
 
     Raises:
@@ -123,15 +123,15 @@ def seqlib_type(cfg):
 
 def element_type(cfg):
     """
-    Get the type of :py:class:`~enrich2.storemanager.StoreManager` derived 
+    Get the type of :py:class:`~enrich2.storemanager.StoreManager` derived
     object specified by the configuration object.
 
     Args:
         cfg (dict): decoded JSON object
 
     Returns:
-        str: The class name of the 
-        :py:class:`~enrich2.storemanager.StoreManager` derived object specified 
+        str: The class name of the
+        :py:class:`~enrich2.storemanager.StoreManager` derived object specified
         by `cfg`.
 
     Raises:

diff --git a/enrich2/dataframe.py b/enrich2/dataframe.py
@@ -144,10 +144,10 @@ def fill_position_gaps(positions, gap_size):
 
     # fill in short gaps
     fill = set()
-    for i in xrange(len(positions) - 1):
+    for i in range(len(positions) - 1):
         delta = positions[i + 1] - positions[i]
         if delta > 1 and delta <= gap_size:
-            fill.update(positions[i] + n + 1 for n in xrange(delta))
+            fill.update(positions[i] + n + 1 for n in range(delta))
     fill.update(positions)
 
     return sorted(list(fill))

diff --git a/enrich2/experiment.py b/enrich2/experiment.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+
 import logging
 import pandas as pd
 import numpy as np
@@ -130,16 +130,16 @@ def validate(self):
 
     def is_coding(self):
         """
-        Return ``True`` if the all :py:class:`~selection.Selection` in the 
-        :py:class:`~experiment.Experiment` count protein-coding variants, else 
+        Return ``True`` if the all :py:class:`~selection.Selection` in the
+        :py:class:`~experiment.Experiment` count protein-coding variants, else
         ``False``.
         """
         return all(x.is_coding() for x in self.selection_list())
 
     def has_wt_sequence(self):
         """
-        Return ``True`` if the all :py:class:`~selection.Selection` in the 
-        :py:class:`~experiment.Experiment` have a wild type sequence, else 
+        Return ``True`` if the all :py:class:`~selection.Selection` in the
+        :py:class:`~experiment.Experiment` have a wild type sequence, else
         ``False``.
         """
         return all(x.has_wt_sequence() for x in self.selection_list())
@@ -172,15 +172,15 @@ def combine_barcode_maps(self):
         If multiple variants or IDs map to the same barcode, only the first one
         will be present in the barcode map table.
 
-        The ``'/main/barcodemap'`` table is not created if no 
+        The ``'/main/barcodemap'`` table is not created if no
         :py:class:`~selection.Selection` has barcode map information.
         """
         if self.check_store("/main/barcodemap"):
             return
 
         bcm = None
         for sel in self.selection_list():
-            if "/main/barcodemap" in sel.store.keys():
+            if "/main/barcodemap" in list(sel.store.keys()):
                 if bcm is None:
                     bcm = sel.store["/main/barcodemap"]
                 else:
@@ -218,7 +218,7 @@ def calc_counts(self, label):
                 selections_index.extend([sel.name] * len(sel.timepoints))
                 values_index.extend(["c_{}".format(x) for x in sorted(sel.timepoints)])
         columns = pd.MultiIndex.from_tuples(
-            zip(conditions_index, selections_index, values_index),
+            list(zip(conditions_index, selections_index, values_index)),
             names=["condition", "selection", "timepoint"],
         )
 
@@ -251,9 +251,10 @@ def calc_counts(self, label):
                     "/main/{}/counts_unfiltered" "".format(label)
                 )
                 for tp in sel.timepoints:
-                    data.loc[:][cnd.name, sel.name, "c_{}".format(tp)] = sel_data[
+                    data[(cnd.name, sel.name, "c_{}".format(tp))] = sel_data[
                         "c_{}".format(tp)
                     ]
+
         self.store.put("/main/{}/counts".format(label), data, format="table")
 
     def calc_shared_full(self, label):
@@ -282,7 +283,7 @@ def calc_shared_full(self, label):
                 selections_index.extend([sel.name] * len(values_list))
                 values_index.extend(sorted(values_list))
         columns = pd.MultiIndex.from_tuples(
-            zip(conditions_index, selections_index, values_index),
+            list(zip(conditions_index, selections_index, values_index)),
             names=["condition", "selection", "value"],
         )
 
@@ -308,7 +309,7 @@ def calc_shared_full(self, label):
         self.logger.info(
             "Populating Experiment data frame with scores ({})".format(label)
         )
-        data = pd.DataFrame(index=combined, columns=columns)
+        data = pd.DataFrame(index=combined, columns=columns).astype(float)
         for cnd in self.children:
             for sel in cnd.children:
                 sel_data = sel.store.select("/main/{}/scores".format(label))
@@ -427,7 +428,7 @@ def calc_pvalues_wt(self, label):
             [sorted(self.child_names()), sorted(["z", "pvalue_raw"])],
             names=["condition", "value"],
         )
-        result_df = pd.DataFrame(index=data.index, columns=columns)
+        result_df = pd.DataFrame(index=data.index, columns=columns, dtype=float)
 
         condition_labels = data.columns.levels[0]
         for cnd in condition_labels:
@@ -468,7 +469,7 @@ def calc_pvalues_pairwise(self, label):
                 cnd2_index.extend([cnd2] * len(values_list))
                 values_index.extend(sorted(values_list))
         columns = pd.MultiIndex.from_tuples(
-            zip(cnd1_index, cnd2_index, values_index),
+            list(zip(cnd1_index, cnd2_index, values_index)),
             names=["condition1", "condition2", "value"],
         )
 
@@ -528,7 +529,7 @@ def write_tsv(self):
         """
         if self.tsv_requested:
             self.logger.info("Generating tab-separated output files")
-            for k in self.store.keys():
+            for k in list(self.store.keys()):
                 self.write_table_tsv(k)
         for s in self.selection_list():
             s.write_tsv()