From ddb767a860d8f35e1a44bd0a80c3b89a2a106a48 Mon Sep 17 00:00:00 2001 From: Chris <17653365+odcambc@users.noreply.github.com> Date: Mon, 25 Nov 2024 13:10:09 -0600 Subject: [PATCH 01/10] 2to3 --- enrich2/aligner.py | 10 ++-- enrich2/barcode.py | 2 +- enrich2/barcodeid.py | 4 +- enrich2/barcodevariant.py | 4 +- enrich2/config_check.py | 8 +-- enrich2/dataframe.py | 4 +- enrich2/experiment.py | 12 ++-- enrich2/fqread.py | 6 +- enrich2/gui/configurator.py | 88 ++++++++++++++--------------- enrich2/gui/create_root_dialog.py | 28 ++++----- enrich2/gui/create_seqlib_dialog.py | 16 +++--- enrich2/gui/delete_dialog.py | 20 +++---- enrich2/gui/dialog_elements.py | 54 +++++++++--------- enrich2/gui/edit_dialog.py | 32 +++++------ enrich2/gui/runner_window.py | 34 +++++------ enrich2/gui/seqlib_apply_dialog.py | 18 +++--- enrich2/main.py | 10 ++-- enrich2/overlap.py | 6 +- enrich2/plots.py | 8 +-- enrich2/random_effects.py | 2 +- enrich2/selection.py | 16 +++--- enrich2/seqlib.py | 14 ++--- enrich2/sfmap.py | 10 ++-- enrich2/storemanager.py | 12 ++-- enrich2/variant.py | 8 +-- enrich2/wildtype.py | 6 +- 26 files changed, 216 insertions(+), 216 deletions(-) diff --git a/enrich2/aligner.py b/enrich2/aligner.py index 30a1f06..eec84aa 100644 --- a/enrich2/aligner.py +++ b/enrich2/aligner.py @@ -43,7 +43,7 @@ class Aligner(object): _END = 4 # end of traceback def __init__(self, similarity=_simple_similarity): - similarity_keys = similarity.keys() + similarity_keys = list(similarity.keys()) if "gap" in similarity_keys: similarity_keys.remove("gap") for key in similarity_keys: @@ -80,12 +80,12 @@ def align(self, seq1, seq2): seq2 = seq2.upper() # build matrix of scores/traceback information - for i in xrange(len(seq1) + 1): + for i in range(len(seq1) + 1): self.matrix[i, 0] = (self.similarity["gap"] * i, Aligner._DEL) - for j in xrange(len(seq2) + 1): + for j in range(len(seq2) + 1): self.matrix[0, j] = (self.similarity["gap"] * j, Aligner._INS) - for i in xrange(1, len(seq1) + 1): - for j in xrange(1, len(seq2) + 1): + for i in range(1, len(seq1) + 1): + for j in range(1, len(seq2) + 1): match = ( self.matrix[i - 1, j - 1]["score"] + self.similarity[seq1[i - 1]][seq2[j - 1]], diff --git a/enrich2/barcode.py b/enrich2/barcode.py index 6b02434..00052ac 100644 --- a/enrich2/barcode.py +++ b/enrich2/barcode.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import logging import sys from .seqlib import SeqLib diff --git a/enrich2/barcodeid.py b/enrich2/barcodeid.py index 245d2e7..26c79b4 100644 --- a/enrich2/barcodeid.py +++ b/enrich2/barcodeid.py @@ -112,7 +112,7 @@ def calculate(self): "identifiers", { k: v - for k, v in df_dict.iteritems() + for k, v in df_dict.items() if v >= self.identifier_min_count }, raw=False, @@ -120,7 +120,7 @@ def calculate(self): del df_dict # write the active subset of the BarcodeMap to the store - barcodes = barcode_identifiers.keys() + barcodes = list(barcode_identifiers.keys()) barcode_identifiers = pd.DataFrame( {"value": [barcode_identifiers[bc] for bc in barcodes]}, index=barcodes ) diff --git a/enrich2/barcodevariant.py b/enrich2/barcodevariant.py index c6245e7..a7e6b41 100644 --- a/enrich2/barcodevariant.py +++ b/enrich2/barcodevariant.py @@ -112,13 +112,13 @@ def calculate(self): # save counts, filtering based on the min count self.save_counts( "variants", - {k: v for k, v in df_dict.iteritems() if v >= self.variant_min_count}, + {k: v for k, v in df_dict.items() if v >= self.variant_min_count}, raw=False, ) del df_dict # write the active subset of the BarcodeMap to the store - barcodes = barcode_variants.keys() + barcodes = list(barcode_variants.keys()) barcode_variants = pd.DataFrame( {"value": [barcode_variants[bc] for bc in barcodes]}, index=barcodes ) diff --git a/enrich2/config_check.py b/enrich2/config_check.py index 3d61d28..8ec4876 100644 --- a/enrich2/config_check.py +++ b/enrich2/config_check.py @@ -20,7 +20,7 @@ def is_experiment(cfg): :py:class:`~enrich2.experiment.Experiment`, else False. """ - if "conditions" in cfg.keys(): + if "conditions" in list(cfg.keys()): return True else: return False @@ -39,7 +39,7 @@ def is_condition(cfg): :py:class:`~enrich2.condition.Condition`, else False. """ - if "selections" in cfg.keys(): + if "selections" in list(cfg.keys()): return True else: return False @@ -58,7 +58,7 @@ def is_selection(cfg): :py:class:`~enrich2.selection.Selection`, else False. """ - if "libraries" in cfg.keys(): + if "libraries" in list(cfg.keys()): return True else: return False @@ -77,7 +77,7 @@ def is_seqlib(cfg): derived object, else False. """ - if "fastq" in cfg.keys() or "identifiers" in cfg.keys(): + if "fastq" in list(cfg.keys()) or "identifiers" in list(cfg.keys()): return True else: return False diff --git a/enrich2/dataframe.py b/enrich2/dataframe.py index 22707ae..fea518a 100644 --- a/enrich2/dataframe.py +++ b/enrich2/dataframe.py @@ -144,10 +144,10 @@ def fill_position_gaps(positions, gap_size): # fill in short gaps fill = set() - for i in xrange(len(positions) - 1): + for i in range(len(positions) - 1): delta = positions[i + 1] - positions[i] if delta > 1 and delta <= gap_size: - fill.update(positions[i] + n + 1 for n in xrange(delta)) + fill.update(positions[i] + n + 1 for n in range(delta)) fill.update(positions) return sorted(list(fill)) diff --git a/enrich2/experiment.py b/enrich2/experiment.py index d9ab19a..dcb7c68 100644 --- a/enrich2/experiment.py +++ b/enrich2/experiment.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import logging import pandas as pd import numpy as np @@ -180,7 +180,7 @@ def combine_barcode_maps(self): bcm = None for sel in self.selection_list(): - if "/main/barcodemap" in sel.store.keys(): + if "/main/barcodemap" in list(sel.store.keys()): if bcm is None: bcm = sel.store["/main/barcodemap"] else: @@ -218,7 +218,7 @@ def calc_counts(self, label): selections_index.extend([sel.name] * len(sel.timepoints)) values_index.extend(["c_{}".format(x) for x in sorted(sel.timepoints)]) columns = pd.MultiIndex.from_tuples( - zip(conditions_index, selections_index, values_index), + list(zip(conditions_index, selections_index, values_index)), names=["condition", "selection", "timepoint"], ) @@ -282,7 +282,7 @@ def calc_shared_full(self, label): selections_index.extend([sel.name] * len(values_list)) values_index.extend(sorted(values_list)) columns = pd.MultiIndex.from_tuples( - zip(conditions_index, selections_index, values_index), + list(zip(conditions_index, selections_index, values_index)), names=["condition", "selection", "value"], ) @@ -468,7 +468,7 @@ def calc_pvalues_pairwise(self, label): cnd2_index.extend([cnd2] * len(values_list)) values_index.extend(sorted(values_list)) columns = pd.MultiIndex.from_tuples( - zip(cnd1_index, cnd2_index, values_index), + list(zip(cnd1_index, cnd2_index, values_index)), names=["condition1", "condition2", "value"], ) @@ -528,7 +528,7 @@ def write_tsv(self): """ if self.tsv_requested: self.logger.info("Generating tab-separated output files") - for k in self.store.keys(): + for k in list(self.store.keys()): self.write_table_tsv(k) for s in self.selection_list(): s.write_tsv() diff --git a/enrich2/fqread.py b/enrich2/fqread.py index 891337a..7c30b54 100644 --- a/enrich2/fqread.py +++ b/enrich2/fqread.py @@ -1,4 +1,4 @@ -from __future__ import print_function + from sys import stderr import os.path import re @@ -255,7 +255,7 @@ def read_fastq(fname, filter_function=None, buffer_size=BUFFER_SIZE, qbase=33): leftover = "\n".join(lines[len(lines) - dangling :]) # index into the list of lines to pull out the FASTQ records - for i in xrange(fastq_count): + for i in range(fastq_count): # (header, sequence, header2, quality) fq = FQRead(*lines[i * 4 : (i + 1) * 4], qbase=qbase) if filter_function is None: # no filtering @@ -289,7 +289,7 @@ def read_fastq_multi( read_fastq(f, filter_function=None, buffer_size=BUFFER_SIZE, qbase=qbase) ) - for records in itertools.izip_longest(*fq_generators, fillvalue=None): + for records in itertools.zip_longest(*fq_generators, fillvalue=None): if None in records: # mismatched file lengths if match_lengths: yield None diff --git a/enrich2/gui/configurator.py b/enrich2/gui/configurator.py index aee12d5..e562a39 100644 --- a/enrich2/gui/configurator.py +++ b/enrich2/gui/configurator.py @@ -1,8 +1,8 @@ -from __future__ import print_function -import Tkinter as tk -import ttk -import tkFileDialog -import tkMessageBox + +import tkinter as tk +import tkinter.ttk +import tkinter.filedialog +import tkinter.messagebox import platform import json from ..config_check import is_seqlib, is_experiment, is_selection, seqlib_type @@ -93,13 +93,13 @@ def treeview_context_menu(self, click): def create_main_frame(self): # Frame for the Treeview and New/Edit/Delete buttons - main = ttk.Frame(self, padding=(3, 3, 12, 12)) + main = tkinter.ttk.Frame(self, padding=(3, 3, 12, 12)) main.rowconfigure(0, weight=1) main.columnconfigure(0, weight=1) main.grid(row=0, column=0, sticky="nsew") # Frame for the Treeview and its scrollbars - tree_frame = ttk.Frame(main, padding=(3, 3, 12, 12)) + tree_frame = tkinter.ttk.Frame(main, padding=(3, 3, 12, 12)) tree_frame.rowconfigure(0, weight=1) tree_frame.rowconfigure(1, weight=0) tree_frame.columnconfigure(0, weight=1) @@ -107,7 +107,7 @@ def create_main_frame(self): tree_frame.grid(row=0, column=0, sticky="nsew") # Treeview with column headings - self.treeview = ttk.Treeview(tree_frame) + self.treeview = tkinter.ttk.Treeview(tree_frame) self.treeview["columns"] = ("class", "barcodes", "variants") self.treeview.column("class", width=120) self.treeview.heading("class", text="Type") @@ -132,35 +132,35 @@ def create_main_frame(self): self.treeview.config(yscroll=tree_ysb.set, xscroll=tree_xsb.set) # Frame for New/Edit/Delete buttons - button_frame = ttk.Frame(main, padding=(3, 3, 12, 12)) + button_frame = tkinter.ttk.Frame(main, padding=(3, 3, 12, 12)) button_frame.grid(row=1, column=0) - new_button = ttk.Button( + new_button = tkinter.ttk.Button( button_frame, text="New...", command=self.new_button_press ) new_button.grid(row=0, column=0) - edit_button = ttk.Button( + edit_button = tkinter.ttk.Button( button_frame, text="Edit...", command=self.edit_button_press ) edit_button.grid(row=0, column=1) - delete_button = ttk.Button( + delete_button = tkinter.ttk.Button( button_frame, text="Delete", command=self.delete_button_press ) delete_button.grid(row=0, column=2) # Frame for Analysis Options - options_frame = ttk.Frame(main, padding=(3, 3, 12, 12)) + options_frame = tkinter.ttk.Frame(main, padding=(3, 3, 12, 12)) options_frame.grid(row=0, column=1, rowspan=2, sticky="nsew") row = 0 - heading = ttk.Label(options_frame, text="Analysis Options") + heading = tkinter.ttk.Label(options_frame, text="Analysis Options") heading.grid(column=0, row=row) row += 1 - scoring_heading = ttk.Label(options_frame, text="Scoring Method") + scoring_heading = tkinter.ttk.Label(options_frame, text="Scoring Method") scoring_heading.grid(column=0, row=row) row += 1 for i, k in enumerate(SCORING_METHODS.keys()): - rb = ttk.Radiobutton( + rb = tkinter.ttk.Radiobutton( options_frame, text=SCORING_METHODS[k].title(), variable=self.scoring_method, @@ -171,11 +171,11 @@ def create_main_frame(self): if i == 0: rb.invoke() - logr_heading = ttk.Label(options_frame, text="Normalization Method") + logr_heading = tkinter.ttk.Label(options_frame, text="Normalization Method") logr_heading.grid(column=0, row=row) row += 1 for i, k in enumerate(LOGR_METHODS.keys()): - rb = ttk.Radiobutton( + rb = tkinter.ttk.Radiobutton( options_frame, text=LOGR_METHODS[k].title(), variable=self.logr_method, @@ -186,19 +186,19 @@ def create_main_frame(self): if i == 0: rb.invoke() - other_heading = ttk.Label(options_frame, text="Other Options") + other_heading = tkinter.ttk.Label(options_frame, text="Other Options") other_heading.grid(column=0, row=row) row += 1 # force recalculate - force_recalculate = ttk.Checkbutton( + force_recalculate = tkinter.ttk.Checkbutton( options_frame, text="Force Recalculation", variable=self.force_recalculate ) force_recalculate.grid(column=0, row=row, sticky="w") row += 1 # component outliers - component_outliers = ttk.Checkbutton( + component_outliers = tkinter.ttk.Checkbutton( options_frame, text="Component Outlier Statistics", variable=self.component_outliers, @@ -207,7 +207,7 @@ def create_main_frame(self): row += 1 # make plots - plots_requested = ttk.Checkbutton( + plots_requested = tkinter.ttk.Checkbutton( options_frame, text="Make Plots", variable=self.plots_requested ) plots_requested.grid(column=0, row=row, sticky="w") @@ -215,21 +215,21 @@ def create_main_frame(self): row += 1 # write tsv - tsv_requested = ttk.Checkbutton( + tsv_requested = tkinter.ttk.Checkbutton( options_frame, text="Write TSV Files", variable=self.tsv_requested ) tsv_requested.grid(column=0, row=row, sticky="w") tsv_requested.invoke() row += 1 - go_button = ttk.Button( + go_button = tkinter.ttk.Button( options_frame, text="Run Analysis", command=self.go_button_press ) go_button.grid(column=0, row=row, sticky="sew") def go_button_press(self): if self.root_element is None: - tkMessageBox.showwarning("", "No experimental design specified.") + tkinter.messagebox.showwarning("", "No experimental design specified.") else: RunnerSavePrompt(self) RunnerWindow(self) @@ -273,7 +273,7 @@ def create_new_element(self): def new_button_press(self): if self.treeview.focus() == "" and self.root_element is not None: - tkMessageBox.showwarning(None, "No parent element selected.") + tkinter.messagebox.showwarning(None, "No parent element selected.") else: if self.treeview.focus() == "" and self.root_element is None: element = CreateRootDialog(self).element @@ -288,7 +288,7 @@ def new_button_press(self): self.refresh_treeview() # select the newly added element if it was successfully added - if element.treeview_id in self.element_dict.keys(): + if element.treeview_id in list(self.element_dict.keys()): self.treeview.focus(element.treeview_id) self.treeview.selection_set(element.treeview_id) else: @@ -299,13 +299,13 @@ def new_button_press(self): def edit_button_press(self): if self.treeview.focus() == "": - tkMessageBox.showwarning(None, "No element selected.") + tkinter.messagebox.showwarning(None, "No element selected.") else: EditDialog(self, self, self.get_focused_element()) def delete_button_press(self): if self.treeview.focus() == "": - tkMessageBox.showwarning(None, "No element selected.") + tkinter.messagebox.showwarning(None, "No element selected.") else: DeleteDialog(self, self) @@ -359,15 +359,15 @@ def create_menubar(self): self.bind("<{}a>".format(accel_bind), lambda event: self.menu_selectall()) def menu_open(self): - fname = tkFileDialog.askopenfilename() + fname = tkinter.filedialog.askopenfilename() if len(fname) > 0: # file was selected try: with open(fname, "rU") as handle: cfg = json.load(handle) except ValueError: - tkMessageBox.showerror(None, "Failed to parse config file.") + tkinter.messagebox.showerror(None, "Failed to parse config file.") except IOError: - tkMessageBox.showerror(None, "Could not read config file.") + tkinter.messagebox.showerror(None, "Could not read config file.") else: if is_experiment(cfg): obj = Experiment() @@ -376,13 +376,13 @@ def menu_open(self): elif is_seqlib(cfg): obj = SEQLIB_CLASSES[seqlib_type(cfg)]() else: - tkMessageBox.showerror(None, "Unrecognized config format.") + tkinter.messagebox.showerror(None, "Unrecognized config format.") return obj.output_dir_override = False try: obj.configure(cfg) except Exception as e: - tkMessageBox.showerror( + tkinter.messagebox.showerror( None, "Failed to process config file:\n{}".format(e) ) else: @@ -394,32 +394,32 @@ def menu_save(self): if len(self.cfg_file_name.get()) == 0: self.menu_saveas() elif self.root_element is None: - tkMessageBox.showwarning(None, "Cannot save empty configuration.") + tkinter.messagebox.showwarning(None, "Cannot save empty configuration.") else: try: with open(self.cfg_file_name.get(), "w") as handle: write_json(self.root_element.serialize(), handle) except IOError: - tkMessageBox.showerror(None, "Failed to save config file.") + tkinter.messagebox.showerror(None, "Failed to save config file.") else: - tkMessageBox.showinfo( + tkinter.messagebox.showinfo( None, "Save successful:\n{}".format(self.cfg_file_name.get()) ) def menu_saveas(self): if self.root_element is None: - tkMessageBox.showwarning(None, "Cannot save empty configuration.") + tkinter.messagebox.showwarning(None, "Cannot save empty configuration.") else: - fname = tkFileDialog.asksaveasfilename() + fname = tkinter.filedialog.asksaveasfilename() if len(fname) > 0: # file was selected try: with open(fname, "w") as handle: write_json(self.root_element.serialize(), handle) except IOError: - tkMessageBox.showerror(None, "Failed to save config file.") + tkinter.messagebox.showerror(None, "Failed to save config file.") else: self.cfg_file_name.set(fname) - tkMessageBox.showinfo( + tkinter.messagebox.showinfo( None, "Save successful:\n{}".format(self.cfg_file_name.get()) ) @@ -427,7 +427,7 @@ def menu_selectall(self): """ Add all elements in the Treeview to the selection. """ - for k in self.element_dict.keys(): + for k in list(self.element_dict.keys()): self.treeview.selection_add(k) def delete_element(self, tree_id): @@ -486,11 +486,11 @@ def set_treeview_properties(self, element): # add the check marks for barcodes/variants if "variants" in element.labels: - self.treeview.set(element.treeview_id, "variants", u"\u2713") + self.treeview.set(element.treeview_id, "variants", "\u2713") else: self.treeview.set(element.treeview_id, "variants", "") if "barcodes" in element.labels: - self.treeview.set(element.treeview_id, "barcodes", u"\u2713") + self.treeview.set(element.treeview_id, "barcodes", "\u2713") else: self.treeview.set(element.treeview_id, "barcodes", "") diff --git a/enrich2/gui/create_root_dialog.py b/enrich2/gui/create_root_dialog.py index c306d20..dfe61ab 100644 --- a/enrich2/gui/create_root_dialog.py +++ b/enrich2/gui/create_root_dialog.py @@ -1,7 +1,7 @@ -from __future__ import print_function -import Tkinter as tk -import ttk -import tkSimpleDialog + +import tkinter as tk +import tkinter.ttk +import tkinter.simpledialog from .dialog_elements import FileEntry, StringEntry, DEFAULT_COLUMNS from .create_seqlib_dialog import SEQLIB_LABEL_TEXT from ..barcode import BarcodeSeqLib @@ -27,7 +27,7 @@ } -class CreateRootDialog(tkSimpleDialog.Dialog): +class CreateRootDialog(tkinter.simpledialog.Dialog): """ Dialog box for creating a new root element. """ @@ -44,23 +44,23 @@ def __init__(self, parent_window, title="Create Root Object"): ) self.name_tk = StringEntry("Name", self.cfg_dict, "name", optional=False) self.element = None - tkSimpleDialog.Dialog.__init__(self, parent_window, title) + tkinter.simpledialog.Dialog.__init__(self, parent_window, title) def body(self, master): row_no = self.name_tk.body(master, 0) row_no += self.output_directory_tk.body(master, row_no) - element_types = ttk.Frame(master, padding=(3, 3, 12, 12)) + element_types = tkinter.ttk.Frame(master, padding=(3, 3, 12, 12)) element_types.grid( column=0, row=row_no, sticky="nsew", columnspan=DEFAULT_COLUMNS ) - message = ttk.Label(element_types, text="Root object type:") + message = tkinter.ttk.Label(element_types, text="Root object type:") message.grid(column=0, row=0) - label = ttk.Label(element_types, text="Experiment") + label = tkinter.ttk.Label(element_types, text="Experiment") label.grid(column=0, row=1, sticky="w") - rb = ttk.Radiobutton( + rb = tkinter.ttk.Radiobutton( element_types, text="Experiment", variable=self.element_tkstring, @@ -69,9 +69,9 @@ def body(self, master): rb.grid(column=0, row=2, sticky="w") rb.invoke() - label = ttk.Label(element_types, text="Selection") + label = tkinter.ttk.Label(element_types, text="Selection") label.grid(column=0, row=3, sticky="w") - rb = ttk.Radiobutton( + rb = tkinter.ttk.Radiobutton( element_types, text="Selection", variable=self.element_tkstring, @@ -79,10 +79,10 @@ def body(self, master): ) rb.grid(column=0, row=4, sticky="w") - label = ttk.Label(element_types, text="SeqLib") + label = tkinter.ttk.Label(element_types, text="SeqLib") label.grid(column=0, row=5, sticky="w") for i, k in enumerate(SEQLIB_LABEL_TEXT.keys()): - rb = ttk.Radiobutton( + rb = tkinter.ttk.Radiobutton( element_types, text=SEQLIB_LABEL_TEXT[k], variable=self.element_tkstring, diff --git a/enrich2/gui/create_seqlib_dialog.py b/enrich2/gui/create_seqlib_dialog.py index f741746..4983399 100644 --- a/enrich2/gui/create_seqlib_dialog.py +++ b/enrich2/gui/create_seqlib_dialog.py @@ -1,7 +1,7 @@ -from __future__ import print_function -import Tkinter as tk -import ttk -import tkSimpleDialog + +import tkinter as tk +import tkinter.ttk +import tkinter.simpledialog from collections import OrderedDict from ..barcode import BarcodeSeqLib from ..barcodevariant import BcvSeqLib @@ -33,7 +33,7 @@ } -class CreateSeqLibDialog(tkSimpleDialog.Dialog): +class CreateSeqLibDialog(tkinter.simpledialog.Dialog): """ Dialog box for creating a new SeqLib. """ @@ -41,14 +41,14 @@ class CreateSeqLibDialog(tkSimpleDialog.Dialog): def __init__(self, parent_window, title="New SeqLib"): self.element_tkstring = tk.StringVar() self.element_type = None - tkSimpleDialog.Dialog.__init__(self, parent_window, title) + tkinter.simpledialog.Dialog.__init__(self, parent_window, title) def body(self, master): - message = ttk.Label(master, text="SeqLib type:") + message = tkinter.ttk.Label(master, text="SeqLib type:") message.grid(column=0, row=0) for i, k in enumerate(SEQLIB_LABEL_TEXT.keys()): - rb = ttk.Radiobutton( + rb = tkinter.ttk.Radiobutton( master, text=SEQLIB_LABEL_TEXT[k], variable=self.element_tkstring, diff --git a/enrich2/gui/delete_dialog.py b/enrich2/gui/delete_dialog.py index 8e93164..c63bf91 100644 --- a/enrich2/gui/delete_dialog.py +++ b/enrich2/gui/delete_dialog.py @@ -1,6 +1,6 @@ -import Tkinter as tk -import ttk -import tkSimpleDialog +import tkinter as tk +import tkinter.ttk +import tkinter.simpledialog def subtree_ids(treeview, x, level=0): @@ -16,7 +16,7 @@ def subtree_ids(treeview, x, level=0): return id_list -class DeleteDialog(tkSimpleDialog.Dialog): +class DeleteDialog(tkinter.simpledialog.Dialog): """ Confirmation dialog box for deleting the selected items from the Treeview. """ @@ -27,7 +27,7 @@ def __init__(self, parent_window, tree, title="Confirm Deletion"): for x in self.tree.treeview.selection(): if x not in [y[0] for y in self.id_tuples]: self.id_tuples.extend(subtree_ids(self.tree.treeview, x)) - tkSimpleDialog.Dialog.__init__(self, parent_window, title) + tkinter.simpledialog.Dialog.__init__(self, parent_window, title) def body(self, master): """ @@ -45,13 +45,13 @@ def body(self, master): message_string = "Delete the following items?\n" for x, level in self.id_tuples: if level == 0: - bullet = " " + u"\u25C6" + bullet = " " + "\u25C6" else: - bullet = " " * (level + 1) + u"\u25C7" - message_string += u"{bullet} {name}\n".format( + bullet = " " * (level + 1) + "\u25C7" + message_string += "{bullet} {name}\n".format( bullet=bullet, name=self.tree.get_element(x).name ) - message = ttk.Label(master, text=message_string, justify="left") + message = tkinter.ttk.Label(master, text=message_string, justify="left") message.grid(row=0, sticky="w") def buttonbox(self): @@ -70,7 +70,7 @@ def buttonbox(self): box.pack() else: - tkSimpleDialog.Dialog.buttonbox(self) + tkinter.simpledialog.Dialog.buttonbox(self) def apply(self): """ diff --git a/enrich2/gui/dialog_elements.py b/enrich2/gui/dialog_elements.py index 2b9495e..4d73534 100644 --- a/enrich2/gui/dialog_elements.py +++ b/enrich2/gui/dialog_elements.py @@ -1,8 +1,8 @@ -from __future__ import print_function -import Tkinter as tk -import ttk -import tkMessageBox -import tkFileDialog + +import tkinter as tk +import tkinter.ttk +import tkinter.messagebox +import tkinter.filedialog import os.path DEFAULT_COLUMNS = 3 @@ -13,7 +13,7 @@ def __init__(self, text): self.text = text def body(self, master, row, columns=DEFAULT_COLUMNS, **kwargs): - label = ttk.Label(master, text=self.text) + label = tkinter.ttk.Label(master, text=self.text) label.grid(row=row, column=0, columnspan=columns, sticky="w") return 1 @@ -53,7 +53,7 @@ def body(self, master, row, columns=DEFAULT_COLUMNS, **kwargs): Returns the number of rows taken by this element. """ - self.checkbox = ttk.Checkbutton(master, text=self.text, variable=self.value) + self.checkbox = tkinter.ttk.Checkbutton(master, text=self.text, variable=self.value) self.checkbox.grid(row=row, column=0, columnspan=columns, sticky="w") return 1 @@ -105,9 +105,9 @@ def body(self, master, row, columns=DEFAULT_COLUMNS, **kwargs): Returns the number of rows taken by this element. """ - label = ttk.Label(master, text=self.text) + label = tkinter.ttk.Label(master, text=self.text) label.grid(row=row, column=0, columnspan=1, sticky="e") - self.entry = ttk.Entry(master, textvariable=self.value) + self.entry = tkinter.ttk.Entry(master, textvariable=self.value) self.entry.grid(row=row, column=1, columnspan=columns - 1, sticky="ew") return 1 @@ -119,7 +119,7 @@ def validate(self): if not self.enabled: return True elif not self.optional and len(self.value.get()) == 0: - tkMessageBox.showwarning("", "{} not specified.".format(self.text)) + tkinter.messagebox.showwarning("", "{} not specified.".format(self.text)) return False else: return True @@ -168,25 +168,25 @@ def body(self, master, row, columns=DEFAULT_COLUMNS, **kwargs): Returns the number of rows taken by this element. """ - label = ttk.Label(master, text=self.text) + label = tkinter.ttk.Label(master, text=self.text) label.grid(row=row, column=0, columnspan=1, sticky="e") - self.entry = ttk.Entry(master, textvariable=self.value) + self.entry = tkinter.ttk.Entry(master, textvariable=self.value) self.entry.grid(row=row, column=1, columnspan=columns - 1, sticky="ew") if self.directory: - self.choose = ttk.Button( + self.choose = tkinter.ttk.Button( master, text="Choose...", - command=lambda: self.value.set(tkFileDialog.askdirectory()), + command=lambda: self.value.set(tkinter.filedialog.askdirectory()), ) else: - self.choose = ttk.Button( + self.choose = tkinter.ttk.Button( master, text="Choose...", - command=lambda: self.value.set(tkFileDialog.askopenfilename()), + command=lambda: self.value.set(tkinter.filedialog.askopenfilename()), ) self.choose.grid(row=row + 1, column=1, sticky="w") if self.optional: - self.clear = ttk.Button( + self.clear = tkinter.ttk.Button( master, text="Clear", command=lambda: self.value.set("") ) self.clear.grid(row=row + 1, column=2, sticky="e") @@ -197,7 +197,7 @@ def validate(self): return True elif len(self.value.get()) == 0: if not self.optional: - tkMessageBox.showwarning("", "{} not specified.".format(self.text)) + tkinter.messagebox.showwarning("", "{} not specified.".format(self.text)) return False else: return True @@ -209,14 +209,14 @@ def validate(self): ): return True else: - tkMessageBox.showwarning( + tkinter.messagebox.showwarning( "", "Invalid file extension " "for {}.".format(self.text) ) return False else: # no extension restriction return True else: - tkMessageBox.showwarning( + tkinter.messagebox.showwarning( "", "{} file does not exist." "".format(self.text) ) return False @@ -252,9 +252,9 @@ def body(self, master, row, columns=DEFAULT_COLUMNS, **kwargs): Returns the number of rows taken by this element. """ - label = ttk.Label(master, text=self.text) + label = tkinter.ttk.Label(master, text=self.text) label.grid(row=row, column=0, columnspan=1, sticky="e") - self.entry = ttk.Entry(master, textvariable=self.value) + self.entry = tkinter.ttk.Entry(master, textvariable=self.value) self.entry.grid(row=row, column=1, columnspan=columns - 1, sticky="ew") return 1 @@ -295,11 +295,11 @@ def body(self, master, row, columns=DEFAULT_COLUMNS, width=4, left=False, **kwar label_sticky = "e" label_width = 1 - label = ttk.Label(master, text=self.text) + label = tkinter.ttk.Label(master, text=self.text) label.grid( row=row, column=label_column, columnspan=label_width, sticky=label_sticky ) - self.entry = ttk.Entry(master, textvariable=self.value, width=width) + self.entry = tkinter.ttk.Entry(master, textvariable=self.value, width=width) self.entry.grid( row=row, column=entry_column, columnspan=entry_width, sticky=entry_sticky ) @@ -320,20 +320,20 @@ def validate(self): except ValueError: if len(self.value.get()) == 0: if not self.optional: - tkMessageBox.showwarning( + tkinter.messagebox.showwarning( "", "{} not specified." "".format(self.text) ) return False else: return True else: - tkMessageBox.showwarning( + tkinter.messagebox.showwarning( "", "{} is not an integer." "".format(self.text) ) return False else: if intvalue < self.minvalue: - tkMessageBox.showwarning( + tkinter.messagebox.showwarning( "", "{} lower than minimum value " "({}).".format(self.text, self.minvalue), diff --git a/enrich2/gui/edit_dialog.py b/enrich2/gui/edit_dialog.py index 891c167..e974bc5 100644 --- a/enrich2/gui/edit_dialog.py +++ b/enrich2/gui/edit_dialog.py @@ -1,8 +1,8 @@ -from __future__ import print_function -import Tkinter as tk -import ttk -import tkSimpleDialog -import tkMessageBox + +import tkinter as tk +import tkinter.ttk +import tkinter.simpledialog +import tkinter.messagebox from sys import maxsize from collections import OrderedDict from .dialog_elements import ( @@ -30,7 +30,7 @@ def clear_nones_filter(v): Returns False if v is None, else True. """ if isinstance(v, dict): - if len(v.keys()) == 0: + if len(list(v.keys())) == 0: # removing empty dictionaries breaks SeqLib recognition # return False return True @@ -50,7 +50,7 @@ def clear_nones(d): return d else: return dict( - (k, clear_nones(v)) for k, v in d.iteritems() if clear_nones_filter(v) + (k, clear_nones(v)) for k, v in d.items() if clear_nones_filter(v) ) @@ -102,7 +102,7 @@ def __init__(self, frame_dict): self.rb_coutns = None def body(self, master, row, columns=DEFAULT_COLUMNS, **kwargs): - self.rb_fastq = ttk.Radiobutton( + self.rb_fastq = tkinter.ttk.Radiobutton( master, text="FASTQ File Mode", variable=self.mode, @@ -110,7 +110,7 @@ def body(self, master, row, columns=DEFAULT_COLUMNS, **kwargs): command=self.fastq_mode, ) self.rb_fastq.grid(row=row, column=0, columnspan=columns, sticky="ew") - self.rb_counts = ttk.Radiobutton( + self.rb_counts = tkinter.ttk.Radiobutton( master, text="Count File Mode", variable=self.mode, @@ -165,7 +165,7 @@ def disable(self): pass -class EditDialog(tkSimpleDialog.Dialog): +class EditDialog(tkinter.simpledialog.Dialog): """ Dialog box for editing elements. Also used to set properties on newly-created elements. @@ -433,19 +433,19 @@ def __init__(self, parent_window, tree, element, title="Configure Object"): Checkbox("Use Aligner", self.element_cfg["variants"], "use aligner") ) - tkSimpleDialog.Dialog.__init__(self, parent_window, title) + tkinter.simpledialog.Dialog.__init__(self, parent_window, title) def body(self, master): """ Add the UI elements to the edit window. Ordering and placement of UI elements in columns is defined by the ``element_layouts`` dictionary. """ - main = ttk.Frame(master, padding=(3, 3, 12, 12)) + main = tkinter.ttk.Frame(master, padding=(3, 3, 12, 12)) main.grid(row=0, column=0, sticky="nsew") layout = element_layouts[type(self.element).__name__] for i, column_tuple in enumerate(layout): - new_frame = ttk.Frame(master, padding=(3, 3, 12, 12)) + new_frame = tkinter.ttk.Frame(master, padding=(3, 3, 12, 12)) new_frame.grid(row=0, column=i, sticky="nsew") row_no = 0 for row_frame_key in layout[i]: @@ -463,14 +463,14 @@ def validate(self): Also checks that child name is unique. """ - for tk_list in self.frame_dict.values(): + for tk_list in list(self.frame_dict.values()): if not all(x.validate() for x in tk_list): return False if self.element.parent is not None: if self.element not in self.element.parent.children: if self.name_entry.value.get() in self.element.parent.child_names(): - tkMessageBox.showwarning("", "Sibling names must be unique.") + tkinter.messagebox.showwarning("", "Sibling names must be unique.") return False return True @@ -480,7 +480,7 @@ def apply(self): Called when the user chooses "OK" and the box closes. """ # apply all changes to the config object - for tk_list in self.frame_dict.values(): + for tk_list in list(self.frame_dict.values()): for tk_element in tk_list: tk_element.apply() diff --git a/enrich2/gui/runner_window.py b/enrich2/gui/runner_window.py index 9986cdf..c6fa54d 100644 --- a/enrich2/gui/runner_window.py +++ b/enrich2/gui/runner_window.py @@ -1,14 +1,14 @@ -from __future__ import print_function -import Tkinter as tk -import ttk -import tkSimpleDialog -import tkMessageBox + +import tkinter as tk +import tkinter.ttk +import tkinter.simpledialog +import tkinter.messagebox import logging logger = logging.getLogger(__name__) -class RunnerSavePrompt(tkSimpleDialog.Dialog): +class RunnerSavePrompt(tkinter.simpledialog.Dialog): """ Dialog box for prompting the user to save before running. """ @@ -19,20 +19,20 @@ def __init__(self, parent_window, title="Enrich2"): self.dialog_text = tk.StringVar() self.dialog_text.set("Would you like to save your config changes?") - tkSimpleDialog.Dialog.__init__(self, parent_window, title) + tkinter.simpledialog.Dialog.__init__(self, parent_window, title) def body(self, master): - frame = ttk.Frame(master, padding=(12, 6, 12, 6)) + frame = tkinter.ttk.Frame(master, padding=(12, 6, 12, 6)) frame.pack() - dialog_text_label = ttk.Label(frame, textvariable=self.dialog_text) + dialog_text_label = tkinter.ttk.Label(frame, textvariable=self.dialog_text) dialog_text_label.grid(column=0, row=0, sticky="nsew") def apply(self): self.pw.menu_save() -class RunnerWindow(tkSimpleDialog.Dialog): +class RunnerWindow(tkinter.simpledialog.Dialog): """ Dialog box for blocking input while running the analysis. """ @@ -44,13 +44,13 @@ def __init__(self, parent_window, title="Enrich2"): self.dialog_text = tk.StringVar() self.dialog_text.set("Ready to start analysis...") - tkSimpleDialog.Dialog.__init__(self, parent_window, title) + tkinter.simpledialog.Dialog.__init__(self, parent_window, title) def body(self, master): - frame = ttk.Frame(master, padding=(12, 6, 12, 6)) + frame = tkinter.ttk.Frame(master, padding=(12, 6, 12, 6)) frame.pack() - dialog_text_label = ttk.Label(frame, textvariable=self.dialog_text) + dialog_text_label = tkinter.ttk.Label(frame, textvariable=self.dialog_text) dialog_text_label.grid(column=0, row=0, sticky="nsew") self.run_button = tk.Button( @@ -91,7 +91,7 @@ def runner(self): except Exception as e: # display error logger.error(e) - tkMessageBox.showerror( + tkinter.messagebox.showerror( "Enrich2 Error", "Enrich2 encountered an error:\n{}".format(e) ) @@ -102,7 +102,7 @@ def runner(self): try: self.pw.root_element.make_plots() except Exception as e: - tkMessageBox.showwarning( + tkinter.messagebox.showwarning( None, "Calculations completed, but plotting failed:\n{}".format(e), ) @@ -110,13 +110,13 @@ def runner(self): try: self.pw.root_element.write_tsv() except Exception as e: - tkMessageBox.showwarning( + tkinter.messagebox.showwarning( None, "Calculations completed, but tsv output failed:\n{}".format(e), ) # show the dialog box - tkMessageBox.showinfo("", "Analysis completed.") + tkinter.messagebox.showinfo("", "Analysis completed.") finally: # close the HDF5 files diff --git a/enrich2/gui/seqlib_apply_dialog.py b/enrich2/gui/seqlib_apply_dialog.py index dfb7d5e..45bb030 100644 --- a/enrich2/gui/seqlib_apply_dialog.py +++ b/enrich2/gui/seqlib_apply_dialog.py @@ -1,9 +1,9 @@ -import Tkinter as tk -import ttk -import tkSimpleDialog +import tkinter as tk +import tkinter.ttk +import tkinter.simpledialog -class SeqLibApplyDialog(tkSimpleDialog.Dialog): +class SeqLibApplyDialog(tkinter.simpledialog.Dialog): """ Confirmation dialog box for applying FASTQ filtering options to selected SeqLibs from the Treeview. """ @@ -20,7 +20,7 @@ def __init__( and type(self.tree.get_element(self.source_id)) == type(self.tree.get_element(x)) ] - tkSimpleDialog.Dialog.__init__(self, parent_window, title) + tkinter.simpledialog.Dialog.__init__(self, parent_window, title) def body(self, master): """ @@ -36,15 +36,15 @@ def body(self, master): self.tree.get_element(self.target_ids[0]).name, ) else: - bullet = " " + u"\u25C6" + bullet = " " + "\u25C6" message_string = 'Apply FASTQ filtering options from "{}"" to the following?\n'.format( self.tree.get_element(self.source_id).name ) for x in self.target_ids: - message_string += u"{bullet} {name}\n".format( + message_string += "{bullet} {name}\n".format( bullet=bullet, name=self.tree.get_element(x).name ) - message = ttk.Label(master, text=message_string, justify="left") + message = tkinter.ttk.Label(master, text=message_string, justify="left") message.grid(row=0, sticky="w") def buttonbox(self): @@ -63,7 +63,7 @@ def buttonbox(self): box.pack() else: - tkSimpleDialog.Dialog.buttonbox(self) + tkinter.simpledialog.Dialog.buttonbox(self) def apply(self): """ diff --git a/enrich2/main.py b/enrich2/main.py index b3359a8..566d686 100755 --- a/enrich2/main.py +++ b/enrich2/main.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -from __future__ import print_function + from argparse import ArgumentParser, RawDescriptionHelpFormatter import logging import json @@ -75,9 +75,9 @@ def main_cmd(): desc_string = ( "Command-line driver for Enrich2 v{}".format(__version__) + "\n\nscoring methods:\n" - + "\n".join([" {:22}{}".format(k, v) for k, v in SCORING_METHODS.items()]) + + "\n".join([" {:22}{}".format(k, v) for k, v in list(SCORING_METHODS.items())]) + "\n\nlog ratio methods:\n" - + "\n".join([" {:22}{}".format(k, v) for k, v in LOGR_METHODS.items()]) + + "\n".join([" {:22}{}".format(k, v) for k, v in list(LOGR_METHODS.items())]) ) # create parser and add description @@ -90,10 +90,10 @@ def main_cmd(): # add command line arguments parser.add_argument("config", help="JSON configuration file") parser.add_argument( - "scoring_method", help="scoring method", choices=SCORING_METHODS.keys() + "scoring_method", help="scoring method", choices=list(SCORING_METHODS.keys()) ) parser.add_argument( - "logr_method", help="log ratio method", choices=LOGR_METHODS.keys() + "logr_method", help="log ratio method", choices=list(LOGR_METHODS.keys()) ) # add support for semantic version checking diff --git a/enrich2/overlap.py b/enrich2/overlap.py index 2c6737b..aec274f 100644 --- a/enrich2/overlap.py +++ b/enrich2/overlap.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import pandas as pd import logging from matplotlib.backends.backend_pdf import PdfPages @@ -198,7 +198,7 @@ def merge_reads(self, fwd, rev): mismatches = 0 first = True - for i in xrange(self.overlap_length): + for i in range(self.overlap_length): a = self.fwd_start - 1 + i b = len(rev) - self.rev_start - self.overlap_length + i + 1 try: @@ -247,7 +247,7 @@ def counts_from_reads(self): data=0, index=[ x + self.fwd_start + self.wt.dna_offset - for x in xrange(0, self.overlap_length) + for x in range(0, self.overlap_length) ], columns=["resolved", "unresolved", "first"], ) diff --git a/enrich2/plots.py b/enrich2/plots.py index b459fd8..7b369dd 100644 --- a/enrich2/plots.py +++ b/enrich2/plots.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import collections import logging import numpy as np @@ -292,7 +292,7 @@ def barcodemap_plot( ) return - if len(data.keys()) <= 1: + if len(list(data.keys())) <= 1: logger.warning("Not enough elements to make barcodemap plot") return @@ -302,7 +302,7 @@ def barcodemap_plot( configure_axes(ax) # plot the histogram - ax.hist(data.values(), bins=bins, log=log, color=color) + ax.hist(list(data.values()), bins=bins, log=log, color=color) # set the title and axes labels ax.set_title("Barcodes per Variant\n{}".format(obj.name)) @@ -653,7 +653,7 @@ def density_ax(ax, ys, xmin, xmax, xlabel, line_params, legend_loc="best"): xs = np.linspace(xmin, xmax, 1000) - for i in xrange(len(ys)): + for i in range(len(ys)): ax.plot(xs, d_ys[i].evaluate(xs), label=ys[i].name, **line_params[i]) ax.legend(loc=legend_loc, **legend_params) diff --git a/enrich2/random_effects.py b/enrich2/random_effects.py index 273ae30..563defe 100644 --- a/enrich2/random_effects.py +++ b/enrich2/random_effects.py @@ -20,7 +20,7 @@ def rml_estimator(y, sigma2i, iterations=50): sigma2ML = np.sum((y - np.mean(y, axis=0)) ** 2 / (len(beta0) - 1), axis=0) eps = np.zeros(beta0.shape) betaML = None - for _ in xrange(iterations): + for _ in range(iterations): w = 1 / (sigma2i + sigma2ML) sw = np.sum(w, axis=0) sw2 = np.sum(w ** 2, axis=0) diff --git a/enrich2/selection.py b/enrich2/selection.py index da6b4f0..b0dbf6e 100644 --- a/enrich2/selection.py +++ b/enrich2/selection.py @@ -1,4 +1,4 @@ -from __future__ import print_function + from .barcode import BarcodeSeqLib from .barcodevariant import BcvSeqLib from .barcodeid import BcidSeqLib @@ -166,7 +166,7 @@ def configure(self, cfg, configure_children=True): lib = SEQLIB_CLASSES[libtype]() # don't re-parse the barcode maps if possible mapfile = lib_cfg["barcodes"]["map file"] - if mapfile in self.barcode_maps.keys(): + if mapfile in list(self.barcode_maps.keys()): lib.configure(lib_cfg, barcode_map=self.barcode_maps[mapfile]) else: lib.configure(lib_cfg) @@ -247,7 +247,7 @@ def is_barcodevariant(self): """ return ( all(isinstance(lib, BcvSeqLib) for lib in self.children) - and len(self.barcode_maps.keys()) == 1 + and len(list(self.barcode_maps.keys())) == 1 ) def is_barcodeid(self): @@ -259,7 +259,7 @@ def is_barcodeid(self): """ return ( all(isinstance(lib, BcidSeqLib) for lib in self.children) - and len(self.barcode_maps.keys()) == 1 + and len(list(self.barcode_maps.keys())) == 1 ) def is_coding(self): @@ -298,7 +298,7 @@ def merge_counts_unfiltered(self, label): self.logger.info("Aggregating SeqLib data") destination = "/main/{}/counts_unfiltered".format(label) - if destination in self.store.keys(): + if destination in list(self.store.keys()): # need to remove the current destination table because we are using append # append is required because it takes the "min_itemsize" argument, and put doesn't self.logger.info("Replacing existing '{}'".format(destination)) @@ -325,7 +325,7 @@ def merge_counts_unfiltered(self, label): # perform operation in chunks tp_frame = None - for i in xrange(0, len(complete_index), self.chunksize): + for i in range(0, len(complete_index), self.chunksize): # don't duplicate the index if the chunksize is large if self.chunksize < len(complete_index): index_chunk = complete_index[i : i + self.chunksize] @@ -724,7 +724,7 @@ def calc_regression(self, label): """ if self.check_store("/main/{}/scores".format(label)): return - elif "/main/{}/scores".format(label) in self.store.keys(): + elif "/main/{}/scores".format(label) in list(self.store.keys()): # need to remove the current keys because we are using append self.store.remove("/main/{}/scores".format(label)) @@ -1107,7 +1107,7 @@ def write_tsv(self): """ if self.tsv_requested: self.logger.info("Generating tab-separated output files") - for k in self.store.keys(): + for k in list(self.store.keys()): self.write_table_tsv(k) for lib in self.children: lib.write_tsv() diff --git a/enrich2/seqlib.py b/enrich2/seqlib.py index 07765bc..65b241c 100644 --- a/enrich2/seqlib.py +++ b/enrich2/seqlib.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import logging import os.path import pandas as pd @@ -80,7 +80,7 @@ def serialize_filters(self): Return a dictionary of filtering options that have non-default values. """ cfg = dict() - for key in self.filters.keys(): + for key in list(self.filters.keys()): if self.filters[key] != self.default_filters[key]: cfg[key] = self.filters[key] return cfg @@ -190,7 +190,7 @@ def save_counts(self, label, df_dict, raw): If *raw* is ``True``, the counts are stored under ``"/raw/label/counts"``; else ``"/main/label/counts"``. """ - if len(df_dict.keys()) == 0: + if len(list(df_dict.keys())) == 0: raise ValueError("Failed to count {} [{}]".format(label, self.name)) df = pd.DataFrame.from_dict(df_dict, orient="index", dtype=np.int32) df.columns = ["count"] @@ -262,8 +262,8 @@ def save_filter_stats(self): This DataFrame contains the same information as ``report_filter_stats`` """ - df = pd.DataFrame(index=SeqLib.filter_messages.values(), columns=["count"]) - for key in self.filter_stats.keys(): + df = pd.DataFrame(index=list(SeqLib.filter_messages.values()), columns=["count"]) + for key in list(self.filter_stats.keys()): if self.filter_stats[key] > 0 or key == "total": df.loc[SeqLib.filter_messages[key], "count"] = self.filter_stats[key] df.dropna(inplace=True) @@ -346,7 +346,7 @@ def write_tsv(self): """ if self.tsv_requested: self.logger.info("Generating tab-separated output files") - for k in self.store.keys(): + for k in list(self.store.keys()): self.write_table_tsv(k) def counts_from_file_h5(self, fname): @@ -362,7 +362,7 @@ def counts_from_file_h5(self, fname): ) # this could probably be much more efficient, but the PyTables docs # don't explain copying subsets of files adequately - raw_keys = [key for key in store.keys() if key.startswith("/raw/")] + raw_keys = [key for key in list(store.keys()) if key.startswith("/raw/")] if len(raw_keys) == 0: raise ValueError( "No raw counts found in '{}' [{}]" "".format(fname, self.name) diff --git a/enrich2/sfmap.py b/enrich2/sfmap.py index 43bb438..10dd4e3 100644 --- a/enrich2/sfmap.py +++ b/enrich2/sfmap.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import logging import numpy as np import matplotlib.pyplot as plt @@ -319,9 +319,9 @@ def sfmap_axes( # add marks on wild type positions wt = list(wt) if tall: - wt_xy = zip((list(df.columns).index(x) for x in wt), reversed(xrange(len(wt)))) + wt_xy = list(zip((list(df.columns).index(x) for x in wt), reversed(list(range(len(wt)))))) else: - wt_xy = zip(xrange(len(wt)), (list(df.index).index(x) for x in wt)) + wt_xy = list(zip(list(range(len(wt))), (list(df.index).index(x) for x in wt))) for x, y in wt_xy: ax.add_patch( Circle( @@ -341,8 +341,8 @@ def sfmap_axes( # rescale the SE's onto 0 .. 0.98 # rescaling onto 0 .. 1.0 causes the corners to look funny masked_se = masked_se / vmax_se * 0.98 - for x in xrange(len(df.index)): - for y in xrange(len(df.columns)): + for x in range(len(df.index)): + for y in range(len(df.columns)): value = masked_se[x, y] if value and value >= 0.02: # not masked, above threshold corner_dist = (1.0 - value) / 2.0 diff --git a/enrich2/storemanager.py b/enrich2/storemanager.py index e428c80..ae4c328 100644 --- a/enrich2/storemanager.py +++ b/enrich2/storemanager.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import os import logging import pandas as pd @@ -118,7 +118,7 @@ def child_labels(self): for x in self.children: shared.extend(x.labels) shared = collections.Counter(shared) - shared = [x for x in shared.keys() if shared[x] == len(self.children)] + shared = [x for x in list(shared.keys()) if shared[x] == len(self.children)] return sorted(shared, key=lambda a: ELEMENT_LABELS.index(a)) @property @@ -285,7 +285,7 @@ def scoring_method(self, value): """ Make sure the *value* is valid and set it. """ - if value in SCORING_METHODS.keys(): + if value in list(SCORING_METHODS.keys()): self._scoring_method = value else: raise ValueError( @@ -427,7 +427,7 @@ def logr_method(self, value): """ Make sure the *value* is valid and set it. """ - if value in LOGR_METHODS.keys(): + if value in list(LOGR_METHODS.keys()): self._logr_method = value else: raise ValueError( @@ -692,7 +692,7 @@ def check_store(self, key): Returns: bool: True if the key exists in the HDF5 store, else False. """ - if key in self.store.keys(): + if key in list(self.store.keys()): self.logger.info("Found existing '{}'".format(key)) return True else: @@ -712,7 +712,7 @@ def map_table( This method really needs a better name. """ - if destination in self.store.keys(): + if destination in list(self.store.keys()): # remove the current destination table because we are using append # append takes the "min_itemsize" argument, and put doesn't self.logger.info("Overwriting existing '{}'".format(destination)) diff --git a/enrich2/variant.py b/enrich2/variant.py index f919a25..9576504 100644 --- a/enrich2/variant.py +++ b/enrich2/variant.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import re from .aligner import Aligner from .seqlib import SeqLib @@ -296,7 +296,7 @@ def align_variant(self, variant_dna): .. warning:: Using the :py:class:`~seqlib.aligner.Aligner` \ dramatically increases runtime. """ - if variant_dna in self.aligner_cache.keys(): + if variant_dna in list(self.aligner_cache.keys()): return self.aligner_cache[variant_dna] mutations = list() @@ -355,7 +355,7 @@ def count_variant(self, variant_dna, include_indels=True): return None else: mutations = list() - for i in xrange(len(variant_dna)): + for i in range(len(variant_dna)): if variant_dna[i] != self.wt.dna_seq[i]: mutations.append( ( @@ -381,7 +381,7 @@ def count_variant(self, variant_dna, include_indels=True): mutation_strings = list() if self.is_coding(): variant_protein = "" - for i in xrange(0, len(variant_dna), 3): + for i in range(0, len(variant_dna), 3): try: variant_protein += CODON_TABLE[variant_dna[i : i + 3]] except KeyError: # garbage codon due to indel, X, or N diff --git a/enrich2/wildtype.py b/enrich2/wildtype.py index ab2b565..bfcffd2 100644 --- a/enrich2/wildtype.py +++ b/enrich2/wildtype.py @@ -1,4 +1,4 @@ -from __future__ import print_function + import logging import re from .constants import CODON_TABLE @@ -68,7 +68,7 @@ def configure(self, cfg): # perform translation self.protein_seq = "" - for i in xrange(0, len(self.dna_seq), 3): + for i in range(0, len(self.dna_seq), 3): self.protein_seq += CODON_TABLE[self.dna_seq[i : i + 3]] # set the reference offset if it's a multiple of three @@ -139,4 +139,4 @@ def position_tuples(self, protein=False): seq = self.dna_seq offset = self.dna_offset - return [(i + offset + 1, seq[i]) for i in xrange(len(seq))] + return [(i + offset + 1, seq[i]) for i in range(len(seq))] From 73ca809b673bfcc1e6b891f66f4440cec01dab7e Mon Sep 17 00:00:00 2001 From: Chris <17653365+odcambc@users.noreply.github.com> Date: Mon, 25 Nov 2024 13:23:48 -0600 Subject: [PATCH 02/10] Minor fixes --- enrich2/barcodemap.py | 2 +- enrich2/seqlib.py | 2 +- enrich2/storemanager.py | 5 +++-- enrich2/wildtype.py | 11 +++++------ 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/enrich2/barcodemap.py b/enrich2/barcodemap.py index b3f03b9..29fa36e 100644 --- a/enrich2/barcodemap.py +++ b/enrich2/barcodemap.py @@ -86,7 +86,7 @@ def __init__(self, mapfile, is_variant=False): if self[barcode] != value: raise ValueError( "Barcode '{}' assigned to multiple " - "unique values".format(barcode, self.name) + "unique values: {}".format(barcode, self.name) ) else: self[barcode] = value diff --git a/enrich2/seqlib.py b/enrich2/seqlib.py index 65b241c..032c864 100644 --- a/enrich2/seqlib.py +++ b/enrich2/seqlib.py @@ -173,7 +173,7 @@ def report_filtered_read(self, fq, filter_flags): ``SeqLib.filter_messages`` dictionary. """ self.logger.debug( - "Filtered read ({messages})\n{read!s}".format( + "Filtered read ({messages})\n{read!s}\n{name}".format( messages=", ".join( SeqLib.filter_messages[x] for x in filter_flags if filter_flags[x] ), diff --git a/enrich2/storemanager.py b/enrich2/storemanager.py index ae4c328..e3b9224 100644 --- a/enrich2/storemanager.py +++ b/enrich2/storemanager.py @@ -5,6 +5,7 @@ import collections import getpass import time +from typing import Optional #: Dictionary specifying available scoring methods for the analysis @@ -65,9 +66,9 @@ class StoreManager(object): store and directory management. """ - store_suffix = None + store_suffix = Optional[str] has_store = True - treeview_class_name = None + treeview_class_name = Optional[str] def __init__(self): self.logger = logging.getLogger("{}.{}".format(__name__, self.__class__)) diff --git a/enrich2/wildtype.py b/enrich2/wildtype.py index bfcffd2..872c1d4 100644 --- a/enrich2/wildtype.py +++ b/enrich2/wildtype.py @@ -1,4 +1,3 @@ - import logging import re from .constants import CODON_TABLE @@ -6,11 +5,11 @@ class WildTypeSequence(object): """ - Container class for wild type sequence information. Used by :py:class:`~seqlib.seqlib.VariantSeqLib` objects and - :py:class:`~enrich2.selection.Selection` or :py:class:`~enrich2.experiment.Experiment` objects that contain + Container class for wild type sequence information. Used by :py:class:`~seqlib.seqlib.VariantSeqLib` objects and + :py:class:`~enrich2.selection.Selection` or :py:class:`~enrich2.experiment.Experiment` objects that contain variant information. - Requires a *parent_name* that associates this object with a StoreManager object for the + Requires a *parent_name* that associates this object with a StoreManager object for the purposes of error reporting and logging. """ @@ -69,7 +68,7 @@ def configure(self, cfg): # perform translation self.protein_seq = "" for i in range(0, len(self.dna_seq), 3): - self.protein_seq += CODON_TABLE[self.dna_seq[i : i + 3]] + self.protein_seq += CODON_TABLE[self.dna_seq[i: i + 3]] # set the reference offset if it's a multiple of three if self.dna_offset % 3 == 0: @@ -122,7 +121,7 @@ def duplicate(self, new_parent_name): def position_tuples(self, protein=False): """ - Return a list of tuples containing the position number (after offset adjustment) and + Return a list of tuples containing the position number (after offset adjustment) and single-letter symbol (nucleotide or amino acid) for each position the wild type sequence. """ if protein: From b1a6e733301642e6fa5c3dc7331b7af7e3517d49 Mon Sep 17 00:00:00 2001 From: Chris <17653365+odcambc@users.noreply.github.com> Date: Tue, 3 Dec 2024 19:40:02 -0600 Subject: [PATCH 03/10] update fastq header format --- enrich2/fqread.py | 112 +++++++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 50 deletions(-) diff --git a/enrich2/fqread.py b/enrich2/fqread.py index 7c30b54..f22d44c 100644 --- a/enrich2/fqread.py +++ b/enrich2/fqread.py @@ -2,7 +2,6 @@ from sys import stderr import os.path import re -import string import itertools import bz2 import gzip @@ -10,32 +9,45 @@ # The following regex is referenced by line number in the class documentation. # Matches FASTQ headers based on the following pattern (modify as needed): -# @:::::#/ +# @:::::: ::: + +# Example: @M02564:876:000000000-L3775:1:1101:16862:1800 1:N:0:TCACTCGA+TAACGGTT +# Sample number contains indexes if they are present. + +# See: https://help.basespace.illumina.com/files-used-by-basespace/fastq-files header_pattern = re.compile( - "@(?P.+)" - ":(?P\d+)" - ":(?P\d+)" - ":(?P\d+)" - ":(?P\d+)" - ":(?P[01])" - "#(?P\d)" - "/(?P\d)" + r""" + @(?P[^:]+): + (?P\d+): + (?P[^:]+): + (?P\d+): + (?P\d+): + (?P\d+): + (?P\d+) + \s + (?P\d+): + (?P[YN]): + (?P[^:]+): + (?P[^:]+) + """, + re.VERBOSE, ) - BUFFER_SIZE = 100000 # empirically optimized for reading FASTQ files -dna_trans = string.maketrans("actgACTG", "tgacTGAC") +dna_trans = str.maketrans( + {"a": "t", "c": "g", "t": "a", "g": "c", "A": "T", "C": "G", "T": "A", "G": "C"} +) class FQRead(object): """ - Stores a single record from a FASTQ_ file. Quality values are stored + Stores a single record from a FASTQ_ file. Quality values are stored internally as a list of integer `Phred quality scores \ - `_. The *qbase* parameter is - the ASCII value that correponds to Phred score of 0. The *sequence* and - *quality* strings must be the same length. + `_. The *qbase* parameter is + the ASCII value that correponds to Phred score of 0. The *sequence* and + *quality* strings must be the same length. """ # use slots for memory efficiency @@ -56,7 +68,7 @@ def __init__(self, header, sequence, header2, quality, qbase=33): def __str__(self): """ - Reformat as a four-line FASTQ_ record. This method converts the + Reformat as a four-line FASTQ_ record. This method converts the integer quality values back into a string. """ return "\n".join( @@ -76,7 +88,7 @@ def __len__(self): def trim(self, start=1, end=None): """ - Trims this :py:class:`~fqread.FQRead` to contain bases between + Trims this :py:class:`~fqread.FQRead` to contain bases between *start* and *end* (inclusive). Bases are numbered starting at 1. """ self.sequence = self.sequence[start - 1 : end] @@ -84,14 +96,14 @@ def trim(self, start=1, end=None): def trim_length(self, length, start=1): """ - Trims this :py:class:`~fqread.FQRead` to contain *length* bases, + Trims this :py:class:`~fqread.FQRead` to contain *length* bases, beginning with *start*. Bases are numbered starting at 1. """ self.trim(start=start, end=start + length - 1) def revcomp(self): """ - Reverse-complement the sequence in place. Also reverses the array of + Reverse-complement the sequence in place. Also reverses the array of quality values. """ self.sequence = self.sequence.translate(dna_trans)[::-1] @@ -100,9 +112,9 @@ def revcomp(self): def header_information(self, pattern=header_pattern): """header_information(pattern=header_pattern) - Parses the first FASTQ_ header (@ header) and returns a dictionary. - Dictionary keys are the named groups in the regular expression - *pattern*. Unnamed matches are ignored. Integer values are converted + Parses the first FASTQ_ header (@ header) and returns a dictionary. + Dictionary keys are the named groups in the regular expression + *pattern*. Unnamed matches are ignored. Integer values are converted from strings to integers. The default pattern matches a header in the format:: @@ -134,13 +146,13 @@ def mean_quality(self): def is_chaste(self, raises=True): """ - Returns ``True`` if the chastity bit is set in the header. The - regular experession used by :py:meth:`header_information` must - include a ``'Chastity'`` match that equals ``1`` if the read is + Returns ``True`` if the chastity bit is set in the header. The + regular experession used by :py:meth:`header_information` must + include a ``'Chastity'`` match that equals ``1`` if the read is chaste. - If ``raises`` is ``True``, raises an informative error if the - chastity information in the header is not found. Otherwise, a + If ``raises`` is ``True``, raises an informative error if the + chastity information in the header is not found. Otherwise, a read without chastity information is treated as unchaste. """ try: @@ -162,15 +174,15 @@ def is_chaste(self, raises=True): def split_fastq_path(fname): """ - Check that *fname* exists and has a valid FASTQ_ file extension. Valid - file extensions are ``.fastq`` or ``.fq``, optionally followed by ``.gz`` - or ``.bz2`` if the file is compressed. + Check that *fname* exists and has a valid FASTQ_ file extension. Valid + file extensions are ``.fastq`` or ``.fq``, optionally followed by ``.gz`` + or ``.bz2`` if the file is compressed. - Returns a tuple containing the directory, the file base name with no - extension, the FASTQ_ file extension used, and the compression format + Returns a tuple containing the directory, the file base name with no + extension, the FASTQ_ file extension used, and the compression format (``"gz"``, ``"bz2"``, or ``None``). - Raises an ``IOError`` if the file doesn't exist. Returns ``None`` if the + Raises an ``IOError`` if the file doesn't exist. Returns ``None`` if the file extension is not recognized. """ if os.path.isfile(fname): @@ -197,9 +209,9 @@ def split_fastq_path(fname): def create_compressed_outfile(fname, compression): """ - Utility function for opening compressed output files. Accepted values for - *compression* are ``"gz"``, ``"bz2"``, or ``None``. Returns a file handle - of the appropriate type opened for writing. Existing files with the same + Utility function for opening compressed output files. Accepted values for + *compression* are ``"gz"``, ``"bz2"``, or ``None``. Returns a file handle + of the appropriate type opened for writing. Existing files with the same name are overwritten. """ if compression == "bz2": @@ -215,10 +227,10 @@ def create_compressed_outfile(fname, compression): def read_fastq(fname, filter_function=None, buffer_size=BUFFER_SIZE, qbase=33): """ - Generator function for reading from FASTQ_ file *fname*. Yields an - :py:class:`~fqread.FQRead` object for each FASTQ_ record in the file. The - *filter_function* must operate on an :py:class:`~fqread.FQRead` object - and return ``True`` or ``False``. If the result is ``False``, the record + Generator function for reading from FASTQ_ file *fname*. Yields an + :py:class:`~fqread.FQRead` object for each FASTQ_ record in the file. The + *filter_function* must operate on an :py:class:`~fqread.FQRead` object + and return ``True`` or ``False``. If the result is ``False``, the record will be skipped silently. .. note:: To read multiple files in parallel (such as index or \ @@ -272,15 +284,15 @@ def read_fastq_multi( fnames, filter_function=None, buffer_size=BUFFER_SIZE, match_lengths=True, qbase=33 ): """ - Generator function for reading from multiple FASTQ_ files in parallel. - The argument *fnames* is an iterable of FASTQ_ file names. Yields a - tuple of :py:class:`~fqread.FQRead` objects, one for each file in - *fnames*. The *filter_function* must operate on an :py:class:`FQRead` - object and return ``True`` or ``False``. If the result is ``False`` for + Generator function for reading from multiple FASTQ_ files in parallel. + The argument *fnames* is an iterable of FASTQ_ file names. Yields a + tuple of :py:class:`~fqread.FQRead` objects, one for each file in + *fnames*. The *filter_function* must operate on an :py:class:`FQRead` + object and return ``True`` or ``False``. If the result is ``False`` for any :py:class:`FQRead` in the tuple, the entire tuple will be skipped. - If *match_lengths* is ``True``, the generator will yield ``None`` if the - files do not contain the same number of FASTQ_ records. Otherwise, it + If *match_lengths* is ``True``, the generator will yield ``None`` if the + files do not contain the same number of FASTQ_ records. Otherwise, it will silently ignore partial records. """ fq_generators = list() @@ -305,8 +317,8 @@ def read_fastq_multi( def fastq_filter_chastity(fq): """ - Filtering function for :py:func:`read_fastq` and - :py:func:`read_fastq_multi`. Returns ``True`` if the + Filtering function for :py:func:`read_fastq` and + :py:func:`read_fastq_multi`. Returns ``True`` if the :py:class:`~fqread.FQRead` object *fq* is chaste. """ return fq.is_chaste() From 5458f0fa5bb693ae8afa850d535f3842eb20558b Mon Sep 17 00:00:00 2001 From: Chris <17653365+odcambc@users.noreply.github.com> Date: Tue, 3 Dec 2024 19:40:29 -0600 Subject: [PATCH 04/10] fix escaping in cite --- enrich2/random_effects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/enrich2/random_effects.py b/enrich2/random_effects.py index 563defe..55c2211 100644 --- a/enrich2/random_effects.py +++ b/enrich2/random_effects.py @@ -10,7 +10,7 @@ def rml_estimator(y, sigma2i, iterations=50): title={Mixed models: theory and applications with R}, author={Demidenko, Eugene}, year={2013}, - publisher={John Wiley \& Sons} + publisher={John Wiley & Sons} } """ From 7713e4e6a5362f38a407a2e958d2d8a02bf4ec30 Mon Sep 17 00:00:00 2001 From: Chris <17653365+odcambc@users.noreply.github.com> Date: Tue, 3 Dec 2024 19:46:59 -0600 Subject: [PATCH 05/10] fixes to types, file mode --- enrich2/experiment.py | 12 ++++----- enrich2/main.py | 2 +- enrich2/selection.py | 62 +++++++++++++++++++++---------------------- 3 files changed, 38 insertions(+), 38 deletions(-) diff --git a/enrich2/experiment.py b/enrich2/experiment.py index dcb7c68..b44183a 100644 --- a/enrich2/experiment.py +++ b/enrich2/experiment.py @@ -130,16 +130,16 @@ def validate(self): def is_coding(self): """ - Return ``True`` if the all :py:class:`~selection.Selection` in the - :py:class:`~experiment.Experiment` count protein-coding variants, else + Return ``True`` if the all :py:class:`~selection.Selection` in the + :py:class:`~experiment.Experiment` count protein-coding variants, else ``False``. """ return all(x.is_coding() for x in self.selection_list()) def has_wt_sequence(self): """ - Return ``True`` if the all :py:class:`~selection.Selection` in the - :py:class:`~experiment.Experiment` have a wild type sequence, else + Return ``True`` if the all :py:class:`~selection.Selection` in the + :py:class:`~experiment.Experiment` have a wild type sequence, else ``False``. """ return all(x.has_wt_sequence() for x in self.selection_list()) @@ -172,7 +172,7 @@ def combine_barcode_maps(self): If multiple variants or IDs map to the same barcode, only the first one will be present in the barcode map table. - The ``'/main/barcodemap'`` table is not created if no + The ``'/main/barcodemap'`` table is not created if no :py:class:`~selection.Selection` has barcode map information. """ if self.check_store("/main/barcodemap"): @@ -427,7 +427,7 @@ def calc_pvalues_wt(self, label): [sorted(self.child_names()), sorted(["z", "pvalue_raw"])], names=["condition", "value"], ) - result_df = pd.DataFrame(index=data.index, columns=columns) + result_df = pd.DataFrame(index=data.index, columns=columns, dtype=float) condition_labels = data.columns.levels[0] for cnd in condition_labels: diff --git a/enrich2/main.py b/enrich2/main.py index 566d686..a2c93db 100755 --- a/enrich2/main.py +++ b/enrich2/main.py @@ -157,7 +157,7 @@ def main_cmd(): # read the JSON file try: - cfg = json.load(open(args.config, "U")) + cfg = json.load(open(args.config, "r")) except IOError: raise IOError("Failed to open '{}' [{}]".format(args.config, DRIVER_NAME)) except ValueError: diff --git a/enrich2/selection.py b/enrich2/selection.py index b0dbf6e..8cd46cd 100644 --- a/enrich2/selection.py +++ b/enrich2/selection.py @@ -42,7 +42,7 @@ def regression_apply(row, timepoints, weighted): """ - :py:meth:`pandas.DataFrame.apply` apply function for calculating + :py:meth:`pandas.DataFrame.apply` apply function for calculating enrichment using linear regression. If *weighted* is ``True`` perform weighted least squares; else perform ordinary least squares. @@ -77,8 +77,8 @@ def regression_apply(row, timepoints, weighted): class Selection(StoreManager): """ - Class for a single selection replicate, consisting of multiple - timepoints. This class coordinates :py:class:`~seqlib.seqlib.SeqLib` + Class for a single selection replicate, consisting of multiple + timepoints. This class coordinates :py:class:`~seqlib.seqlib.SeqLib` objects. """ @@ -94,7 +94,7 @@ def __init__(self): def _children(self): """ - Return the :py:class:`~seqlib.seqlib.SeqLib` objects as a list, + Return the :py:class:`~seqlib.seqlib.SeqLib` objects as a list, sorted by timepoint and then by name. """ libs = list() @@ -104,7 +104,7 @@ def _children(self): def remove_child_id(self, tree_id): """ - Remove the reference to a :py:class:`~seqlib.seqlib.SeqLib` with + Remove the reference to a :py:class:`~seqlib.seqlib.SeqLib` with Treeview id *tree_id*. Deletes empty time points. """ empty = None @@ -140,10 +140,10 @@ def wt(self): def configure(self, cfg, configure_children=True): """ - Set up the :py:class:`~selection.Selection` using the *cfg* object, + Set up the :py:class:`~selection.Selection` using the *cfg* object, usually from a ``.json`` configuration file. - If *configure_children* is false, do not configure the children in + If *configure_children* is false, do not configure the children in *cfg*. """ StoreManager.configure(self, cfg) @@ -240,9 +240,9 @@ def add_child(self, child): def is_barcodevariant(self): """ - Return ``True`` if all :py:class:`~seqlib.seqlib.SeqLib` in the - :py:class:`~selection.Selection` are - :py:class:`~barcodevariant.BcvSeqLib` objects with + Return ``True`` if all :py:class:`~seqlib.seqlib.SeqLib` in the + :py:class:`~selection.Selection` are + :py:class:`~barcodevariant.BcvSeqLib` objects with the same barcode map, else ``False``. """ return ( @@ -252,9 +252,9 @@ def is_barcodevariant(self): def is_barcodeid(self): """ - Return ``True`` if all :py:class:`~seqlib.SeqLib` in the - :py:class:`~selection.Selection` are - :py:class:`~barcodeid.BcidSeqLib` objects with + Return ``True`` if all :py:class:`~seqlib.SeqLib` in the + :py:class:`~selection.Selection` are + :py:class:`~barcodeid.BcidSeqLib` objects with the same barcode map, else ``False``. """ return ( @@ -264,24 +264,24 @@ def is_barcodeid(self): def is_coding(self): """ - Return ``True`` if the all :py:class:`~seqlib.seqlib.SeqLib` in the - :py:class:`~selection.Selection` count protein-coding variants, else + Return ``True`` if the all :py:class:`~seqlib.seqlib.SeqLib` in the + :py:class:`~selection.Selection` count protein-coding variants, else ``False``. """ return all(x.is_coding() for x in self.children) def has_wt_sequence(self): """ - Return ``True`` if the all :py:class:`~seqlib.seqlib.SeqLib` in the - :py:class:`~selection.Selection` have a wild type sequence, else + Return ``True`` if the all :py:class:`~seqlib.seqlib.SeqLib` in the + :py:class:`~selection.Selection` have a wild type sequence, else ``False``. """ return all(x.has_wt_sequence() for x in self.children) def merge_counts_unfiltered(self, label): """ - Counts :py:class:`~seqlib.seqlib.SeqLib` objects and tabulates counts - for each timepoint. :py:class:`~seqlib.seqlib.SeqLib` objects from + Counts :py:class:`~seqlib.seqlib.SeqLib` objects and tabulates counts + for each timepoint. :py:class:`~seqlib.seqlib.SeqLib` objects from the same timepoint are combined by summing the counts. Stores the unfiltered counts under ``/main/label/counts_unfiltered``. @@ -364,14 +364,14 @@ def merge_counts_unfiltered(self, label): def filter_counts(self, label): """ - Converts unfiltered counts stored in ``/main/label/counts_unfiltered`` - into filtered counts calculated from complete cases (elements with a + Converts unfiltered counts stored in ``/main/label/counts_unfiltered`` + into filtered counts calculated from complete cases (elements with a non-zero count in each time point). - For the most basic element type (variant or barcode, depending on the - experimental design), the result of this operation simply drops any - rows that have missing counts. For other element types, such as - synonymous variants, the counts are re-aggregated using only the + For the most basic element type (variant or barcode, depending on the + experimental design), the result of this operation simply drops any + rows that have missing counts. For other element types, such as + synonymous variants, the counts are re-aggregated using only the complete cases in the underlying element type. """ if (self.is_barcodeid() or self.is_barcodevariant()) and label != "barcodes": @@ -387,7 +387,7 @@ def filter_counts(self, label): df.dropna(axis="index", how="any", inplace=True) self.store.put( "/main/{}/counts".format(label), - df.astype(float), + df.astype(int), format="table", data_columns=df.columns, ) @@ -414,7 +414,7 @@ def combine_barcode_maps(self): def calculate(self): """ - Wrapper method to calculate counts and enrichment scores + Wrapper method to calculate counts and enrichment scores for all data in the :py:class:`~selection.Selection`. """ if len(self.labels) == 0: @@ -819,7 +819,7 @@ def wt_plot(self, pdf): *pdf* is an open PdfPages instance. - Only created for selections that use WLS or OLS scoring and have a wild type specified. + Only created for selections that use WLS or OLS scoring and have a wild type specified. Uses :py:func:`~plots.fit_axes` for the plotting. """ self.logger.info("Creating wild type fit plot") @@ -1102,7 +1102,7 @@ def write_tsv(self): """ Write each table from the store to its own tab-separated file. - Files are written to a ``tsv`` directory in the default output location. + Files are written to a ``tsv`` directory in the default output location. File names are the HDF5 key with ``'_'`` substituted for ``'/'``. """ if self.tsv_requested: @@ -1114,7 +1114,7 @@ def write_tsv(self): def synonymous_variants(self): """ - Populate and return a dictionary mapping synonymous variants to the + Populate and return a dictionary mapping synonymous variants to the list of associated variants in ``/main/variants/counts``. """ mapping = dict() @@ -1258,7 +1258,7 @@ def barcodemap_mapping(self): def calc_outliers(self, label, minimum_components=4, log_chunksize=20000): """ - Test whether an element's individual components have significantly different + Test whether an element's individual components have significantly different scores from the element. Results are stored in ``'/main/