Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/_static/enrich2_env.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: enrich2
dependencies:
- python=2.7
- python=3
- numpy
- scipy
- pandas
Expand Down
10 changes: 5 additions & 5 deletions enrich2/aligner.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class Aligner(object):
_END = 4 # end of traceback

def __init__(self, similarity=_simple_similarity):
similarity_keys = similarity.keys()
similarity_keys = list(similarity.keys())
if "gap" in similarity_keys:
similarity_keys.remove("gap")
for key in similarity_keys:
Expand Down Expand Up @@ -80,12 +80,12 @@ def align(self, seq1, seq2):
seq2 = seq2.upper()

# build matrix of scores/traceback information
for i in xrange(len(seq1) + 1):
for i in range(len(seq1) + 1):
self.matrix[i, 0] = (self.similarity["gap"] * i, Aligner._DEL)
for j in xrange(len(seq2) + 1):
for j in range(len(seq2) + 1):
self.matrix[0, j] = (self.similarity["gap"] * j, Aligner._INS)
for i in xrange(1, len(seq1) + 1):
for j in xrange(1, len(seq2) + 1):
for i in range(1, len(seq1) + 1):
for j in range(1, len(seq2) + 1):
match = (
self.matrix[i - 1, j - 1]["score"]
+ self.similarity[seq1[i - 1]][seq2[j - 1]],
Expand Down
2 changes: 1 addition & 1 deletion enrich2/barcode.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import print_function

import logging
import sys
from .seqlib import SeqLib
Expand Down
4 changes: 2 additions & 2 deletions enrich2/barcodeid.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,15 @@ def calculate(self):
"identifiers",
{
k: v
for k, v in df_dict.iteritems()
for k, v in df_dict.items()
if v >= self.identifier_min_count
},
raw=False,
)
del df_dict

# write the active subset of the BarcodeMap to the store
barcodes = barcode_identifiers.keys()
barcodes = list(barcode_identifiers.keys())
barcode_identifiers = pd.DataFrame(
{"value": [barcode_identifiers[bc] for bc in barcodes]}, index=barcodes
)
Expand Down
9 changes: 5 additions & 4 deletions enrich2/barcodemap.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,19 @@ def __init__(self, mapfile, is_variant=False):
try:
ext = os.path.splitext(mapfile)[-1].lower()
if ext in (".bz2"):
handle = bz2.BZ2File(mapfile, "rU")
handle = bz2.BZ2File(mapfile, "r")
elif ext in (".gz"):
handle = gzip.GzipFile(mapfile, "rU")
handle = gzip.GzipFile(mapfile, "r")
else:
handle = open(mapfile, "rU")
handle = open(mapfile, "r")
except IOError:
raise IOError(
"Could not open barcode map file '{}' [{}]".format(mapfile, self.name)
)

# handle each line
for line in handle:
line = line.decode("utf-8")
# skip comments and whitespace-only lines
if len(line.strip()) == 0 or line[0] == "#":
continue
Expand Down Expand Up @@ -86,7 +87,7 @@ def __init__(self, mapfile, is_variant=False):
if self[barcode] != value:
raise ValueError(
"Barcode '{}' assigned to multiple "
"unique values".format(barcode, self.name)
"unique values: {}".format(barcode, self.name)
)
else:
self[barcode] = value
Expand Down
4 changes: 2 additions & 2 deletions enrich2/barcodevariant.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,13 @@ def calculate(self):
# save counts, filtering based on the min count
self.save_counts(
"variants",
{k: v for k, v in df_dict.iteritems() if v >= self.variant_min_count},
{k: v for k, v in df_dict.items() if v >= self.variant_min_count},
raw=False,
)
del df_dict

# write the active subset of the BarcodeMap to the store
barcodes = barcode_variants.keys()
barcodes = list(barcode_variants.keys())
barcode_variants = pd.DataFrame(
{"value": [barcode_variants[bc] for bc in barcodes]}, index=barcodes
)
Expand Down
34 changes: 17 additions & 17 deletions enrich2/config_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,90 +9,90 @@

def is_experiment(cfg):
"""
Check if the given configuration object specifies an
Check if the given configuration object specifies an
:py:class:`~enrich2.experiment.Experiment`.

Args:
cfg (dict): decoded JSON object

Returns:
bool: True if `cfg` if specifies an
bool: True if `cfg` if specifies an
:py:class:`~enrich2.experiment.Experiment`, else False.

"""
if "conditions" in cfg.keys():
if "conditions" in list(cfg.keys()):
return True
else:
return False


def is_condition(cfg):
"""
Check if the given configuration object specifies a
Check if the given configuration object specifies a
:py:class:`~enrich2.condition.Condition`.

Args:
cfg (dict): decoded JSON object

Returns:
bool: True if `cfg` if specifies a
bool: True if `cfg` if specifies a
:py:class:`~enrich2.condition.Condition`, else False.

"""
if "selections" in cfg.keys():
if "selections" in list(cfg.keys()):
return True
else:
return False


def is_selection(cfg):
"""
Check if the given configuration object specifies a
Check if the given configuration object specifies a
:py:class:`~enrich2.selection.Selection`.

Args:
cfg (dict): decoded JSON object

Returns:
bool: True if `cfg` if specifies a
bool: True if `cfg` if specifies a
:py:class:`~enrich2.selection.Selection`, else False.

"""
if "libraries" in cfg.keys():
if "libraries" in list(cfg.keys()):
return True
else:
return False


def is_seqlib(cfg):
"""
Check if the given configuration object specifies a
Check if the given configuration object specifies a
:py:class:`~enrich2.seqlib.SeqLib` derived object.

Args:
cfg (dict): decoded JSON object

Returns:
bool: True if `cfg` if specifies a :py:class:`~enrich2.seqlib.SeqLib`
bool: True if `cfg` if specifies a :py:class:`~enrich2.seqlib.SeqLib`
derived object, else False.

"""
if "fastq" in cfg.keys() or "identifiers" in cfg.keys():
if "fastq" in list(cfg.keys()) or "identifiers" in list(cfg.keys()):
return True
else:
return False


def seqlib_type(cfg):
"""
Get the type of :py:class:`~enrich2.seqlib.SeqLib` derived object
Get the type of :py:class:`~enrich2.seqlib.SeqLib` derived object
specified by the configuration object.

Args:
cfg (dict): decoded JSON object

Returns:
str: The class name of the :py:class:`~seqlib.seqlib.SeqLib` derived
str: The class name of the :py:class:`~seqlib.seqlib.SeqLib` derived
object specified by `cfg`.

Raises:
Expand Down Expand Up @@ -123,15 +123,15 @@ def seqlib_type(cfg):

def element_type(cfg):
"""
Get the type of :py:class:`~enrich2.storemanager.StoreManager` derived
Get the type of :py:class:`~enrich2.storemanager.StoreManager` derived
object specified by the configuration object.

Args:
cfg (dict): decoded JSON object

Returns:
str: The class name of the
:py:class:`~enrich2.storemanager.StoreManager` derived object specified
str: The class name of the
:py:class:`~enrich2.storemanager.StoreManager` derived object specified
by `cfg`.

Raises:
Expand Down
4 changes: 2 additions & 2 deletions enrich2/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,10 @@ def fill_position_gaps(positions, gap_size):

# fill in short gaps
fill = set()
for i in xrange(len(positions) - 1):
for i in range(len(positions) - 1):
delta = positions[i + 1] - positions[i]
if delta > 1 and delta <= gap_size:
fill.update(positions[i] + n + 1 for n in xrange(delta))
fill.update(positions[i] + n + 1 for n in range(delta))
fill.update(positions)

return sorted(list(fill))
Expand Down
29 changes: 15 additions & 14 deletions enrich2/experiment.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import print_function

import logging
import pandas as pd
import numpy as np
Expand Down Expand Up @@ -130,16 +130,16 @@ def validate(self):

def is_coding(self):
"""
Return ``True`` if the all :py:class:`~selection.Selection` in the
:py:class:`~experiment.Experiment` count protein-coding variants, else
Return ``True`` if the all :py:class:`~selection.Selection` in the
:py:class:`~experiment.Experiment` count protein-coding variants, else
``False``.
"""
return all(x.is_coding() for x in self.selection_list())

def has_wt_sequence(self):
"""
Return ``True`` if the all :py:class:`~selection.Selection` in the
:py:class:`~experiment.Experiment` have a wild type sequence, else
Return ``True`` if the all :py:class:`~selection.Selection` in the
:py:class:`~experiment.Experiment` have a wild type sequence, else
``False``.
"""
return all(x.has_wt_sequence() for x in self.selection_list())
Expand Down Expand Up @@ -172,15 +172,15 @@ def combine_barcode_maps(self):
If multiple variants or IDs map to the same barcode, only the first one
will be present in the barcode map table.

The ``'/main/barcodemap'`` table is not created if no
The ``'/main/barcodemap'`` table is not created if no
:py:class:`~selection.Selection` has barcode map information.
"""
if self.check_store("/main/barcodemap"):
return

bcm = None
for sel in self.selection_list():
if "/main/barcodemap" in sel.store.keys():
if "/main/barcodemap" in list(sel.store.keys()):
if bcm is None:
bcm = sel.store["/main/barcodemap"]
else:
Expand Down Expand Up @@ -218,7 +218,7 @@ def calc_counts(self, label):
selections_index.extend([sel.name] * len(sel.timepoints))
values_index.extend(["c_{}".format(x) for x in sorted(sel.timepoints)])
columns = pd.MultiIndex.from_tuples(
zip(conditions_index, selections_index, values_index),
list(zip(conditions_index, selections_index, values_index)),
names=["condition", "selection", "timepoint"],
)

Expand Down Expand Up @@ -251,9 +251,10 @@ def calc_counts(self, label):
"/main/{}/counts_unfiltered" "".format(label)
)
for tp in sel.timepoints:
data.loc[:][cnd.name, sel.name, "c_{}".format(tp)] = sel_data[
data[(cnd.name, sel.name, "c_{}".format(tp))] = sel_data[
"c_{}".format(tp)
]

self.store.put("/main/{}/counts".format(label), data, format="table")

def calc_shared_full(self, label):
Expand Down Expand Up @@ -282,7 +283,7 @@ def calc_shared_full(self, label):
selections_index.extend([sel.name] * len(values_list))
values_index.extend(sorted(values_list))
columns = pd.MultiIndex.from_tuples(
zip(conditions_index, selections_index, values_index),
list(zip(conditions_index, selections_index, values_index)),
names=["condition", "selection", "value"],
)

Expand All @@ -308,7 +309,7 @@ def calc_shared_full(self, label):
self.logger.info(
"Populating Experiment data frame with scores ({})".format(label)
)
data = pd.DataFrame(index=combined, columns=columns)
data = pd.DataFrame(index=combined, columns=columns).astype(float)
for cnd in self.children:
for sel in cnd.children:
sel_data = sel.store.select("/main/{}/scores".format(label))
Expand Down Expand Up @@ -427,7 +428,7 @@ def calc_pvalues_wt(self, label):
[sorted(self.child_names()), sorted(["z", "pvalue_raw"])],
names=["condition", "value"],
)
result_df = pd.DataFrame(index=data.index, columns=columns)
result_df = pd.DataFrame(index=data.index, columns=columns, dtype=float)

condition_labels = data.columns.levels[0]
for cnd in condition_labels:
Expand Down Expand Up @@ -468,7 +469,7 @@ def calc_pvalues_pairwise(self, label):
cnd2_index.extend([cnd2] * len(values_list))
values_index.extend(sorted(values_list))
columns = pd.MultiIndex.from_tuples(
zip(cnd1_index, cnd2_index, values_index),
list(zip(cnd1_index, cnd2_index, values_index)),
names=["condition1", "condition2", "value"],
)

Expand Down Expand Up @@ -528,7 +529,7 @@ def write_tsv(self):
"""
if self.tsv_requested:
self.logger.info("Generating tab-separated output files")
for k in self.store.keys():
for k in list(self.store.keys()):
self.write_table_tsv(k)
for s in self.selection_list():
s.write_tsv()
Expand Down
Loading