From 478912e1fa185054e5c59de352489d8f5865ca67 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 3 Dec 2020 11:42:25 -0800 Subject: [PATCH 1/4] Wrap `numcodecs.compat` `import` line --- zarr/storage.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 956a8ecaaf..32e7adbbee 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -35,7 +35,10 @@ import uuid import time -from numcodecs.compat import ensure_bytes, ensure_contiguous_ndarray +from numcodecs.compat import ( + ensure_bytes, + ensure_contiguous_ndarray +) from numcodecs.registry import codec_registry from zarr.errors import ( From 1390983147f9e8b51fba2c455a43e89e76ab3b79 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 3 Dec 2020 11:42:27 -0800 Subject: [PATCH 2/4] Use `ensure_text` for LMDB decoding --- zarr/storage.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 32e7adbbee..dbf5e28b08 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -37,6 +37,7 @@ from numcodecs.compat import ( ensure_bytes, + ensure_text, ensure_contiguous_ndarray ) from numcodecs.registry import codec_registry @@ -1770,16 +1771,6 @@ def __contains__(self, key): return key in self.db -def _lmdb_decode_key_buffer(key): - # assume buffers=True - return key.tobytes().decode('ascii') - - -def _lmdb_decode_key_bytes(key): - # assume buffers=False - return key.decode('ascii') - - class LMDBStore(MutableMapping): """Storage class using LMDB. Requires the `lmdb `_ package to be installed. @@ -1869,10 +1860,6 @@ def __init__(self, path, buffers=True, **kwargs): self.db = lmdb.open(path, **kwargs) # store properties - if buffers: - self.decode_key = _lmdb_decode_key_buffer - else: - self.decode_key = _lmdb_decode_key_bytes self.buffers = buffers self.path = path self.kwargs = kwargs @@ -1933,13 +1920,13 @@ def items(self): with self.db.begin(buffers=self.buffers) as txn: with txn.cursor() as cursor: for k, v in cursor.iternext(keys=True, values=True): - yield self.decode_key(k), v + yield ensure_text(k, "ascii"), v def keys(self): with self.db.begin(buffers=self.buffers) as txn: with txn.cursor() as cursor: for k in cursor.iternext(keys=True, values=False): - yield self.decode_key(k) + yield ensure_text(k, "ascii") def values(self): with self.db.begin(buffers=self.buffers) as txn: From 818a34b89f27e98d5a32bef453516ba2740556ef Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 3 Dec 2020 11:42:28 -0800 Subject: [PATCH 3/4] Use `ensure_text` in DBMStore --- zarr/storage.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index dbf5e28b08..d5ed049fa3 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1583,12 +1583,6 @@ def _dbm_encode_key(key): return key -def _dbm_decode_key(key): - if hasattr(key, 'decode'): - key = key.decode('ascii') - return key - - # noinspection PyShadowingBuiltins class DBMStore(MutableMapping): """Storage class using a DBM-style database. @@ -1758,7 +1752,7 @@ def __eq__(self, other): ) def keys(self): - return (_dbm_decode_key(k) for k in iter(self.db.keys())) + return (ensure_text(k, "ascii") for k in iter(self.db.keys())) def __iter__(self): return self.keys() From ed1ffcabbbbd012f83bdbd5bdbd4bca674901488 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 3 Dec 2020 11:42:28 -0800 Subject: [PATCH 4/4] Replace encode `str`s to `bytes` directly --- zarr/storage.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index d5ed049fa3..8d918e736b 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1577,12 +1577,6 @@ def migrate_1to2(store): del store['attrs'] -def _dbm_encode_key(key): - if hasattr(key, 'encode'): - key = key.encode('ascii') - return key - - # noinspection PyShadowingBuiltins class DBMStore(MutableMapping): """Storage class using a DBM-style database. @@ -1728,17 +1722,20 @@ def __exit__(self, *args): self.close() def __getitem__(self, key): - key = _dbm_encode_key(key) + if isinstance(key, str): + key = key.encode("ascii") return self.db[key] def __setitem__(self, key, value): - key = _dbm_encode_key(key) + if isinstance(key, str): + key = key.encode("ascii") value = ensure_bytes(value) with self.write_mutex: self.db[key] = value def __delitem__(self, key): - key = _dbm_encode_key(key) + if isinstance(key, str): + key = key.encode("ascii") with self.write_mutex: del self.db[key] @@ -1761,7 +1758,8 @@ def __len__(self): return sum(1 for _ in self.keys()) def __contains__(self, key): - key = _dbm_encode_key(key) + if isinstance(key, str): + key = key.encode("ascii") return key in self.db @@ -1885,7 +1883,8 @@ def __exit__(self, *args): self.close() def __getitem__(self, key): - key = _dbm_encode_key(key) + if isinstance(key, str): + key = key.encode("ascii") # use the buffers option, should avoid a memory copy with self.db.begin(buffers=self.buffers) as txn: value = txn.get(key) @@ -1894,18 +1893,21 @@ def __getitem__(self, key): return value def __setitem__(self, key, value): - key = _dbm_encode_key(key) + if isinstance(key, str): + key = key.encode("ascii") with self.db.begin(write=True, buffers=self.buffers) as txn: txn.put(key, value) def __delitem__(self, key): - key = _dbm_encode_key(key) + if isinstance(key, str): + key = key.encode("ascii") with self.db.begin(write=True) as txn: if not txn.delete(key): raise KeyError(key) def __contains__(self, key): - key = _dbm_encode_key(key) + if isinstance(key, str): + key = key.encode("ascii") with self.db.begin(buffers=self.buffers) as txn: with txn.cursor() as cursor: return cursor.set_key(key)