From 22ef4899af0e4e89567f1c299ae4b27ac8f5ca49 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 15 Feb 2019 20:14:55 -0500 Subject: [PATCH 01/13] Change default store to `DictStore` Instead of using a Python `dict` as the `default` store for a Zarr `Array`, use the `DictStore`. This ensures that all blobs will be represented as `bytes` regardless of what the user provided as data. Thus things like comparisons of stores will work well in the default case. --- zarr/creation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/creation.py b/zarr/creation.py index 0184a4a5da..b46adc5b38 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -7,7 +7,7 @@ from zarr.core import Array -from zarr.storage import (DirectoryStore, init_array, contains_array, contains_group, +from zarr.storage import (DictStore, DirectoryStore, init_array, contains_array, contains_group, default_compressor, normalize_storage_path, ZipStore) from numcodecs.registry import codec_registry from zarr.errors import err_contains_array, err_contains_group, err_array_not_found @@ -125,7 +125,7 @@ def create(shape, chunks=True, dtype=None, compressor='default', return z -def normalize_store_arg(store, clobber=False, default=dict): +def normalize_store_arg(store, clobber=False, default=DictStore): if store is None: return default() elif isinstance(store, str): From 1b0930f7f82286aff30b5472587299ae7e21dde9 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 15 Feb 2019 20:14:56 -0500 Subject: [PATCH 02/13] Update `DictStore` docs to note `Array` uses it --- zarr/storage.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 656b05acf8..4849e4a42b 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -476,12 +476,11 @@ class DictStore(MutableMapping): >>> type(g.store) - Note that the default class when creating an array is the built-in - :class:`dict` class, i.e.:: + Also this is the default class when creating an array. E.g.:: >>> z = zarr.zeros(100) >>> type(z.store) - + Notes ----- From fdced9e11a0e2e967b05d63b8fcc4145375cafab Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 15 Feb 2019 20:14:57 -0500 Subject: [PATCH 03/13] Update `Array`'s `info` examples As we are now using `DictStore` to back the `Array`, we can correctly measure how much memory it is using. So update the examples in `info` and the tutorial to show how much memory is being used. Also update the store type listed in info as well. --- docs/tutorial.rst | 8 ++++---- zarr/core.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 3e8e9bac66..9422453375 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -176,7 +176,7 @@ print some diagnostics, e.g.:: Read-only : False Compressor : Blosc(cname='zstd', clevel=3, shuffle=BITSHUFFLE, : blocksize=0) - Store type : builtins.dict + Store type : zarr.storage.DictStore No. bytes : 400000000 (381.5M) No. bytes stored : 3379344 (3.2M) Storage ratio : 118.4 @@ -268,7 +268,7 @@ Here is an example using a delta filter with the Blosc compressor:: Read-only : False Filter [0] : Delta(dtype=' Date: Fri, 15 Feb 2019 20:39:46 -0500 Subject: [PATCH 04/13] Raise if Array's store is the builtin Python dict As we prefer to use the better behaved `DictStore`, raise an error if `dict` is used. Should also help us smoke out where in our tests `dict` is used and change it. --- zarr/core.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/zarr/core.py b/zarr/core.py index d3d4ea01c5..944eb1a1d0 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -108,6 +108,9 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, # N.B., expect at this point store is fully initialized with all # configuration metadata fully specified and normalized + if isinstance(store, dict): + raise TypeError("Please use Zarr's DictStore instead") + self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) From 3d7329f9979f93d842abdfbc92711919bbd7d6f4 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 15 Feb 2019 20:45:41 -0500 Subject: [PATCH 05/13] Raise if Group's store is the builtin Python dict As we prefer to use the better behaved `DictStore`, raise an error if `dict` is used. Should also help us smoke out where in our tests `dict` is used and change it. --- zarr/hierarchy.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 17821130eb..6be0e9dc99 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -91,6 +91,9 @@ class Group(MutableMapping): def __init__(self, store, path=None, read_only=False, chunk_store=None, cache_attrs=True, synchronizer=None): + if isinstance(store, dict): + raise TypeError("Please use Zarr's DictStore instead") + self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) From e596c4fc7ab32bcdee36eedc38e22c8e9afc5a58 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 15 Feb 2019 20:50:13 -0500 Subject: [PATCH 06/13] Use DictStore in Array tests --- zarr/tests/test_core.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 97842a6f6c..a2f8283be9 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -15,7 +15,7 @@ import pytest -from zarr.storage import (DirectoryStore, init_array, init_group, NestedDirectoryStore, +from zarr.storage import (DictStore, DirectoryStore, init_array, init_group, NestedDirectoryStore, DBMStore, LMDBStore, SQLiteStore, atexit_rmtree, atexit_rmglob, LRUStoreCache) from zarr.core import Array @@ -41,7 +41,7 @@ class TestArray(unittest.TestCase): def test_array_init(self): # normal initialization - store = dict() + store = DictStore() init_array(store, shape=100, chunks=10) a = Array(store) assert isinstance(a, Array) @@ -54,7 +54,7 @@ def test_array_init(self): assert "8fecb7a17ea1493d9c1430d04437b4f5b0b34985" == a.hexdigest() # initialize at path - store = dict() + store = DictStore() init_array(store, shape=100, chunks=10, path='foo/bar') a = Array(store, path='foo/bar') assert isinstance(a, Array) @@ -67,18 +67,18 @@ def test_array_init(self): assert "8fecb7a17ea1493d9c1430d04437b4f5b0b34985" == a.hexdigest() # store not initialized - store = dict() + store = DictStore() with pytest.raises(ValueError): Array(store) # group is in the way - store = dict() + store = DictStore() init_group(store, path='baz') with pytest.raises(ValueError): Array(store, path='baz') def create_array(self, read_only=False, **kwargs): - store = dict() + store = DictStore() kwargs.setdefault('compressor', Zlib(level=1)) cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) @@ -1255,7 +1255,7 @@ class TestArrayWithPath(TestArray): @staticmethod def create_array(read_only=False, **kwargs): - store = dict() + store = DictStore() cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) init_array(store, path='foo/bar', **kwargs) @@ -1308,9 +1308,9 @@ class TestArrayWithChunkStore(TestArray): @staticmethod def create_array(read_only=False, **kwargs): - store = dict() + store = DictStore() # separate chunk store - chunk_store = dict() + chunk_store = DictStore() cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) init_array(store, chunk_store=chunk_store, **kwargs) @@ -1516,7 +1516,7 @@ def test_nbytes_stored(self): class TestArrayWithNoCompressor(TestArray): def create_array(self, read_only=False, **kwargs): - store = dict() + store = DictStore() kwargs.setdefault('compressor', None) cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) @@ -1551,7 +1551,7 @@ def test_hexdigest(self): class TestArrayWithBZ2Compressor(TestArray): def create_array(self, read_only=False, **kwargs): - store = dict() + store = DictStore() compressor = BZ2(level=1) kwargs.setdefault('compressor', compressor) cache_metadata = kwargs.pop('cache_metadata', True) @@ -1587,7 +1587,7 @@ def test_hexdigest(self): class TestArrayWithBloscCompressor(TestArray): def create_array(self, read_only=False, **kwargs): - store = dict() + store = DictStore() compressor = Blosc(cname='zstd', clevel=1, shuffle=1) kwargs.setdefault('compressor', compressor) cache_metadata = kwargs.pop('cache_metadata', True) @@ -1630,7 +1630,7 @@ def test_hexdigest(self): class TestArrayWithLZMACompressor(TestArray): def create_array(self, read_only=False, **kwargs): - store = dict() + store = DictStore() compressor = LZMA(preset=1) kwargs.setdefault('compressor', compressor) cache_metadata = kwargs.pop('cache_metadata', True) @@ -1667,7 +1667,7 @@ class TestArrayWithFilters(TestArray): @staticmethod def create_array(read_only=False, **kwargs): - store = dict() + store = DictStore() dtype = kwargs.get('dtype', None) filters = [ Delta(dtype=dtype), @@ -1710,7 +1710,7 @@ def test_astype_no_filters(self): dtype = np.dtype(np.int8) astype = np.dtype(np.float32) - store = dict() + store = DictStore() init_array(store, shape=shape, chunks=10, dtype=dtype) data = np.arange(np.prod(shape), dtype=dtype).reshape(shape) @@ -1834,7 +1834,7 @@ class TestArrayNoCache(TestArray): @staticmethod def create_array(read_only=False, **kwargs): - store = dict() + store = DictStore() kwargs.setdefault('compressor', Zlib(level=1)) cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) @@ -1906,7 +1906,7 @@ class TestArrayWithStoreCache(TestArray): @staticmethod def create_array(read_only=False, **kwargs): - store = LRUStoreCache(dict(), max_size=None) + store = LRUStoreCache(DictStore(), max_size=None) kwargs.setdefault('compressor', Zlib(level=1)) cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) From c53a995eaedd9a4fb4e0bdc41723ed90f119394a Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 15 Feb 2019 20:50:14 -0500 Subject: [PATCH 07/13] Use DictStore in Group tests --- zarr/tests/test_hierarchy.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 369cf4b55a..d1cbb842ac 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -41,7 +41,7 @@ class TestGroup(unittest.TestCase): @staticmethod def create_store(): # can be overridden in sub-classes - return dict(), None + return DictStore(), None def create_group(self, store=None, path=None, read_only=False, chunk_store=None, synchronizer=None): @@ -948,7 +948,7 @@ class TestGroupWithChunkStore(TestGroup): @staticmethod def create_store(): - return dict(), dict() + return DictStore(), DictStore() def test_chunk_store(self): # setup @@ -979,7 +979,7 @@ class TestGroupWithStoreCache(TestGroup): @staticmethod def create_store(): - store = LRUStoreCache(dict(), max_size=None) + store = LRUStoreCache(DictStore(), max_size=None) return store, None @@ -993,13 +993,13 @@ def test_group(): assert '/' == g.name # usage with custom store - store = dict() + store = DictStore() g = group(store=store) assert isinstance(g, Group) assert store is g.store # overwrite behaviour - store = dict() + store = DictStore() init_array(store, shape=100, chunks=10) with pytest.raises(ValueError): group(store) From 732427c7f00eecc8f9780652144013ebe1586027 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Sat, 16 Feb 2019 18:02:01 -0500 Subject: [PATCH 08/13] Drop `ensure_bytes` line for `dict` stores As `dict` stores are not supported in this changeset, there is no need for this specific workaround for them. Given this go ahead drop this workaround. --- zarr/core.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 944eb1a1d0..58a3c11001 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -1781,10 +1781,6 @@ def _encode_chunk(self, chunk): else: cdata = chunk - # ensure in-memory data is immutable and easy to compare - if isinstance(self.chunk_store, dict): - cdata = ensure_bytes(cdata) - return cdata def __repr__(self): From 2fba54680e9f3b2221926a4299c0842c8f0f126c Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Sat, 16 Feb 2019 18:05:11 -0500 Subject: [PATCH 09/13] Use DictStore in synchronization tests --- zarr/tests/test_sync.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/tests/test_sync.py b/zarr/tests/test_sync.py index 1593845f1c..c85300716f 100644 --- a/zarr/tests/test_sync.py +++ b/zarr/tests/test_sync.py @@ -19,7 +19,7 @@ from zarr.sync import ThreadSynchronizer, ProcessSynchronizer from zarr.core import Array from zarr.attrs import Attributes -from zarr.storage import init_array, DirectoryStore, init_group, atexit_rmtree +from zarr.storage import init_array, DictStore, DirectoryStore, init_group, atexit_rmtree from zarr.hierarchy import Group @@ -100,7 +100,7 @@ def test_parallel_append(self): class TestArrayWithThreadSynchronizer(TestArray, MixinArraySyncTests): def create_array(self, read_only=False, **kwargs): - store = dict() + store = DictStore() cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) init_array(store, **kwargs) From 4dedeba701f7059d8254ba8d3c6d0824f0c02397 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Sat, 16 Feb 2019 18:14:03 -0500 Subject: [PATCH 10/13] Drop unsupported test --- zarr/tests/test_core.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index a2f8283be9..d5f6eafa89 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -1299,10 +1299,6 @@ def test_nbytes_stored(self): if k.startswith('foo/bar/')) assert expect_nbytes_stored == z.nbytes_stored - # mess with store - z.store[z._key_prefix + 'foo'] = list(range(10)) - assert -1 == z.nbytes_stored - class TestArrayWithChunkStore(TestArray): @@ -1353,10 +1349,6 @@ def test_nbytes_stored(self): for v in z.chunk_store.values()) assert expect_nbytes_stored == z.nbytes_stored - # mess with store - z.chunk_store[z._key_prefix + 'foo'] = list(range(10)) - assert -1 == z.nbytes_stored - class TestArrayWithDirectoryStore(TestArray): From 819b9ad78d141ed3f52553172a99abb1572e4310 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Sat, 16 Feb 2019 18:14:04 -0500 Subject: [PATCH 11/13] Test custom chunk store to determine size --- zarr/tests/test_core.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index d5f6eafa89..fd8e1d2146 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -1822,6 +1822,26 @@ def test_nbytes_stored(self): assert -1 == z.nbytes_stored +class TestArrayWithCustomChunkStore(TestArray): + + @staticmethod + def create_array(read_only=False, **kwargs): + store = CustomMapping() + kwargs["chunk_store"] = CustomMapping() + kwargs.setdefault('compressor', Zlib(1)) + cache_metadata = kwargs.pop('cache_metadata', True) + cache_attrs = kwargs.pop('cache_attrs', True) + init_array(store, **kwargs) + return Array(store, read_only=read_only, cache_metadata=cache_metadata, + cache_attrs=cache_attrs) + + def test_nbytes_stored(self): + z = self.create_array(shape=1000, chunks=100) + assert -1 == z.nbytes_stored + z[:] = 42 + assert -1 == z.nbytes_stored + + class TestArrayNoCache(TestArray): @staticmethod From d83a982fd39150579841547b069ff791ca60b393 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Sat, 16 Feb 2019 21:41:54 -0500 Subject: [PATCH 12/13] Update example in `create` to use `DictStore` --- zarr/creation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/creation.py b/zarr/creation.py index b46adc5b38..707f16fef7 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -98,7 +98,7 @@ def create(shape, chunks=True, dtype=None, compressor='default', Example with some filters, and also storing chunks separately from metadata:: >>> from numcodecs import Quantize, Adler32 - >>> store, chunk_store = dict(), dict() + >>> store, chunk_store = DictStore(), DictStore() >>> z = zarr.create((10000, 10000), chunks=(1000, 1000), dtype='f8', ... filters=[Quantize(digits=2, dtype='f8'), Adler32()], ... store=store, chunk_store=chunk_store) From 87af9ca34fff53924deb66494037e038c41b876d Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Sat, 16 Feb 2019 21:42:42 -0500 Subject: [PATCH 13/13] Use `DictStore` in `info` test --- zarr/tests/test_info.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/tests/test_info.py b/zarr/tests/test_info.py index e0d2330d6b..7ea7859dca 100644 --- a/zarr/tests/test_info.py +++ b/zarr/tests/test_info.py @@ -9,7 +9,7 @@ def test_info(): # setup - g = zarr.group(store=dict(), chunk_store=dict(), + g = zarr.group(store=zarr.DictStore(), chunk_store=zarr.DictStore(), synchronizer=zarr.ThreadSynchronizer()) g.create_group('foo') z = g.zeros('bar', shape=10, filters=[numcodecs.Adler32()])