From e8e5d4854e5121ed8b2ab3552dd7cc79b261ccfa Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 1 Jul 2020 09:05:52 -0700 Subject: [PATCH] Try to converge on internal types consistency. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While it is convenient for user on high level API to be able to pass arround many types (array, bytes, or even dictionary, list, strings), It can make it relatively hard to follow internally if the types are not consistant. In particular this can relatively complicate weaving the v3 spec into the core of Zarr and add a number of conditional if we are not sure of the type we get. This try to be a little more consistent in what the implementation _emits_ right now this try to make sure that all the core only try to converge toward bytes, so that store users can expect to alway get bytes back from Zarr-Python stores, and as much as possible give bytes back, and mostly affects the ConsolidatedMetadataStore which used to – unlike other store – return objects instead of bytes, as well as the tests directly testing the stores. We of course can't be too strict as some existing store may have stored some values as not-bytes (pickle), and we still want to support this for the time being,but with this changes, we can add many `assert isinstance(x, bytes_like)` in many places ans still have the test suite to pass. In particular in all the decode/encode metadata functions, and Group class. One extra change in test is the removal of an old conditional import of h5py in the test suite which is unnecessary since introduction of the pytest's `importorskip` for h5py tests. --- docs/release.rst | 5 +++++ zarr/core.py | 2 +- zarr/storage.py | 10 ++++++++-- zarr/tests/test_convenience.py | 6 ------ zarr/tests/test_meta.py | 8 ++++---- zarr/tests/test_storage.py | 28 ++++++++++++++++++++++------ 6 files changed, 40 insertions(+), 19 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index 5ebb52bdab..55ddae4aa0 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -5,6 +5,11 @@ Release notes Next release ------------ +* For consistency across stores, the `ConsolidatedMetadataStore` will return + bytes instead of objects for metadata. Internally zarr tries to be more type + stable and attempt to always pass bytes-likes object when storing objects on + stores. + * Fix minor bug in `N5Store`. By :user:`gsakkis`, :issue:`550`. diff --git a/zarr/core.py b/zarr/core.py index 6edcbb475f..a034b0f637 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -1489,7 +1489,7 @@ def _set_basic_selection_zd(self, selection, value, fields=None): chunk[selection] = value # encode and store - cdata = self._encode_chunk(chunk) + cdata = ensure_bytes(self._encode_chunk(chunk)) self.chunk_store[ckey] = cdata def _set_basic_selection_nd(self, selection, value, fields=None): diff --git a/zarr/storage.py b/zarr/storage.py index 738bf548ee..6857910cc9 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -44,7 +44,7 @@ from zarr.meta import encode_array_metadata, encode_group_metadata from zarr.util import (buffer_size, json_loads, nolock, normalize_chunks, normalize_dtype, normalize_fill_value, normalize_order, - normalize_shape, normalize_storage_path) + normalize_shape, normalize_storage_path, json_dumps) __doctest_requires__ = { ('RedisStore', 'RedisStore.*'): ['redis'], @@ -2479,6 +2479,10 @@ class ConsolidatedMetadataStore(MutableMapping): .. versionadded:: 2.3 + .. versionchanged:: 2.5 + + __getitem__ will now return bytes for metadata for consistency across stores. + .. note:: This is an experimental feature. Parameters @@ -2507,7 +2511,9 @@ def __init__(self, store, metadata_key='.zmetadata'): consolidated_format) # decode metadata - self.meta_store = meta['metadata'] + self.meta_store = {} + for k, v in meta["metadata"].items(): + self.meta_store[k] = json_dumps(v) def __getitem__(self, key): return self.meta_store[key] diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index e8e0f2028c..fa6df5310b 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -697,12 +697,6 @@ def test_logging(self): copy(source['foo'], dest, dry_run=True, log=True) -try: - import h5py -except ImportError: # pragma: no cover - h5py = None - - def temp_h5f(): h5py = pytest.importorskip("h5py") fn = tempfile.mktemp() diff --git a/zarr/tests/test_meta.py b/zarr/tests/test_meta.py index 6c094b4dfa..2983526ceb 100644 --- a/zarr/tests/test_meta.py +++ b/zarr/tests/test_meta.py @@ -424,15 +424,15 @@ def test_encode_decode_dtype(): def test_decode_group(): # typical - b = '''{ + b = b'''{ "zarr_format": %s - }''' % ZARR_FORMAT + }''' % str(ZARR_FORMAT).encode() meta = decode_group_metadata(b) assert ZARR_FORMAT == meta['zarr_format'] # unsupported format - b = '''{ + b = b'''{ "zarr_format": %s - }''' % (ZARR_FORMAT - 1) + }''' % str(ZARR_FORMAT - 1).encode() with pytest.raises(MetadataError): decode_group_metadata(b) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index a0c8412e23..f5a3677866 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -30,6 +30,7 @@ attrs_key, default_compressor, getsize, group_meta_key, init_array, init_group, migrate_1to2) from zarr.tests.util import CountingDict, skip_test_env_var +from zarr.util import json_dumps @contextmanager @@ -1627,11 +1628,26 @@ def test_read_write(self): # setup store with consolidated metdata store = dict() consolidated = { - 'zarr_consolidated_format': 1, - 'metadata': { - 'foo': 'bar', - 'baz': 42, - } + "zarr_consolidated_format": 1, + "metadata": { + ".zgroup": {"zarr_format": 2}, + "g2/arr/.zarray": { + "chunks": [5, 5], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1, + }, + "dtype": "