From e051b17665f2b8b02006325aa7dee6391696df2c Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 15 Jul 2022 01:20:48 -0400 Subject: [PATCH 1/9] support path='/' for zarr v3 to create a root array or group v3 spec states path = '/' for arrays gives /meta/root.array.json path = '/' for groups gives /meta/root.array.json In this implementation path = None or path = '' will also result in a root array. Creation routines default to path=None, so this makes it so that the path argument does not have to be manually specified. --- zarr/_storage/store.py | 10 ++--- zarr/creation.py | 2 +- zarr/hierarchy.py | 3 -- zarr/tests/test_creation.py | 83 +++++++++++++++++++++---------------- zarr/util.py | 3 ++ 5 files changed, 56 insertions(+), 45 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 6faf4a1250..9e265cf383 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -420,11 +420,11 @@ def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str def _prefix_to_array_key(store: StoreLike, prefix: str) -> str: if getattr(store, "_store_version", 2) == 3: + sfx = _get_metadata_suffix(store) # type: ignore if prefix: - sfx = _get_metadata_suffix(store) # type: ignore key = meta_root + prefix.rstrip("/") + ".array" + sfx else: - raise ValueError("prefix must be supplied to get a v3 array key") + key = meta_root[:-1] + '.array' + sfx else: key = prefix + array_meta_key return key @@ -432,11 +432,11 @@ def _prefix_to_array_key(store: StoreLike, prefix: str) -> str: def _prefix_to_group_key(store: StoreLike, prefix: str) -> str: if getattr(store, "_store_version", 2) == 3: + sfx = _get_metadata_suffix(store) # type: ignore if prefix: - sfx = _get_metadata_suffix(store) # type: ignore key = meta_root + prefix.rstrip('/') + ".group" + sfx else: - raise ValueError("prefix must be supplied to get a v3 group key") + key = meta_root[:-1] + '.group' + sfx else: key = prefix + group_meta_key return key @@ -449,7 +449,7 @@ def _prefix_to_attrs_key(store: StoreLike, prefix: str) -> str: if prefix: key = meta_root + prefix.rstrip('/') + ".array" + sfx else: - raise ValueError("prefix must be supplied to get a v3 array key") + key = meta_root[:-1] + '.array' + sfx else: key = prefix + attrs_key return key diff --git a/zarr/creation.py b/zarr/creation.py index e77f26b3e2..688cdcc858 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -155,7 +155,7 @@ def create(shape, chunks=True, dtype=None, compressor='default', dimension_separator = normalize_dimension_separator(dimension_separator) if zarr_version > 2 and path is None: - raise ValueError("path must be supplied to initialize a zarr v3 array") + path = '/' # initialize array metadata init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index b9052408b4..5b71737986 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -1333,9 +1333,6 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N ) store_version = getattr(store, '_store_version', 2) - if store_version == 3 and path is None: - raise ValueError("path must be supplied to initialize a zarr v3 group") - path = normalize_storage_path(path) # ensure store is initialized diff --git a/zarr/tests/test_creation.py b/zarr/tests/test_creation.py index 48d6aee4f5..a9a3226fe3 100644 --- a/zarr/tests/test_creation.py +++ b/zarr/tests/test_creation.py @@ -53,18 +53,19 @@ def __getitem__(self, item): return self.data[item] -def _init_creation_kwargs(zarr_version): +def _init_creation_kwargs(zarr_version, at_root=False): kwargs = {'zarr_version': zarr_version} - if zarr_version == 3: + if not at_root: kwargs['path'] = 'array' return kwargs @pytest.mark.parametrize('zarr_version', _VERSIONS) -def test_array(zarr_version): +@pytest.mark.parametrize('at_root', [False, True]) +def test_array(zarr_version, at_root): expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version - kwargs = _init_creation_kwargs(zarr_version) + kwargs = _init_creation_kwargs(zarr_version, at_root) # with numpy array a = np.arange(100) @@ -121,16 +122,18 @@ def test_array(zarr_version): @pytest.mark.parametrize('zarr_version', _VERSIONS) -def test_empty(zarr_version): - kwargs = _init_creation_kwargs(zarr_version) +@pytest.mark.parametrize('at_root', [False, True]) +def test_empty(zarr_version, at_root): + kwargs = _init_creation_kwargs(zarr_version, at_root) z = empty(100, chunks=10, **kwargs) assert (100,) == z.shape assert (10,) == z.chunks @pytest.mark.parametrize('zarr_version', _VERSIONS) -def test_zeros(zarr_version): - kwargs = _init_creation_kwargs(zarr_version) +@pytest.mark.parametrize('at_root', [False, True]) +def test_zeros(zarr_version, at_root): + kwargs = _init_creation_kwargs(zarr_version, at_root) z = zeros(100, chunks=10, **kwargs) assert (100,) == z.shape assert (10,) == z.chunks @@ -138,8 +141,9 @@ def test_zeros(zarr_version): @pytest.mark.parametrize('zarr_version', _VERSIONS) -def test_ones(zarr_version): - kwargs = _init_creation_kwargs(zarr_version) +@pytest.mark.parametrize('at_root', [False, True]) +def test_ones(zarr_version, at_root): + kwargs = _init_creation_kwargs(zarr_version, at_root) z = ones(100, chunks=10, **kwargs) assert (100,) == z.shape assert (10,) == z.chunks @@ -147,8 +151,9 @@ def test_ones(zarr_version): @pytest.mark.parametrize('zarr_version', _VERSIONS) -def test_full(zarr_version): - kwargs = _init_creation_kwargs(zarr_version) +@pytest.mark.parametrize('at_root', [False, True]) +def test_full(zarr_version, at_root): + kwargs = _init_creation_kwargs(zarr_version, at_root) z = full(100, chunks=10, fill_value=42, dtype='i4', **kwargs) assert (100,) == z.shape assert (10,) == z.chunks @@ -195,10 +200,11 @@ def test_full_additional_dtypes(zarr_version): @pytest.mark.parametrize('dimension_separator', ['.', '/', None]) @pytest.mark.parametrize('zarr_version', _VERSIONS) -def test_open_array(zarr_version, dimension_separator): +@pytest.mark.parametrize('at_root', [False, True]) +def test_open_array(zarr_version, at_root, dimension_separator): store = 'data/array.zarr' - kwargs = _init_creation_kwargs(zarr_version) + kwargs = _init_creation_kwargs(zarr_version, at_root) # mode == 'w' z = open_array(store, mode='w', shape=100, chunks=10, @@ -391,11 +397,12 @@ def test_open_array_n5(zarr_version): @pytest.mark.parametrize('zarr_version', _VERSIONS) -def test_open_array_dict_store(zarr_version): +@pytest.mark.parametrize('at_root', [False, True]) +def test_open_array_dict_store(zarr_version, at_root): # dict will become a KVStore store = dict() - kwargs = _init_creation_kwargs(zarr_version) + kwargs = _init_creation_kwargs(zarr_version, at_root) expected_store_type = KVStoreV3 if zarr_version == 3 else KVStore # mode == 'w' @@ -409,8 +416,9 @@ def test_open_array_dict_store(zarr_version): @pytest.mark.parametrize('zarr_version', _VERSIONS) -def test_create_in_dict(zarr_version): - kwargs = _init_creation_kwargs(zarr_version) +@pytest.mark.parametrize('at_root', [False, True]) +def test_create_in_dict(zarr_version, at_root): + kwargs = _init_creation_kwargs(zarr_version, at_root) expected_store_type = KVStoreV3 if zarr_version == 3 else KVStore for func in [empty, zeros, ones]: @@ -422,8 +430,9 @@ def test_create_in_dict(zarr_version): @pytest.mark.parametrize('zarr_version', _VERSIONS) -def test_empty_like(zarr_version): - kwargs = _init_creation_kwargs(zarr_version) +@pytest.mark.parametrize('at_root', [False, True]) +def test_empty_like(zarr_version, at_root): + kwargs = _init_creation_kwargs(zarr_version, at_root) expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version # zarr array @@ -471,9 +480,10 @@ def test_empty_like(zarr_version): @pytest.mark.parametrize('zarr_version', _VERSIONS) -def test_zeros_like(zarr_version): +@pytest.mark.parametrize('at_root', [False, True]) +def test_zeros_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version) + kwargs = _init_creation_kwargs(zarr_version, at_root) expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version # zarr array @@ -498,9 +508,10 @@ def test_zeros_like(zarr_version): @pytest.mark.parametrize('zarr_version', _VERSIONS) -def test_ones_like(zarr_version): +@pytest.mark.parametrize('at_root', [False, True]) +def test_ones_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version) + kwargs = _init_creation_kwargs(zarr_version, at_root) expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version # zarr array @@ -526,9 +537,10 @@ def test_ones_like(zarr_version): @pytest.mark.parametrize('zarr_version', _VERSIONS) -def test_full_like(zarr_version): +@pytest.mark.parametrize('at_root', [False, True]) +def test_full_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version) + kwargs = _init_creation_kwargs(zarr_version, at_root) expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version z = full(100, chunks=10, dtype='f4', compressor=Zlib(5), @@ -556,8 +568,9 @@ def test_full_like(zarr_version): @pytest.mark.parametrize('zarr_version', _VERSIONS) -def test_open_like(zarr_version): - kwargs = _init_creation_kwargs(zarr_version) +@pytest.mark.parametrize('at_root', [False, True]) +def test_open_like(zarr_version, at_root): + kwargs = _init_creation_kwargs(zarr_version, at_root) expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version # zarr array @@ -587,16 +600,13 @@ def test_open_like(zarr_version): @pytest.mark.parametrize('zarr_version', _VERSIONS) -def test_create(zarr_version): - kwargs = _init_creation_kwargs(zarr_version) +@pytest.mark.parametrize('at_root', [False, True]) +def test_create(zarr_version, at_root): + kwargs = _init_creation_kwargs(zarr_version, at_root) expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version # defaults z = create(100, **kwargs) - if zarr_version == 3: - with pytest.raises(ValueError): - # cannot create without specifying a path - z = create(100, zarr_version=3) assert isinstance(z, Array) assert (100,) == z.shape assert (100,) == z.chunks # auto-chunks @@ -694,10 +704,11 @@ def test_compression_args(zarr_version): @pytest.mark.parametrize('zarr_version', _VERSIONS) -def test_create_read_only(zarr_version): +@pytest.mark.parametrize('at_root', [False, True]) +def test_create_read_only(zarr_version, at_root): # https://github.com/alimanfoo/zarr/issues/151 - kwargs = _init_creation_kwargs(zarr_version) + kwargs = _init_creation_kwargs(zarr_version, at_root) # create an array initially read-only, then enable writing z = create(100, read_only=True, **kwargs) diff --git a/zarr/util.py b/zarr/util.py index cc3bd50356..6887a774d9 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -316,6 +316,9 @@ def normalize_storage_path(path: Union[str, bytes, None]) -> str: if path is not None and not isinstance(path, str): path = str(path) + if path == '/': + return path + if path: # convert backslash to forward slash From f2f47fb0c024a81fe8c319ead6dd97f1c20564d4 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 15 Jul 2022 16:30:53 -0400 Subject: [PATCH 2/9] revert change to normalize_storage_path update additional tests --- zarr/hierarchy.py | 2 -- zarr/tests/test_convenience.py | 10 -------- zarr/tests/test_core.py | 43 ++++++++++++---------------------- zarr/tests/test_hierarchy.py | 35 ++++++++++----------------- zarr/util.py | 3 --- 5 files changed, 28 insertions(+), 65 deletions(-) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 5b71737986..ee714adabd 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -1247,8 +1247,6 @@ def group(store=None, overwrite=False, chunk_store=None, if zarr_version != 2: assert_zarr_v3_api_available() - if zarr_version == 3 and path is None: - raise ValueError(f"path must be provided for a v{zarr_version} group") path = normalize_storage_path(path) if zarr_version == 2: diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index d0d293a694..edd533afec 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -73,11 +73,6 @@ def test_open_array(path_type, zarr_version): assert isinstance(z, Array) assert z.shape == (200,) - if zarr_version == 3: - # cannot open a v3 array without path - with pytest.raises(ValueError): - open(store, mode='w', shape=200, zarr_version=3) - # open array, read-only z = open(store, mode='r', **kwargs) assert isinstance(z, Array) @@ -108,11 +103,6 @@ def test_open_group(path_type, zarr_version): assert isinstance(g, Group) assert 'foo' not in g - if zarr_version == 3: - # cannot open a v3 group without path - with pytest.raises(ValueError): - open(store, mode='w', zarr_version=3) - # open group, read-only g = open(store, mode='r', **kwargs) assert isinstance(g, Group) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index f5f043e6e3..1b002b98e3 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -2712,27 +2712,10 @@ class TestArrayV3(unittest.TestCase): def test_array_init(self): - # normal initialization + # normal initialization without path store = KVStoreV3(dict()) - with pytest.raises(ValueError): - # cannot init_array for v3 without a path - init_array(store, shape=100, chunks=10, dtype=" str: if path is not None and not isinstance(path, str): path = str(path) - if path == '/': - return path - if path: # convert backslash to forward slash From a14a620373c1bd5541cd0bf7ee1e8176e5ae6704 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 15 Jul 2022 19:33:06 -0400 Subject: [PATCH 3/9] fix --- zarr/tests/test_hierarchy.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 9d31c2140b..6840a95030 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -1621,7 +1621,11 @@ def test_open_group(zarr_version): assert 0 == len(g) g.create_groups('foo', 'bar') assert 2 == len(g) - with pytest.raises(ValueError): + if zarr_version == 2: + with pytest.raises(ValueError): + open_group('data/array.zarr', mode='a', zarr_version=zarr_version) + else: + # TODO, root: should this raise an error? open_group('data/array.zarr', mode='a', zarr_version=zarr_version) # mode in 'w-', 'x' From 0f6d5636042ca35cd849af85c76e86169acfd989 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sun, 17 Jul 2022 10:34:10 -0400 Subject: [PATCH 4/9] update TestArrayV3 to inherit all tests from TestArray --- zarr/tests/test_core.py | 59 ++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 1b002b98e3..4d79d8c419 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -64,13 +64,15 @@ class TestArray(unittest.TestCase): version = 2 + root = '' + KVStoreClass = KVStore def test_array_init(self): # normal initialization - store = KVStore(dict()) + store = self.KVStoreClass(dict()) init_array(store, shape=100, chunks=10, dtype=" Date: Sun, 17 Jul 2022 10:37:15 -0400 Subject: [PATCH 5/9] remove test bypass --- zarr/tests/test_core.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 4d79d8c419..0ca601f7ba 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -2729,8 +2729,7 @@ def expected(self): "c780221df84eb91cb62f633f12d3f1eaa9cee6bd" ] - def test_nbytes_stored(self): - pass # TODO: fix + # TODO: fix test_nbytes_stored @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") From 8e6dca181c30c8193b2c6f4415b22dc5f8701687 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Mon, 18 Jul 2022 20:35:11 -0400 Subject: [PATCH 6/9] fix nbytes_stored for v3 array without path update test_nbytes_stored to handle both v3 and v2 cases properly --- zarr/storage.py | 10 ++++++++-- zarr/tests/test_core.py | 15 ++++++++++++--- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 48b6f049dd..4da33cd01f 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -229,8 +229,14 @@ def _getsize(store: BaseStore, path: Path = None) -> int: size = 0 store_version = getattr(store, '_store_version', 2) if store_version == 3: - members = store.list_prefix(data_root + path) # type: ignore - members += store.list_prefix(meta_root + path) # type: ignore + if path == '': + # have to list the root folders without trailing / in this case + members = store.list_prefix(data_root.rstrip('/')) # type: ignore + members += store.list_prefix(meta_root.rstrip('/')) # type: ignore + else: + members = store.list_prefix(data_root + path) # type: ignore + members += store.list_prefix(meta_root + path) # type: ignore + # also include zarr.json? # members += ['zarr.json'] else: members = listdir(store, path) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 0ca601f7ba..f925622aa7 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -170,15 +170,24 @@ def test_nbytes_stored(self): # dict as store z = self.create_array(shape=1000, chunks=100) - expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) + if self.version == 3: + expect_nbytes_stored = sum(buffer_size(v) for k, v in z.store.items() if k != 'zarr.json') + else: + expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) assert expect_nbytes_stored == z.nbytes_stored z[:] = 42 - expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) + if self.version == 3: + expect_nbytes_stored = sum(buffer_size(v) for k, v in z.store.items() if k != 'zarr.json') + else: + expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) assert expect_nbytes_stored == z.nbytes_stored # mess with store try: - z.store[z._key_prefix + 'foo'] = list(range(10)) + if self.version == 2: + z.store[z._key_prefix + 'foo'] = list(range(10)) + else: + z.store['meta/root/foo'] = list(range(10)) assert -1 == z.nbytes_stored except TypeError: pass From abfe36833da52d07ef5b8656890fbee9c99d1442 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Mon, 18 Jul 2022 20:37:31 -0400 Subject: [PATCH 7/9] pep8 --- zarr/tests/test_core.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index f925622aa7..c556314ba3 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -19,9 +19,6 @@ from zarr._storage.store import ( v3_api_available, - _prefix_to_array_key, - _prefix_to_attrs_key, - _prefix_to_group_key ) from zarr.core import Array from zarr.errors import ArrayNotFoundError, ContainsGroupError @@ -171,13 +168,17 @@ def test_nbytes_stored(self): # dict as store z = self.create_array(shape=1000, chunks=100) if self.version == 3: - expect_nbytes_stored = sum(buffer_size(v) for k, v in z.store.items() if k != 'zarr.json') + expect_nbytes_stored = sum( + buffer_size(v) for k, v in z.store.items() if k != 'zarr.json' + ) else: expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) assert expect_nbytes_stored == z.nbytes_stored z[:] = 42 if self.version == 3: - expect_nbytes_stored = sum(buffer_size(v) for k, v in z.store.items() if k != 'zarr.json') + expect_nbytes_stored = sum( + buffer_size(v) for k, v in z.store.items() if k != 'zarr.json' + ) else: expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) assert expect_nbytes_stored == z.nbytes_stored From e86e67d67c4bd7b6093108cc6eadbfc5c0456e7b Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 19 Jul 2022 19:10:47 -0400 Subject: [PATCH 8/9] fix incorrect default value for at_root in _init_creation_kwargs previous behavior corresponded to at_root=True by default --- zarr/tests/test_creation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/tests/test_creation.py b/zarr/tests/test_creation.py index a9a3226fe3..2b7ddde7a1 100644 --- a/zarr/tests/test_creation.py +++ b/zarr/tests/test_creation.py @@ -53,7 +53,7 @@ def __getitem__(self, item): return self.data[item] -def _init_creation_kwargs(zarr_version, at_root=False): +def _init_creation_kwargs(zarr_version, at_root=True): kwargs = {'zarr_version': zarr_version} if not at_root: kwargs['path'] = 'array' From 168f2c1e4930d3fc4dcf6adfee2e292109b51a9b Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 19 Jul 2022 19:20:05 -0400 Subject: [PATCH 9/9] flake8 --- zarr/hierarchy.py | 1 - 1 file changed, 1 deletion(-) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index ee714adabd..e57e84ed16 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -1330,7 +1330,6 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N "zarr_version of store and chunk_store must match" ) - store_version = getattr(store, '_store_version', 2) path = normalize_storage_path(path) # ensure store is initialized