From e492be23f8c36095a29902953e19a0892dd65c89 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 26 Mar 2024 21:04:57 +0100 Subject: [PATCH 01/56] feat: functional .children method for groups --- src/zarr/v3/group.py | 37 ++++++++++++++++++++++++++++++++++--- tests/test_group_v3.py | 4 +++- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index acd5ca0d62..4a84cbed80 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -4,7 +4,17 @@ import asyncio import json import logging -from typing import Any, Dict, Literal, Optional, Union, AsyncIterator, Iterator, List +from typing import ( + Any, + AsyncGenerator, + Dict, + Literal, + Optional, + Union, + AsyncIterator, + Iterator, + List, +) from zarr.v3.abc.metadata import Metadata from zarr.v3.array import AsyncArray, Array @@ -271,8 +281,29 @@ def __repr__(self): async def nchildren(self) -> int: raise NotImplementedError - async def children(self) -> AsyncIterator[AsyncArray, AsyncGroup]: - raise NotImplementedError + async def children(self) -> AsyncGenerator[AsyncArray, AsyncGroup]: + """ + Returns an async iterator over the arrays and groups contained in this group. + """ + if not self.store_path.store.supports_listing: + msg = ( + f"The store associated with this group ({type(self.store_path.store)}) " + "does not support listing, " + "specifically the `list_dir` method. " + "This function requires a store that supports listing." + ) + + raise ValueError(msg) + subkeys = await self.store_path.store.list_dir(self.store_path.path) + # would be nice to make these special keys accessible programmatically, + # and scoped to specific zarr versions + subkeys_filtered = filter(lambda v: v not in ("zarr.json", ".zgroup", ".zattrs"), subkeys) + # might be smarter to wrap this in asyncio gather + for subkey in subkeys_filtered: + try: + yield await self.getitem(subkey) + except ValueError: + pass async def contains(self, child: str) -> bool: raise NotImplementedError diff --git a/tests/test_group_v3.py b/tests/test_group_v3.py index 1498d6779b..01859bb7ae 100644 --- a/tests/test_group_v3.py +++ b/tests/test_group_v3.py @@ -21,13 +21,15 @@ def test_group(store_path) -> None: runtime_configuration=RuntimeConfiguration(), ) group = Group(agroup) - assert agroup.metadata is group.metadata # create two groups foo = group.create_group("foo") bar = foo.create_group("bar", attributes={"baz": "qux"}) + # check that bar is in the children of foo + assert foo.children == [bar] + # create an array from the "bar" group data = np.arange(0, 4 * 4, dtype="uint16").reshape((4, 4)) arr = bar.create_array( From b7f66c7ecea7d79fb911da1a4d48d01f67b225ec Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 27 Mar 2024 12:21:48 +0100 Subject: [PATCH 02/56] changes necessary for correctly generating list of children --- src/zarr/v3/group.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index 4a84cbed80..84e0100d81 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -80,7 +80,7 @@ def to_dict(self) -> Dict[str, Any]: class AsyncGroup: metadata: GroupMetadata store_path: StorePath - runtime_configuration: RuntimeConfiguration + runtime_configuration: RuntimeConfiguration = RuntimeConfiguration() @classmethod async def create( @@ -164,11 +164,21 @@ async def getitem( store_path = self.store_path / key + # Note: + # in zarr-python v2, we first check if `key` references an Array, else if `key` references + # a group,using standalone `contains_array` and `contains_group` functions. These functions + # are reusable, but for v3 they would perform redundant I/O operations. + # Not clear how much of that strategy we want to keep here. + + # if `key` names an object in storage, it cannot be an array or group + if await store_path.exists(): + raise KeyError(key) + if self.metadata.zarr_format == 3: zarr_json_bytes = await (store_path / ZARR_JSON).get() if zarr_json_bytes is None: # implicit group? - logger.warning("group at {} is an implicit group", store_path) + logger.warning("group at %s is an implicit group", store_path) zarr_json = { "zarr_format": self.metadata.zarr_format, "node_type": "group", @@ -205,7 +215,7 @@ async def getitem( else: if zgroup_bytes is None: # implicit group? - logger.warning("group at {} is an implicit group", store_path) + logger.warning("group at %s is an implicit group", store_path) zgroup = ( json.loads(zgroup_bytes) if zgroup_bytes is not None @@ -283,13 +293,14 @@ async def nchildren(self) -> int: async def children(self) -> AsyncGenerator[AsyncArray, AsyncGroup]: """ - Returns an async iterator over the arrays and groups contained in this group. + Returns an AsyncGenerator over the arrays and groups contained in this group. + This method requires that `store_path.store` supports directory listing. """ if not self.store_path.store.supports_listing: msg = ( f"The store associated with this group ({type(self.store_path.store)}) " "does not support listing, " - "specifically the `list_dir` method. " + "specifically via the `list_dir` method. " "This function requires a store that supports listing." ) @@ -298,11 +309,13 @@ async def children(self) -> AsyncGenerator[AsyncArray, AsyncGroup]: # would be nice to make these special keys accessible programmatically, # and scoped to specific zarr versions subkeys_filtered = filter(lambda v: v not in ("zarr.json", ".zgroup", ".zattrs"), subkeys) - # might be smarter to wrap this in asyncio gather + # is there a better way to schedule this? for subkey in subkeys_filtered: try: yield await self.getitem(subkey) - except ValueError: + except KeyError: + # keyerror is raised when `subkey``names an object in the store + # in which case `subkey` cannot be the name of a sub-array or sub-group. pass async def contains(self, child: str) -> bool: @@ -436,7 +449,7 @@ def nchildren(self) -> int: return self._sync(self._async_group.nchildren) @property - def children(self) -> List[Array, Group]: + def children(self) -> List[Array | Group]: _children = self._sync_iter(self._async_group.children) return [Array(obj) if isinstance(obj, AsyncArray) else Group(obj) for obj in _children] From c7b333a3064de462d5c590581f6fb70836202a56 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 27 Mar 2024 12:22:18 +0100 Subject: [PATCH 03/56] add stand-alone test for group.children --- tests/test_group_v3.py | 58 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/tests/test_group_v3.py b/tests/test_group_v3.py index 01859bb7ae..3c8e33db8d 100644 --- a/tests/test_group_v3.py +++ b/tests/test_group_v3.py @@ -4,6 +4,8 @@ from zarr.v3.group import AsyncGroup, Group, GroupMetadata from zarr.v3.store import LocalStore, StorePath from zarr.v3.config import RuntimeConfiguration +from zarr.v3.store.remote import RemoteStore +from zarr.v3.sync import sync @pytest.fixture @@ -13,8 +15,59 @@ def store_path(tmpdir): return p -def test_group(store_path) -> None: +@pytest.fixture(scope="function") +def local_store(tmpdir): + return LocalStore(str(tmpdir)) + +@pytest.fixture(scope="function") +def remote_store(): + return RemoteStore() + + +@pytest.mark.parametrize("store_type", ("local_store",)) +def test_group_children(store_type, request): + store: LocalStore | RemoteStore = request.getfixturevalue(store_type) + path = "group" + agroup = AsyncGroup( + metadata=GroupMetadata(), + store_path=StorePath(store=store, path=path), + ) + group = Group(agroup) + + subgroup = group.create_group("subgroup") + # make a sub-sub-subgroup, to ensure that the children calculation doesn't go + # too deep in the hierarchy + _ = subgroup.create_group("subsubgroup") + subarray = group.create_array( + "subarray", shape=(100,), dtype="uint8", chunk_shape=(10,), exists_ok=True + ) + + # add an extra object to the domain of the group. + # the list of children should ignore this object. + sync(store.set(f"{path}/extra_object", b"000000")) + # add an extra object under a directory-like prefix in the domain of the group. + # this creates an implicit group called implicit_subgroup + sync(store.set(f"{path}/implicit_subgroup/extra_object", b"000000")) + # make the implicit subgroup + implicit_subgroup = Group( + AsyncGroup( + metadata=GroupMetadata(), + store_path=StorePath(store=store, path=f"{path}/implicit_subgroup"), + ) + ) + # note: this assertion is order-specific, but it is not clear + # if group.children guarantees a particular order for the children. + # If order is not guaranteed, then the better test is + # to compare two sets, but presently neither the group nor array classes are hashable. + observed = group.children + assert observed == [subarray, implicit_subgroup, subgroup] + + +@pytest.mark.parametrize("store_type", (("local_store",))) +def test_group(store_type, request) -> None: + store = request.getfixturevalue(store_type) + store_path = StorePath(store) agroup = AsyncGroup( metadata=GroupMetadata(), store_path=store_path, @@ -27,9 +80,6 @@ def test_group(store_path) -> None: foo = group.create_group("foo") bar = foo.create_group("bar", attributes={"baz": "qux"}) - # check that bar is in the children of foo - assert foo.children == [bar] - # create an array from the "bar" group data = np.arange(0, 4 * 4, dtype="uint16").reshape((4, 4)) arr = bar.create_array( From a64b3420523165d86946316e60dd46893d909846 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 27 Mar 2024 12:32:35 +0100 Subject: [PATCH 04/56] give type hints a glow-up --- src/zarr/v3/group.py | 59 ++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index 84e0100d81..5bce87376c 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -1,20 +1,19 @@ from __future__ import annotations +from typing import TYPE_CHECKING from dataclasses import asdict, dataclass, field, replace import asyncio import json import logging -from typing import ( - Any, - AsyncGenerator, - Dict, - Literal, - Optional, - Union, - AsyncIterator, - Iterator, - List, -) + +if TYPE_CHECKING: + from typing import ( + Any, + AsyncGenerator, + Literal, + AsyncIterator, + Iterator, + ) from zarr.v3.abc.metadata import Metadata from zarr.v3.array import AsyncArray, Array @@ -35,7 +34,7 @@ def parse_zarr_format(data: Any) -> Literal[2, 3]: # todo: convert None to empty dict -def parse_attributes(data: Any) -> Dict[str, Any]: +def parse_attributes(data: Any) -> dict[str, Any]: if data is None: return {} elif isinstance(data, dict) and all(map(lambda v: isinstance(v, str), data.keys())): @@ -46,12 +45,12 @@ def parse_attributes(data: Any) -> Dict[str, Any]: @dataclass(frozen=True) class GroupMetadata(Metadata): - attributes: Dict[str, Any] = field(default_factory=dict) + attributes: dict[str, Any] = field(default_factory=dict) zarr_format: Literal[2, 3] = 3 node_type: Literal["group"] = field(default="group", init=False) # todo: rename this, since it doesn't return bytes - def to_bytes(self) -> Dict[str, bytes]: + def to_bytes(self) -> dict[str, bytes]: if self.zarr_format == 3: return {ZARR_JSON: json.dumps(self.to_dict()).encode()} else: @@ -60,7 +59,7 @@ def to_bytes(self) -> Dict[str, bytes]: ZATTRS_JSON: json.dumps(self.attributes).encode(), } - def __init__(self, attributes: Dict[str, Any] = None, zarr_format: Literal[2, 3] = 3): + def __init__(self, attributes: dict[str, Any] = {}, zarr_format: Literal[2, 3] = 3): attributes_parsed = parse_attributes(attributes) zarr_format_parsed = parse_zarr_format(zarr_format) @@ -68,11 +67,11 @@ def __init__(self, attributes: Dict[str, Any] = None, zarr_format: Literal[2, 3] object.__setattr__(self, "zarr_format", zarr_format_parsed) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> GroupMetadata: + def from_dict(cls, data: dict[str, Any]) -> GroupMetadata: assert data.pop("node_type", None) in ("group", None) return cls(**data) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: return asdict(self) @@ -87,7 +86,7 @@ async def create( cls, store: StoreLike, *, - attributes: Optional[Dict[str, Any]] = None, + attributes: dict[str, Any] = {}, exists_ok: bool = False, zarr_format: Literal[2, 3] = 3, runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), @@ -99,7 +98,7 @@ async def create( elif zarr_format == 2: assert not await (store_path / ZGROUP_JSON).exists() group = cls( - metadata=GroupMetadata(attributes=attributes or {}, zarr_format=zarr_format), + metadata=GroupMetadata(attributes=attributes, zarr_format=zarr_format), store_path=store_path, runtime_configuration=runtime_configuration, ) @@ -147,7 +146,7 @@ async def open( def from_dict( cls, store_path: StorePath, - data: Dict[str, Any], + data: dict[str, Any], runtime_configuration: RuntimeConfiguration, ) -> Group: group = cls( @@ -160,7 +159,7 @@ def from_dict( async def getitem( self, key: str, - ) -> Union[AsyncArray, AsyncGroup]: + ) -> AsyncArray | AsyncGroup: store_path = self.store_path / key @@ -267,7 +266,7 @@ async def create_array(self, path: str, **kwargs) -> AsyncArray: **kwargs, ) - async def update_attributes(self, new_attributes: Dict[str, Any]): + async def update_attributes(self, new_attributes: dict[str, Any]): # metadata.attributes is "frozen" so we simply clear and update the dict self.metadata.attributes.clear() self.metadata.attributes.update(new_attributes) @@ -374,7 +373,7 @@ def create( cls, store: StoreLike, *, - attributes: Optional[Dict[str, Any]] = None, + attributes: dict[str, Any] = {}, exists_ok: bool = False, runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), ) -> Group: @@ -401,7 +400,7 @@ def open( ) return cls(obj) - def __getitem__(self, path: str) -> Union[Array, Group]: + def __getitem__(self, path: str) -> Array | Group: obj = self._sync(self._async_group.getitem(path)) if isinstance(obj, AsyncArray): return Array(obj) @@ -421,7 +420,7 @@ def __setitem__(self, key, value): """__setitem__ is not supported in v3""" raise NotImplementedError - async def update_attributes_async(self, new_attributes: Dict[str, Any]) -> Group: + async def update_attributes_async(self, new_attributes: dict[str, Any]) -> Group: new_metadata = replace(self.metadata, attributes=new_attributes) # Write new metadata @@ -440,7 +439,7 @@ def attrs(self) -> Attributes: def info(self): return self._async_group.info - def update_attributes(self, new_attributes: Dict[str, Any]): + def update_attributes(self, new_attributes: dict[str, Any]): self._sync(self._async_group.update_attributes(new_attributes)) return self @@ -449,7 +448,7 @@ def nchildren(self) -> int: return self._sync(self._async_group.nchildren) @property - def children(self) -> List[Array | Group]: + def children(self) -> list[Array | Group]: _children = self._sync_iter(self._async_group.children) return [Array(obj) if isinstance(obj, AsyncArray) else Group(obj) for obj in _children] @@ -459,14 +458,14 @@ def __contains__(self, child) -> bool: def group_keys(self) -> Iterator[str]: return self._sync_iter(self._async_group.group_keys) - def groups(self) -> List[Group]: + def groups(self) -> list[Group]: # TODO: in v2 this was a generator that return key: Group return [Group(obj) for obj in self._sync_iter(self._async_group.groups)] - def array_keys(self) -> List[str]: + def array_keys(self) -> list[str]: return self._sync_iter(self._async_group.array_keys) - def arrays(self) -> List[Array]: + def arrays(self) -> list[Array]: return [Array(obj) for obj in self._sync_iter(self._async_group.arrays)] def tree(self, expand=False, level=None) -> Any: From 3d11fc0edc175748e7268397dcb14a92bba8c07d Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 27 Mar 2024 14:03:01 +0100 Subject: [PATCH 05/56] test: use separate assert statements to avoid platform-dependent ordering issues --- tests/test_group_v3.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/test_group_v3.py b/tests/test_group_v3.py index 3c8e33db8d..37f90543b1 100644 --- a/tests/test_group_v3.py +++ b/tests/test_group_v3.py @@ -56,12 +56,15 @@ def test_group_children(store_type, request): store_path=StorePath(store=store, path=f"{path}/implicit_subgroup"), ) ) - # note: this assertion is order-specific, but it is not clear + # note: these assertions are order-independent, because it is not clear # if group.children guarantees a particular order for the children. - # If order is not guaranteed, then the better test is + # If order is not guaranteed, then the better version of this test is # to compare two sets, but presently neither the group nor array classes are hashable. observed = group.children - assert observed == [subarray, implicit_subgroup, subgroup] + assert len(observed) == 3 + assert subarray in observed + assert implicit_subgroup in observed + assert subgroup in observed @pytest.mark.parametrize("store_type", (("local_store",))) From cf34afc77cb766fa1d3a3d4c778b63f4d113199c Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 27 Mar 2024 14:39:38 +0100 Subject: [PATCH 06/56] test: put fixtures in conftest, add MemoryStore fixture --- tests/conftest.py | 70 +++++++++++++++++++++++++++++++++++++++++- tests/test_group_v3.py | 36 +++++++++------------- 2 files changed, 83 insertions(+), 23 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index aa73b8691e..38f87a0d9d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,76 @@ import pathlib - import pytest +import os + +from zarr.v3.store import LocalStore, StorePath, MemoryStore +from zarr.v3.store.remote import RemoteStore @pytest.fixture(params=[str, pathlib.Path]) def path_type(request): return request.param + + +@pytest.fixture() +def mock_s3(request): + # writable local S3 system + import shlex + import subprocess + import time + + if "BOTO_CONFIG" not in os.environ: # pragma: no cover + os.environ["BOTO_CONFIG"] = "/dev/null" + if "AWS_ACCESS_KEY_ID" not in os.environ: # pragma: no cover + os.environ["AWS_ACCESS_KEY_ID"] = "foo" + if "AWS_SECRET_ACCESS_KEY" not in os.environ: # pragma: no cover + os.environ["AWS_SECRET_ACCESS_KEY"] = "bar" + requests = pytest.importorskip("requests") + s3fs = pytest.importorskip("s3fs") + pytest.importorskip("moto") + port = 5555 + endpoint_uri = "http://127.0.0.1:%d/" % port + proc = subprocess.Popen( + shlex.split("moto_server s3 -p %d" % port), + stderr=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + ) + timeout = 5 + while timeout > 0: + try: + r = requests.get(endpoint_uri) + if r.ok: + break + except Exception: # pragma: no cover + pass + timeout -= 0.1 # pragma: no cover + time.sleep(0.1) # pragma: no cover + s3so = dict(client_kwargs={"endpoint_url": endpoint_uri}, use_listings_cache=False) + s3 = s3fs.S3FileSystem(anon=False, **s3so) + s3.mkdir("test") + request.cls.s3so = s3so + yield + proc.terminate() + proc.wait() + + +# todo: harmonize this with local_store fixture +@pytest.fixture +def store_path(tmpdir): + store = LocalStore(str(tmpdir)) + p = StorePath(store) + return p + + +@pytest.fixture(scope="function") +def local_store(tmpdir): + return LocalStore(str(tmpdir)) + + +@pytest.fixture(scope="function") +def remote_store(): + return RemoteStore() + + +@pytest.fixture(scope="function") +def memory_store(): + return MemoryStore() diff --git a/tests/test_group_v3.py b/tests/test_group_v3.py index 37f90543b1..555374f5b3 100644 --- a/tests/test_group_v3.py +++ b/tests/test_group_v3.py @@ -1,33 +1,25 @@ +from __future__ import annotations +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from zarr.v3.store.remote import MemoryStore, LocalStore import pytest import numpy as np from zarr.v3.group import AsyncGroup, Group, GroupMetadata -from zarr.v3.store import LocalStore, StorePath +from zarr.v3.store import StorePath from zarr.v3.config import RuntimeConfiguration -from zarr.v3.store.remote import RemoteStore from zarr.v3.sync import sync - -@pytest.fixture -def store_path(tmpdir): - store = LocalStore(str(tmpdir)) - p = StorePath(store) - return p - - -@pytest.fixture(scope="function") -def local_store(tmpdir): - return LocalStore(str(tmpdir)) - - -@pytest.fixture(scope="function") -def remote_store(): - return RemoteStore() - - -@pytest.mark.parametrize("store_type", ("local_store",)) +# todo: put RemoteStore in here +@pytest.mark.parametrize("store_type", ("local_store", "memory_store")) def test_group_children(store_type, request): - store: LocalStore | RemoteStore = request.getfixturevalue(store_type) + """ + Test that `Group.children` returns correct values, i.e. the arrays and groups + (explicit and implicit) contained in that group. + """ + + store: LocalStore | MemoryStore = request.getfixturevalue(store_type) path = "group" agroup = AsyncGroup( metadata=GroupMetadata(), From 16cb22610781775c96b2d843b7c85ba5ce8e41e3 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 27 Mar 2024 14:48:08 +0100 Subject: [PATCH 07/56] docs: release notes --- docs/release.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/release.rst b/docs/release.rst index 3ed47ff9f5..b78e709c0e 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -18,6 +18,12 @@ Release notes Unreleased (v3) --------------- +Enhancements +~~~~~~~~~~~~ + +* Implement listing of the sub-arrays and sub-groups for a V3 ``Group``. + By :user:`Davis Bennett ` :issue:`1726`. + Maintenance ~~~~~~~~~~~ From b28eaee2f328e8e9ee2aed9f51d14edf9d49630b Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Sat, 30 Mar 2024 18:14:14 +0100 Subject: [PATCH 08/56] test: remove prematurely-added mock s3 fixture --- tests/conftest.py | 43 ------------------------------------------- 1 file changed, 43 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 38f87a0d9d..40275ba62c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,5 @@ import pathlib import pytest -import os from zarr.v3.store import LocalStore, StorePath, MemoryStore from zarr.v3.store.remote import RemoteStore @@ -11,48 +10,6 @@ def path_type(request): return request.param -@pytest.fixture() -def mock_s3(request): - # writable local S3 system - import shlex - import subprocess - import time - - if "BOTO_CONFIG" not in os.environ: # pragma: no cover - os.environ["BOTO_CONFIG"] = "/dev/null" - if "AWS_ACCESS_KEY_ID" not in os.environ: # pragma: no cover - os.environ["AWS_ACCESS_KEY_ID"] = "foo" - if "AWS_SECRET_ACCESS_KEY" not in os.environ: # pragma: no cover - os.environ["AWS_SECRET_ACCESS_KEY"] = "bar" - requests = pytest.importorskip("requests") - s3fs = pytest.importorskip("s3fs") - pytest.importorskip("moto") - port = 5555 - endpoint_uri = "http://127.0.0.1:%d/" % port - proc = subprocess.Popen( - shlex.split("moto_server s3 -p %d" % port), - stderr=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - ) - timeout = 5 - while timeout > 0: - try: - r = requests.get(endpoint_uri) - if r.ok: - break - except Exception: # pragma: no cover - pass - timeout -= 0.1 # pragma: no cover - time.sleep(0.1) # pragma: no cover - s3so = dict(client_kwargs={"endpoint_url": endpoint_uri}, use_listings_cache=False) - s3 = s3fs.S3FileSystem(anon=False, **s3so) - s3.mkdir("test") - request.cls.s3so = s3so - yield - proc.terminate() - proc.wait() - - # todo: harmonize this with local_store fixture @pytest.fixture def store_path(tmpdir): From 0215b97cc6e5acd3997891e680156aa0ecd45ace Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 13:30:01 +0200 Subject: [PATCH 09/56] chore: move v3 tests into v3 folder --- tests/{ => v3}/test_codecs_v3.py | 0 tests/{ => v3}/test_group_v3.py | 0 tests/{ => v3}/test_storage_v3.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename tests/{ => v3}/test_codecs_v3.py (100%) rename tests/{ => v3}/test_group_v3.py (100%) rename tests/{ => v3}/test_storage_v3.py (100%) diff --git a/tests/test_codecs_v3.py b/tests/v3/test_codecs_v3.py similarity index 100% rename from tests/test_codecs_v3.py rename to tests/v3/test_codecs_v3.py diff --git a/tests/test_group_v3.py b/tests/v3/test_group_v3.py similarity index 100% rename from tests/test_group_v3.py rename to tests/v3/test_group_v3.py diff --git a/tests/test_storage_v3.py b/tests/v3/test_storage_v3.py similarity index 100% rename from tests/test_storage_v3.py rename to tests/v3/test_storage_v3.py From e33ca6ec0cf1e3c0141a1f0710a0456724f4a529 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 14:43:22 +0200 Subject: [PATCH 10/56] chore: type hints --- tests/{ => v3}/conftest.py | 13 +++++++++++++ tests/v3/test_group_v3.py | 18 +++++++++--------- 2 files changed, 22 insertions(+), 9 deletions(-) rename tests/{ => v3}/conftest.py (66%) diff --git a/tests/conftest.py b/tests/v3/conftest.py similarity index 66% rename from tests/conftest.py rename to tests/v3/conftest.py index 40275ba62c..0e166e224e 100644 --- a/tests/conftest.py +++ b/tests/v3/conftest.py @@ -31,3 +31,16 @@ def remote_store(): @pytest.fixture(scope="function") def memory_store(): return MemoryStore() + + +@pytest.fixture(scope="function") +def store(request: str, tmpdir): + param = request.param + if param == "local_store": + return LocalStore(str(tmpdir)) + elif param == "memory_store": + return MemoryStore() + elif param == "remote_store": + return RemoteStore() + else: + assert False diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group_v3.py index 555374f5b3..c1f5b708da 100644 --- a/tests/v3/test_group_v3.py +++ b/tests/v3/test_group_v3.py @@ -3,6 +3,7 @@ if TYPE_CHECKING: from zarr.v3.store.remote import MemoryStore, LocalStore + import pytest import numpy as np @@ -11,15 +12,15 @@ from zarr.v3.config import RuntimeConfiguration from zarr.v3.sync import sync + # todo: put RemoteStore in here -@pytest.mark.parametrize("store_type", ("local_store", "memory_store")) -def test_group_children(store_type, request): +@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +def test_group_children(store: MemoryStore | LocalStore): """ Test that `Group.children` returns correct values, i.e. the arrays and groups (explicit and implicit) contained in that group. """ - store: LocalStore | MemoryStore = request.getfixturevalue(store_type) path = "group" agroup = AsyncGroup( metadata=GroupMetadata(), @@ -59,9 +60,8 @@ def test_group_children(store_type, request): assert subgroup in observed -@pytest.mark.parametrize("store_type", (("local_store",))) -def test_group(store_type, request) -> None: - store = request.getfixturevalue(store_type) +@pytest.mark.parametrize("store", (("local_store", "memory_store")), indirect=["store"]) +def test_group(store: MemoryStore | LocalStore) -> None: store_path = StorePath(store) agroup = AsyncGroup( metadata=GroupMetadata(), @@ -103,10 +103,10 @@ def test_group(store_type, request) -> None: assert dict(bar3.attrs) == {"baz": "qux", "name": "bar"} -def test_group_sync_constructor(store_path) -> None: - +@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +def test_group_sync_constructor(store: MemoryStore | LocalStore) -> None: group = Group.create( - store=store_path, + store=store, attributes={"title": "test 123"}, runtime_configuration=RuntimeConfiguration(), ) From f237172f21fea441e24896b4c311d52c1e642ea1 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 14:59:51 +0200 Subject: [PATCH 11/56] test: add schema for group method tests --- tests/v3/test_group_v3.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group_v3.py index c1f5b708da..034a21a547 100644 --- a/tests/v3/test_group_v3.py +++ b/tests/v3/test_group_v3.py @@ -3,6 +3,7 @@ if TYPE_CHECKING: from zarr.v3.store.remote import MemoryStore, LocalStore + from typing import Literal import pytest import numpy as np @@ -112,3 +113,27 @@ def test_group_sync_constructor(store: MemoryStore | LocalStore) -> None: ) assert group._async_group.metadata.attributes["title"] == "test 123" + + +@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", ("2", "3")) +@pytest.mark.parametrize("exists_ok", (True, False)) +@pytest.mark.parametrize("runtime_configuration", (None,)) +def test_create( + store: MemoryStore | LocalStore, + exists_ok: bool, + zarr_format: Literal["2", "3"], + runtime_configuration: None, +): + ... + + +@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +def test_from_dict(store: MemoryStore | LocalStore): + ... + + +@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", ("2", "3")) +def test_getitem(store: MemoryStore | LocalStore, zarr_format: Literal["2", "3"]): + ... From 460544609c799da169876d109d90bcea425b49b0 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 21:43:05 +0200 Subject: [PATCH 12/56] chore: add type for zarr_formats --- src/zarr/v3/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/zarr/v3/common.py b/src/zarr/v3/common.py index 1caf83a764..e92e84ef7b 100644 --- a/src/zarr/v3/common.py +++ b/src/zarr/v3/common.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Union, Tuple, Iterable, Dict, List, TypeVar, overload +from typing import TYPE_CHECKING, Literal, Union, Tuple, Iterable, Dict, List, TypeVar, overload import asyncio import contextvars from dataclasses import dataclass @@ -21,6 +21,7 @@ ChunkCoordsLike = Iterable[int] SliceSelection = Tuple[slice, ...] Selection = Union[slice, SliceSelection] +ZarrFormat = Literal[2, 3] JSON = Union[str, None, int, float, Enum, Dict[str, "JSON"], List["JSON"], Tuple["JSON", ...]] From 6e5ded9aea74f0a3e3ffc0a976fadcba8bcfcdc4 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 22:19:03 +0200 Subject: [PATCH 13/56] chore: remove localstore for now --- tests/conftest.py | 7 +++ tests/v3/conftest.py | 54 ++++++++++++++++++---- tests/v3/test_group_v3.py | 96 +++++++++++++++++++++++++++++---------- 3 files changed, 125 insertions(+), 32 deletions(-) create mode 100644 tests/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000000..6680e4066b --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,7 @@ +import pytest +import pathlib + + +@pytest.fixture(params=[str, pathlib.Path]) +def path_type(request): + return request.param diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index 0e166e224e..ceb0b558eb 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -1,10 +1,33 @@ +from __future__ import annotations +from typing import TYPE_CHECKING + +from zarr.v3.common import ZarrFormat +from zarr.v3.group import AsyncGroup, Group + +if TYPE_CHECKING: + from typing import Any, Literal +from dataclasses import dataclass, field import pathlib + import pytest +from zarr.v3.config import RuntimeConfiguration from zarr.v3.store import LocalStore, StorePath, MemoryStore from zarr.v3.store.remote import RemoteStore +def parse_store( + store: Literal["local", "memory", "remote"], path: str +) -> LocalStore | MemoryStore | RemoteStore: + if store == "local": + return LocalStore(path) + if store == "memory": + return MemoryStore() + if store == "remote": + return RemoteStore() + assert False + + @pytest.fixture(params=[str, pathlib.Path]) def path_type(request): return request.param @@ -36,11 +59,26 @@ def memory_store(): @pytest.fixture(scope="function") def store(request: str, tmpdir): param = request.param - if param == "local_store": - return LocalStore(str(tmpdir)) - elif param == "memory_store": - return MemoryStore() - elif param == "remote_store": - return RemoteStore() - else: - assert False + return parse_store(param, str(tmpdir)) + + +@dataclass +class AsyncGroupRequest: + zarr_format: ZarrFormat + store: str + attributes: dict[str, Any] = field(default_factory=dict) + runtime_configuration: RuntimeConfiguration = RuntimeConfiguration() + + +@pytest.fixture(scope="function") +async def async_group(request: AsyncGroupRequest, tmpdir) -> Group: + param: AsyncGroupRequest = request.param + + store = parse_store(param.store, str(tmpdir)) + return await AsyncGroup.create( + store, + attributes=param.attributes, + zarr_format=param.zarr_format, + runtime_configuration=param.runtime_configuration, + exists_ok=False, + ) diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group_v3.py index 034a21a547..af5c5171b3 100644 --- a/tests/v3/test_group_v3.py +++ b/tests/v3/test_group_v3.py @@ -1,9 +1,12 @@ from __future__ import annotations from typing import TYPE_CHECKING +from zarr.v3.store.core import make_store_path + if TYPE_CHECKING: - from zarr.v3.store.remote import MemoryStore, LocalStore + from zarr.v3.store import MemoryStore, LocalStore from typing import Literal + from zarr.v3.common import ZarrFormat import pytest import numpy as np @@ -15,7 +18,7 @@ # todo: put RemoteStore in here -@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) def test_group_children(store: MemoryStore | LocalStore): """ Test that `Group.children` returns correct values, i.e. the arrays and groups @@ -61,14 +64,10 @@ def test_group_children(store: MemoryStore | LocalStore): assert subgroup in observed -@pytest.mark.parametrize("store", (("local_store", "memory_store")), indirect=["store"]) +@pytest.mark.parametrize("store", (("local", "memory")), indirect=["store"]) def test_group(store: MemoryStore | LocalStore) -> None: store_path = StorePath(store) - agroup = AsyncGroup( - metadata=GroupMetadata(), - store_path=store_path, - runtime_configuration=RuntimeConfiguration(), - ) + agroup = AsyncGroup(metadata=GroupMetadata(), store_path=store_path) group = Group(agroup) assert agroup.metadata is group.metadata @@ -104,36 +103,85 @@ def test_group(store: MemoryStore | LocalStore) -> None: assert dict(bar3.attrs) == {"baz": "qux", "name": "bar"} -@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) -def test_group_sync_constructor(store: MemoryStore | LocalStore) -> None: +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("exists_ok", (True, False)) +@pytest.mark.parametrize( + "runtime_configuration", (RuntimeConfiguration(order="C"), RuntimeConfiguration(order="F")) +) +def test_group_create( + store: MemoryStore | LocalStore, exists_ok: bool, runtime_configuration: RuntimeConfiguration +): + + attributes = {"foo": 100} group = Group.create( - store=store, - attributes={"title": "test 123"}, - runtime_configuration=RuntimeConfiguration(), + store, + attributes=attributes, + exists_ok=exists_ok, + runtime_configuration=runtime_configuration, ) - assert group._async_group.metadata.attributes["title"] == "test 123" + assert group.attrs == attributes + assert group._async_group.runtime_configuration == runtime_configuration + if not exists_ok: + with pytest.raises(AssertionError): + group = Group.create( + store, + attributes=attributes, + exists_ok=exists_ok, + runtime_configuration=runtime_configuration, + ) -@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", ("2", "3")) + +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) @pytest.mark.parametrize("exists_ok", (True, False)) -@pytest.mark.parametrize("runtime_configuration", (None,)) -def test_create( +@pytest.mark.parametrize( + "runtime_configuration", (RuntimeConfiguration(order="C"), RuntimeConfiguration(order="F")) +) +async def test_asyncgroup_create( store: MemoryStore | LocalStore, exists_ok: bool, - zarr_format: Literal["2", "3"], - runtime_configuration: None, + zarr_format: ZarrFormat, + runtime_configuration: RuntimeConfiguration, ): - ... + """ + Test that `AsyncGroup.create` works as expected. + """ + attributes = {"foo": 100} + group = await AsyncGroup.create( + store, + attributes=attributes, + exists_ok=exists_ok, + zarr_format=zarr_format, + runtime_configuration=runtime_configuration, + ) + assert group.metadata == GroupMetadata(zarr_format=zarr_format, attributes=attributes) + assert group.store_path == make_store_path(store) + assert group.runtime_configuration == runtime_configuration -@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) -def test_from_dict(store: MemoryStore | LocalStore): + if not exists_ok: + with pytest.raises(AssertionError): + group = await AsyncGroup.create( + store, + attributes=attributes, + exists_ok=exists_ok, + zarr_format=zarr_format, + runtime_configuration=runtime_configuration, + ) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +async def test_asyncgroup_open(store: MemoryStore | LocalStore): + """ + Test that + """ ... -@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) @pytest.mark.parametrize("zarr_format", ("2", "3")) def test_getitem(store: MemoryStore | LocalStore, zarr_format: Literal["2", "3"]): ... From f81b6f327c7758a989a6f6b60505511817a2d4c6 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 22:49:06 +0200 Subject: [PATCH 14/56] test: add __init__.py to support imports from top-level conftest.py, and add some docstrings, and remove redundant def --- tests/v3/__init__.py | 0 tests/v3/test_group_v3.py | 30 ++++++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 tests/v3/__init__.py diff --git a/tests/v3/__init__.py b/tests/v3/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group_v3.py index af5c5171b3..bd3407c068 100644 --- a/tests/v3/test_group_v3.py +++ b/tests/v3/test_group_v3.py @@ -111,7 +111,9 @@ def test_group(store: MemoryStore | LocalStore) -> None: def test_group_create( store: MemoryStore | LocalStore, exists_ok: bool, runtime_configuration: RuntimeConfiguration ): - + """ + Test that `Group.create` works as expected. + """ attributes = {"foo": 100} group = Group.create( store, @@ -173,12 +175,32 @@ async def test_asyncgroup_create( ) +@pytest.mark.asyncio @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) -async def test_asyncgroup_open(store: MemoryStore | LocalStore): +@pytest.mark.parametrize("zarr_format", (2, 3)) +@pytest.mark.parametrize("runtime_configuration", (RuntimeConfiguration(),)) +async def test_asyncgroup_open( + store: LocalStore | MemoryStore, + zarr_format: ZarrFormat, + runtime_configuration: RuntimeConfiguration, +) -> None: """ - Test that + Create an `AsyncGroup`, then ensure that we can open it using `AsyncGroup.open` """ - ... + attributes = {"foo": 100} + group_w = await AsyncGroup.create( + store=store, + attributes=attributes, + exists_ok=False, + zarr_format=ZarrFormat, + runtime_configuration=runtime_configuration, + ) + + group_r = AsyncGroup.open( + store=store, zarr_format=zarr_format, runtime_configuration=runtime_configuration + ) + + assert group_r == group_w @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) From c768ca8a052bb38bb916cd487ac2b99c9e51ebe0 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 22:56:13 +0200 Subject: [PATCH 15/56] fix: return valid JSON from GroupMetadata.to_bytes for v2 metadata --- src/zarr/v3/group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index 5bce87376c..4f4fb72019 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -55,7 +55,7 @@ def to_bytes(self) -> dict[str, bytes]: return {ZARR_JSON: json.dumps(self.to_dict()).encode()} else: return { - ZGROUP_JSON: self.zarr_format, + ZGROUP_JSON: json.dumps({"zarr_format": self.zarr_format}).encode(), ZATTRS_JSON: json.dumps(self.attributes).encode(), } From c40550638b578215ff8410585bcf80a372ed4728 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 22:56:34 +0200 Subject: [PATCH 16/56] fix: don't use a type as a value --- tests/v3/test_group_v3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group_v3.py index bd3407c068..9c1bde2ad8 100644 --- a/tests/v3/test_group_v3.py +++ b/tests/v3/test_group_v3.py @@ -192,7 +192,7 @@ async def test_asyncgroup_open( store=store, attributes=attributes, exists_ok=False, - zarr_format=ZarrFormat, + zarr_format=zarr_format, runtime_configuration=runtime_configuration, ) From 5b9655437d1afb0868ab3af717bcf075a419a1a8 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 23:57:29 +0200 Subject: [PATCH 17/56] test: add getitem test --- tests/v3/conftest.py | 7 ++-- tests/v3/test_group_v3.py | 70 +++++++++++++++++++++++++++++++++++---- 2 files changed, 67 insertions(+), 10 deletions(-) diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index ceb0b558eb..05ab0550e7 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING from zarr.v3.common import ZarrFormat -from zarr.v3.group import AsyncGroup, Group +from zarr.v3.group import AsyncGroup if TYPE_CHECKING: from typing import Any, Literal @@ -71,14 +71,15 @@ class AsyncGroupRequest: @pytest.fixture(scope="function") -async def async_group(request: AsyncGroupRequest, tmpdir) -> Group: +async def async_group(request: AsyncGroupRequest, tmpdir) -> AsyncGroup: param: AsyncGroupRequest = request.param store = parse_store(param.store, str(tmpdir)) - return await AsyncGroup.create( + agroup = await AsyncGroup.create( store, attributes=param.attributes, zarr_format=param.zarr_format, runtime_configuration=param.runtime_configuration, exists_ok=False, ) + return agroup diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group_v3.py index 9c1bde2ad8..c35c933639 100644 --- a/tests/v3/test_group_v3.py +++ b/tests/v3/test_group_v3.py @@ -1,11 +1,10 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from zarr.v3.store.core import make_store_path if TYPE_CHECKING: from zarr.v3.store import MemoryStore, LocalStore - from typing import Literal from zarr.v3.common import ZarrFormat import pytest @@ -196,14 +195,71 @@ async def test_asyncgroup_open( runtime_configuration=runtime_configuration, ) - group_r = AsyncGroup.open( + group_r = await AsyncGroup.open( store=store, zarr_format=zarr_format, runtime_configuration=runtime_configuration ) - assert group_r == group_w + assert group_w.attrs == group_w.attrs == attributes + assert group_w == group_r + # try opening with the wrong zarr format + if zarr_format == 3: + zarr_format_wrong = 2 + elif zarr_format == 2: + zarr_format_wrong = 3 + else: + assert False + # todo: get more specific than this + with pytest.raises(ValueError): + await AsyncGroup.open(store=store, zarr_format=zarr_format_wrong) + + +# todo: replace the dict[str, Any] type with something a bit more specific +# should this be async? @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", ("2", "3")) -def test_getitem(store: MemoryStore | LocalStore, zarr_format: Literal["2", "3"]): - ... +@pytest.mark.parametrize( + "data", + ( + {"zarr_format": 3, "node_type": "group", "attributes": {"foo": 100}}, + {"zarr_format": 2, "attributes": {"foo": 100}}, + ), +) +def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, Any]): + """ + Test that we can create an AsyncGroup from a dict + """ + path = "test" + store_path = StorePath(store=store, path=path) + group = AsyncGroup.from_dict( + store_path, data=data, runtime_configuration=RuntimeConfiguration() + ) + + assert group.metadata.zarr_format == data["zarr_format"] + assert group.metadata.attributes == data["attributes"] + + +# todo: replace this with a declarative API where we model a full hierarchy +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat): + """ + Create an `AsyncGroup`, then create members of that group, and ensure that we can access those + members via the `AsyncGroup.getitem` method. + """ + agroup = await AsyncGroup.create(store=store, zarr_format=zarr_format) + + sub_array_path = "sub_array" + sub_array = await agroup.create_array( + path=sub_array_path, shape=(10,), dtype="uint8", chunk_shape=(2,) + ) + assert await agroup.getitem(sub_array_path) == sub_array + + sub_group_path = "sub_group" + sub_group = await agroup.create_group(sub_group_path, attributes={"foo": 100}) + assert await agroup.getitem(sub_group_path) == sub_group + + # check that asking for a nonexistent key raises KeyError + with pytest.raises(KeyError): + agroup.getitem("foo") From d77d55f4c0e31ab19fabe1aa88c82ba80c8ec54d Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 5 Apr 2024 10:47:23 +0200 Subject: [PATCH 18/56] fix: replace reference to nonexistent method in with , which does exist --- src/zarr/v3/group.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index 4f4fb72019..bfb6440cf3 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -113,7 +113,7 @@ async def open( zarr_format: Literal[2, 3] = 3, ) -> AsyncGroup: store_path = make_store_path(store) - zarr_json_bytes = await (store_path / ZARR_JSON).get_async() + zarr_json_bytes = await (store_path / ZARR_JSON).get() assert zarr_json_bytes is not None # TODO: consider trying to autodiscover the zarr-format here @@ -124,7 +124,6 @@ async def open( zarr_json = ( json.loads(zarr_json_bytes) if zarr_json_bytes is not None else {"zarr_format": 3} ) - elif zarr_format == 2: # V2 groups are comprised of a .zgroup and .zattrs objects # (both are optional in the case of implicit groups) From 05f89c658fa1799295e95e4be5bcdfaa141a81d6 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 5 Apr 2024 10:48:07 +0200 Subject: [PATCH 19/56] test: declare v3ness via directory structure, not test file name --- tests/v3/{test_codecs_v3.py => test_codecs.py} | 0 tests/v3/{test_group_v3.py => test_group.py} | 0 tests/v3/{test_storage_v3.py => test_storage.py} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename tests/v3/{test_codecs_v3.py => test_codecs.py} (100%) rename tests/v3/{test_group_v3.py => test_group.py} (100%) rename tests/v3/{test_storage_v3.py => test_storage.py} (100%) diff --git a/tests/v3/test_codecs_v3.py b/tests/v3/test_codecs.py similarity index 100% rename from tests/v3/test_codecs_v3.py rename to tests/v3/test_codecs.py diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group.py similarity index 100% rename from tests/v3/test_group_v3.py rename to tests/v3/test_group.py diff --git a/tests/v3/test_storage_v3.py b/tests/v3/test_storage.py similarity index 100% rename from tests/v3/test_storage_v3.py rename to tests/v3/test_storage.py From b2f9beed75d405769d76c3e7d0a219eae75237df Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 5 Apr 2024 12:48:08 +0200 Subject: [PATCH 20/56] add a docstring to _get, and pass auto_mkdir to _put --- src/zarr/v3/store/local.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/zarr/v3/store/local.py b/src/zarr/v3/store/local.py index a62eea20f7..1ef4beffec 100644 --- a/src/zarr/v3/store/local.py +++ b/src/zarr/v3/store/local.py @@ -10,6 +10,20 @@ def _get(path: Path, byte_range: Optional[Tuple[int, Optional[int]]] = None) -> bytes: + """ + Fetch a contiguous region of bytes from a file. + + Parameters + ---------- + + path: Path + The file to read bytes from. + byte_range: Optional[Tuple[int, Optional[int]]] = None + The range of bytes to read. If `byte_range` is `None`, then the entire file will be read. + If `byte_range` is a tuple, the first value specifies the index of the first byte to read, + and the second value specifies the total number of bytes to read. If the total value is + `None`, then the entire file after the first byte will be read. + """ if byte_range is not None: start = byte_range[0] end = (start + byte_range[1]) if byte_range[1] is not None else None @@ -95,7 +109,7 @@ async def get_partial_values( async def set(self, key: str, value: BytesLike) -> None: assert isinstance(key, str) path = self.root / key - await to_thread(_put, path, value) + await to_thread(_put, path, value, auto_mkdir=self.auto_mkdir) async def set_partial_values(self, key_start_values: List[Tuple[str, int, bytes]]) -> None: args = [] From 88f4c463e955e17883c6831343a42b8362a8d24a Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 5 Apr 2024 13:42:55 +0200 Subject: [PATCH 21/56] fix: add docstring to LocalStore.get_partial_values; adjust body of LocalStore.get_partial_values to properly handle the byte_range parameter of LocalStore.get. --- src/zarr/v3/store/local.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/zarr/v3/store/local.py b/src/zarr/v3/store/local.py index 1ef4beffec..1677e08ddf 100644 --- a/src/zarr/v3/store/local.py +++ b/src/zarr/v3/store/local.py @@ -96,14 +96,23 @@ async def get( async def get_partial_values( self, key_ranges: List[Tuple[str, Tuple[int, int]]] ) -> List[bytes]: + """ + Read byte ranges from multiple keys. + + Parameters + ---------- + + key_ranges: List[Tuple[str, Tuple[int, int]]] + A list of (key, (start, length)) tuples. The first element of the tuple is the name of + the key in storage to fetch bytes from. The second element the tuple defines the byte + range to retrieve. These values are arguments to `get`, as this method wraps + concurrent invocation of `get`. + """ args = [] for key, byte_range in key_ranges: assert isinstance(key, str) path = self.root / key - if byte_range is not None: - args.append((_get, path, byte_range[0], byte_range[1])) - else: - args.append((_get, path)) + args.append((_get, path, byte_range)) return await concurrent_map(args, to_thread, limit=None) # TODO: fix limit async def set(self, key: str, value: BytesLike) -> None: From 624ff773137ad44c9d604958535fd19d5b353195 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 5 Apr 2024 15:49:49 +0200 Subject: [PATCH 22/56] test: add tests for localstore init, set, get, get_partial --- tests/v3/test_group.py | 2 +- tests/v3/test_storage.py | 101 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 101 insertions(+), 2 deletions(-) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index c35c933639..f3530c6042 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -262,4 +262,4 @@ async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: # check that asking for a nonexistent key raises KeyError with pytest.raises(KeyError): - agroup.getitem("foo") + await agroup.getitem("foo") diff --git a/tests/v3/test_storage.py b/tests/v3/test_storage.py index 3d8024de70..88d83c8e85 100644 --- a/tests/v3/test_storage.py +++ b/tests/v3/test_storage.py @@ -6,9 +6,108 @@ # import tempfile # import numpy as np +from __future__ import annotations +from zarr.v3.store.local import LocalStore +from pathlib import Path import pytest -pytest.skip("old v3 tests are disabled", allow_module_level=True) + +@pytest.mark.parametrize("auto_mkdir", (True, False)) +def test_local_store_init(tmpdir, auto_mkdir: bool) -> None: + tmpdir_str = str(tmpdir) + tmpdir_path = Path(tmpdir_str) + store = LocalStore(root=tmpdir_str, auto_mkdir=auto_mkdir) + + assert store.root == tmpdir_path + assert store.auto_mkdir == auto_mkdir + + # ensure that str and pathlib.Path get normalized to the same output + # a stronger test is to ensure that these two store instances are identical + # but LocalStore.__eq__ is not defined at this time. + assert store.root == LocalStore(root=tmpdir_path, auto_mkdir=auto_mkdir).root + + store_str = f"file://{tmpdir_str}" + assert str(store) == store_str + assert repr(store) == f"LocalStore({repr(store_str)})" + + +@pytest.mark.asyncio +@pytest.mark.parametrize("byte_range", (None, (0, None), (1, None), (1, 2), (None, 1))) +async def test_local_store_get( + local_store, byte_range: None | tuple[int | None, int | None] +) -> None: + payload = b"\x01\x02\x03\x04" + object_name = "foo" + (local_store.root / object_name).write_bytes(payload) + observed = await local_store.get(object_name, byte_range=byte_range) + + if byte_range is None: + start = 0 + length = len(payload) + else: + maybe_start, maybe_len = byte_range + if maybe_start is None: + start = 0 + else: + start = maybe_start + + if maybe_len is None: + length = len(payload) - start + else: + length = maybe_len + + expected = payload[start : start + length] + assert observed == expected + + # test that it's an error to get bytes from a file that doesn't exist + with pytest.raises(FileNotFoundError): + await local_store.get(object_name + "_absent", byte_range=byte_range) + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "key_ranges", + ( + [], + [("key_0", (0, 1))], + [("dir/key_0", (0, 1)), ("key_1", (0, 2))], + [("key_0", (0, 1)), ("key_1", (0, 2)), ("key_1", (0, 2))], + ), +) +async def test_local_store_get_partial( + tmpdir, key_ranges: tuple[list[tuple[str, tuple[int, int]]]] +) -> None: + store = LocalStore(str(tmpdir), auto_mkdir=True) + # use the utf-8 encoding of the key as the bytes + for key, _ in key_ranges: + payload = bytes(key, encoding="utf-8") + (store.root / key).write_bytes(payload) + + results = await store.get_partial_values(key_ranges) + for idx, observed in enumerate(results): + key, byte_range = key_ranges[idx] + expected = await store.get(key, byte_range=byte_range) + assert observed == expected + + +@pytest.mark.asyncio +@pytest.mark.parametrize("path", ("foo", "foo/bar")) +@pytest.mark.parametrize("auto_mkdir", (True, False)) +async def test_local_store_set(tmpdir, path: str, auto_mkdir: bool) -> None: + store = LocalStore(str(tmpdir), auto_mkdir=auto_mkdir) + payload = b"\x01\x02\x03\x04" + + if "/" in path and not auto_mkdir: + with pytest.raises(FileNotFoundError): + await store.set(path, payload) + else: + x = await store.set(path, payload) + + # this method should not return anything + assert x is None + + assert (store.root / path).read_bytes() == payload + # import zarr # from zarr._storage.store import _get_hierarchy_metadata, v3_api_available, StorageTransformer From b762fa47da38d3a1664e3b74dc8f53c72c2e52fa Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 11 Apr 2024 11:15:29 +0200 Subject: [PATCH 23/56] fix: Rename children to members; AsyncGroup.members yields tuples of (name, AsyncArray / AsyncGroup) pairs; Group.members repackages these into a dict. --- src/zarr/v3/group.py | 39 ++++++++++++++++++++++++++------------- tests/test_group_v3.py | 26 +++++++++++--------------- 2 files changed, 37 insertions(+), 28 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index 5bce87376c..a30d7d1702 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -287,13 +287,15 @@ async def update_attributes(self, new_attributes: dict[str, Any]): def __repr__(self): return f"" - async def nchildren(self) -> int: + async def nmembers(self) -> int: raise NotImplementedError - async def children(self) -> AsyncGenerator[AsyncArray, AsyncGroup]: + async def members(self) -> AsyncGenerator[tuple[str, AsyncArray | AsyncGroup], None]: """ Returns an AsyncGenerator over the arrays and groups contained in this group. This method requires that `store_path.store` supports directory listing. + + The results are not guaranteed to be ordered. """ if not self.store_path.store.supports_listing: msg = ( @@ -311,13 +313,17 @@ async def children(self) -> AsyncGenerator[AsyncArray, AsyncGroup]: # is there a better way to schedule this? for subkey in subkeys_filtered: try: - yield await self.getitem(subkey) + yield (subkey, await self.getitem(subkey)) except KeyError: - # keyerror is raised when `subkey``names an object in the store + # keyerror is raised when `subkey` names an object (in the object storage sense), + # as opposed to a prefix, in the store under the prefix associated with this group # in which case `subkey` cannot be the name of a sub-array or sub-group. + logger.warning( + "Object at %s is not recognized as a component of a Zarr hierarchy.", subkey + ) pass - async def contains(self, child: str) -> bool: + async def contains(self, member: str) -> bool: raise NotImplementedError async def group_keys(self) -> AsyncIterator[str]: @@ -444,16 +450,23 @@ def update_attributes(self, new_attributes: dict[str, Any]): return self @property - def nchildren(self) -> int: - return self._sync(self._async_group.nchildren) + def nmembers(self) -> int: + return self._sync(self._async_group.nmembers) @property - def children(self) -> list[Array | Group]: - _children = self._sync_iter(self._async_group.children) - return [Array(obj) if isinstance(obj, AsyncArray) else Group(obj) for obj in _children] - - def __contains__(self, child) -> bool: - return self._sync(self._async_group.contains(child)) + def members(self) -> dict[str, Array | Group]: + """ + Return the sub-arrays and sub-groups of this group as a `dict` of (name, array | group) + pairs + """ + _members = self._sync_iter(self._async_group.members) + return { + key: Array(value) if isinstance(value, AsyncArray) else Group(value) + for key, value in _members + } + + def __contains__(self, member) -> bool: + return self._sync(self._async_group.contains(member)) def group_keys(self) -> Iterator[str]: return self._sync_iter(self._async_group.group_keys) diff --git a/tests/test_group_v3.py b/tests/test_group_v3.py index 555374f5b3..204c255064 100644 --- a/tests/test_group_v3.py +++ b/tests/test_group_v3.py @@ -13,9 +13,9 @@ # todo: put RemoteStore in here @pytest.mark.parametrize("store_type", ("local_store", "memory_store")) -def test_group_children(store_type, request): +def test_group_members(store_type, request): """ - Test that `Group.children` returns correct values, i.e. the arrays and groups + Test that `Group.members` returns correct values, i.e. the arrays and groups (explicit and implicit) contained in that group. """ @@ -26,12 +26,14 @@ def test_group_children(store_type, request): store_path=StorePath(store=store, path=path), ) group = Group(agroup) + members_expected = {} - subgroup = group.create_group("subgroup") + members_expected["subgroup"] = group.create_group("subgroup") # make a sub-sub-subgroup, to ensure that the children calculation doesn't go # too deep in the hierarchy - _ = subgroup.create_group("subsubgroup") - subarray = group.create_array( + _ = members_expected["subgroup"].create_group("subsubgroup") + + members_expected["subarray"] = group.create_array( "subarray", shape=(100,), dtype="uint8", chunk_shape=(10,), exists_ok=True ) @@ -42,21 +44,15 @@ def test_group_children(store_type, request): # this creates an implicit group called implicit_subgroup sync(store.set(f"{path}/implicit_subgroup/extra_object", b"000000")) # make the implicit subgroup - implicit_subgroup = Group( + members_expected["implicit_subgroup"] = Group( AsyncGroup( metadata=GroupMetadata(), store_path=StorePath(store=store, path=f"{path}/implicit_subgroup"), ) ) - # note: these assertions are order-independent, because it is not clear - # if group.children guarantees a particular order for the children. - # If order is not guaranteed, then the better version of this test is - # to compare two sets, but presently neither the group nor array classes are hashable. - observed = group.children - assert len(observed) == 3 - assert subarray in observed - assert implicit_subgroup in observed - assert subgroup in observed + members_observed = group.members + # members are not guaranteed to be ordered, so sort before comparing + assert sorted(members_observed) == sorted(members_expected) @pytest.mark.parametrize("store_type", (("local_store",))) From 55742269a77f20f19114a7e077b675edf7332d77 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 11 Apr 2024 13:20:32 +0200 Subject: [PATCH 24/56] fix: make Group.members return a tuple of str, Array | Group pairs --- src/zarr/v3/group.py | 10 +++++----- tests/test_group_v3.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index a30d7d1702..f2158d3b28 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -454,16 +454,16 @@ def nmembers(self) -> int: return self._sync(self._async_group.nmembers) @property - def members(self) -> dict[str, Array | Group]: + def members(self) -> tuple[tuple[str, Array | Group], ...]: """ - Return the sub-arrays and sub-groups of this group as a `dict` of (name, array | group) + Return the sub-arrays and sub-groups of this group as a `tuple` of (name, array | group) pairs """ _members = self._sync_iter(self._async_group.members) - return { - key: Array(value) if isinstance(value, AsyncArray) else Group(value) + return tuple( + (key, Array(value)) if isinstance(value, AsyncArray) else (key, Group(value)) for key, value in _members - } + ) def __contains__(self, member) -> bool: return self._sync(self._async_group.contains(member)) diff --git a/tests/test_group_v3.py b/tests/test_group_v3.py index 204c255064..6b1f78df60 100644 --- a/tests/test_group_v3.py +++ b/tests/test_group_v3.py @@ -52,7 +52,7 @@ def test_group_members(store_type, request): ) members_observed = group.members # members are not guaranteed to be ordered, so sort before comparing - assert sorted(members_observed) == sorted(members_expected) + assert sorted(dict(members_observed)) == sorted(members_expected) @pytest.mark.parametrize("store_type", (("local_store",))) From d634cbf211f24290f8d282b2479df1566e317f85 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 11 Apr 2024 20:47:28 +0200 Subject: [PATCH 25/56] fix: revert changes to synchronization code; this is churn that we need to deal with --- src/zarr/v3/group.py | 6 +++++- src/zarr/v3/sync.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index ce5a3a3e58..a93f8404e9 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -437,6 +437,10 @@ async def update_attributes_async(self, new_attributes: dict[str, Any]) -> Group async_group = replace(self._async_group, metadata=new_metadata) return replace(self, _async_group=async_group) + @property + def store_path(self) -> StorePath: + return self._async_group.store_path + @property def metadata(self) -> GroupMetadata: return self._async_group.metadata @@ -463,7 +467,7 @@ def members(self) -> tuple[tuple[str, Array | Group], ...]: Return the sub-arrays and sub-groups of this group as a `tuple` of (name, array | group) pairs """ - _members = self._sync_iter(self._async_group.members) + _members: list[AsyncArray | AsyncGroup] = self._sync_iter(self._async_group.members) return tuple( (key, Array(value)) if isinstance(value, AsyncArray) else (key, Group(value)) for key, value in _members diff --git a/src/zarr/v3/sync.py b/src/zarr/v3/sync.py index 2e94a815cc..592ce8b75b 100644 --- a/src/zarr/v3/sync.py +++ b/src/zarr/v3/sync.py @@ -113,7 +113,7 @@ def _sync(self, coroutine: Coroutine[Any, Any, T]) -> T: def _sync_iter(self, coroutine: Coroutine[Any, Any, AsyncIterator[T]]) -> List[T]: async def iter_to_list() -> List[T]: # TODO: replace with generators so we don't materialize the entire iterator at once - async_iterator = await coroutine - return [item async for item in async_iterator] + # async_iterator = await coroutine + return [item async for item in coroutine()] return self._sync(iter_to_list()) From d264f71a5a5f49f47daef36e8074a468eae3010e Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 13:30:01 +0200 Subject: [PATCH 26/56] chore: move v3 tests into v3 folder --- tests/{ => v3}/test_codecs_v3.py | 0 tests/{ => v3}/test_group_v3.py | 0 tests/{ => v3}/test_storage_v3.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename tests/{ => v3}/test_codecs_v3.py (100%) rename tests/{ => v3}/test_group_v3.py (100%) rename tests/{ => v3}/test_storage_v3.py (100%) diff --git a/tests/test_codecs_v3.py b/tests/v3/test_codecs_v3.py similarity index 100% rename from tests/test_codecs_v3.py rename to tests/v3/test_codecs_v3.py diff --git a/tests/test_group_v3.py b/tests/v3/test_group_v3.py similarity index 100% rename from tests/test_group_v3.py rename to tests/v3/test_group_v3.py diff --git a/tests/test_storage_v3.py b/tests/v3/test_storage_v3.py similarity index 100% rename from tests/test_storage_v3.py rename to tests/v3/test_storage_v3.py From 0741bed728fe724d733250e84c006698cb9899f8 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 12 Apr 2024 14:36:18 +0200 Subject: [PATCH 27/56] chore: type hints --- tests/{ => v3}/conftest.py | 13 +++++++++++++ tests/v3/test_group_v3.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 8 deletions(-) rename tests/{ => v3}/conftest.py (66%) diff --git a/tests/conftest.py b/tests/v3/conftest.py similarity index 66% rename from tests/conftest.py rename to tests/v3/conftest.py index 40275ba62c..0e166e224e 100644 --- a/tests/conftest.py +++ b/tests/v3/conftest.py @@ -31,3 +31,16 @@ def remote_store(): @pytest.fixture(scope="function") def memory_store(): return MemoryStore() + + +@pytest.fixture(scope="function") +def store(request: str, tmpdir): + param = request.param + if param == "local_store": + return LocalStore(str(tmpdir)) + elif param == "memory_store": + return MemoryStore() + elif param == "remote_store": + return RemoteStore() + else: + assert False diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group_v3.py index 79bbd4af45..87d59eb31a 100644 --- a/tests/v3/test_group_v3.py +++ b/tests/v3/test_group_v3.py @@ -3,6 +3,7 @@ if TYPE_CHECKING: from zarr.v3.store.remote import MemoryStore, LocalStore + import pytest import numpy as np @@ -13,14 +14,13 @@ # todo: put RemoteStore in here -@pytest.mark.parametrize("store_type", ("local_store", "memory_store")) -def test_group_members(store_type, request): +@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +def test_group_children(store: MemoryStore | LocalStore): """ Test that `Group.members` returns correct values, i.e. the arrays and groups (explicit and implicit) contained in that group. """ - store: LocalStore | MemoryStore = request.getfixturevalue(store_type) path = "group" agroup = AsyncGroup( metadata=GroupMetadata(), @@ -56,9 +56,8 @@ def test_group_members(store_type, request): assert sorted(dict(members_observed)) == sorted(members_expected) -@pytest.mark.parametrize("store_type", (("local_store",))) -def test_group(store_type, request) -> None: - store = request.getfixturevalue(store_type) +@pytest.mark.parametrize("store", (("local_store", "memory_store")), indirect=["store"]) +def test_group(store: MemoryStore | LocalStore) -> None: store_path = StorePath(store) agroup = AsyncGroup( metadata=GroupMetadata(), @@ -100,9 +99,10 @@ def test_group(store_type, request) -> None: assert dict(bar3.attrs) == {"baz": "qux", "name": "bar"} -def test_group_sync_constructor(store_path) -> None: +@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +def test_group_sync_constructor(store: MemoryStore | LocalStore) -> None: group = Group.create( - store=store_path, + store=store, attributes={"title": "test 123"}, runtime_configuration=RuntimeConfiguration(), ) From eb8a535368970391fc6378c9c03875fae6d19ff4 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 14:59:51 +0200 Subject: [PATCH 28/56] test: add schema for group method tests --- tests/v3/test_group_v3.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group_v3.py index 87d59eb31a..b3fedf6c8e 100644 --- a/tests/v3/test_group_v3.py +++ b/tests/v3/test_group_v3.py @@ -3,6 +3,7 @@ if TYPE_CHECKING: from zarr.v3.store.remote import MemoryStore, LocalStore + from typing import Literal import pytest import numpy as np @@ -108,3 +109,27 @@ def test_group_sync_constructor(store: MemoryStore | LocalStore) -> None: ) assert group._async_group.metadata.attributes["title"] == "test 123" + + +@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", ("2", "3")) +@pytest.mark.parametrize("exists_ok", (True, False)) +@pytest.mark.parametrize("runtime_configuration", (None,)) +def test_create( + store: MemoryStore | LocalStore, + exists_ok: bool, + zarr_format: Literal["2", "3"], + runtime_configuration: None, +): + ... + + +@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +def test_from_dict(store: MemoryStore | LocalStore): + ... + + +@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", ("2", "3")) +def test_getitem(store: MemoryStore | LocalStore, zarr_format: Literal["2", "3"]): + ... From ee2e233e44fc368181989de004239ea2a66b2eb3 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 21:43:05 +0200 Subject: [PATCH 29/56] chore: add type for zarr_formats --- src/zarr/v3/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/zarr/v3/common.py b/src/zarr/v3/common.py index 1caf83a764..e92e84ef7b 100644 --- a/src/zarr/v3/common.py +++ b/src/zarr/v3/common.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Union, Tuple, Iterable, Dict, List, TypeVar, overload +from typing import TYPE_CHECKING, Literal, Union, Tuple, Iterable, Dict, List, TypeVar, overload import asyncio import contextvars from dataclasses import dataclass @@ -21,6 +21,7 @@ ChunkCoordsLike = Iterable[int] SliceSelection = Tuple[slice, ...] Selection = Union[slice, SliceSelection] +ZarrFormat = Literal[2, 3] JSON = Union[str, None, int, float, Enum, Dict[str, "JSON"], List["JSON"], Tuple["JSON", ...]] From 01eec6f269c3ff01d71a20a1c70752775540bd59 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 22:19:03 +0200 Subject: [PATCH 30/56] chore: remove localstore for now --- tests/conftest.py | 7 +++ tests/v3/conftest.py | 54 ++++++++++++++++++---- tests/v3/test_group_v3.py | 96 +++++++++++++++++++++++++++++---------- 3 files changed, 125 insertions(+), 32 deletions(-) create mode 100644 tests/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000000..6680e4066b --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,7 @@ +import pytest +import pathlib + + +@pytest.fixture(params=[str, pathlib.Path]) +def path_type(request): + return request.param diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index 0e166e224e..ceb0b558eb 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -1,10 +1,33 @@ +from __future__ import annotations +from typing import TYPE_CHECKING + +from zarr.v3.common import ZarrFormat +from zarr.v3.group import AsyncGroup, Group + +if TYPE_CHECKING: + from typing import Any, Literal +from dataclasses import dataclass, field import pathlib + import pytest +from zarr.v3.config import RuntimeConfiguration from zarr.v3.store import LocalStore, StorePath, MemoryStore from zarr.v3.store.remote import RemoteStore +def parse_store( + store: Literal["local", "memory", "remote"], path: str +) -> LocalStore | MemoryStore | RemoteStore: + if store == "local": + return LocalStore(path) + if store == "memory": + return MemoryStore() + if store == "remote": + return RemoteStore() + assert False + + @pytest.fixture(params=[str, pathlib.Path]) def path_type(request): return request.param @@ -36,11 +59,26 @@ def memory_store(): @pytest.fixture(scope="function") def store(request: str, tmpdir): param = request.param - if param == "local_store": - return LocalStore(str(tmpdir)) - elif param == "memory_store": - return MemoryStore() - elif param == "remote_store": - return RemoteStore() - else: - assert False + return parse_store(param, str(tmpdir)) + + +@dataclass +class AsyncGroupRequest: + zarr_format: ZarrFormat + store: str + attributes: dict[str, Any] = field(default_factory=dict) + runtime_configuration: RuntimeConfiguration = RuntimeConfiguration() + + +@pytest.fixture(scope="function") +async def async_group(request: AsyncGroupRequest, tmpdir) -> Group: + param: AsyncGroupRequest = request.param + + store = parse_store(param.store, str(tmpdir)) + return await AsyncGroup.create( + store, + attributes=param.attributes, + zarr_format=param.zarr_format, + runtime_configuration=param.runtime_configuration, + exists_ok=False, + ) diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group_v3.py index b3fedf6c8e..ceef30a467 100644 --- a/tests/v3/test_group_v3.py +++ b/tests/v3/test_group_v3.py @@ -1,9 +1,12 @@ from __future__ import annotations from typing import TYPE_CHECKING +from zarr.v3.store.core import make_store_path + if TYPE_CHECKING: - from zarr.v3.store.remote import MemoryStore, LocalStore + from zarr.v3.store import MemoryStore, LocalStore from typing import Literal + from zarr.v3.common import ZarrFormat import pytest import numpy as np @@ -15,7 +18,7 @@ # todo: put RemoteStore in here -@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) def test_group_children(store: MemoryStore | LocalStore): """ Test that `Group.members` returns correct values, i.e. the arrays and groups @@ -57,14 +60,10 @@ def test_group_children(store: MemoryStore | LocalStore): assert sorted(dict(members_observed)) == sorted(members_expected) -@pytest.mark.parametrize("store", (("local_store", "memory_store")), indirect=["store"]) +@pytest.mark.parametrize("store", (("local", "memory")), indirect=["store"]) def test_group(store: MemoryStore | LocalStore) -> None: store_path = StorePath(store) - agroup = AsyncGroup( - metadata=GroupMetadata(), - store_path=store_path, - runtime_configuration=RuntimeConfiguration(), - ) + agroup = AsyncGroup(metadata=GroupMetadata(), store_path=store_path) group = Group(agroup) assert agroup.metadata is group.metadata @@ -100,36 +99,85 @@ def test_group(store: MemoryStore | LocalStore) -> None: assert dict(bar3.attrs) == {"baz": "qux", "name": "bar"} -@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) -def test_group_sync_constructor(store: MemoryStore | LocalStore) -> None: +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("exists_ok", (True, False)) +@pytest.mark.parametrize( + "runtime_configuration", (RuntimeConfiguration(order="C"), RuntimeConfiguration(order="F")) +) +def test_group_create( + store: MemoryStore | LocalStore, exists_ok: bool, runtime_configuration: RuntimeConfiguration +): + + attributes = {"foo": 100} group = Group.create( - store=store, - attributes={"title": "test 123"}, - runtime_configuration=RuntimeConfiguration(), + store, + attributes=attributes, + exists_ok=exists_ok, + runtime_configuration=runtime_configuration, ) - assert group._async_group.metadata.attributes["title"] == "test 123" + assert group.attrs == attributes + assert group._async_group.runtime_configuration == runtime_configuration + if not exists_ok: + with pytest.raises(AssertionError): + group = Group.create( + store, + attributes=attributes, + exists_ok=exists_ok, + runtime_configuration=runtime_configuration, + ) -@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", ("2", "3")) + +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) @pytest.mark.parametrize("exists_ok", (True, False)) -@pytest.mark.parametrize("runtime_configuration", (None,)) -def test_create( +@pytest.mark.parametrize( + "runtime_configuration", (RuntimeConfiguration(order="C"), RuntimeConfiguration(order="F")) +) +async def test_asyncgroup_create( store: MemoryStore | LocalStore, exists_ok: bool, - zarr_format: Literal["2", "3"], - runtime_configuration: None, + zarr_format: ZarrFormat, + runtime_configuration: RuntimeConfiguration, ): - ... + """ + Test that `AsyncGroup.create` works as expected. + """ + attributes = {"foo": 100} + group = await AsyncGroup.create( + store, + attributes=attributes, + exists_ok=exists_ok, + zarr_format=zarr_format, + runtime_configuration=runtime_configuration, + ) + assert group.metadata == GroupMetadata(zarr_format=zarr_format, attributes=attributes) + assert group.store_path == make_store_path(store) + assert group.runtime_configuration == runtime_configuration -@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) -def test_from_dict(store: MemoryStore | LocalStore): + if not exists_ok: + with pytest.raises(AssertionError): + group = await AsyncGroup.create( + store, + attributes=attributes, + exists_ok=exists_ok, + zarr_format=zarr_format, + runtime_configuration=runtime_configuration, + ) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +async def test_asyncgroup_open(store: MemoryStore | LocalStore): + """ + Test that + """ ... -@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) @pytest.mark.parametrize("zarr_format", ("2", "3")) def test_getitem(store: MemoryStore | LocalStore, zarr_format: Literal["2", "3"]): ... From acae77a857f91232ad28a19f36af2c0ced79fcf3 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 22:49:06 +0200 Subject: [PATCH 31/56] test: add __init__.py to support imports from top-level conftest.py, and add some docstrings, and remove redundant def --- tests/v3/__init__.py | 0 tests/v3/test_group_v3.py | 30 ++++++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 tests/v3/__init__.py diff --git a/tests/v3/__init__.py b/tests/v3/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group_v3.py index ceef30a467..62b7dff178 100644 --- a/tests/v3/test_group_v3.py +++ b/tests/v3/test_group_v3.py @@ -107,7 +107,9 @@ def test_group(store: MemoryStore | LocalStore) -> None: def test_group_create( store: MemoryStore | LocalStore, exists_ok: bool, runtime_configuration: RuntimeConfiguration ): - + """ + Test that `Group.create` works as expected. + """ attributes = {"foo": 100} group = Group.create( store, @@ -169,12 +171,32 @@ async def test_asyncgroup_create( ) +@pytest.mark.asyncio @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) -async def test_asyncgroup_open(store: MemoryStore | LocalStore): +@pytest.mark.parametrize("zarr_format", (2, 3)) +@pytest.mark.parametrize("runtime_configuration", (RuntimeConfiguration(),)) +async def test_asyncgroup_open( + store: LocalStore | MemoryStore, + zarr_format: ZarrFormat, + runtime_configuration: RuntimeConfiguration, +) -> None: """ - Test that + Create an `AsyncGroup`, then ensure that we can open it using `AsyncGroup.open` """ - ... + attributes = {"foo": 100} + group_w = await AsyncGroup.create( + store=store, + attributes=attributes, + exists_ok=False, + zarr_format=ZarrFormat, + runtime_configuration=runtime_configuration, + ) + + group_r = AsyncGroup.open( + store=store, zarr_format=zarr_format, runtime_configuration=runtime_configuration + ) + + assert group_r == group_w @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) From ebe15483abced98144a7f3ede81bba3c745ddaa3 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 22:56:13 +0200 Subject: [PATCH 32/56] fix: return valid JSON from GroupMetadata.to_bytes for v2 metadata --- src/zarr/v3/group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index a93f8404e9..470cf0b0cf 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -54,7 +54,7 @@ def to_bytes(self) -> dict[str, bytes]: return {ZARR_JSON: json.dumps(self.to_dict()).encode()} else: return { - ZGROUP_JSON: json.dumps({"zarr_format": 2}).encode(), + ZGROUP_JSON: json.dumps({"zarr_format": self.zarr_format}).encode(), ZATTRS_JSON: json.dumps(self.attributes).encode(), } From 3dce5e37cf9fe9ee4c6b20a011c9d78fbad67a6a Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 22:56:34 +0200 Subject: [PATCH 33/56] fix: don't use a type as a value --- tests/v3/test_group_v3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group_v3.py index 62b7dff178..04a9479e9b 100644 --- a/tests/v3/test_group_v3.py +++ b/tests/v3/test_group_v3.py @@ -188,7 +188,7 @@ async def test_asyncgroup_open( store=store, attributes=attributes, exists_ok=False, - zarr_format=ZarrFormat, + zarr_format=zarr_format, runtime_configuration=runtime_configuration, ) From 7f82fdfd4ab102e9d9e66ad86d3884f7795cb2ad Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 23:57:29 +0200 Subject: [PATCH 34/56] test: add getitem test --- tests/v3/conftest.py | 7 ++-- tests/v3/test_group_v3.py | 70 +++++++++++++++++++++++++++++++++++---- 2 files changed, 67 insertions(+), 10 deletions(-) diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index ceb0b558eb..05ab0550e7 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING from zarr.v3.common import ZarrFormat -from zarr.v3.group import AsyncGroup, Group +from zarr.v3.group import AsyncGroup if TYPE_CHECKING: from typing import Any, Literal @@ -71,14 +71,15 @@ class AsyncGroupRequest: @pytest.fixture(scope="function") -async def async_group(request: AsyncGroupRequest, tmpdir) -> Group: +async def async_group(request: AsyncGroupRequest, tmpdir) -> AsyncGroup: param: AsyncGroupRequest = request.param store = parse_store(param.store, str(tmpdir)) - return await AsyncGroup.create( + agroup = await AsyncGroup.create( store, attributes=param.attributes, zarr_format=param.zarr_format, runtime_configuration=param.runtime_configuration, exists_ok=False, ) + return agroup diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group_v3.py index 04a9479e9b..3e2779fa79 100644 --- a/tests/v3/test_group_v3.py +++ b/tests/v3/test_group_v3.py @@ -1,11 +1,10 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from zarr.v3.store.core import make_store_path if TYPE_CHECKING: from zarr.v3.store import MemoryStore, LocalStore - from typing import Literal from zarr.v3.common import ZarrFormat import pytest @@ -192,14 +191,71 @@ async def test_asyncgroup_open( runtime_configuration=runtime_configuration, ) - group_r = AsyncGroup.open( + group_r = await AsyncGroup.open( store=store, zarr_format=zarr_format, runtime_configuration=runtime_configuration ) - assert group_r == group_w + assert group_w.attrs == group_w.attrs == attributes + assert group_w == group_r + # try opening with the wrong zarr format + if zarr_format == 3: + zarr_format_wrong = 2 + elif zarr_format == 2: + zarr_format_wrong = 3 + else: + assert False + # todo: get more specific than this + with pytest.raises(ValueError): + await AsyncGroup.open(store=store, zarr_format=zarr_format_wrong) + + +# todo: replace the dict[str, Any] type with something a bit more specific +# should this be async? @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", ("2", "3")) -def test_getitem(store: MemoryStore | LocalStore, zarr_format: Literal["2", "3"]): - ... +@pytest.mark.parametrize( + "data", + ( + {"zarr_format": 3, "node_type": "group", "attributes": {"foo": 100}}, + {"zarr_format": 2, "attributes": {"foo": 100}}, + ), +) +def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, Any]): + """ + Test that we can create an AsyncGroup from a dict + """ + path = "test" + store_path = StorePath(store=store, path=path) + group = AsyncGroup.from_dict( + store_path, data=data, runtime_configuration=RuntimeConfiguration() + ) + + assert group.metadata.zarr_format == data["zarr_format"] + assert group.metadata.attributes == data["attributes"] + + +# todo: replace this with a declarative API where we model a full hierarchy +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat): + """ + Create an `AsyncGroup`, then create members of that group, and ensure that we can access those + members via the `AsyncGroup.getitem` method. + """ + agroup = await AsyncGroup.create(store=store, zarr_format=zarr_format) + + sub_array_path = "sub_array" + sub_array = await agroup.create_array( + path=sub_array_path, shape=(10,), dtype="uint8", chunk_shape=(2,) + ) + assert await agroup.getitem(sub_array_path) == sub_array + + sub_group_path = "sub_group" + sub_group = await agroup.create_group(sub_group_path, attributes={"foo": 100}) + assert await agroup.getitem(sub_group_path) == sub_group + + # check that asking for a nonexistent key raises KeyError + with pytest.raises(KeyError): + agroup.getitem("foo") From 1655ff88b017b5ac2cd0619cd77280c6406d09cc Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 5 Apr 2024 10:47:23 +0200 Subject: [PATCH 35/56] fix: replace reference to nonexistent method in with , which does exist --- src/zarr/v3/group.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index 470cf0b0cf..ff8ef53eba 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -123,7 +123,6 @@ async def open( zarr_json = ( json.loads(zarr_json_bytes) if zarr_json_bytes is not None else {"zarr_format": 3} ) - elif zarr_format == 2: # V2 groups are comprised of a .zgroup and .zattrs objects # (both are optional in the case of implicit groups) From e8514b110576d7433a28a5e970f9b592aaedb2df Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 5 Apr 2024 10:48:07 +0200 Subject: [PATCH 36/56] test: declare v3ness via directory structure, not test file name --- tests/v3/{test_codecs_v3.py => test_codecs.py} | 0 tests/v3/{test_group_v3.py => test_group.py} | 0 tests/v3/{test_storage_v3.py => test_storage.py} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename tests/v3/{test_codecs_v3.py => test_codecs.py} (100%) rename tests/v3/{test_group_v3.py => test_group.py} (100%) rename tests/v3/{test_storage_v3.py => test_storage.py} (100%) diff --git a/tests/v3/test_codecs_v3.py b/tests/v3/test_codecs.py similarity index 100% rename from tests/v3/test_codecs_v3.py rename to tests/v3/test_codecs.py diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group.py similarity index 100% rename from tests/v3/test_group_v3.py rename to tests/v3/test_group.py diff --git a/tests/v3/test_storage_v3.py b/tests/v3/test_storage.py similarity index 100% rename from tests/v3/test_storage_v3.py rename to tests/v3/test_storage.py From dacacc8e08d19887b49dcc2f5c00f6273fcd5550 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 5 Apr 2024 12:48:08 +0200 Subject: [PATCH 37/56] add a docstring to _get, and pass auto_mkdir to _put --- src/zarr/v3/store/local.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/zarr/v3/store/local.py b/src/zarr/v3/store/local.py index 5d22b30e9a..3d9595b28c 100644 --- a/src/zarr/v3/store/local.py +++ b/src/zarr/v3/store/local.py @@ -10,6 +10,20 @@ def _get(path: Path, byte_range: Optional[Tuple[int, Optional[int]]] = None) -> bytes: + """ + Fetch a contiguous region of bytes from a file. + + Parameters + ---------- + + path: Path + The file to read bytes from. + byte_range: Optional[Tuple[int, Optional[int]]] = None + The range of bytes to read. If `byte_range` is `None`, then the entire file will be read. + If `byte_range` is a tuple, the first value specifies the index of the first byte to read, + and the second value specifies the total number of bytes to read. If the total value is + `None`, then the entire file after the first byte will be read. + """ if byte_range is not None: start = byte_range[0] end = (start + byte_range[1]) if byte_range[1] is not None else None @@ -94,7 +108,7 @@ async def get_partial_values( async def set(self, key: str, value: BytesLike) -> None: assert isinstance(key, str) path = self.root / key - await to_thread(_put, path, value) + await to_thread(_put, path, value, auto_mkdir=self.auto_mkdir) async def set_partial_values(self, key_start_values: List[Tuple[str, int, bytes]]) -> None: args = [] From 5d2a532619d0bead92625f692608f150781be6cd Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 5 Apr 2024 13:42:55 +0200 Subject: [PATCH 38/56] fix: add docstring to LocalStore.get_partial_values; adjust body of LocalStore.get_partial_values to properly handle the byte_range parameter of LocalStore.get. --- src/zarr/v3/store/local.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/zarr/v3/store/local.py b/src/zarr/v3/store/local.py index 3d9595b28c..1a789141d9 100644 --- a/src/zarr/v3/store/local.py +++ b/src/zarr/v3/store/local.py @@ -95,14 +95,23 @@ async def get( async def get_partial_values( self, key_ranges: List[Tuple[str, Tuple[int, int]]] ) -> List[bytes]: + """ + Read byte ranges from multiple keys. + + Parameters + ---------- + + key_ranges: List[Tuple[str, Tuple[int, int]]] + A list of (key, (start, length)) tuples. The first element of the tuple is the name of + the key in storage to fetch bytes from. The second element the tuple defines the byte + range to retrieve. These values are arguments to `get`, as this method wraps + concurrent invocation of `get`. + """ args = [] for key, byte_range in key_ranges: assert isinstance(key, str) path = self.root / key - if byte_range is not None: - args.append((_get, path, byte_range[0], byte_range[1])) - else: - args.append((_get, path)) + args.append((_get, path, byte_range)) return await concurrent_map(args, to_thread, limit=None) # TODO: fix limit async def set(self, key: str, value: BytesLike) -> None: From 06d8b04963bbe9d2a569c75f05a22fe3edac818b Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 5 Apr 2024 15:49:49 +0200 Subject: [PATCH 39/56] test: add tests for localstore init, set, get, get_partial --- tests/v3/test_group.py | 2 +- tests/v3/test_storage.py | 101 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 101 insertions(+), 2 deletions(-) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 3e2779fa79..b9be73fcc8 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -258,4 +258,4 @@ async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: # check that asking for a nonexistent key raises KeyError with pytest.raises(KeyError): - agroup.getitem("foo") + await agroup.getitem("foo") diff --git a/tests/v3/test_storage.py b/tests/v3/test_storage.py index 3d8024de70..88d83c8e85 100644 --- a/tests/v3/test_storage.py +++ b/tests/v3/test_storage.py @@ -6,9 +6,108 @@ # import tempfile # import numpy as np +from __future__ import annotations +from zarr.v3.store.local import LocalStore +from pathlib import Path import pytest -pytest.skip("old v3 tests are disabled", allow_module_level=True) + +@pytest.mark.parametrize("auto_mkdir", (True, False)) +def test_local_store_init(tmpdir, auto_mkdir: bool) -> None: + tmpdir_str = str(tmpdir) + tmpdir_path = Path(tmpdir_str) + store = LocalStore(root=tmpdir_str, auto_mkdir=auto_mkdir) + + assert store.root == tmpdir_path + assert store.auto_mkdir == auto_mkdir + + # ensure that str and pathlib.Path get normalized to the same output + # a stronger test is to ensure that these two store instances are identical + # but LocalStore.__eq__ is not defined at this time. + assert store.root == LocalStore(root=tmpdir_path, auto_mkdir=auto_mkdir).root + + store_str = f"file://{tmpdir_str}" + assert str(store) == store_str + assert repr(store) == f"LocalStore({repr(store_str)})" + + +@pytest.mark.asyncio +@pytest.mark.parametrize("byte_range", (None, (0, None), (1, None), (1, 2), (None, 1))) +async def test_local_store_get( + local_store, byte_range: None | tuple[int | None, int | None] +) -> None: + payload = b"\x01\x02\x03\x04" + object_name = "foo" + (local_store.root / object_name).write_bytes(payload) + observed = await local_store.get(object_name, byte_range=byte_range) + + if byte_range is None: + start = 0 + length = len(payload) + else: + maybe_start, maybe_len = byte_range + if maybe_start is None: + start = 0 + else: + start = maybe_start + + if maybe_len is None: + length = len(payload) - start + else: + length = maybe_len + + expected = payload[start : start + length] + assert observed == expected + + # test that it's an error to get bytes from a file that doesn't exist + with pytest.raises(FileNotFoundError): + await local_store.get(object_name + "_absent", byte_range=byte_range) + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "key_ranges", + ( + [], + [("key_0", (0, 1))], + [("dir/key_0", (0, 1)), ("key_1", (0, 2))], + [("key_0", (0, 1)), ("key_1", (0, 2)), ("key_1", (0, 2))], + ), +) +async def test_local_store_get_partial( + tmpdir, key_ranges: tuple[list[tuple[str, tuple[int, int]]]] +) -> None: + store = LocalStore(str(tmpdir), auto_mkdir=True) + # use the utf-8 encoding of the key as the bytes + for key, _ in key_ranges: + payload = bytes(key, encoding="utf-8") + (store.root / key).write_bytes(payload) + + results = await store.get_partial_values(key_ranges) + for idx, observed in enumerate(results): + key, byte_range = key_ranges[idx] + expected = await store.get(key, byte_range=byte_range) + assert observed == expected + + +@pytest.mark.asyncio +@pytest.mark.parametrize("path", ("foo", "foo/bar")) +@pytest.mark.parametrize("auto_mkdir", (True, False)) +async def test_local_store_set(tmpdir, path: str, auto_mkdir: bool) -> None: + store = LocalStore(str(tmpdir), auto_mkdir=auto_mkdir) + payload = b"\x01\x02\x03\x04" + + if "/" in path and not auto_mkdir: + with pytest.raises(FileNotFoundError): + await store.set(path, payload) + else: + x = await store.set(path, payload) + + # this method should not return anything + assert x is None + + assert (store.root / path).read_bytes() == payload + # import zarr # from zarr._storage.store import _get_hierarchy_metadata, v3_api_available, StorageTransformer From d8749dea7bd4758970eb80455154177162aa6730 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 16 Apr 2024 10:51:34 +0200 Subject: [PATCH 40/56] fix: remove pre-emptive fetching from group.open --- src/zarr/v3/group.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index ff8ef53eba..b80348aff8 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -112,8 +112,8 @@ async def open( zarr_format: Literal[2, 3] = 3, ) -> AsyncGroup: store_path = make_store_path(store) - zarr_json_bytes = await (store_path / ZARR_JSON).get() - assert zarr_json_bytes is not None + + zarr_json_bytes: bytes | None # TODO: consider trying to autodiscover the zarr-format here if zarr_format == 3: @@ -129,12 +129,8 @@ async def open( zgroup_bytes, zattrs_bytes = await asyncio.gather( (store_path / ZGROUP_JSON).get(), (store_path / ZATTRS_JSON).get() ) - zgroup = ( - json.loads(json.loads(zgroup_bytes)) - if zgroup_bytes is not None - else {"zarr_format": 2} - ) - zattrs = json.loads(json.loads(zattrs_bytes)) if zattrs_bytes is not None else {} + zgroup = json.loads(zgroup_bytes) if zgroup_bytes is not None else {"zarr_format": 2} + zattrs = json.loads(zattrs_bytes) if zattrs_bytes is not None else {} zarr_json = {**zgroup, "attributes": zattrs} else: raise ValueError(f"unexpected zarr_format: {zarr_format}") @@ -160,12 +156,6 @@ async def getitem( ) -> AsyncArray | AsyncGroup: store_path = self.store_path / key - # Note: - # in zarr-python v2, we first check if `key` references an Array, else if `key` references - # a group,using standalone `contains_array` and `contains_group` functions. These functions - # are reusable, but for v3 they would perform redundant I/O operations. - # Not clear how much of that strategy we want to keep here. - # if `key` names an object in storage, it cannot be an array or group if await store_path.exists(): raise KeyError(key) From eed03c8c4e3b9d12264b385571d42590237b0844 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 16 Apr 2024 15:06:12 +0200 Subject: [PATCH 41/56] fix: use removeprefix (removes a substring) instead of strip (removes any member of a set); comment out / avoid tests that cannot pass right now; don't consider implicit groups for v2; check if prefix is present in storage before opening for Group.getitem --- src/zarr/v3/group.py | 14 +++++++++----- src/zarr/v3/store/memory.py | 2 +- tests/v3/test_group.py | 8 ++++---- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index ac81686e59..1764a7e039 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -126,11 +126,12 @@ async def open( ) elif zarr_format == 2: # V2 groups are comprised of a .zgroup and .zattrs objects - # (both are optional in the case of implicit groups) zgroup_bytes, zattrs_bytes = await asyncio.gather( (store_path / ZGROUP_JSON).get(), (store_path / ZATTRS_JSON).get() ) - zgroup = json.loads(zgroup_bytes) if zgroup_bytes is not None else {"zarr_format": 2} + if zgroup_bytes is None: + raise FileNotFoundError(f"No Zarr v2 group metadata found at {store_path}") + zgroup = json.loads(zgroup_bytes) zattrs = json.loads(zattrs_bytes) if zattrs_bytes is not None else {} zarr_json = {**zgroup, "attributes": zattrs} else: @@ -161,6 +162,12 @@ async def getitem( if await store_path.exists(): raise KeyError(key) + # calling list_dir here is a big performance loss. We should try to find a way around + # this. + # see https://github.com/zarr-developers/zarr-python/pull/1743#issuecomment-2058681807 + if key not in await store_path.store.list_dir(self.store_path.path): + raise KeyError(key) + if self.metadata.zarr_format == 3: zarr_json_bytes = await (store_path / ZARR_JSON).get() if zarr_json_bytes is None: @@ -202,9 +209,6 @@ async def getitem( store_path, zarray, runtime_configuration=self.runtime_configuration ) else: - if zgroup_bytes is None: - # implicit group? - logger.warning("group at %s is an implicit group", store_path) zgroup = ( json.loads(zgroup_bytes) if zgroup_bytes is not None diff --git a/src/zarr/v3/store/memory.py b/src/zarr/v3/store/memory.py index afacfa4321..e430c79f91 100644 --- a/src/zarr/v3/store/memory.py +++ b/src/zarr/v3/store/memory.py @@ -79,7 +79,7 @@ async def list_dir(self, prefix: str) -> List[str]: else: return list( { - key.strip(prefix + "/").split("/")[0] + key.removeprefix(prefix + "/").split("/")[0] for key in self._store_dict if (key.startswith(prefix + "/") and key != prefix) } diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index b9be73fcc8..6e5db356c2 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -206,9 +206,9 @@ async def test_asyncgroup_open( else: assert False - # todo: get more specific than this - with pytest.raises(ValueError): - await AsyncGroup.open(store=store, zarr_format=zarr_format_wrong) + # todo: uncomment this test when we get rid of implicit groups + # with pytest.raises(FileNotFoundError): + # await AsyncGroup.open(store=store, zarr_format=zarr_format_wrong) # todo: replace the dict[str, Any] type with something a bit more specific @@ -238,7 +238,7 @@ def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, A # todo: replace this with a declarative API where we model a full hierarchy @pytest.mark.asyncio @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", (2, 3)) +@pytest.mark.parametrize("zarr_format", (3,)) # todo: add testing for v2 when we support v2 arrays async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat): """ Create an `AsyncGroup`, then create members of that group, and ensure that we can access those From 75f75b1807c1531ab01cbf977a7d36e8a6cd2236 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 16 Apr 2024 16:24:24 +0200 Subject: [PATCH 42/56] xfail v2 tests that are sure to fail; add delitem tests; partition xfailing tests into subtests --- tests/v3/test_group.py | 111 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 102 insertions(+), 9 deletions(-) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 6e5db356c2..08735d68ed 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -147,7 +147,7 @@ async def test_asyncgroup_create( Test that `AsyncGroup.create` works as expected. """ attributes = {"foo": 100} - group = await AsyncGroup.create( + agroup = await AsyncGroup.create( store, attributes=attributes, exists_ok=exists_ok, @@ -155,13 +155,13 @@ async def test_asyncgroup_create( runtime_configuration=runtime_configuration, ) - assert group.metadata == GroupMetadata(zarr_format=zarr_format, attributes=attributes) - assert group.store_path == make_store_path(store) - assert group.runtime_configuration == runtime_configuration + assert agroup.metadata == GroupMetadata(zarr_format=zarr_format, attributes=attributes) + assert agroup.store_path == make_store_path(store) + assert agroup.runtime_configuration == runtime_configuration if not exists_ok: with pytest.raises(AssertionError): - group = await AsyncGroup.create( + agroup = await AsyncGroup.create( store, attributes=attributes, exists_ok=exists_ok, @@ -170,6 +170,28 @@ async def test_asyncgroup_create( ) +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +async def test_asyncgroup_attrs(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: + attributes = {"foo": 100} + agroup = await AsyncGroup.create(store, zarr_format=zarr_format, attributes=attributes) + + assert agroup.attrs == agroup.metadata.attributes == attributes + + +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +async def test_asyncgroup_info(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: + agroup = await AsyncGroup.create( # noqa + store, + zarr_format=zarr_format, + ) + pytest.xfail("Info is not implemented for metadata yet") + # assert agroup.info == agroup.metadata.info + + @pytest.mark.asyncio @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) @pytest.mark.parametrize("zarr_format", (2, 3)) @@ -198,6 +220,16 @@ async def test_asyncgroup_open( assert group_w.attrs == group_w.attrs == attributes assert group_w == group_r + +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (pytest.param(2, marks=pytest.mark.xfail), 3)) +async def test_asyncgroup_open_wrong_format( + store: LocalStore | MemoryStore, + zarr_format: ZarrFormat, +) -> None: + _ = await AsyncGroup.create(store=store, exists_ok=False, zarr_format=zarr_format) + # try opening with the wrong zarr format if zarr_format == 3: zarr_format_wrong = 2 @@ -206,9 +238,8 @@ async def test_asyncgroup_open( else: assert False - # todo: uncomment this test when we get rid of implicit groups - # with pytest.raises(FileNotFoundError): - # await AsyncGroup.open(store=store, zarr_format=zarr_format_wrong) + with pytest.raises(FileNotFoundError): + await AsyncGroup.open(store=store, zarr_format=zarr_format_wrong) # todo: replace the dict[str, Any] type with something a bit more specific @@ -238,7 +269,10 @@ def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, A # todo: replace this with a declarative API where we model a full hierarchy @pytest.mark.asyncio @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", (3,)) # todo: add testing for v2 when we support v2 arrays +@pytest.mark.parametrize( + "zarr_format", + (pytest.param(2, marks=pytest.mark.xfail), 3), +) async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat): """ Create an `AsyncGroup`, then create members of that group, and ensure that we can access those @@ -259,3 +293,62 @@ async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: # check that asking for a nonexistent key raises KeyError with pytest.raises(KeyError): await agroup.getitem("foo") + + +# todo: replace this with a declarative API where we model a full hierarchy +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +async def test_asyncgroup_delitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat): + agroup = await AsyncGroup.create(store=store, zarr_format=zarr_format) + sub_array_path = "sub_array" + _ = await agroup.create_array( + path=sub_array_path, shape=(10,), dtype="uint8", chunk_shape=(2,), attributes={"foo": 100} + ) + await agroup.delitem(sub_array_path) + + # todo: clean up the code duplication here + if zarr_format == 2: + assert not await agroup.store_path.store.exists(sub_array_path + "/" + ".zarray") + assert not await agroup.store_path.store.exists(sub_array_path + "/" + ".zattrs") + elif zarr_format == 3: + assert not await agroup.store_path.store.exists(sub_array_path + "/" + "zarr.json") + else: + assert False + + sub_group_path = "sub_group" + _ = await agroup.create_group(sub_group_path, attributes={"foo": 100}) + await agroup.delitem(sub_group_path) + if zarr_format == 2: + assert not await agroup.store_path.store.exists(sub_array_path + "/" + ".zgroup") + assert not await agroup.store_path.store.exists(sub_array_path + "/" + ".zattrs") + elif zarr_format == 3: + assert not await agroup.store_path.store.exists(sub_array_path + "/" + "zarr.json") + else: + assert False + + +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +async def test_asyncgroup_create_group(store: LocalStore | MemoryStore, zarr_format: ZarrFormat): + agroup = await AsyncGroup.create(store=store, zarr_format=zarr_format) + + shape = (10,) + dtype = "uint8" + chunk_shape = (4,) + attributes = {"foo": 100} + + sub_array_path = "sub_array" + array = await agroup.create_array( + path=sub_array_path, + shape=shape, + dtype=dtype, + chunk_shape=chunk_shape, + attributes=attributes, + ) + + assert array.shape == shape + assert array.dtype == dtype + # todo: fix this + assert array.metadata.chunk_grid.chunk_shape == chunk_shape From 8a14e3b879f803bcaccbaa27697f08b55fa40f1b Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 16 Apr 2024 17:30:00 +0200 Subject: [PATCH 43/56] fix: handle byte_range[0] being None --- src/zarr/v3/store/local.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/zarr/v3/store/local.py b/src/zarr/v3/store/local.py index 1a789141d9..0e8eed52aa 100644 --- a/src/zarr/v3/store/local.py +++ b/src/zarr/v3/store/local.py @@ -25,7 +25,11 @@ def _get(path: Path, byte_range: Optional[Tuple[int, Optional[int]]] = None) -> `None`, then the entire file after the first byte will be read. """ if byte_range is not None: - start = byte_range[0] + if byte_range[0] is None: + start = 0 + else: + start = byte_range[0] + end = (start + byte_range[1]) if byte_range[1] is not None else None else: return path.read_bytes() From 459bb42824e83145feb905f7e5a03277271efe62 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 16 Apr 2024 17:45:23 +0200 Subject: [PATCH 44/56] fix: adjust test for localstore.get to check that get on nonexistent keys returns None; correctly create intermediate directories when preparing test data in test_local_store_get_partial --- tests/v3/test_storage.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/v3/test_storage.py b/tests/v3/test_storage.py index 88d83c8e85..fd2af079c0 100644 --- a/tests/v3/test_storage.py +++ b/tests/v3/test_storage.py @@ -59,9 +59,8 @@ async def test_local_store_get( expected = payload[start : start + length] assert observed == expected - # test that it's an error to get bytes from a file that doesn't exist - with pytest.raises(FileNotFoundError): - await local_store.get(object_name + "_absent", byte_range=byte_range) + # test that getting from a file that doesn't exist returns None + assert await local_store.get(object_name + "_absent", byte_range=byte_range) is None @pytest.mark.asyncio @@ -81,7 +80,11 @@ async def test_local_store_get_partial( # use the utf-8 encoding of the key as the bytes for key, _ in key_ranges: payload = bytes(key, encoding="utf-8") - (store.root / key).write_bytes(payload) + target_path: Path = store.root / key + # create the parent directories + target_path.parent.mkdir(parents=True, exist_ok=True) + # write bytes + target_path.write_bytes(payload) results = await store.get_partial_values(key_ranges) for idx, observed in enumerate(results): From 8ef3fec909a68013396bebfaa8dbc9404beb9da9 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 17 Apr 2024 16:23:53 +0200 Subject: [PATCH 45/56] fix: add zarr_format parameter to array creation routines (which raises if zarr_format is not 3), and xfail the tests that will hit this condition. add tests for create_group, create_array, and update_attributes methods of asyncgroup. --- src/zarr/v3/array.py | 50 ++++++++++--------- src/zarr/v3/group.py | 2 + tests/v3/test_group.py | 109 ++++++++++++++++++++++++++++++++++------- 3 files changed, 121 insertions(+), 40 deletions(-) diff --git a/src/zarr/v3/array.py b/src/zarr/v3/array.py index c0a00a624e..64a73e64dc 100644 --- a/src/zarr/v3/array.py +++ b/src/zarr/v3/array.py @@ -27,6 +27,7 @@ ChunkCoords, Selection, SliceSelection, + ZarrFormat, concurrent_map, ) from zarr.v3.config import RuntimeConfiguration @@ -88,6 +89,7 @@ async def create( attributes: Optional[Dict[str, Any]] = None, runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), exists_ok: bool = False, + zarr_format: ZarrFormat = 3, ) -> AsyncArray: store_path = make_store_path(store) if not exists_ok: @@ -100,31 +102,33 @@ async def create( fill_value = False else: fill_value = 0 + if zarr_format == 3: + metadata = ArrayMetadata( + shape=shape, + data_type=dtype, + chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), + chunk_key_encoding=( + V2ChunkKeyEncoding(separator=chunk_key_encoding[1]) + if chunk_key_encoding[0] == "v2" + else DefaultChunkKeyEncoding(separator=chunk_key_encoding[1]) + ), + fill_value=fill_value, + codecs=codecs, + dimension_names=tuple(dimension_names) if dimension_names else None, + attributes=attributes or {}, + ) + runtime_configuration = runtime_configuration or RuntimeConfiguration() - metadata = ArrayMetadata( - shape=shape, - data_type=dtype, - chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), - chunk_key_encoding=( - V2ChunkKeyEncoding(separator=chunk_key_encoding[1]) - if chunk_key_encoding[0] == "v2" - else DefaultChunkKeyEncoding(separator=chunk_key_encoding[1]) - ), - fill_value=fill_value, - codecs=codecs, - dimension_names=tuple(dimension_names) if dimension_names else None, - attributes=attributes or {}, - ) - runtime_configuration = runtime_configuration or RuntimeConfiguration() - - array = cls( - metadata=metadata, - store_path=store_path, - runtime_configuration=runtime_configuration, - ) + array = cls( + metadata=metadata, + store_path=store_path, + runtime_configuration=runtime_configuration, + ) - await array._save_metadata() - return array + await array._save_metadata() + return array + else: + raise NotImplementedError("Zarr version 2 arrays cannot be created yet.") @classmethod def from_dict( diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index 1764a7e039..09e466fa99 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -249,6 +249,7 @@ async def create_group(self, path: str, **kwargs) -> AsyncGroup: return await type(self).create( self.store_path / path, runtime_configuration=runtime_configuration, + zarr_format=self.metadata.zarr_format, **kwargs, ) @@ -257,6 +258,7 @@ async def create_array(self, path: str, **kwargs) -> AsyncArray: return await AsyncArray.create( self.store_path / path, runtime_configuration=runtime_configuration, + zarr_format=self.metadata.zarr_format, **kwargs, ) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 08735d68ed..45df2bd276 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -1,6 +1,7 @@ from __future__ import annotations from typing import TYPE_CHECKING, Any +from zarr.v3.array import AsyncArray from zarr.v3.store.core import make_store_path if TYPE_CHECKING: @@ -18,7 +19,7 @@ # todo: put RemoteStore in here @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) -def test_group_children(store: MemoryStore | LocalStore): +def test_group_children(store: MemoryStore | LocalStore) -> None: """ Test that `Group.members` returns correct values, i.e. the arrays and groups (explicit and implicit) contained in that group. @@ -105,7 +106,7 @@ def test_group(store: MemoryStore | LocalStore) -> None: ) def test_group_create( store: MemoryStore | LocalStore, exists_ok: bool, runtime_configuration: RuntimeConfiguration -): +) -> None: """ Test that `Group.create` works as expected. """ @@ -142,7 +143,7 @@ async def test_asyncgroup_create( exists_ok: bool, zarr_format: ZarrFormat, runtime_configuration: RuntimeConfiguration, -): +) -> None: """ Test that `AsyncGroup.create` works as expected. """ @@ -252,7 +253,7 @@ async def test_asyncgroup_open_wrong_format( {"zarr_format": 2, "attributes": {"foo": 100}}, ), ) -def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, Any]): +def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, Any]) -> None: """ Test that we can create an AsyncGroup from a dict """ @@ -271,9 +272,9 @@ def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, A @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) @pytest.mark.parametrize( "zarr_format", - (pytest.param(2, marks=pytest.mark.xfail), 3), + (pytest.param(2, marks=pytest.mark.xfail(reason="V2 arrays cannot be created yet.")), 3), ) -async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat): +async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: """ Create an `AsyncGroup`, then create members of that group, and ensure that we can access those members via the `AsyncGroup.getitem` method. @@ -298,8 +299,11 @@ async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: # todo: replace this with a declarative API where we model a full hierarchy @pytest.mark.asyncio @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", (2, 3)) -async def test_asyncgroup_delitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat): +@pytest.mark.parametrize( + "zarr_format", + (pytest.param(2, marks=pytest.mark.xfail(reason="V2 arrays cannot be created yet.")), 3), +) +async def test_asyncgroup_delitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: agroup = await AsyncGroup.create(store=store, zarr_format=zarr_format) sub_array_path = "sub_array" _ = await agroup.create_array( @@ -330,25 +334,96 @@ async def test_asyncgroup_delitem(store: LocalStore | MemoryStore, zarr_format: @pytest.mark.asyncio @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize( + "runtime_configuration", (RuntimeConfiguration(), RuntimeConfiguration(order="F")) +) @pytest.mark.parametrize("zarr_format", (2, 3)) -async def test_asyncgroup_create_group(store: LocalStore | MemoryStore, zarr_format: ZarrFormat): - agroup = await AsyncGroup.create(store=store, zarr_format=zarr_format) +async def test_asyncgroup_create_group( + store: LocalStore | MemoryStore, + zarr_format: ZarrFormat, + runtime_configuration: RuntimeConfiguration, +) -> None: + agroup = await AsyncGroup.create( + store=store, zarr_format=zarr_format, runtime_configuration=RuntimeConfiguration + ) + sub_node_path = "sub_group" + attributes = {"foo": 999} + subnode = await agroup.create_group( + path=sub_node_path, attributes=attributes, runtime_configuration=runtime_configuration + ) + + assert isinstance(subnode, AsyncGroup) + assert subnode.runtime_configuration == runtime_configuration + assert subnode.attrs == attributes + assert subnode.store_path.path == sub_node_path + assert subnode.store_path.store == store + assert subnode.metadata.zarr_format == zarr_format + + +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize( + "runtime_configuration", (RuntimeConfiguration(), RuntimeConfiguration(order="F")) +) +@pytest.mark.parametrize( + "zarr_format", + (pytest.param(2, marks=pytest.mark.xfail(reason="V2 arrays cannot be created yet")), 3), +) +async def test_asyncgroup_create_array( + store: LocalStore | MemoryStore, + runtime_configuration: RuntimeConfiguration, + zarr_format: ZarrFormat, +) -> None: + """ + Test that the AsyncGroup.create_array method works correctly. We ensure that array properties + specified in create_array are present on the resulting array. + """ + + agroup = await AsyncGroup.create( + store=store, zarr_format=zarr_format, runtime_configuration=runtime_configuration + ) shape = (10,) dtype = "uint8" chunk_shape = (4,) attributes = {"foo": 100} - sub_array_path = "sub_array" - array = await agroup.create_array( - path=sub_array_path, + sub_node_path = "sub_array" + subnode = await agroup.create_array( + path=sub_node_path, shape=shape, dtype=dtype, chunk_shape=chunk_shape, attributes=attributes, + runtime_configuration=runtime_configuration, + ) + assert isinstance(subnode, AsyncArray) + assert subnode.runtime_configuration == runtime_configuration + assert subnode.attrs == attributes + assert subnode.store_path.path == sub_node_path + assert subnode.store_path.store == store + assert subnode.shape == shape + assert subnode.dtype == dtype + # todo: fix the type annotation of array.metadata.chunk_grid so that we get some autocomplete + # here. + assert subnode.metadata.chunk_grid.chunk_shape == chunk_shape + assert subnode.metadata.zarr_format == zarr_format + + +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +async def test_asyncgroup_update_attributes( + store: LocalStore | MemoryStore, zarr_format: ZarrFormat +) -> None: + """ + Test that the AsyncGroup.update_attributes method works correctly. + """ + attributes_old = {"foo": 10} + attributes_new = {"baz": "new"} + agroup = await AsyncGroup.create( + store=store, zarr_format=zarr_format, attributes=attributes_old ) - assert array.shape == shape - assert array.dtype == dtype - # todo: fix this - assert array.metadata.chunk_grid.chunk_shape == chunk_shape + agroup_new_attributes = await agroup.update_attributes(attributes_new) + assert agroup_new_attributes.attrs == attributes_new From b5a76981c823b93dee4710eed2951ee4cd67f736 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 17 Apr 2024 16:56:19 +0200 Subject: [PATCH 46/56] test: add group init test --- tests/v3/test_group.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 45df2bd276..a7e7a10be8 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -13,7 +13,7 @@ from zarr.v3.group import AsyncGroup, Group, GroupMetadata from zarr.v3.store import StorePath -from zarr.v3.config import RuntimeConfiguration +from zarr.v3.config import RuntimeConfiguration, SyncConfiguration from zarr.v3.sync import sync @@ -427,3 +427,17 @@ async def test_asyncgroup_update_attributes( agroup_new_attributes = await agroup.update_attributes(attributes_new) assert agroup_new_attributes.attrs == attributes_new + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +@pytest.mark.parametrize( + "sync_configuration", (SyncConfiguration(), SyncConfiguration(concurrency=2)) +) +async def test_group_init( + store: LocalStore | MemoryStore, zarr_format: ZarrFormat, sync_configuration: SyncConfiguration +) -> None: + agroup = sync(AsyncGroup.create(store=store, zarr_format=zarr_format)) + group = Group(_async_group=agroup, _sync_configuration=sync_configuration) + assert group._async_group == agroup + assert group._sync_configuration == sync_configuration From 49f1505ee8686fe612affdd709f0a49ab27a4f9c Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Fri, 19 Apr 2024 06:06:43 -0700 Subject: [PATCH 47/56] feature(store): make list_* methods async generators (#110) * feature(store): make list_* methods async generators * Update src/zarr/v3/store/memory.py * Apply suggestions from code review - simplify code comments - use `removeprefix` instead of `strip` --------- Co-authored-by: Davis Bennett --- src/zarr/v3/abc/store.py | 13 +++++----- src/zarr/v3/group.py | 37 +++++++++++++++-------------- src/zarr/v3/store/local.py | 47 +++++++++++++++++-------------------- src/zarr/v3/store/memory.py | 27 ++++++++++----------- 4 files changed, 61 insertions(+), 63 deletions(-) diff --git a/src/zarr/v3/abc/store.py b/src/zarr/v3/abc/store.py index ce5de279c4..c9845b7ae7 100644 --- a/src/zarr/v3/abc/store.py +++ b/src/zarr/v3/abc/store.py @@ -1,4 +1,5 @@ from abc import abstractmethod, ABC +from collections.abc import AsyncGenerator from typing import List, Tuple, Optional @@ -106,17 +107,17 @@ def supports_listing(self) -> bool: ... @abstractmethod - async def list(self) -> List[str]: + async def list(self) -> AsyncGenerator[str, None]: """Retrieve all keys in the store. Returns ------- - list[str] + AsyncGenerator[str, None] """ ... @abstractmethod - async def list_prefix(self, prefix: str) -> List[str]: + async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: """Retrieve all keys in the store with a given prefix. Parameters @@ -125,12 +126,12 @@ async def list_prefix(self, prefix: str) -> List[str]: Returns ------- - list[str] + AsyncGenerator[str, None] """ ... @abstractmethod - async def list_dir(self, prefix: str) -> List[str]: + async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: """ Retrieve all keys and prefixes with a given prefix and which do not contain the character “/” after the given prefix. @@ -141,6 +142,6 @@ async def list_dir(self, prefix: str) -> List[str]: Returns ------- - list[str] + AsyncGenerator[str, None] """ ... diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index 09e466fa99..fd8efa2f43 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -1,4 +1,5 @@ from __future__ import annotations +from collections.abc import AsyncGenerator from typing import TYPE_CHECKING from dataclasses import asdict, dataclass, field, replace @@ -9,7 +10,6 @@ if TYPE_CHECKING: from typing import ( Any, - AsyncGenerator, Literal, AsyncIterator, ) @@ -157,6 +157,7 @@ async def getitem( key: str, ) -> AsyncArray | AsyncGroup: store_path = self.store_path / key + logger.warning("key=%s, store_path=%s", key, store_path) # if `key` names an object in storage, it cannot be an array or group if await store_path.exists(): @@ -302,22 +303,24 @@ async def members(self) -> AsyncGenerator[tuple[str, AsyncArray | AsyncGroup], N ) raise ValueError(msg) - subkeys = await self.store_path.store.list_dir(self.store_path.path) - # would be nice to make these special keys accessible programmatically, - # and scoped to specific zarr versions - subkeys_filtered = filter(lambda v: v not in ("zarr.json", ".zgroup", ".zattrs"), subkeys) - # is there a better way to schedule this? - for subkey in subkeys_filtered: - try: - yield (subkey, await self.getitem(subkey)) - except KeyError: - # keyerror is raised when `subkey` names an object (in the object storage sense), - # as opposed to a prefix, in the store under the prefix associated with this group - # in which case `subkey` cannot be the name of a sub-array or sub-group. - logger.warning( - "Object at %s is not recognized as a component of a Zarr hierarchy.", subkey - ) - pass + + async for key in self.store_path.store.list_dir(self.store_path.path): + # these keys are not valid child names so we make sure to skip them + # TODO: it would be nice to make these special keys accessible programmatically, + # and scoped to specific zarr versions + if key not in ("zarr.json", ".zgroup", ".zattrs"): + try: + # TODO: performance optimization -- load children concurrently + child = await self.getitem(key) + yield key, child + except KeyError: + # keyerror is raised when `key` names an object (in the object storage sense), + # as opposed to a prefix, in the store under the prefix associated with this group. + # In this case, `key` cannot be the name of a sub-array or sub-group. + logger.warning( + "Object at %s is not recognized as a component of a Zarr hierarchy.", key + ) + pass async def contains(self, member: str) -> bool: raise NotImplementedError diff --git a/src/zarr/v3/store/local.py b/src/zarr/v3/store/local.py index 0e8eed52aa..ac883c35e6 100644 --- a/src/zarr/v3/store/local.py +++ b/src/zarr/v3/store/local.py @@ -2,6 +2,7 @@ import io import shutil +from collections.abc import AsyncGenerator from pathlib import Path from typing import Union, Optional, List, Tuple @@ -145,22 +146,19 @@ async def exists(self, key: str) -> bool: path = self.root / key return await to_thread(path.is_file) - async def list(self) -> List[str]: + async def list(self) -> AsyncGenerator[str, None]: """Retrieve all keys in the store. Returns ------- - list[str] + AsyncGenerator[str, None] """ + for p in self.root.rglob(""): + if p.is_file(): + yield str(p) - # Q: do we want to return strings or Paths? - def _list(root: Path) -> List[str]: - files = [str(p) for p in root.rglob("") if p.is_file()] - return files - return await to_thread(_list, self.root) - - async def list_prefix(self, prefix: str) -> List[str]: + async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: """Retrieve all keys in the store with a given prefix. Parameters @@ -169,16 +167,14 @@ async def list_prefix(self, prefix: str) -> List[str]: Returns ------- - list[str] + AsyncGenerator[str, None] """ + for p in (self.root / prefix).rglob("*"): + if p.is_file(): + yield str(p) - def _list_prefix(root: Path, prefix: str) -> List[str]: - files = [str(p) for p in (root / prefix).rglob("*") if p.is_file()] - return files - - return await to_thread(_list_prefix, self.root, prefix) - async def list_dir(self, prefix: str) -> List[str]: + async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: """ Retrieve all keys and prefixes with a given prefix and which do not contain the character “/” after the given prefix. @@ -189,15 +185,16 @@ async def list_dir(self, prefix: str) -> List[str]: Returns ------- - list[str] + AsyncGenerator[str, None] """ + base = self.root / prefix + to_strip = str(base) + "/" + + try: + key_iter = base.iterdir() + except (FileNotFoundError, NotADirectoryError): + key_iter = [] - def _list_dir(root: Path, prefix: str) -> List[str]: - base = root / prefix - to_strip = str(base) + "/" - try: - return [str(key).replace(to_strip, "") for key in base.iterdir()] - except (FileNotFoundError, NotADirectoryError): - return [] + for key in key_iter: + yield str(key).replace(to_strip, "") - return await to_thread(_list_dir, self.root, prefix) diff --git a/src/zarr/v3/store/memory.py b/src/zarr/v3/store/memory.py index e430c79f91..1a4864835e 100644 --- a/src/zarr/v3/store/memory.py +++ b/src/zarr/v3/store/memory.py @@ -1,5 +1,6 @@ from __future__ import annotations +from collections.abc import AsyncGenerator from typing import Optional, MutableMapping, List, Tuple from zarr.v3.common import BytesLike @@ -67,20 +68,16 @@ async def delete(self, key: str) -> None: async def set_partial_values(self, key_start_values: List[Tuple[str, int, bytes]]) -> None: raise NotImplementedError - async def list(self) -> List[str]: - return list(self._store_dict.keys()) + async def list(self) -> AsyncGenerator[str, None]: + for key in self._store_dict: + yield key - async def list_prefix(self, prefix: str) -> List[str]: - return [key for key in self._store_dict if key.startswith(prefix)] + async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + for key in self._store_dict: + if key.startswith(prefix): + yield key - async def list_dir(self, prefix: str) -> List[str]: - if prefix == "": - return list({key.split("/", maxsplit=1)[0] for key in self._store_dict}) - else: - return list( - { - key.removeprefix(prefix + "/").split("/")[0] - for key in self._store_dict - if (key.startswith(prefix + "/") and key != prefix) - } - ) + async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + for key in self._store_dict: + if key.startswith(prefix + "/") and key != prefix: + yield key.removeprefix(prefix + "/").rsplit("/", maxsplit=1)[0] From b996aff13996fd172f85ddd5554737149221f7c2 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 10 May 2024 10:34:41 +0200 Subject: [PATCH 48/56] fix: define utility for converting asyncarray to array, and similar for group, largely to appease mypy --- src/zarr/group.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/src/zarr/group.py b/src/zarr/group.py index f9dbea3a73..055ef53278 100644 --- a/src/zarr/group.py +++ b/src/zarr/group.py @@ -20,6 +20,7 @@ from zarr.config import RuntimeConfiguration, SyncConfiguration from zarr.store import StoreLike, StorePath, make_store_path from zarr.sync import SyncMixin, sync +from typing import overload logger = logging.getLogger("zarr.group") @@ -41,6 +42,26 @@ def parse_attributes(data: Any) -> dict[str, Any]: raise TypeError(msg) +@overload +def _parse_async_node(node: AsyncArray) -> Array: ... + + +@overload +def _parse_async_node(node: AsyncGroup) -> Group: ... + + +def _parse_async_node(node: AsyncArray | AsyncGroup) -> Array | Group: + """ + Wrap an AsyncArray in an Array, or an AsyncGroup in a Group. + """ + if isinstance(node, AsyncArray): + return Array(node) + elif isinstance(node, Group): + return Group(node) + else: + assert False + + @dataclass(frozen=True) class GroupMetadata(Metadata): attributes: dict[str, Any] = field(default_factory=dict) @@ -509,11 +530,10 @@ def members(self) -> tuple[tuple[str, Array | Group], ...]: Return the sub-arrays and sub-groups of this group as a tuple of (name, array | group) pairs """ - _members: list[AsyncArray | AsyncGroup] = self._sync_iter(self._async_group.members()) - return tuple( - (key, Array(value)) if isinstance(value, AsyncArray) else (key, Group(value)) - for key, value in _members - ) + _members = self._sync_iter(self._async_group.members()) + + result = tuple(map(lambda kv: (kv[0], _parse_async_node(kv[1])), _members)) + return result def __contains__(self, member) -> bool: return self._sync(self._async_group.contains(member)) From 923fc18412d25cca141595ff6dfaadc707912752 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 10 May 2024 10:38:48 +0200 Subject: [PATCH 49/56] chore: remove checks that only existed because of implicit groups --- src/zarr/group.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/src/zarr/group.py b/src/zarr/group.py index 055ef53278..ed8e5aa487 100644 --- a/src/zarr/group.py +++ b/src/zarr/group.py @@ -56,7 +56,7 @@ def _parse_async_node(node: AsyncArray | AsyncGroup) -> Array | Group: """ if isinstance(node, AsyncArray): return Array(node) - elif isinstance(node, Group): + elif isinstance(node, AsyncGroup): return Group(node) else: assert False @@ -197,26 +197,12 @@ async def getitem( store_path = self.store_path / key logger.warning("key=%s, store_path=%s", key, store_path) - # if `key` names an object in storage, it cannot be an array or group - if await store_path.exists(): - raise KeyError(key) - - # calling list_dir here is a big performance loss. We should try to find a way around - # this. - # see https://github.com/zarr-developers/zarr-python/pull/1743#issuecomment-2058681807 - if key not in [item async for item in store_path.store.list_dir(self.store_path.path)]: - raise KeyError(key) - # Note: # in zarr-python v2, we first check if `key` references an Array, else if `key` references # a group,using standalone `contains_array` and `contains_group` functions. These functions # are reusable, but for v3 they would perform redundant I/O operations. # Not clear how much of that strategy we want to keep here. - # if `key` names an object in storage, it cannot be an array or group - if await store_path.exists(): - raise KeyError(key) - if self.metadata.zarr_format == 3: zarr_json_bytes = await (store_path / ZARR_JSON).get() if zarr_json_bytes is None: From 7c6cfb476e4dbea3b4391cedd61b835328f8543e Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 10 May 2024 10:54:19 +0200 Subject: [PATCH 50/56] chore: clean up docstring and modernize some type hints --- src/zarr/store/local.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/src/zarr/store/local.py b/src/zarr/store/local.py index fd73a8199a..4cff4428be 100644 --- a/src/zarr/store/local.py +++ b/src/zarr/store/local.py @@ -10,25 +10,19 @@ from zarr.common import BytesLike, concurrent_map, to_thread -def _get(path: Path, byte_range: Optional[Tuple[int, Optional[int]]] = None) -> bytes: +def _get(path: Path, byte_range: tuple[int, int | None] | None) -> bytes: """ - Fetch a contiguous region of bytes from a file. - <<<<<<< HEAD - - Parameters - ---------- - - ======= - Parameters - ---------- - >>>>>>> fcab6505eaf15c959a0093cefe1b5b1a31f6c3ed - path: Path - The file to read bytes from. - byte_range: Optional[Tuple[int, Optional[int]]] = None - The range of bytes to read. If `byte_range` is `None`, then the entire file will be read. - If `byte_range` is a tuple, the first value specifies the index of the first byte to read, - and the second value specifies the total number of bytes to read. If the total value is - `None`, then the entire file after the first byte will be read. + Fetch a contiguous region of bytes from a file. + + Parameters + ---------- + path: Path + The file to read bytes from. + byte_range: tuple[int, int | None] | None = None + The range of bytes to read. If `byte_range` is `None`, then the entire file will be read. + If `byte_range` is a tuple, the first value specifies the index of the first byte to read, + and the second value specifies the total number of bytes to read. If the total value is + `None`, then the entire file after the first byte will be read. """ if byte_range is not None: if byte_range[0] is None: From 17997f82ea1b2184a4adc39fedb45a3e5d241bd3 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 10 May 2024 10:57:58 +0200 Subject: [PATCH 51/56] chore: move imports to top-level --- tests/v3/test_storage.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/v3/test_storage.py b/tests/v3/test_storage.py index 449c505dc8..7e6d232b22 100644 --- a/tests/v3/test_storage.py +++ b/tests/v3/test_storage.py @@ -11,6 +11,9 @@ from pathlib import Path import pytest +from zarr.testing.store import StoreTests +from zarr.store.memory import MemoryStore + @pytest.mark.parametrize("auto_mkdir", (True, False)) def test_local_store_init(tmpdir, auto_mkdir: bool) -> None: @@ -792,10 +795,6 @@ async def test_local_store_set(tmpdir, path: str, auto_mkdir: bool) -> None: # storage_transformer_methods.discard("__init__") # storage_transformer_methods.discard("get_config") # assert storage_transformer_methods == store_v3_methods -import pytest - -from zarr.testing.store import StoreTests -from zarr.store.memory import MemoryStore class TestMemoryStore(StoreTests): From 22c0889a2f8ed02209d43c17c3fcc509a514e984 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 15 May 2024 17:01:14 +0200 Subject: [PATCH 52/56] remove fixture files --- src/zarr/fixture/.zgroup | 3 --- src/zarr/fixture/flat/.zarray | 23 ----------------------- src/zarr/fixture/flat/0.0 | Bin 48 -> 0 bytes src/zarr/fixture/flat_legacy/.zarray | 22 ---------------------- src/zarr/fixture/flat_legacy/0.0 | Bin 48 -> 0 bytes src/zarr/fixture/meta/.zarray | 23 ----------------------- src/zarr/fixture/meta/0.0 | Bin 48 -> 0 bytes src/zarr/fixture/nested/.zarray | 23 ----------------------- src/zarr/fixture/nested/0/0 | Bin 48 -> 0 bytes src/zarr/fixture/nested_legacy/.zarray | 23 ----------------------- src/zarr/fixture/nested_legacy/0/0 | Bin 48 -> 0 bytes 11 files changed, 117 deletions(-) delete mode 100644 src/zarr/fixture/.zgroup delete mode 100644 src/zarr/fixture/flat/.zarray delete mode 100644 src/zarr/fixture/flat/0.0 delete mode 100644 src/zarr/fixture/flat_legacy/.zarray delete mode 100644 src/zarr/fixture/flat_legacy/0.0 delete mode 100644 src/zarr/fixture/meta/.zarray delete mode 100644 src/zarr/fixture/meta/0.0 delete mode 100644 src/zarr/fixture/nested/.zarray delete mode 100644 src/zarr/fixture/nested/0/0 delete mode 100644 src/zarr/fixture/nested_legacy/.zarray delete mode 100644 src/zarr/fixture/nested_legacy/0/0 diff --git a/src/zarr/fixture/.zgroup b/src/zarr/fixture/.zgroup deleted file mode 100644 index 3b7daf227c..0000000000 --- a/src/zarr/fixture/.zgroup +++ /dev/null @@ -1,3 +0,0 @@ -{ - "zarr_format": 2 -} \ No newline at end of file diff --git a/src/zarr/fixture/flat/.zarray b/src/zarr/fixture/flat/.zarray deleted file mode 100644 index d1acce7665..0000000000 --- a/src/zarr/fixture/flat/.zarray +++ /dev/null @@ -1,23 +0,0 @@ -{ - "chunks": [ - 2, - 2 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dimension_separator": ".", - "dtype": " Date: Wed, 15 May 2024 17:03:09 +0200 Subject: [PATCH 53/56] remove commented imports --- tests/v3/test_storage.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/tests/v3/test_storage.py b/tests/v3/test_storage.py index 7e6d232b22..9f66d342ad 100644 --- a/tests/v3/test_storage.py +++ b/tests/v3/test_storage.py @@ -1,11 +1,3 @@ -# import array -# import atexit -# import copy -# import inspect -# import os -# import tempfile - -# import numpy as np from __future__ import annotations from zarr.store.local import LocalStore from pathlib import Path @@ -31,7 +23,7 @@ def test_local_store_init(tmpdir, auto_mkdir: bool) -> None: store_str = f"file://{tmpdir_str}" assert str(store) == store_str - assert repr(store) == f"LocalStore({repr(store_str)})" + assert repr(store) == f"LocalStore({store_str!r})" @pytest.mark.asyncio @@ -170,7 +162,7 @@ async def test_local_store_set(tmpdir, path: str, auto_mkdir: bool) -> None: # from .test_storage import TestSQLiteStore as _TestSQLiteStore # from .test_storage import TestSQLiteStoreInMemory as _TestSQLiteStoreInMemory # from .test_storage import TestZipStore as _TestZipStore -# from .test_storage import dimension_separator_fixture, s3, skip_if_nested_chunks # noqa +# from .test_storage import dimension_separator_fixture, s3, skip_if_nested_chunks # pytestmark = pytest.mark.skipif(not v3_api_available, reason="v3 api is not available") From a821808604074bffc8d5ac2eb3d651311fefa97d Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 15 May 2024 17:11:12 +0200 Subject: [PATCH 54/56] remove explicit asyncio marks; use __eq__ method of LocalStore for test --- tests/v3/test_codecs.py | 11 ----------- tests/v3/test_storage.py | 7 +------ 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/tests/v3/test_codecs.py b/tests/v3/test_codecs.py index e042c7f275..fc209bd5e6 100644 --- a/tests/v3/test_codecs.py +++ b/tests/v3/test_codecs.py @@ -233,7 +233,6 @@ def test_nested_sharding( @pytest.mark.parametrize("runtime_write_order", ["F", "C"]) @pytest.mark.parametrize("runtime_read_order", ["F", "C"]) @pytest.mark.parametrize("with_sharding", [True, False]) -@pytest.mark.asyncio async def test_order( store: Store, input_order: Literal["F", "C"], @@ -344,7 +343,6 @@ def test_order_implicit( @pytest.mark.parametrize("runtime_write_order", ["F", "C"]) @pytest.mark.parametrize("runtime_read_order", ["F", "C"]) @pytest.mark.parametrize("with_sharding", [True, False]) -@pytest.mark.asyncio async def test_transpose( store: Store, input_order: Literal["F", "C"], @@ -601,7 +599,6 @@ def test_write_partial_sharded_chunks(store: Store): assert np.array_equal(a[0:16, 0:16], data) -@pytest.mark.asyncio async def test_delete_empty_chunks(store: Store): data = np.ones((16, 16)) @@ -618,7 +615,6 @@ async def test_delete_empty_chunks(store: Store): assert await (store / "delete_empty_chunks/c0/0").get() is None -@pytest.mark.asyncio async def test_delete_empty_sharded_chunks(store: Store): a = await AsyncArray.create( store / "delete_empty_sharded_chunks", @@ -644,7 +640,6 @@ async def test_delete_empty_sharded_chunks(store: Store): assert chunk_bytes is not None and len(chunk_bytes) == 16 * 2 + 8 * 8 * 2 + 4 -@pytest.mark.asyncio async def test_zarr_compat(store: Store): data = np.zeros((16, 18), dtype="uint16") @@ -676,7 +671,6 @@ async def test_zarr_compat(store: Store): assert z2._store["1.1"] == await (store / "zarr_compat3/1.1").get() -@pytest.mark.asyncio async def test_zarr_compat_F(store: Store): data = np.zeros((16, 18), dtype="uint16", order="F") @@ -710,7 +704,6 @@ async def test_zarr_compat_F(store: Store): assert z2._store["1.1"] == await (store / "zarr_compatF3/1.1").get() -@pytest.mark.asyncio async def test_dimension_names(store: Store): data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) @@ -776,7 +769,6 @@ def test_zstd(store: Store, checksum: bool): @pytest.mark.parametrize("endian", ["big", "little"]) -@pytest.mark.asyncio async def test_endian(store: Store, endian: Literal["big", "little"]): data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) @@ -808,7 +800,6 @@ async def test_endian(store: Store, endian: Literal["big", "little"]): @pytest.mark.parametrize("dtype_input_endian", [">u2", "u2", " None: assert store.auto_mkdir == auto_mkdir # ensure that str and pathlib.Path get normalized to the same output - # a stronger test is to ensure that these two store instances are identical - # but LocalStore.__eq__ is not defined at this time. - assert store.root == LocalStore(root=tmpdir_path, auto_mkdir=auto_mkdir).root + assert store == LocalStore(root=tmpdir_path, auto_mkdir=auto_mkdir) store_str = f"file://{tmpdir_str}" assert str(store) == store_str assert repr(store) == f"LocalStore({store_str!r})" -@pytest.mark.asyncio @pytest.mark.parametrize("byte_range", (None, (0, None), (1, None), (1, 2), (None, 1))) async def test_local_store_get( local_store, byte_range: None | tuple[int | None, int | None] @@ -58,7 +55,6 @@ async def test_local_store_get( assert await local_store.get(object_name + "_absent", byte_range=byte_range) is None -@pytest.mark.asyncio @pytest.mark.parametrize( "key_ranges", ( @@ -88,7 +84,6 @@ async def test_local_store_get_partial( assert observed == expected -@pytest.mark.asyncio @pytest.mark.parametrize("path", ("foo", "foo/bar")) @pytest.mark.parametrize("auto_mkdir", (True, False)) async def test_local_store_set(tmpdir, path: str, auto_mkdir: bool) -> None: From 6803f26bab19f47d471c969150f155b04714b3b3 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 15 May 2024 17:14:16 +0200 Subject: [PATCH 55/56] rename test_storage to test_store --- tests/v3/{test_storage.py => test_store.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/v3/{test_storage.py => test_store.py} (100%) diff --git a/tests/v3/test_storage.py b/tests/v3/test_store.py similarity index 100% rename from tests/v3/test_storage.py rename to tests/v3/test_store.py From d236b24ba871e9f3a5e87fa8d1e42016c7c34735 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 15 May 2024 17:22:32 +0200 Subject: [PATCH 56/56] modern type hints --- src/zarr/store/local.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/zarr/store/local.py b/src/zarr/store/local.py index a2792f514f..a3dd65979b 100644 --- a/src/zarr/store/local.py +++ b/src/zarr/store/local.py @@ -4,7 +4,6 @@ import shutil from collections.abc import AsyncGenerator from pathlib import Path -from typing import Union, Optional, List, Tuple from zarr.abc.store import Store from zarr.common import BytesLike, concurrent_map, to_thread @@ -50,7 +49,7 @@ def _get(path: Path, byte_range: tuple[int, int | None] | None) -> bytes: def _put( path: Path, value: BytesLike, - start: Optional[int] = None, + start: int | None = None, auto_mkdir: bool = True, ) -> int | None: if auto_mkdir: @@ -72,7 +71,7 @@ class LocalStore(Store): root: Path auto_mkdir: bool - def __init__(self, root: Union[Path, str], auto_mkdir: bool = True): + def __init__(self, root: Path | str, auto_mkdir: bool = True): if isinstance(root, str): root = Path(root) assert isinstance(root, Path) @@ -89,9 +88,7 @@ def __repr__(self) -> str: def __eq__(self, other: object) -> bool: return isinstance(other, type(self)) and self.root == other.root - async def get( - self, key: str, byte_range: Optional[Tuple[int, Optional[int]]] = None - ) -> Optional[bytes]: + async def get(self, key: str, byte_range: tuple[int, int | None] | None = None) -> bytes | None: assert isinstance(key, str) path = self.root / key @@ -101,8 +98,8 @@ async def get( return None async def get_partial_values( - self, key_ranges: List[Tuple[str, Tuple[int, int]]] - ) -> List[Optional[bytes]]: + self, key_ranges: list[tuple[str, tuple[int, int]]] + ) -> list[bytes | None]: """ Read byte ranges from multiple keys. Parameters @@ -125,7 +122,7 @@ async def set(self, key: str, value: BytesLike) -> None: path = self.root / key await to_thread(_put, path, value, auto_mkdir=self.auto_mkdir) - async def set_partial_values(self, key_start_values: List[Tuple[str, int, bytes]]) -> None: + async def set_partial_values(self, key_start_values: list[tuple[str, int, bytes]]) -> None: args = [] for key, start, value in key_start_values: assert isinstance(key, str)