From 14afe30d099be9e86f61957ee2d5f6dd39e3f595 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sat, 10 Aug 2024 16:49:40 -0700 Subject: [PATCH 01/10] feature(store): add basic implementation of a zip store --- src/zarr/store/__init__.py | 11 +- src/zarr/store/zip.py | 216 ++++++++++++++++++++++++++++++++ tests/v3/test_store/test_zip.py | 68 ++++++++++ 3 files changed, 294 insertions(+), 1 deletion(-) create mode 100644 src/zarr/store/zip.py create mode 100644 tests/v3/test_store/test_zip.py diff --git a/src/zarr/store/__init__.py b/src/zarr/store/__init__.py index fbdcdb9255..599d5e47f2 100644 --- a/src/zarr/store/__init__.py +++ b/src/zarr/store/__init__.py @@ -2,5 +2,14 @@ from zarr.store.local import LocalStore from zarr.store.memory import MemoryStore from zarr.store.remote import RemoteStore +from zarr.store.zip import ZipStore -__all__ = ["StorePath", "StoreLike", "make_store_path", "RemoteStore", "LocalStore", "MemoryStore"] +__all__ = [ + "StorePath", + "StoreLike", + "make_store_path", + "RemoteStore", + "LocalStore", + "MemoryStore", + "ZipStore", +] diff --git a/src/zarr/store/zip.py b/src/zarr/store/zip.py new file mode 100644 index 0000000000..66733fc457 --- /dev/null +++ b/src/zarr/store/zip.py @@ -0,0 +1,216 @@ +from __future__ import annotations + +import os +import threading +import time +import zipfile +from collections.abc import AsyncGenerator +from pathlib import Path +from typing import Literal + +from zarr.abc.store import Store +from zarr.buffer import Buffer, BufferPrototype + +ZipStoreAccessModeLiteral = Literal["r", "w", "a"] + + +class ZipStore(Store): + supports_writes: bool = True + supports_partial_writes: bool = False + supports_listing: bool = True + + root: Path + compression: int + allowZip64: bool + + def __init__( + self, + path: Path | str, + *, + mode: ZipStoreAccessModeLiteral = "r", + compression: int = zipfile.ZIP_STORED, + allowZip64: bool = True, + ): + super().__init__(mode=mode) + + if isinstance(path, str): + path = Path(path) + assert isinstance(path, Path) + self.path = path + + self.compression = compression + self.allowZip64 = allowZip64 + + self._lock = threading.RLock() # TODO: evaluate if this is the lock we want or if we want an asyncio.Lock or something like that + + self._zf = zipfile.ZipFile(path, mode=mode, compression=compression, allowZip64=allowZip64) + + def close(self) -> None: + self._is_open = False + with self._lock: + self._zf.close() + + async def clear(self) -> None: + with self._lock: + self._check_writable() + self._zf.close() + os.remove(self.path) + self._zf = zipfile.ZipFile( + self.path, mode="w", compression=self.compression, allowZip64=self.allowZip64 + ) + + async def empty(self) -> bool: + async for _ in self.list(): + return False + return True + + def __str__(self) -> str: + return f"zip://{self.path}" + + def __repr__(self) -> str: + return f"ZipStore({str(self)!r})" + + def __eq__(self, other: object) -> bool: + return isinstance(other, type(self)) and self.root == other.root + + def _get( + self, + key: str, + prototype: BufferPrototype, + byte_range: tuple[int | None, int | None] | None = None, + ) -> Buffer | None: + try: + with self._zf.open(key) as f: # will raise KeyError + if byte_range is None: + return prototype.buffer.from_bytes(f.read()) + start, length = byte_range + if start: + f.seek(start or 0) + if length: + return prototype.buffer.from_bytes(f.read(length)) + else: + return prototype.buffer.from_bytes(f.read()) + except KeyError: + return None + + async def get( + self, + key: str, + prototype: BufferPrototype, + byte_range: tuple[int | None, int | None] | None = None, + ) -> Buffer | None: + assert isinstance(key, str) + + with self._lock: + return self._get(key, prototype=prototype, byte_range=byte_range) + + async def get_partial_values( + self, + prototype: BufferPrototype, + key_ranges: list[tuple[str, tuple[int | None, int | None]]], + ) -> list[Buffer | None]: + """ + Read byte ranges from multiple keys. + + Parameters + ---------- + key_ranges: List[Tuple[str, Tuple[int, int]]] + A list of (key, (start, length)) tuples. The first element of the tuple is the name of + the key in storage to fetch bytes from. The second element the tuple defines the byte + range to retrieve. These values are arguments to `get`, as this method wraps + concurrent invocation of `get`. + """ + out = [] + with self._lock: + for key, byte_range in key_ranges: + out.append(self._get(key, prototype=prototype, byte_range=byte_range)) + return out + + def _set(self, key: str, value: Buffer) -> None: + # generally, this should be called inside a lock + keyinfo = zipfile.ZipInfo(filename=key, date_time=time.localtime(time.time())[:6]) + keyinfo.compress_type = self.compression + if keyinfo.filename[-1] == os.sep: + keyinfo.external_attr = 0o40775 << 16 # drwxrwxr-x + keyinfo.external_attr |= 0x10 # MS-DOS directory flag + else: + keyinfo.external_attr = 0o644 << 16 # ?rw-r--r-- + self._zf.writestr(keyinfo, value.to_bytes()) + + async def set(self, key: str, value: Buffer) -> None: + self._check_writable() + assert isinstance(key, str) + if not isinstance(value, Buffer): + raise TypeError("ZipStore.set(): `value` must a Buffer instance") + with self._lock: + self._set(key, value) + + async def set_partial_values(self, key_start_values: list[tuple[str, int, bytes]]) -> None: + self._check_writable() + # TODO: this actually seems possible! + raise NotImplementedError + + async def delete(self, key: str) -> None: + # TODO: decide if writing an empty file is the right thing to do + raise NotImplementedError + + async def exists(self, key: str) -> bool: + with self._lock: + try: + self._zf.getinfo(key) + except KeyError: + return False + else: + return True + + async def list(self) -> AsyncGenerator[str, None]: + """Retrieve all keys in the store. + + Returns + ------- + AsyncGenerator[str, None] + """ + with self._lock: + for key in self._zf.namelist(): + yield key + + async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + """Retrieve all keys in the store with a given prefix. + + Parameters + ---------- + prefix : str + + Returns + ------- + AsyncGenerator[str, None] + """ + + async for key in self.list(): + if key.startswith(prefix): + yield key + + async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + """ + Retrieve all keys and prefixes with a given prefix and which do not contain the character + “/” after the given prefix. + + Parameters + ---------- + prefix : str + + Returns + ------- + AsyncGenerator[str, None] + """ + + if prefix.endswith("/"): + prefix = prefix[:-1] + + known_keys = set() + async for key in self.list(): + if key.startswith(prefix + "/") and key != prefix: + k = key.removeprefix(prefix + "/").split("/")[0] + if k not in known_keys: + known_keys.add(k) + yield k diff --git a/tests/v3/test_store/test_zip.py b/tests/v3/test_store/test_zip.py new file mode 100644 index 0000000000..c09ee4abab --- /dev/null +++ b/tests/v3/test_store/test_zip.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +import os +import tempfile +from collections.abc import Coroutine +from typing import Any + +import pytest + +from zarr.abc.store import AccessMode +from zarr.buffer import Buffer, default_buffer_prototype +from zarr.store.zip import ZipStore +from zarr.testing.store import StoreTests + + +class TestZipStore(StoreTests[ZipStore]): + store_cls = ZipStore + + @pytest.fixture(scope="function") + def store_kwargs(self, request) -> dict[str, str | bool]: + fd, temp_path = tempfile.mkstemp() + os.close(fd) + + return {"path": temp_path, "mode": "w"} + + def get(self, store: ZipStore, key: str) -> Buffer: + return store._get(key, prototype=default_buffer_prototype()) + + def set(self, store: ZipStore, key: str, value: Buffer) -> None: + return store._set(key, value) + + def test_store_mode(self, store: ZipStore, store_kwargs: dict[str, Any]) -> None: + assert store.mode == AccessMode.from_literal(store_kwargs["mode"]) + assert not store.mode.readonly + + async def test_not_writable_store_raises(self, store_kwargs: dict[str, Any]) -> None: + # we need to create the zipfile in write mode before switching to read mode + store = await self.store_cls.open(**store_kwargs) + store.close() + + kwargs = {**store_kwargs, "mode": "r"} + store = await self.store_cls.open(**kwargs) + assert store.mode == AccessMode.from_literal("r") + assert store.mode.readonly + + # set + with pytest.raises(ValueError): + await store.set("foo", Buffer.from_bytes(b"bar")) + + # # delete + # TODO: uncomment once deletes are implemented + # with pytest.raises(ValueError): + # await store.delete("foo") + + def test_store_repr(self, store: ZipStore) -> None: + assert str(store) == f"zip://{store.path!s}" + + def test_store_supports_writes(self, store: ZipStore) -> None: + assert store.supports_writes + + def test_store_supports_partial_writes(self, store: ZipStore) -> None: + assert store.supports_partial_writes is False + + def test_store_supports_listing(self, store: ZipStore) -> None: + assert store.supports_listing + + def test_delete(self, store: ZipStore) -> Coroutine[Any, Any, None]: + pass From 0a20ed6b95736883d832ef76a4942d26ffbdd5e8 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sat, 10 Aug 2024 21:49:54 -0700 Subject: [PATCH 02/10] add zip store to array/group/sharding tests --- src/zarr/store/zip.py | 31 ++++++++++++++---------- tests/v3/conftest.py | 13 +++++++--- tests/v3/test_array.py | 6 ++--- tests/v3/test_codecs/test_sharding.py | 16 ++++++------ tests/v3/test_group.py | 35 +++++++++++++-------------- 5 files changed, 56 insertions(+), 45 deletions(-) diff --git a/src/zarr/store/zip.py b/src/zarr/store/zip.py index 66733fc457..33d480ec05 100644 --- a/src/zarr/store/zip.py +++ b/src/zarr/store/zip.py @@ -19,7 +19,7 @@ class ZipStore(Store): supports_partial_writes: bool = False supports_listing: bool = True - root: Path + path: Path compression: int allowZip64: bool @@ -36,7 +36,7 @@ def __init__( if isinstance(path, str): path = Path(path) assert isinstance(path, Path) - self.path = path + self.path = path # root? self.compression = compression self.allowZip64 = allowZip64 @@ -71,7 +71,7 @@ def __repr__(self) -> str: return f"ZipStore({str(self)!r})" def __eq__(self, other: object) -> bool: - return isinstance(other, type(self)) and self.root == other.root + return isinstance(other, type(self)) and self.path == other.path def _get( self, @@ -146,12 +146,9 @@ async def set(self, key: str, value: Buffer) -> None: self._set(key, value) async def set_partial_values(self, key_start_values: list[tuple[str, int, bytes]]) -> None: - self._check_writable() - # TODO: this actually seems possible! raise NotImplementedError async def delete(self, key: str) -> None: - # TODO: decide if writing an empty file is the right thing to do raise NotImplementedError async def exists(self, key: str) -> bool: @@ -207,10 +204,18 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: if prefix.endswith("/"): prefix = prefix[:-1] - known_keys = set() - async for key in self.list(): - if key.startswith(prefix + "/") and key != prefix: - k = key.removeprefix(prefix + "/").split("/")[0] - if k not in known_keys: - known_keys.add(k) - yield k + keys = self._zf.namelist() + seen = set() + if prefix == "": + keys_unique = set(k.split("/")[0] for k in keys) + for key in keys_unique: + if key not in seen: + seen.add(key) + yield key + else: + for key in keys: + if key.startswith(prefix + "/") and key != prefix: + k = key.removeprefix(prefix + "/").split("/")[0] + if k not in seen: + seen.add(k) + yield k diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index 0a672d1f2e..785004f022 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -20,12 +20,12 @@ import pytest from hypothesis import HealthCheck, Verbosity, settings -from zarr.store import LocalStore, MemoryStore, StorePath +from zarr.store import LocalStore, MemoryStore, StorePath, ZipStore from zarr.store.remote import RemoteStore async def parse_store( - store: Literal["local", "memory", "remote"], path: str + store: Literal["local", "memory", "remote", "zip"], path: str ) -> LocalStore | MemoryStore | RemoteStore: if store == "local": return await LocalStore.open(path, mode="w") @@ -33,6 +33,8 @@ async def parse_store( return await MemoryStore.open(mode="w") if store == "remote": return await RemoteStore.open(url=path, mode="w") + if store == "zip": + return await ZipStore.open(path + "/zarr.zip", mode="w") raise AssertionError @@ -64,6 +66,11 @@ async def memory_store() -> MemoryStore: return await MemoryStore.open(mode="w") +@pytest.fixture(scope="function") +async def zip_store(tmpdir: LEGACY_PATH) -> ZipStore: + return await ZipStore.open(str(tmpdir / "zarr.zip"), mode="w") + + @pytest.fixture(scope="function") async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store: param = request.param @@ -73,7 +80,7 @@ async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store: @dataclass class AsyncGroupRequest: zarr_format: ZarrFormat - store: Literal["local", "remote", "memory"] + store: Literal["local", "remote", "memory", "zip"] attributes: dict[str, Any] = field(default_factory=dict) diff --git a/tests/v3/test_array.py b/tests/v3/test_array.py index 9fd135ad5c..ca0eb86838 100644 --- a/tests/v3/test_array.py +++ b/tests/v3/test_array.py @@ -11,7 +11,7 @@ from zarr.store.core import StorePath -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) @pytest.mark.parametrize("zarr_format", (2, 3)) @pytest.mark.parametrize("exists_ok", [True, False]) @pytest.mark.parametrize("extant_node", ["array", "group"]) @@ -60,7 +60,7 @@ def test_array_creation_existing_node( ) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) @pytest.mark.parametrize("zarr_format", (2, 3)) def test_array_name_properties_no_group( store: LocalStore | MemoryStore, zarr_format: ZarrFormat @@ -71,7 +71,7 @@ def test_array_name_properties_no_group( assert arr.basename is None -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) @pytest.mark.parametrize("zarr_format", (2, 3)) def test_array_name_properties_with_group( store: LocalStore | MemoryStore, zarr_format: ZarrFormat diff --git a/tests/v3/test_codecs/test_sharding.py b/tests/v3/test_codecs/test_sharding.py index 27667ca9dd..b48c011925 100644 --- a/tests/v3/test_codecs/test_sharding.py +++ b/tests/v3/test_codecs/test_sharding.py @@ -19,7 +19,7 @@ from .test_codecs import _AsyncArrayProxy, order_from_dim -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) @pytest.mark.parametrize("index_location", ["start", "end"]) @pytest.mark.parametrize( "array_fixture", @@ -71,7 +71,7 @@ def test_sharding( @pytest.mark.parametrize("index_location", ["start", "end"]) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) @pytest.mark.parametrize( "array_fixture", [ @@ -121,7 +121,7 @@ def test_sharding_partial( indirect=["array_fixture"], ) @pytest.mark.parametrize("index_location", ["start", "end"]) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) def test_sharding_partial_read( store: Store, array_fixture: np.ndarray, index_location: ShardingCodecIndexLocation ) -> None: @@ -158,7 +158,7 @@ def test_sharding_partial_read( indirect=["array_fixture"], ) @pytest.mark.parametrize("index_location", ["start", "end"]) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) def test_sharding_partial_overwrite( store: Store, array_fixture: np.ndarray, index_location: ShardingCodecIndexLocation ) -> None: @@ -209,7 +209,7 @@ def test_sharding_partial_overwrite( "inner_index_location", ["start", "end"], ) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) def test_nested_sharding( store: Store, array_fixture: np.ndarray, @@ -242,7 +242,7 @@ def test_nested_sharding( assert np.array_equal(data, read_data) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) def test_open_sharding(store: Store) -> None: path = "open_sharding" spath = StorePath(store, path) @@ -267,7 +267,7 @@ def test_open_sharding(store: Store) -> None: assert a.metadata == b.metadata -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) def test_write_partial_sharded_chunks(store: Store) -> None: data = np.arange(0, 16 * 16, dtype="uint16").reshape((16, 16)) spath = StorePath(store) @@ -291,7 +291,7 @@ def test_write_partial_sharded_chunks(store: Store) -> None: assert np.array_equal(a[0:16, 0:16], data) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) async def test_delete_empty_shards(store: Store) -> None: path = "delete_empty_shards" spath = StorePath(store, path) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index daa5979b27..f1d15f789f 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -6,6 +6,7 @@ import pytest from _pytest.compat import LEGACY_PATH +from zarr.abc.store import Store from zarr.array import Array, AsyncArray from zarr.buffer import Buffer from zarr.common import ZarrFormat @@ -18,10 +19,10 @@ from .conftest import parse_store -@pytest.fixture(params=["local", "memory"]) -async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> LocalStore | MemoryStore: +@pytest.fixture(params=["local", "memory", "zip"]) +async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store: result = await parse_store(request.param, str(tmpdir)) - if not isinstance(result, MemoryStore | LocalStore): + if not isinstance(result, Store): raise TypeError("Wrong store class returned by test fixture! got " + result + " instead") return result @@ -42,7 +43,7 @@ def zarr_format(request: pytest.FixtureRequest) -> ZarrFormat: return cast(ZarrFormat, result) -def test_group_init(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: +def test_group_init(store: Store, zarr_format: ZarrFormat) -> None: """ Test that initializing a group from an asyncgroup works. """ @@ -51,7 +52,7 @@ def test_group_init(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> assert group._async_group == agroup -def test_group_name_properties(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: +def test_group_name_properties(store: Store, zarr_format: ZarrFormat) -> None: """ Test basic properties of groups """ @@ -365,12 +366,12 @@ def test_group_create_array( assert np.array_equal(array[:], data) -@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) @pytest.mark.parametrize("zarr_format", (2, 3)) @pytest.mark.parametrize("exists_ok", [True, False]) @pytest.mark.parametrize("extant_node", ["array", "group"]) def test_group_creation_existing_node( - store: LocalStore | MemoryStore, + store: Store, zarr_format: ZarrFormat, exists_ok: bool, extant_node: Literal["array", "group"], @@ -454,14 +455,14 @@ async def test_asyncgroup_create( ) -async def test_asyncgroup_attrs(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: +async def test_asyncgroup_attrs(store: Store, zarr_format: ZarrFormat) -> None: attributes = {"foo": 100} agroup = await AsyncGroup.create(store, zarr_format=zarr_format, attributes=attributes) assert agroup.attrs == agroup.metadata.attributes == attributes -async def test_asyncgroup_info(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: +async def test_asyncgroup_info(store: Store, zarr_format: ZarrFormat) -> None: agroup = await AsyncGroup.create( # noqa store, zarr_format=zarr_format, @@ -471,7 +472,7 @@ async def test_asyncgroup_info(store: LocalStore | MemoryStore, zarr_format: Zar async def test_asyncgroup_open( - store: LocalStore | MemoryStore, + store: Store, zarr_format: ZarrFormat, ) -> None: """ @@ -492,7 +493,7 @@ async def test_asyncgroup_open( async def test_asyncgroup_open_wrong_format( - store: LocalStore | MemoryStore, + store: Store, zarr_format: ZarrFormat, ) -> None: _ = await AsyncGroup.create(store=store, exists_ok=False, zarr_format=zarr_format) @@ -533,7 +534,7 @@ def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, A # todo: replace this with a declarative API where we model a full hierarchy -async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: +async def test_asyncgroup_getitem(store: Store, zarr_format: ZarrFormat) -> None: """ Create an `AsyncGroup`, then create members of that group, and ensure that we can access those members via the `AsyncGroup.getitem` method. @@ -555,7 +556,7 @@ async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: await agroup.getitem("foo") -async def test_asyncgroup_delitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: +async def test_asyncgroup_delitem(store: Store, zarr_format: ZarrFormat) -> None: agroup = await AsyncGroup.create(store=store, zarr_format=zarr_format) sub_array_path = "sub_array" _ = await agroup.create_array( @@ -585,7 +586,7 @@ async def test_asyncgroup_delitem(store: LocalStore | MemoryStore, zarr_format: async def test_asyncgroup_create_group( - store: LocalStore | MemoryStore, + store: Store, zarr_format: ZarrFormat, ) -> None: agroup = await AsyncGroup.create(store=store, zarr_format=zarr_format) @@ -601,7 +602,7 @@ async def test_asyncgroup_create_group( async def test_asyncgroup_create_array( - store: LocalStore | MemoryStore, zarr_format: ZarrFormat, exists_ok: bool + store: Store, zarr_format: ZarrFormat, exists_ok: bool ) -> None: """ Test that the AsyncGroup.create_array method works correctly. We ensure that array properties @@ -639,9 +640,7 @@ async def test_asyncgroup_create_array( assert subnode.metadata.zarr_format == zarr_format -async def test_asyncgroup_update_attributes( - store: LocalStore | MemoryStore, zarr_format: ZarrFormat -) -> None: +async def test_asyncgroup_update_attributes(store: Store, zarr_format: ZarrFormat) -> None: """ Test that the AsyncGroup.update_attributes method works correctly. """ From 9aeebeab907da74b8a3db94e70d1151f0a4a69e7 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 12 Aug 2024 14:46:02 -0700 Subject: [PATCH 03/10] fix sharding and skip tests that require delete --- src/zarr/abc/store.py | 6 ++++ src/zarr/store/local.py | 1 + src/zarr/store/memory.py | 1 + src/zarr/store/remote.py | 1 + src/zarr/store/zip.py | 6 +++- tests/v3/test_codecs/test_sharding.py | 2 ++ tests/v3/test_group.py | 47 +++++++++++++-------------- 7 files changed, 39 insertions(+), 25 deletions(-) diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 449816209b..43dda67dff 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -141,6 +141,12 @@ async def set(self, key: str, value: Buffer) -> None: """ ... + @property + @abstractmethod + def supports_deletes(self) -> bool: + """Does the store support deletes?""" + ... + @abstractmethod async def delete(self, key: str) -> None: """Remove a key from the store diff --git a/src/zarr/store/local.py b/src/zarr/store/local.py index 25fd9fc13a..4138f96c43 100644 --- a/src/zarr/store/local.py +++ b/src/zarr/store/local.py @@ -67,6 +67,7 @@ def _put( class LocalStore(Store): supports_writes: bool = True + supports_deletes: bool = True supports_partial_writes: bool = True supports_listing: bool = True diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py index dd3e52e703..d2e233a773 100644 --- a/src/zarr/store/memory.py +++ b/src/zarr/store/memory.py @@ -12,6 +12,7 @@ # When that is done, the `MemoryStore` will just be a store that wraps a dict. class MemoryStore(Store): supports_writes: bool = True + supports_deletes: bool = True supports_partial_writes: bool = True supports_listing: bool = True diff --git a/src/zarr/store/remote.py b/src/zarr/store/remote.py index c742d9e567..db10cb7824 100644 --- a/src/zarr/store/remote.py +++ b/src/zarr/store/remote.py @@ -21,6 +21,7 @@ class RemoteStore(Store): # based on FSSpec supports_writes: bool = True + supports_deletes: bool = True supports_partial_writes: bool = False supports_listing: bool = True diff --git a/src/zarr/store/zip.py b/src/zarr/store/zip.py index 33d480ec05..02905367f3 100644 --- a/src/zarr/store/zip.py +++ b/src/zarr/store/zip.py @@ -16,6 +16,7 @@ class ZipStore(Store): supports_writes: bool = True + supports_deletes: bool = False supports_partial_writes: bool = False supports_listing: bool = True @@ -85,7 +86,10 @@ def _get( return prototype.buffer.from_bytes(f.read()) start, length = byte_range if start: - f.seek(start or 0) + if start < 0: + start = f.seek(start, os.SEEK_END) + start + else: + start = f.seek(start, os.SEEK_SET) if length: return prototype.buffer.from_bytes(f.read(length)) else: diff --git a/tests/v3/test_codecs/test_sharding.py b/tests/v3/test_codecs/test_sharding.py index b48c011925..5ed54bed9f 100644 --- a/tests/v3/test_codecs/test_sharding.py +++ b/tests/v3/test_codecs/test_sharding.py @@ -293,6 +293,8 @@ def test_write_partial_sharded_chunks(store: Store) -> None: @pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) async def test_delete_empty_shards(store: Store) -> None: + if not store.supports_deletes: + pytest.skip("store does not support deletes") path = "delete_empty_shards" spath = StorePath(store, path) a = await AsyncArray.create( diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index f1d15f789f..7b26ce78c6 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -12,7 +12,7 @@ from zarr.common import ZarrFormat from zarr.errors import ContainsArrayError, ContainsGroupError from zarr.group import AsyncGroup, Group, GroupMetadata -from zarr.store import LocalStore, MemoryStore, StorePath +from zarr.store import StorePath from zarr.store.core import make_store_path from zarr.sync import sync @@ -72,7 +72,7 @@ def test_group_name_properties(store: Store, zarr_format: ZarrFormat) -> None: assert bar.basename == "bar" -def test_group_members(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_members(store: Store, zarr_format: ZarrFormat) -> None: """ Test that `Group.members` returns correct values, i.e. the arrays and groups (explicit and implicit) contained in that group. @@ -107,7 +107,7 @@ def test_group_members(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) assert sorted(dict(members_observed)) == sorted(members_expected) -def test_group(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group(store: Store, zarr_format: ZarrFormat) -> None: """ Test basic Group routines. """ @@ -149,9 +149,7 @@ def test_group(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None assert dict(bar3.attrs) == {"baz": "qux", "name": "bar"} -def test_group_create( - store: MemoryStore | LocalStore, exists_ok: bool, zarr_format: ZarrFormat -) -> None: +def test_group_create(store: Store, exists_ok: bool, zarr_format: ZarrFormat) -> None: """ Test that `Group.create` works as expected. """ @@ -167,9 +165,7 @@ def test_group_create( ) -def test_group_open( - store: MemoryStore | LocalStore, zarr_format: ZarrFormat, exists_ok: bool -) -> None: +def test_group_open(store: Store, zarr_format: ZarrFormat, exists_ok: bool) -> None: """ Test the `Group.open` method. """ @@ -201,7 +197,7 @@ def test_group_open( assert group_created_again.store_path == spath -def test_group_getitem(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_getitem(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__getitem__` method. """ @@ -216,10 +212,12 @@ def test_group_getitem(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) group["nope"] -def test_group_delitem(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_delitem(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__delitem__` method. """ + if not store.supports_deletes: + pytest.skip("store does not support deletes") group = Group.create(store, zarr_format=zarr_format) subgroup = group.create_group(name="subgroup") @@ -237,7 +235,7 @@ def test_group_delitem(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) group["subarray"] -def test_group_iter(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_iter(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__iter__` method. """ @@ -247,7 +245,7 @@ def test_group_iter(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> [x for x in group] # type: ignore -def test_group_len(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_len(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__len__` method. """ @@ -257,7 +255,7 @@ def test_group_len(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> len(group) # type: ignore -def test_group_setitem(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_setitem(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__setitem__` method. """ @@ -266,7 +264,7 @@ def test_group_setitem(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) group["key"] = 10 -def test_group_contains(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_contains(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__contains__` method """ @@ -276,7 +274,7 @@ def test_group_contains(store: MemoryStore | LocalStore, zarr_format: ZarrFormat assert "foo" in group -def test_group_subgroups(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_subgroups(store: Store, zarr_format: ZarrFormat) -> None: """ Test the behavior of `Group` methods for accessing subgroups, namely `Group.group_keys` and `Group.groups` """ @@ -291,7 +289,7 @@ def test_group_subgroups(store: MemoryStore | LocalStore, zarr_format: ZarrForma assert all(a in subgroups_observed for a in subgroups_expected) -def test_group_subarrays(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_subarrays(store: Store, zarr_format: ZarrFormat) -> None: """ Test the behavior of `Group` methods for accessing subgroups, namely `Group.group_keys` and `Group.groups` """ @@ -306,7 +304,7 @@ def test_group_subarrays(store: MemoryStore | LocalStore, zarr_format: ZarrForma assert all(a in subarrays_observed for a in subarrays_expected) -def test_group_update_attributes(store: MemoryStore | LocalStore, zarr_format: ZarrFormat) -> None: +def test_group_update_attributes(store: Store, zarr_format: ZarrFormat) -> None: """ Test the behavior of `Group.update_attributes` """ @@ -318,9 +316,7 @@ def test_group_update_attributes(store: MemoryStore | LocalStore, zarr_format: Z assert new_group.attrs == new_attrs -async def test_group_update_attributes_async( - store: MemoryStore | LocalStore, zarr_format: ZarrFormat -) -> None: +async def test_group_update_attributes_async(store: Store, zarr_format: ZarrFormat) -> None: """ Test the behavior of `Group.update_attributes_async` """ @@ -334,7 +330,7 @@ async def test_group_update_attributes_async( @pytest.mark.parametrize("method", ["create_array", "array"]) def test_group_create_array( - store: MemoryStore | LocalStore, + store: Store, zarr_format: ZarrFormat, exists_ok: bool, method: Literal["create_array", "array"], @@ -414,7 +410,7 @@ def test_group_creation_existing_node( async def test_asyncgroup_create( - store: MemoryStore | LocalStore, + store: Store, exists_ok: bool, zarr_format: ZarrFormat, ) -> None: @@ -519,7 +515,7 @@ async def test_asyncgroup_open_wrong_format( {"zarr_format": 2, "attributes": {"foo": 100}}, ), ) -def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, Any]) -> None: +def test_asyncgroup_from_dict(store: Store, data: dict[str, Any]) -> None: """ Test that we can create an AsyncGroup from a dict """ @@ -557,6 +553,9 @@ async def test_asyncgroup_getitem(store: Store, zarr_format: ZarrFormat) -> None async def test_asyncgroup_delitem(store: Store, zarr_format: ZarrFormat) -> None: + if not store.supports_deletes: + pytest.skip("store does not support deletes") + agroup = await AsyncGroup.create(store=store, zarr_format=zarr_format) sub_array_path = "sub_array" _ = await agroup.create_array( From c6ab43952faef1694810c9be1b1706e6e23b72d1 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Wed, 14 Aug 2024 21:36:34 -0700 Subject: [PATCH 04/10] store context managers --- src/zarr/abc/store.py | 11 ++++++++++- src/zarr/store/zip.py | 35 +++++++++++++++++++++++++-------- tests/v3/test_store/test_zip.py | 2 +- 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 5044983ec4..70ac9adc17 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -11,6 +11,7 @@ class AccessMode(NamedTuple): + str: AccessModeLiteral readonly: bool overwrite: bool create: bool @@ -20,6 +21,7 @@ class AccessMode(NamedTuple): def from_literal(cls, mode: AccessModeLiteral) -> Self: if mode in ("r", "r+", "a", "w", "w-"): return cls( + str=mode, readonly=mode == "r", overwrite=mode == "w", create=mode in ("a", "w", "w-"), @@ -42,6 +44,14 @@ async def open(cls, *args: Any, **kwargs: Any) -> Self: await store._open() return store + def __enter__(self) -> Self: + """Enter a context manager that will close the store upon exiting.""" + return self + + def __exit__(self, *args: Any) -> None: + """Close the store.""" + self.close() + async def _open(self) -> None: if self._is_open: raise ValueError("store is already open") @@ -227,7 +237,6 @@ def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: def close(self) -> None: """Close the store.""" self._is_open = False - pass @runtime_checkable diff --git a/src/zarr/store/zip.py b/src/zarr/store/zip.py index 02905367f3..8150a4d0ec 100644 --- a/src/zarr/store/zip.py +++ b/src/zarr/store/zip.py @@ -6,10 +6,13 @@ import zipfile from collections.abc import AsyncGenerator from pathlib import Path -from typing import Literal +from typing import TYPE_CHECKING, Any, Literal from zarr.abc.store import Store -from zarr.buffer import Buffer, BufferPrototype +from zarr.core.buffer import Buffer, BufferPrototype + +if TYPE_CHECKING: + from typing_extensions import Self ZipStoreAccessModeLiteral = Literal["r", "w", "a"] @@ -24,6 +27,9 @@ class ZipStore(Store): compression: int allowZip64: bool + _zf: zipfile.ZipFile + _lock: threading.RLock + def __init__( self, path: Path | str, @@ -39,15 +45,26 @@ def __init__( assert isinstance(path, Path) self.path = path # root? + self._zmode = mode self.compression = compression self.allowZip64 = allowZip64 - self._lock = threading.RLock() # TODO: evaluate if this is the lock we want or if we want an asyncio.Lock or something like that + @classmethod + async def open(cls, *args: Any, **kwargs: Any) -> Self: + store = cls(*args, **kwargs) + store._lock = threading.RLock() # TODO: evaluate if this is the lock we want or if we want an asyncio.Lock or something like that - self._zf = zipfile.ZipFile(path, mode=mode, compression=compression, allowZip64=allowZip64) + store._zf = zipfile.ZipFile( + store.path, + mode=store._zmode, + compression=store.compression, + allowZip64=store.allowZip64, + ) + store._is_open = True + return store def close(self) -> None: - self._is_open = False + super().close() with self._lock: self._zf.close() @@ -61,9 +78,11 @@ async def clear(self) -> None: ) async def empty(self) -> bool: - async for _ in self.list(): - return False - return True + with self._lock: + if self._zf.namelist(): + return False + else: + return True def __str__(self) -> str: return f"zip://{self.path}" diff --git a/tests/v3/test_store/test_zip.py b/tests/v3/test_store/test_zip.py index c09ee4abab..e79b86615b 100644 --- a/tests/v3/test_store/test_zip.py +++ b/tests/v3/test_store/test_zip.py @@ -8,7 +8,7 @@ import pytest from zarr.abc.store import AccessMode -from zarr.buffer import Buffer, default_buffer_prototype +from zarr.core.buffer import Buffer, default_buffer_prototype from zarr.store.zip import ZipStore from zarr.testing.store import StoreTests From 5862787cd0faf40b31de9a3764490c7ebad00fe5 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 12 Sep 2024 08:24:21 -0700 Subject: [PATCH 05/10] fix test typing --- tests/v3/test_store/test_zip.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/v3/test_store/test_zip.py b/tests/v3/test_store/test_zip.py index e79b86615b..ed7a38a278 100644 --- a/tests/v3/test_store/test_zip.py +++ b/tests/v3/test_store/test_zip.py @@ -2,18 +2,21 @@ import os import tempfile -from collections.abc import Coroutine -from typing import Any +from typing import TYPE_CHECKING import pytest from zarr.abc.store import AccessMode -from zarr.core.buffer import Buffer, default_buffer_prototype +from zarr.core.buffer import Buffer, cpu, default_buffer_prototype from zarr.store.zip import ZipStore from zarr.testing.store import StoreTests +if TYPE_CHECKING: + from collections.abc import Coroutine + from typing import Any -class TestZipStore(StoreTests[ZipStore]): + +class TestZipStore(StoreTests[ZipStore, cpu.Buffer]): store_cls = ZipStore @pytest.fixture(scope="function") From 569e560fcd767a2e8ed808fbc96d189bd05a34b0 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 12 Sep 2024 08:37:22 -0700 Subject: [PATCH 06/10] add buffer_cls to store test --- tests/v3/test_store/test_zip.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/v3/test_store/test_zip.py b/tests/v3/test_store/test_zip.py index ed7a38a278..f12614b7f6 100644 --- a/tests/v3/test_store/test_zip.py +++ b/tests/v3/test_store/test_zip.py @@ -18,6 +18,7 @@ class TestZipStore(StoreTests[ZipStore, cpu.Buffer]): store_cls = ZipStore + buffer_cls = cpu.Buffer @pytest.fixture(scope="function") def store_kwargs(self, request) -> dict[str, str | bool]: From 480220dfab16c8785f3efb77e3570bf0e33f1371 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 12 Sep 2024 10:20:07 -0700 Subject: [PATCH 07/10] clean up test failures --- src/zarr/store/zip.py | 28 ++++++++++++++-------------- tests/v3/test_store/test_zip.py | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/zarr/store/zip.py b/src/zarr/store/zip.py index 8150a4d0ec..d785b1950c 100644 --- a/src/zarr/store/zip.py +++ b/src/zarr/store/zip.py @@ -4,15 +4,14 @@ import threading import time import zipfile -from collections.abc import AsyncGenerator from pathlib import Path -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Literal from zarr.abc.store import Store from zarr.core.buffer import Buffer, BufferPrototype if TYPE_CHECKING: - from typing_extensions import Self + from collections.abc import AsyncGenerator ZipStoreAccessModeLiteral = Literal["r", "w", "a"] @@ -49,19 +48,20 @@ def __init__( self.compression = compression self.allowZip64 = allowZip64 - @classmethod - async def open(cls, *args: Any, **kwargs: Any) -> Self: - store = cls(*args, **kwargs) - store._lock = threading.RLock() # TODO: evaluate if this is the lock we want or if we want an asyncio.Lock or something like that + async def _open(self) -> None: + if self._is_open: + raise ValueError("store is already open") - store._zf = zipfile.ZipFile( - store.path, - mode=store._zmode, - compression=store.compression, - allowZip64=store.allowZip64, + self._lock = threading.RLock() + + self._zf = zipfile.ZipFile( + self.path, + mode=self._zmode, + compression=self.compression, + allowZip64=self.allowZip64, ) - store._is_open = True - return store + + self._is_open = True def close(self) -> None: super().close() diff --git a/tests/v3/test_store/test_zip.py b/tests/v3/test_store/test_zip.py index f12614b7f6..e5ad6710b6 100644 --- a/tests/v3/test_store/test_zip.py +++ b/tests/v3/test_store/test_zip.py @@ -49,7 +49,7 @@ async def test_not_writable_store_raises(self, store_kwargs: dict[str, Any]) -> # set with pytest.raises(ValueError): - await store.set("foo", Buffer.from_bytes(b"bar")) + await store.set("foo", cpu.Buffer.from_bytes(b"bar")) # # delete # TODO: uncomment once deletes are implemented From 7c8276326fd0d91451adc4d181ed1158fb66f35c Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 12 Sep 2024 10:26:06 -0700 Subject: [PATCH 08/10] class docstring --- src/zarr/store/zip.py | 61 ++++++++++++++----------------------------- 1 file changed, 20 insertions(+), 41 deletions(-) diff --git a/src/zarr/store/zip.py b/src/zarr/store/zip.py index d785b1950c..15473aa674 100644 --- a/src/zarr/store/zip.py +++ b/src/zarr/store/zip.py @@ -17,6 +17,26 @@ class ZipStore(Store): + """ + Storage class using a ZIP file. + + Parameters + ---------- + path : string + Location of file. + compression : integer, optional + Compression method to use when writing to the archive. + allowZip64 : bool, optional + If True (the default) will create ZIP files that use the ZIP64 + extensions when the zipfile is larger than 2 GiB. If False + will raise an exception when the ZIP file would require ZIP64 + extensions. + mode : string, optional + One of 'r' to read an existing file, 'w' to truncate and write a new + file, 'a' to append to an existing file, or 'x' to exclusively create + and write a new file. + """ + supports_writes: bool = True supports_deletes: bool = False supports_partial_writes: bool = False @@ -132,17 +152,6 @@ async def get_partial_values( prototype: BufferPrototype, key_ranges: list[tuple[str, tuple[int | None, int | None]]], ) -> list[Buffer | None]: - """ - Read byte ranges from multiple keys. - - Parameters - ---------- - key_ranges: List[Tuple[str, Tuple[int, int]]] - A list of (key, (start, length)) tuples. The first element of the tuple is the name of - the key in storage to fetch bytes from. The second element the tuple defines the byte - range to retrieve. These values are arguments to `get`, as this method wraps - concurrent invocation of `get`. - """ out = [] with self._lock: for key, byte_range in key_ranges: @@ -184,46 +193,16 @@ async def exists(self, key: str) -> bool: return True async def list(self) -> AsyncGenerator[str, None]: - """Retrieve all keys in the store. - - Returns - ------- - AsyncGenerator[str, None] - """ with self._lock: for key in self._zf.namelist(): yield key async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: - """Retrieve all keys in the store with a given prefix. - - Parameters - ---------- - prefix : str - - Returns - ------- - AsyncGenerator[str, None] - """ - async for key in self.list(): if key.startswith(prefix): yield key async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: - """ - Retrieve all keys and prefixes with a given prefix and which do not contain the character - “/” after the given prefix. - - Parameters - ---------- - prefix : str - - Returns - ------- - AsyncGenerator[str, None] - """ - if prefix.endswith("/"): prefix = prefix[:-1] From ccff15fd3475b91fae8fbf376d777ffac1e4202f Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 12 Sep 2024 10:31:21 -0700 Subject: [PATCH 09/10] remove commented out check against zipstore.delete --- tests/v3/test_store/test_zip.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/v3/test_store/test_zip.py b/tests/v3/test_store/test_zip.py index e5ad6710b6..0842ff986a 100644 --- a/tests/v3/test_store/test_zip.py +++ b/tests/v3/test_store/test_zip.py @@ -51,11 +51,6 @@ async def test_not_writable_store_raises(self, store_kwargs: dict[str, Any]) -> with pytest.raises(ValueError): await store.set("foo", cpu.Buffer.from_bytes(b"bar")) - # # delete - # TODO: uncomment once deletes are implemented - # with pytest.raises(ValueError): - # await store.delete("foo") - def test_store_repr(self, store: ZipStore) -> None: assert str(store) == f"zip://{store.path!s}" From 42a46e19b58d26366dbf925d2876d0caea783e0f Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 13 Sep 2024 09:41:06 -0700 Subject: [PATCH 10/10] add api integration test --- tests/v3/test_store/test_zip.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/v3/test_store/test_zip.py b/tests/v3/test_store/test_zip.py index 0842ff986a..7c332e9a2e 100644 --- a/tests/v3/test_store/test_zip.py +++ b/tests/v3/test_store/test_zip.py @@ -4,8 +4,10 @@ import tempfile from typing import TYPE_CHECKING +import numpy as np import pytest +import zarr from zarr.abc.store import AccessMode from zarr.core.buffer import Buffer, cpu, default_buffer_prototype from zarr.store.zip import ZipStore @@ -65,3 +67,32 @@ def test_store_supports_listing(self, store: ZipStore) -> None: def test_delete(self, store: ZipStore) -> Coroutine[Any, Any, None]: pass + + def test_api_integration(self, store: ZipStore) -> None: + root = zarr.open_group(store=store) + + data = np.arange(10000, dtype=np.uint16).reshape(100, 100) + z = root.create_array( + shape=data.shape, chunks=(10, 10), name="foo", dtype=np.uint16, fill_value=99 + ) + z[:] = data + + assert np.array_equal(data, z[:]) + + # you can overwrite existing chunks but zipfile will issue a warning + with pytest.warns(UserWarning, match="Duplicate name: 'foo/c/0/0'"): + z[0, 0] = 100 + + # TODO: assigning an entire chunk to fill value ends up deleting the chunk which is not supported + # a work around will be needed here. + with pytest.raises(NotImplementedError): + z[0:10, 0:10] = 99 + + bar = root.create_group("bar", attributes={"hello": "world"}) + assert "hello" in dict(bar.attrs) + + # keys cannot be deleted + with pytest.raises(NotImplementedError): + del root["bar"] + + store.close()