From a43ef03eb771e96e403f33026bb5f18c6895f907 Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 8 Sep 2024 18:08:42 -0500 Subject: [PATCH 1/8] Default zarr.open to open_group if shape is not provided --- src/zarr/api/asynchronous.py | 4 +++- tests/v3/test_api.py | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 7f59517f39..02b6a72c76 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -224,7 +224,9 @@ async def open( if path is not None: store_path = store_path / path - + + if "shape" not in kwargs: + return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) try: return await open_array(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) except KeyError: diff --git a/tests/v3/test_api.py b/tests/v3/test_api.py index 239dd1c3e2..e5795b46be 100644 --- a/tests/v3/test_api.py +++ b/tests/v3/test_api.py @@ -117,6 +117,10 @@ def test_open_with_mode_r_plus(tmp_path: pathlib.Path) -> None: def test_open_with_mode_a(tmp_path: pathlib.Path) -> None: + # Open without shape argument should default to group + g = zarr.open(store=tmp_path, mode="a") + assert isinstance(g, Group) + # 'a' means read/write (create if doesn't exist) arr = zarr.open(store=tmp_path, mode="a", shape=(3, 3)) assert isinstance(arr, Array) From a4d8113af1c9847aa473068cf69cedbd69857771 Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 8 Sep 2024 18:57:35 -0500 Subject: [PATCH 2/8] linting --- src/zarr/api/asynchronous.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 02b6a72c76..0fe631edbe 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -224,7 +224,7 @@ async def open( if path is not None: store_path = store_path / path - + if "shape" not in kwargs: return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) try: From 8af66e738aff5bc903ff054a79473e198cd3a493 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 10 Sep 2024 14:02:03 -0500 Subject: [PATCH 3/8] Address failing tests --- src/zarr/api/asynchronous.py | 7 +++++-- tests/v3/test_api.py | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 0fe631edbe..27c92ae1c1 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -225,8 +225,11 @@ async def open( if path is not None: store_path = store_path / path - if "shape" not in kwargs: - return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) + if "shape" not in kwargs and mode in {"a", "w", "w-"}: + try: + return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) + except AssertionError: + return await open_array(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) try: return await open_array(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) except KeyError: diff --git a/tests/v3/test_api.py b/tests/v3/test_api.py index e5795b46be..06ca946af9 100644 --- a/tests/v3/test_api.py +++ b/tests/v3/test_api.py @@ -116,10 +116,11 @@ def test_open_with_mode_r_plus(tmp_path: pathlib.Path) -> None: z2[:] = 3 -def test_open_with_mode_a(tmp_path: pathlib.Path) -> None: +async def test_open_with_mode_a(tmp_path: pathlib.Path) -> None: # Open without shape argument should default to group g = zarr.open(store=tmp_path, mode="a") assert isinstance(g, Group) + await g.store_path.delete() # 'a' means read/write (create if doesn't exist) arr = zarr.open(store=tmp_path, mode="a", shape=(3, 3)) From 2f7fed1268247f609f1dc9e0cd0242d2124f3bcc Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 13 Sep 2024 15:09:14 -0500 Subject: [PATCH 4/8] Add check if store_path contains array to open() --- src/zarr/api/asynchronous.py | 14 +++++- src/zarr/core/array.py | 94 +++++++++++++++++++----------------- 2 files changed, 63 insertions(+), 45 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 27c92ae1c1..0ed353894f 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -7,7 +7,7 @@ import numpy as np import numpy.typing as npt -from zarr.core.array import Array, AsyncArray +from zarr.core.array import Array, AsyncArray, get_array_metadata from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, MemoryOrder, ZarrFormat from zarr.core.group import AsyncGroup from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata @@ -227,8 +227,18 @@ async def open( if "shape" not in kwargs and mode in {"a", "w", "w-"}: try: + metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format) + zarr_format = metadata_dict["zarr_format"] + if zarr_format == 3: + is_array = metadata_dict.get("node_type") == "array" + else: + # for v2, the above would already have raised an exception if not an array + is_array = True + except (AssertionError, FileNotFoundError): + is_array = False + if not is_array: return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) - except AssertionError: + else: return await open_array(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) try: return await open_array(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index fbe0b19f6b..90011e3348 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -91,6 +91,53 @@ def create_codec_pipeline(metadata: ArrayV2Metadata | ArrayV3Metadata) -> CodecP raise TypeError +async def get_array_metadata( + store_path: StorePath, zarr_format: ZarrFormat | None = 3 +) -> dict[str, Any]: + if zarr_format == 2: + zarray_bytes, zattrs_bytes = await gather( + (store_path / ZARRAY_JSON).get(), (store_path / ZATTRS_JSON).get() + ) + if zarray_bytes is None: + raise FileNotFoundError(store_path) + elif zarr_format == 3: + zarr_json_bytes = await (store_path / ZARR_JSON).get() + if zarr_json_bytes is None: + raise FileNotFoundError(store_path) + elif zarr_format is None: + zarr_json_bytes, zarray_bytes, zattrs_bytes = await gather( + (store_path / ZARR_JSON).get(), + (store_path / ZARRAY_JSON).get(), + (store_path / ZATTRS_JSON).get(), + ) + if zarr_json_bytes is not None and zarray_bytes is not None: + # TODO: revisit this exception type + # alternatively, we could warn and favor v3 + raise ValueError("Both zarr.json and .zarray objects exist") + if zarr_json_bytes is None and zarray_bytes is None: + raise FileNotFoundError(store_path) + # set zarr_format based on which keys were found + if zarr_json_bytes is not None: + zarr_format = 3 + else: + zarr_format = 2 + else: + raise ValueError(f"unexpected zarr_format: {zarr_format}") + + metadata_dict: dict[str, Any] + if zarr_format == 2: + # V2 arrays are comprised of a .zarray and .zattrs objects + assert zarray_bytes is not None + metadata_dict = json.loads(zarray_bytes.to_bytes()) + zattrs_dict = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {} + metadata_dict["attributes"] = zattrs_dict + else: + # V3 arrays are comprised of a zarr.json object + assert zarr_json_bytes is not None + metadata_dict = json.loads(zarr_json_bytes.to_bytes()) + return metadata_dict + + @dataclass(frozen=True) class AsyncArray: metadata: ArrayMetadata @@ -342,51 +389,12 @@ async def open( zarr_format: ZarrFormat | None = 3, ) -> AsyncArray: store_path = await make_store_path(store) - + metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format) + zarr_format = metadata_dict["zarr_format"] if zarr_format == 2: - zarray_bytes, zattrs_bytes = await gather( - (store_path / ZARRAY_JSON).get(), (store_path / ZATTRS_JSON).get() - ) - if zarray_bytes is None: - raise FileNotFoundError(store_path) - elif zarr_format == 3: - zarr_json_bytes = await (store_path / ZARR_JSON).get() - if zarr_json_bytes is None: - raise FileNotFoundError(store_path) - elif zarr_format is None: - zarr_json_bytes, zarray_bytes, zattrs_bytes = await gather( - (store_path / ZARR_JSON).get(), - (store_path / ZARRAY_JSON).get(), - (store_path / ZATTRS_JSON).get(), - ) - if zarr_json_bytes is not None and zarray_bytes is not None: - # TODO: revisit this exception type - # alternatively, we could warn and favor v3 - raise ValueError("Both zarr.json and .zarray objects exist") - if zarr_json_bytes is None and zarray_bytes is None: - raise FileNotFoundError(store_path) - # set zarr_format based on which keys were found - if zarr_json_bytes is not None: - zarr_format = 3 - else: - zarr_format = 2 + return cls(store_path=store_path, metadata=ArrayV2Metadata.from_dict(metadata_dict)) else: - raise ValueError(f"unexpected zarr_format: {zarr_format}") - - if zarr_format == 2: - # V2 arrays are comprised of a .zarray and .zattrs objects - assert zarray_bytes is not None - zarray_dict = json.loads(zarray_bytes.to_bytes()) - zattrs_dict = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {} - zarray_dict["attributes"] = zattrs_dict - return cls(store_path=store_path, metadata=ArrayV2Metadata.from_dict(zarray_dict)) - else: - # V3 arrays are comprised of a zarr.json object - assert zarr_json_bytes is not None - return cls( - store_path=store_path, - metadata=ArrayV3Metadata.from_dict(json.loads(zarr_json_bytes.to_bytes())), - ) + return cls(store_path=store_path, metadata=ArrayV3Metadata.from_dict(metadata_dict)) @property def ndim(self) -> int: From 82b1742408d67efb38af93d15c2dc3d91a66f5fa Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 13 Sep 2024 19:49:15 -0500 Subject: [PATCH 5/8] Allow AsyncArray constructor to accept dictionary metadata --- src/zarr/core/array.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 90011e3348..6bf13425a1 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -147,10 +147,16 @@ class AsyncArray: def __init__( self, - metadata: ArrayMetadata, + metadata: ArrayMetadata | dict[str, Any], store_path: StorePath, order: Literal["C", "F"] | None = None, ): + if not isinstance(metadata, ArrayMetadata): + zarr_format = metadata["zarr_format"] + if zarr_format == 2: + metadata = ArrayV2Metadata.from_dict(metadata) + else: + metadata = ArrayV3Metadata.from_dict(metadata) metadata_parsed = parse_array_metadata(metadata) order_parsed = parse_indexing_order(order or config.get("array.order")) @@ -390,11 +396,7 @@ async def open( ) -> AsyncArray: store_path = await make_store_path(store) metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format) - zarr_format = metadata_dict["zarr_format"] - if zarr_format == 2: - return cls(store_path=store_path, metadata=ArrayV2Metadata.from_dict(metadata_dict)) - else: - return cls(store_path=store_path, metadata=ArrayV3Metadata.from_dict(metadata_dict)) + return cls(store_path=store_path, metadata=metadata_dict) @property def ndim(self) -> int: From 2ebc465c5688b7cb11145c95df1783833d5afe92 Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 13 Sep 2024 19:50:26 -0500 Subject: [PATCH 6/8] Explicitly construct array from metadata in open() --- src/zarr/api/asynchronous.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 0ed353894f..dab682ccf1 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -228,18 +228,15 @@ async def open( if "shape" not in kwargs and mode in {"a", "w", "w-"}: try: metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format) + # for v2, the above would already have raised an exception if not an array zarr_format = metadata_dict["zarr_format"] - if zarr_format == 3: - is_array = metadata_dict.get("node_type") == "array" - else: - # for v2, the above would already have raised an exception if not an array - is_array = True + is_v3_array = zarr_format == 3 and metadata_dict.get("node_type") == "array" + if is_v3_array or zarr_format == 2: + return AsyncArray(store_path=store_path, metadata=metadata_dict) except (AssertionError, FileNotFoundError): - is_array = False - if not is_array: - return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) - else: - return await open_array(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) + pass + return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) + try: return await open_array(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) except KeyError: From eef5f60771a568256176670ee5ab2b84c8d9342c Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 13 Sep 2024 20:04:40 -0500 Subject: [PATCH 7/8] Check if metadata input is dict rather than ArrayMetadata --- src/zarr/core/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index ccdf7cd306..08716ff210 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -156,7 +156,7 @@ def __init__( store_path: StorePath, order: Literal["C", "F"] | None = None, ): - if not isinstance(metadata, ArrayMetadata): + if isinstance(metadata, dict): zarr_format = metadata["zarr_format"] if zarr_format == 2: metadata = ArrayV2Metadata.from_dict(metadata) From 2ba10f276194dfb7c7d420eaaf79d69d1f7db4b4 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Fri, 27 Sep 2024 10:04:31 -0600 Subject: [PATCH 8/8] fixup --- src/zarr/core/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 4d42c8cc30..cc52dd3ac6 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -165,7 +165,7 @@ def __init__( metadata: ArrayMetadata | dict[str, Any], store_path: StorePath, order: Literal["C", "F"] | None = None, - ): -> None: + ) -> None: if isinstance(metadata, dict): zarr_format = metadata["zarr_format"] if zarr_format == 2: