From c6b6d55947b0f4841a54518c9c39dc1e778efcad Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 2 Jun 2024 15:46:26 +0200 Subject: [PATCH 1/8] use compressor, filters, post_compressor for Array v3 create --- src/zarr/array.py | 56 ++++----- src/zarr/group.py | 14 +-- src/zarr/metadata.py | 6 +- tests/v3/test_codecs.py | 245 ++++++++++++++++++---------------------- 4 files changed, 143 insertions(+), 178 deletions(-) diff --git a/src/zarr/array.py b/src/zarr/array.py index 278e58d2d6..1c20fc2016 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -17,7 +17,7 @@ import numpy as np import numpy.typing as npt -from zarr.abc.codec import Codec +from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec from zarr.abc.store import set_or_delete from zarr.attributes import Attributes from zarr.buffer import Factory, NDArrayLike, NDBuffer @@ -90,14 +90,14 @@ async def create( | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, - codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: Iterable[str] | None = None, # v2 only chunks: ChunkCoords | None = None, dimension_separator: Literal[".", "/"] | None = None, order: Literal["C", "F"] | None = None, - filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | None = None, + compressor: dict[str, JSON] | ArrayBytesCodec | None = None, + filters: Iterable[dict[str, JSON] | Codec] = [], + post_compressors: Iterable[dict[str, JSON] | Codec] = [], # runtime exists_ok: bool = False, ) -> AsyncArray: @@ -119,14 +119,6 @@ async def create( raise ValueError( "order cannot be used for arrays with version 3. Use a transpose codec instead." ) - if filters is not None: - raise ValueError( - "filters cannot be used for arrays with version 3. Use array-to-array codecs instead." - ) - if compressor is not None: - raise ValueError( - "compressor cannot be used for arrays with version 3. Use bytes-to-bytes codecs instead." - ) return await cls._create_v3( store_path, shape=shape, @@ -134,16 +126,14 @@ async def create( chunk_shape=chunk_shape, fill_value=fill_value, chunk_key_encoding=chunk_key_encoding, - codecs=codecs, + filters=filters, + compressor=compressor, + post_compressors=post_compressors, dimension_names=dimension_names, attributes=attributes, exists_ok=exists_ok, ) elif zarr_format == 2: - if codecs is not None: - raise ValueError( - "codecs cannot be used for arrays with version 2. Use filters and compressor instead." - ) if chunk_key_encoding is not None: raise ValueError( "chunk_key_encoding cannot be used for arrays with version 2. Use dimension_separator instead." @@ -181,7 +171,9 @@ async def _create_v3( | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, - codecs: Iterable[Codec | dict[str, JSON]] | None = None, + compressor: dict[str, JSON] | ArrayBytesCodec | None = None, + filters: Iterable[dict[str, JSON] | Codec] = (), + post_compressors: Iterable[dict[str, JSON] | Codec] = (), dimension_names: Iterable[str] | None = None, attributes: dict[str, JSON] | None = None, exists_ok: bool = False, @@ -189,7 +181,13 @@ async def _create_v3( if not exists_ok: assert not await (store_path / ZARR_JSON).exists() - codecs = list(codecs) if codecs is not None else [BytesCodec()] + codecs: tuple[dict[str, JSON] | Codec, ...] + _compressor: dict[str, JSON] | ArrayBytesCodec + if compressor is None: + _compressor = BytesCodec() + else: + _compressor = compressor + codecs = (*filters, _compressor, *post_compressors) if fill_value is None: if dtype == np.dtype("bool"): @@ -235,8 +233,8 @@ async def _create_v2( dimension_separator: Literal[".", "/"] | None = None, fill_value: None | int | float = None, order: Literal["C", "F"] | None = None, - filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | None = None, + filters: Iterable[dict[str, JSON] | Codec] = [], + compressor: dict[str, JSON] | Codec | None = None, attributes: dict[str, JSON] | None = None, exists_ok: bool = False, ) -> AsyncArray: @@ -261,11 +259,7 @@ async def _create_v2( compressor=( numcodecs.get_codec(compressor).get_config() if compressor is not None else None ), - filters=( - [numcodecs.get_codec(filter).get_config() for filter in filters] - if filters is not None - else None - ), + filters=tuple(numcodecs.get_codec(filter).get_config() for filter in filters), attributes=attributes, ) array = cls(metadata=metadata, store_path=store_path) @@ -500,14 +494,14 @@ def create( | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, - codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: Iterable[str] | None = None, # v2 only chunks: ChunkCoords | None = None, dimension_separator: Literal[".", "/"] | None = None, order: Literal["C", "F"] | None = None, - filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | None = None, + compressor: dict[str, JSON] | ArrayBytesCodec | None = None, + filters: Iterable[dict[str, JSON] | ArrayArrayCodec] = [], + post_compressors: Iterable[dict[str, JSON] | BytesBytesCodec] = [], # runtime exists_ok: bool = False, ) -> Array: @@ -521,13 +515,13 @@ def create( fill_value=fill_value, chunk_shape=chunk_shape, chunk_key_encoding=chunk_key_encoding, - codecs=codecs, dimension_names=dimension_names, chunks=chunks, dimension_separator=dimension_separator, order=order, - filters=filters, compressor=compressor, + filters=filters, + post_compressors=post_compressors, exists_ok=exists_ok, ), ) diff --git a/src/zarr/group.py b/src/zarr/group.py index 88e7fd0922..84c0c87b39 100644 --- a/src/zarr/group.py +++ b/src/zarr/group.py @@ -9,7 +9,7 @@ import numpy.typing as npt -from zarr.abc.codec import Codec +from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec from zarr.abc.metadata import Metadata from zarr.abc.store import set_or_delete from zarr.array import Array, AsyncArray @@ -306,14 +306,14 @@ async def create_array( | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, - codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: Iterable[str] | None = None, # v2 only chunks: ChunkCoords | None = None, dimension_separator: Literal[".", "/"] | None = None, order: Literal["C", "F"] | None = None, - filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | None = None, + compressor: dict[str, JSON] | ArrayBytesCodec | None = None, + filters: Iterable[dict[str, JSON] | ArrayArrayCodec] = (), + post_compressors: Iterable[dict[str, JSON] | BytesBytesCodec] = (), # runtime exists_ok: bool = False, ) -> AsyncArray: @@ -324,14 +324,14 @@ async def create_array( chunk_shape=chunk_shape, fill_value=fill_value, chunk_key_encoding=chunk_key_encoding, - codecs=codecs, + compressor=compressor, + filters=filters, + post_compressors=post_compressors, dimension_names=dimension_names, attributes=attributes, chunks=chunks, dimension_separator=dimension_separator, order=order, - filters=filters, - compressor=compressor, exists_ok=exists_ok, zarr_format=self.metadata.zarr_format, ) diff --git a/src/zarr/metadata.py b/src/zarr/metadata.py index 39a1d53199..2fc0ef7b69 100644 --- a/src/zarr/metadata.py +++ b/src/zarr/metadata.py @@ -323,7 +323,7 @@ def __init__( order: Literal["C", "F"], dimension_separator: Literal[".", "/"] = ".", compressor: dict[str, JSON] | None = None, - filters: list[dict[str, JSON]] | None = None, + filters: Iterable[dict[str, JSON]] | None = None, attributes: dict[str, JSON] | None = None, ): """ @@ -471,8 +471,8 @@ def parse_node_type_array(data: Literal["array"]) -> Literal["array"]: # todo: real validation -def parse_filters(data: list[dict[str, JSON]] | None) -> list[dict[str, JSON]] | None: - return data +def parse_filters(data: Any) -> tuple[dict[str, JSON]] | None: + return cast(tuple[dict[str, JSON]], data) # todo: real validation diff --git a/tests/v3/test_codecs.py b/tests/v3/test_codecs.py index 251570f767..4acbe658ef 100644 --- a/tests/v3/test_codecs.py +++ b/tests/v3/test_codecs.py @@ -75,17 +75,15 @@ def test_sharding( chunk_shape=(64, 64, 64), dtype=sample_data.dtype, fill_value=0, - codecs=[ - ShardingCodec( - chunk_shape=(32, 32, 32), - codecs=[ - TransposeCodec(order=order_from_dim("F", sample_data.ndim)), - BytesCodec(), - BloscCodec(cname="lz4"), - ], - index_location=index_location, - ) - ], + compressor=ShardingCodec( + chunk_shape=(32, 32, 32), + codecs=[ + TransposeCodec(order=order_from_dim("F", sample_data.ndim)), + BytesCodec(), + BloscCodec(cname="lz4"), + ], + index_location=index_location, + ), ) a[:, :, :] = sample_data @@ -105,17 +103,15 @@ def test_sharding_partial( chunk_shape=(64, 64, 64), dtype=sample_data.dtype, fill_value=0, - codecs=[ - ShardingCodec( - chunk_shape=(32, 32, 32), - codecs=[ - TransposeCodec(order=order_from_dim("F", sample_data.ndim)), - BytesCodec(), - BloscCodec(cname="lz4"), - ], - index_location=index_location, - ) - ], + compressor=ShardingCodec( + chunk_shape=(32, 32, 32), + codecs=[ + TransposeCodec(order=order_from_dim("F", sample_data.ndim)), + BytesCodec(), + BloscCodec(cname="lz4"), + ], + index_location=index_location, + ), ) a[10:, 10:, 10:] = sample_data @@ -138,17 +134,15 @@ def test_sharding_partial_read( chunk_shape=(64, 64, 64), dtype=sample_data.dtype, fill_value=1, - codecs=[ - ShardingCodec( - chunk_shape=(32, 32, 32), - codecs=[ - TransposeCodec(order=order_from_dim("F", sample_data.ndim)), - BytesCodec(), - BloscCodec(cname="lz4"), - ], - index_location=index_location, - ) - ], + compressor=ShardingCodec( + chunk_shape=(32, 32, 32), + codecs=[ + TransposeCodec(order=order_from_dim("F", sample_data.ndim)), + BytesCodec(), + BloscCodec(cname="lz4"), + ], + index_location=index_location, + ), ) read_data = a[0:10, 0:10, 0:10] @@ -167,17 +161,15 @@ def test_sharding_partial_overwrite( chunk_shape=(64, 64, 64), dtype=data.dtype, fill_value=1, - codecs=[ - ShardingCodec( - chunk_shape=(32, 32, 32), - codecs=[ - TransposeCodec(order=order_from_dim("F", data.ndim)), - BytesCodec(), - BloscCodec(cname="lz4"), - ], - index_location=index_location, - ) - ], + compressor=ShardingCodec( + chunk_shape=(32, 32, 32), + codecs=[ + TransposeCodec(order=order_from_dim("F", data.ndim)), + BytesCodec(), + BloscCodec(cname="lz4"), + ], + index_location=index_location, + ), ) a[:10, :10, :10] = data @@ -211,15 +203,11 @@ def test_nested_sharding( chunk_shape=(64, 64, 64), dtype=sample_data.dtype, fill_value=0, - codecs=[ - ShardingCodec( - chunk_shape=(32, 32, 32), - codecs=[ - ShardingCodec(chunk_shape=(16, 16, 16), index_location=inner_index_location) - ], - index_location=outer_index_location, - ) - ], + compressor=ShardingCodec( + chunk_shape=(32, 32, 32), + codecs=[ShardingCodec(chunk_shape=(16, 16, 16), index_location=inner_index_location)], + index_location=outer_index_location, + ), ) a[:, :, :] = sample_data @@ -243,17 +231,16 @@ async def test_order( with_sharding: bool, ): data = np.arange(0, 256, dtype="uint16").reshape((32, 8), order=input_order) - - codecs_: list[Codec] = ( - [ - ShardingCodec( - chunk_shape=(16, 8), - codecs=[TransposeCodec(order=order_from_dim(store_order, data.ndim)), BytesCodec()], - ) - ] - if with_sharding - else [TransposeCodec(order=order_from_dim(store_order, data.ndim)), BytesCodec()] - ) + compressor: Codec + filters: tuple[Codec] = () + if with_sharding: + compressor = ShardingCodec( + chunk_shape=(16, 8), + codecs=[TransposeCodec(order=order_from_dim(store_order, data.ndim)), BytesCodec()], + ) + else: + compressor = BytesCodec() + filters = (TransposeCodec(order=order_from_dim(store_order, data.ndim)),) with config.set({"array.order": runtime_write_order}): a = await AsyncArray.create( @@ -263,7 +250,8 @@ async def test_order( dtype=data.dtype, fill_value=0, chunk_key_encoding=("v2", "."), - codecs=codecs_, + compressor=compressor, + filters=filters, ) await _AsyncArrayProxy(a)[:, :].set(data) @@ -311,7 +299,10 @@ def test_order_implicit( ): data = np.arange(0, 256, dtype="uint16").reshape((16, 16), order=input_order) - codecs_: list[Codec] | None = [ShardingCodec(chunk_shape=(8, 8))] if with_sharding else None + if with_sharding: + compressor = ShardingCodec(chunk_shape=(8, 8)) + else: + compressor = None with config.set({"array.order": runtime_write_order}): a = Array.create( @@ -320,7 +311,7 @@ def test_order_implicit( chunk_shape=(16, 16), dtype=data.dtype, fill_value=0, - codecs=codecs_, + compressor=compressor, ) a[:, :] = data @@ -353,16 +344,15 @@ async def test_transpose( ): data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8), order=input_order) - codecs_: list[Codec] = ( - [ - ShardingCodec( - chunk_shape=(1, 16, 8), - codecs=[TransposeCodec(order=(2, 1, 0)), BytesCodec()], - ) - ] - if with_sharding - else [TransposeCodec(order=(2, 1, 0)), BytesCodec()] - ) + if with_sharding: + compressor = ShardingCodec( + chunk_shape=(1, 16, 8), + codecs=[TransposeCodec(order=(2, 1, 0)), BytesCodec()], + ) + filters = () + else: + compressor = BytesCodec() + filters = (TransposeCodec(order=(2, 1, 0)),) with config.set({"array.order": runtime_write_order}): a = await AsyncArray.create( store / "transpose", @@ -371,7 +361,8 @@ async def test_transpose( dtype=data.dtype, fill_value=0, chunk_key_encoding=("v2", "."), - codecs=codecs_, + compressor=compressor, + filters=filters, ) await _AsyncArrayProxy(a)[:, :].set(data) @@ -420,7 +411,8 @@ def test_transpose_invalid( dtype=data.dtype, fill_value=0, chunk_key_encoding=("v2", "."), - codecs=[TransposeCodec(order=order), BytesCodec()], + compressor=BytesCodec(), + filters=(TransposeCodec(order=order),), ) @@ -443,16 +435,14 @@ def test_open_sharding(store: Store): chunk_shape=(16, 16), dtype="int32", fill_value=0, - codecs=[ - ShardingCodec( - chunk_shape=(8, 8), - codecs=[ - TransposeCodec(order=order_from_dim("F", 2)), - BytesCodec(), - BloscCodec(), - ], - ) - ], + compressor=ShardingCodec( + chunk_shape=(8, 8), + codecs=[ + TransposeCodec(order=order_from_dim("F", 2)), + BytesCodec(), + BloscCodec(), + ], + ), ) b = Array.open(store / "open_sharding") assert a.metadata == b.metadata @@ -586,15 +576,13 @@ def test_write_partial_sharded_chunks(store: Store): chunk_shape=(20, 20), dtype=data.dtype, fill_value=1, - codecs=[ - ShardingCodec( - chunk_shape=(10, 10), - codecs=[ - BytesCodec(), - BloscCodec(), - ], - ) - ], + compressor=ShardingCodec( + chunk_shape=(10, 10), + codecs=[ + BytesCodec(), + BloscCodec(), + ], + ), ) a[0:16, 0:16] = data assert np.array_equal(a[0:16, 0:16], data) @@ -623,7 +611,7 @@ async def test_delete_empty_shards(store: Store): chunk_shape=(8, 16), dtype="uint16", fill_value=1, - codecs=[ShardingCodec(chunk_shape=(8, 8))], + compressor=ShardingCodec(chunk_shape=(8, 8)), ) await _AsyncArrayProxy(a)[:, :].set(np.zeros((16, 16))) await _AsyncArrayProxy(a)[8:, :].set(np.ones((8, 16))) @@ -682,7 +670,8 @@ async def test_zarr_compat_F(store: Store): dtype=data.dtype, chunk_key_encoding=("v2", "."), fill_value=1, - codecs=[TransposeCodec(order=order_from_dim("F", data.ndim)), BytesCodec()], + compressor=BytesCodec(), + filters=(TransposeCodec(order=order_from_dim("F", data.ndim)),), ) z2 = zarr.v2.create( @@ -745,7 +734,8 @@ def test_gzip(store: Store): chunk_shape=(16, 16), dtype=data.dtype, fill_value=0, - codecs=[BytesCodec(), GzipCodec()], + compressor=BytesCodec(), + post_compressors=(GzipCodec(),), ) a[:, :] = data @@ -762,7 +752,8 @@ def test_zstd(store: Store, checksum: bool): chunk_shape=(16, 16), dtype=data.dtype, fill_value=0, - codecs=[BytesCodec(), ZstdCodec(level=0, checksum=checksum)], + compressor=BytesCodec(), + post_compressors=(ZstdCodec(level=0, checksum=checksum),), ) a[:, :] = data @@ -780,7 +771,7 @@ async def test_endian(store: Store, endian: Literal["big", "little"]): dtype=data.dtype, fill_value=0, chunk_key_encoding=("v2", "."), - codecs=[BytesCodec(endian=endian)], + compressor=BytesCodec(endian=endian), ) await _AsyncArrayProxy(a)[:, :].set(data) @@ -815,7 +806,7 @@ async def test_endian_write( dtype="uint16", fill_value=0, chunk_key_encoding=("v2", "."), - codecs=[BytesCodec(endian=dtype_store_endian)], + compressor=BytesCodec(endian=dtype_store_endian), ) await _AsyncArrayProxy(a)[:, :].set(data) @@ -851,10 +842,8 @@ def test_invalid_metadata(store: Store): chunk_shape=(16, 16), dtype=np.dtype("uint8"), fill_value=0, - codecs=[ - BytesCodec(endian="big"), - TransposeCodec(order=order_from_dim("F", 2)), - ], + compressor=BytesCodec(endian="big"), + post_compressors=(TransposeCodec(order=order_from_dim("F", 2)),), ) with pytest.raises(TypeError): @@ -864,22 +853,8 @@ def test_invalid_metadata(store: Store): chunk_shape=(16, 16), dtype=np.dtype("uint8"), fill_value=0, - codecs=[ - BytesCodec(), - TransposeCodec(order="F"), - ], - ) - - with pytest.raises(ValueError): - Array.create( - store / "invalid_missing_bytes_codec", - shape=(16, 16), - chunk_shape=(16, 16), - dtype=np.dtype("uint8"), - fill_value=0, - codecs=[ - TransposeCodec(order=order_from_dim("F", 2)), - ], + compressor=BytesCodec(), + post_compressors=(TransposeCodec(order="F"),), ) with pytest.raises(ValueError): @@ -889,9 +864,7 @@ def test_invalid_metadata(store: Store): chunk_shape=(16, 16), dtype=np.dtype("uint8"), fill_value=0, - codecs=[ - ShardingCodec(chunk_shape=(8,)), - ], + compressor=ShardingCodec(chunk_shape=(8,)), ) with pytest.raises(ValueError): Array.create( @@ -900,9 +873,7 @@ def test_invalid_metadata(store: Store): chunk_shape=(16, 16), dtype=np.dtype("uint8"), fill_value=0, - codecs=[ - ShardingCodec(chunk_shape=(8, 7)), - ], + compressor=ShardingCodec(chunk_shape=(8, 7)), ) with pytest.warns(UserWarning): @@ -912,10 +883,8 @@ def test_invalid_metadata(store: Store): chunk_shape=(16, 16), dtype=np.dtype("uint8"), fill_value=0, - codecs=[ - ShardingCodec(chunk_shape=(8, 8)), - GzipCodec(), - ], + compressor=ShardingCodec(chunk_shape=(8, 8)), + post_compressors=(GzipCodec(),), ) @@ -952,7 +921,8 @@ async def test_blosc_evolve(store: Store): chunk_shape=(16, 16), dtype="uint8", fill_value=0, - codecs=[BytesCodec(), BloscCodec()], + compressor=BytesCodec(), + post_compressors=(BloscCodec(),), ) zarr_json = json.loads((await (store / "blosc_evolve_u1" / "zarr.json").get()).to_bytes()) @@ -966,7 +936,8 @@ async def test_blosc_evolve(store: Store): chunk_shape=(16, 16), dtype="uint16", fill_value=0, - codecs=[BytesCodec(), BloscCodec()], + compressor=BytesCodec(), + post_compressors=(BloscCodec(),), ) zarr_json = json.loads((await (store / "blosc_evolve_u2" / "zarr.json").get()).to_bytes()) @@ -980,7 +951,7 @@ async def test_blosc_evolve(store: Store): chunk_shape=(16, 16), dtype="uint16", fill_value=0, - codecs=[ShardingCodec(chunk_shape=(16, 16), codecs=[BytesCodec(), BloscCodec()])], + compressor=ShardingCodec(chunk_shape=(16, 16), codecs=[BytesCodec(), BloscCodec()]), ) zarr_json = json.loads((await (store / "sharding_blosc_evolve" / "zarr.json").get()).to_bytes()) From 33f0df3a7c5cb0cf5752bff5e429be82928b546d Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 2 Jun 2024 15:56:48 +0200 Subject: [PATCH 2/8] narrow types of filters and post_compressors --- src/zarr/array.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/zarr/array.py b/src/zarr/array.py index 1c20fc2016..a12f70deec 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -96,8 +96,8 @@ async def create( dimension_separator: Literal[".", "/"] | None = None, order: Literal["C", "F"] | None = None, compressor: dict[str, JSON] | ArrayBytesCodec | None = None, - filters: Iterable[dict[str, JSON] | Codec] = [], - post_compressors: Iterable[dict[str, JSON] | Codec] = [], + filters: Iterable[dict[str, JSON] | ArrayArrayCodec] = [], + post_compressors: Iterable[dict[str, JSON] | BytesBytesCodec] = [], # runtime exists_ok: bool = False, ) -> AsyncArray: @@ -172,8 +172,8 @@ async def _create_v3( | None ) = None, compressor: dict[str, JSON] | ArrayBytesCodec | None = None, - filters: Iterable[dict[str, JSON] | Codec] = (), - post_compressors: Iterable[dict[str, JSON] | Codec] = (), + filters: Iterable[dict[str, JSON] | ArrayArrayCodec] = (), + post_compressors: Iterable[dict[str, JSON] | BytesBytesCodec] = (), dimension_names: Iterable[str] | None = None, attributes: dict[str, JSON] | None = None, exists_ok: bool = False, @@ -234,7 +234,7 @@ async def _create_v2( fill_value: None | int | float = None, order: Literal["C", "F"] | None = None, filters: Iterable[dict[str, JSON] | Codec] = [], - compressor: dict[str, JSON] | Codec | None = None, + compressor: dict[str, JSON] | ArrayBytesCodec | None = None, attributes: dict[str, JSON] | None = None, exists_ok: bool = False, ) -> AsyncArray: From 0f75f1777acd8c20841f140bfaf62b9196a3ede4 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 2 Jun 2024 15:58:18 +0200 Subject: [PATCH 3/8] empty list -> empty tuple --- src/zarr/array.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/zarr/array.py b/src/zarr/array.py index a12f70deec..aebdf75f73 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -96,8 +96,8 @@ async def create( dimension_separator: Literal[".", "/"] | None = None, order: Literal["C", "F"] | None = None, compressor: dict[str, JSON] | ArrayBytesCodec | None = None, - filters: Iterable[dict[str, JSON] | ArrayArrayCodec] = [], - post_compressors: Iterable[dict[str, JSON] | BytesBytesCodec] = [], + filters: Iterable[dict[str, JSON] | ArrayArrayCodec] = (), + post_compressors: Iterable[dict[str, JSON] | BytesBytesCodec] = (), # runtime exists_ok: bool = False, ) -> AsyncArray: @@ -233,7 +233,7 @@ async def _create_v2( dimension_separator: Literal[".", "/"] | None = None, fill_value: None | int | float = None, order: Literal["C", "F"] | None = None, - filters: Iterable[dict[str, JSON] | Codec] = [], + filters: Iterable[dict[str, JSON] | Codec] = (), compressor: dict[str, JSON] | ArrayBytesCodec | None = None, attributes: dict[str, JSON] | None = None, exists_ok: bool = False, @@ -500,8 +500,8 @@ def create( dimension_separator: Literal[".", "/"] | None = None, order: Literal["C", "F"] | None = None, compressor: dict[str, JSON] | ArrayBytesCodec | None = None, - filters: Iterable[dict[str, JSON] | ArrayArrayCodec] = [], - post_compressors: Iterable[dict[str, JSON] | BytesBytesCodec] = [], + filters: Iterable[dict[str, JSON] | ArrayArrayCodec] = (), + post_compressors: Iterable[dict[str, JSON] | BytesBytesCodec] = (), # runtime exists_ok: bool = False, ) -> Array: From 6c1c419da8787ace03b1eebd43077b878d911c80 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 2 Jun 2024 16:01:35 +0200 Subject: [PATCH 4/8] convert lists to tuples in parse_filters --- src/zarr/metadata.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/zarr/metadata.py b/src/zarr/metadata.py index 2fc0ef7b69..04675b4a08 100644 --- a/src/zarr/metadata.py +++ b/src/zarr/metadata.py @@ -472,7 +472,9 @@ def parse_node_type_array(data: Literal["array"]) -> Literal["array"]: # todo: real validation def parse_filters(data: Any) -> tuple[dict[str, JSON]] | None: - return cast(tuple[dict[str, JSON]], data) + if isinstance(data, Iterable): + result = tuple(data) + return cast(tuple[dict[str, JSON]], result) # todo: real validation From 7796194a6a0de42e0c13a5d7ae3373fb4c01492a Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 2 Jun 2024 16:11:05 +0200 Subject: [PATCH 5/8] restore deleted test, and update the test condition --- tests/v3/test_codecs.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/v3/test_codecs.py b/tests/v3/test_codecs.py index 4acbe658ef..762e074031 100644 --- a/tests/v3/test_codecs.py +++ b/tests/v3/test_codecs.py @@ -846,6 +846,16 @@ def test_invalid_metadata(store: Store): post_compressors=(TransposeCodec(order=order_from_dim("F", 2)),), ) + with pytest.raises(ValueError): + Array.create( + store / "invalid_missing_bytes_codec", + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + compressor=TransposeCodec(order=order_from_dim("F", 2)), + ) + with pytest.raises(TypeError): Array.create( store / "invalid_order", From 17de339c8b11da62e06d5e3dc6c28c567b118c39 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 4 Jun 2024 20:41:01 +0200 Subject: [PATCH 6/8] fix missing import and fix an additional test --- src/zarr/metadata.py | 5 ++++- tests/v3/test_codecs.py | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/zarr/metadata.py b/src/zarr/metadata.py index 6b5728d8e3..7cba3d9baf 100644 --- a/src/zarr/metadata.py +++ b/src/zarr/metadata.py @@ -5,7 +5,10 @@ from collections.abc import Iterable from dataclasses import dataclass, field, replace from enum import Enum -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, cast + +if TYPE_CHECKING: + from typing import Any import numpy as np import numpy.typing as npt diff --git a/tests/v3/test_codecs.py b/tests/v3/test_codecs.py index c38a4d5c25..a85df1d633 100644 --- a/tests/v3/test_codecs.py +++ b/tests/v3/test_codecs.py @@ -407,7 +407,8 @@ def test_transpose_non_self_inverse(store: Store, order): chunk_shape=data.shape, dtype=data.dtype, fill_value=0, - codecs=[TransposeCodec(order=order), BytesCodec()], + compressor=BytesCodec(), + filters=(TransposeCodec(order=order),), ) a[:, :] = data read_data = a[:, :] From 42e7d4c5f1ae35c3bb58ab5ae0dfb9997ef79490 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 14 Jun 2024 15:59:18 +0200 Subject: [PATCH 7/8] top-level api adjustments --- src/zarr/api/asynchronous.py | 22 +++++++++++++++++----- src/zarr/array.py | 28 ++++++++++++++-------------- src/zarr/group.py | 4 ++-- tests/v3/test_buffer.py | 11 +++-------- tests/v3/test_codecs.py | 22 +++++++++++----------- 5 files changed, 47 insertions(+), 40 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index e4de159c0a..298f874cf0 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -8,10 +8,11 @@ import numpy as np import numpy.typing as npt -from zarr.abc.codec import ArrayArrayCodec, Codec +from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec from zarr.array import Array, AsyncArray from zarr.buffer import NDArrayLike from zarr.chunk_key_encodings import ChunkKeyEncoding +from zarr.codecs.bytes import BytesCodec from zarr.common import JSON, ChunkCoords, MemoryOrder, OpenMode, ZarrFormat from zarr.group import AsyncGroup from zarr.metadata import ArrayV2Metadata, ArrayV3Metadata @@ -20,6 +21,8 @@ make_store_path, ) +default_pre_compressor = BytesCodec() + # TODO: this type could use some more thought, noqa to avoid "Variable "asynchronous.ArrayLike" is not valid as a type" ArrayLike = Union[AsyncArray | Array | npt.NDArray[Any]] # noqa PathLike = str @@ -532,7 +535,9 @@ async def create( *, # Note: this is a change from v2 chunks: ChunkCoords | None = None, # TODO: v2 allowed chunks=True dtype: npt.DTypeLike | None = None, - compressor: dict[str, JSON] | None = None, # TODO: default and type change + compressor: dict[str, JSON] | BytesBytesCodec | None = None, # TODO: default and type change + filters: Iterable[dict[str, JSON] | ArrayArrayCodec] = (), + pre_compressor: dict[str, JSON] | ArrayBytesCodec = default_pre_compressor, fill_value: Any = 0, # TODO: need type order: MemoryOrder | None = None, # TODO: default change store: str | StoreLike | None = None, @@ -540,7 +545,6 @@ async def create( overwrite: bool = False, path: PathLike | None = None, chunk_store: StoreLike | None = None, - filters: Iterable[dict[str, JSON] | ArrayArrayCodec] = (), cache_metadata: bool | None = None, cache_attrs: bool | None = None, read_only: bool | None = None, @@ -682,15 +686,23 @@ async def create( if path is not None: store_path = store_path / path + compressor_out: tuple[dict[str, JSON] | BytesBytesCodec, ...] + # normalize compressor to a tuple + if compressor is None: + compressor_out = () + else: + compressor_out = (compressor,) + return await AsyncArray.create( store_path, shape=shape, chunks=chunks, dtype=dtype, - compressor=compressor, + compressors=compressor_out, + filters=filters, + pre_compressor=pre_compressor, fill_value=fill_value, exists_ok=overwrite, # TODO: name change - filters=filters, dimension_separator=dimension_separator, zarr_format=zarr_format, chunk_shape=chunk_shape, diff --git a/src/zarr/array.py b/src/zarr/array.py index 7dd32d14d8..f63d83fc83 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -118,9 +118,9 @@ async def create( chunks: ChunkCoords | None = None, dimension_separator: Literal[".", "/"] | None = None, order: Literal["C", "F"] | None = None, - compressor: dict[str, JSON] | ArrayBytesCodec | None = None, filters: Iterable[dict[str, JSON] | ArrayArrayCodec] = (), - post_compressors: Iterable[dict[str, JSON] | BytesBytesCodec] = (), + pre_compressor: dict[str, JSON] | ArrayBytesCodec | None = None, + compressors: Iterable[dict[str, JSON] | BytesBytesCodec] = (), # runtime exists_ok: bool = False, ) -> AsyncArray: @@ -150,8 +150,8 @@ async def create( fill_value=fill_value, chunk_key_encoding=chunk_key_encoding, filters=filters, - compressor=compressor, - post_compressors=post_compressors, + pre_compressor=pre_compressor, + compressors=compressors, dimension_names=dimension_names, attributes=attributes, exists_ok=exists_ok, @@ -172,7 +172,7 @@ async def create( fill_value=fill_value, order=order, filters=filters, - compressor=compressor, + compressor=pre_compressor, attributes=attributes, exists_ok=exists_ok, ) @@ -194,9 +194,9 @@ async def _create_v3( | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, - compressor: dict[str, JSON] | ArrayBytesCodec | None = None, + pre_compressor: dict[str, JSON] | ArrayBytesCodec | None = None, filters: Iterable[dict[str, JSON] | ArrayArrayCodec] = (), - post_compressors: Iterable[dict[str, JSON] | BytesBytesCodec] = (), + compressors: Iterable[dict[str, JSON] | BytesBytesCodec] = (), dimension_names: Iterable[str] | None = None, attributes: dict[str, JSON] | None = None, exists_ok: bool = False, @@ -205,12 +205,12 @@ async def _create_v3( assert not await (store_path / ZARR_JSON).exists() codecs: tuple[dict[str, JSON] | Codec, ...] - _compressor: dict[str, JSON] | ArrayBytesCodec - if compressor is None: - _compressor = BytesCodec() + _pre_compressor: dict[str, JSON] | ArrayBytesCodec + if pre_compressor is None: + _pre_compressor = BytesCodec() else: - _compressor = compressor - codecs = (*filters, _compressor, *post_compressors) + _pre_compressor = pre_compressor + codecs = (*filters, _pre_compressor, *compressors) if fill_value is None: if dtype == np.dtype("bool"): @@ -615,9 +615,9 @@ def create( chunks=chunks, dimension_separator=dimension_separator, order=order, - compressor=compressor, + pre_compressor=compressor, filters=filters, - post_compressors=post_compressors, + compressors=post_compressors, exists_ok=exists_ok, ), ) diff --git a/src/zarr/group.py b/src/zarr/group.py index 65aa9eea28..ca7819df8b 100644 --- a/src/zarr/group.py +++ b/src/zarr/group.py @@ -345,9 +345,9 @@ async def create_array( chunk_shape=chunk_shape, fill_value=fill_value, chunk_key_encoding=chunk_key_encoding, - compressor=compressor, + pre_compressor=compressor, filters=filters, - post_compressors=post_compressors, + compressors=post_compressors, dimension_names=dimension_names, attributes=attributes, chunks=chunks, diff --git a/tests/v3/test_buffer.py b/tests/v3/test_buffer.py index e814afef15..2cc1aad82c 100644 --- a/tests/v3/test_buffer.py +++ b/tests/v3/test_buffer.py @@ -112,14 +112,9 @@ async def test_codecs_use_of_prototype(): chunk_shape=(5, 5), dtype=expect.dtype, fill_value=0, - codecs=[ - TransposeCodec(order=(1, 0)), - BytesCodec(), - BloscCodec(), - Crc32cCodec(), - GzipCodec(), - ZstdCodec(), - ], + filters=[TransposeCodec(order=(1, 0))], + pre_compressor=BytesCodec(), + compressors=(BloscCodec(), Crc32cCodec(), GzipCodec(), ZstdCodec()), ) expect[:] = np.arange(100).reshape(10, 10) diff --git a/tests/v3/test_codecs.py b/tests/v3/test_codecs.py index a85df1d633..31394eb02a 100644 --- a/tests/v3/test_codecs.py +++ b/tests/v3/test_codecs.py @@ -250,7 +250,7 @@ async def test_order( dtype=data.dtype, fill_value=0, chunk_key_encoding=("v2", "."), - compressor=compressor, + pre_compressor=compressor, filters=filters, ) @@ -361,7 +361,7 @@ async def test_transpose( dtype=data.dtype, fill_value=0, chunk_key_encoding=("v2", "."), - compressor=compressor, + pre_compressor=compressor, filters=filters, ) @@ -629,7 +629,7 @@ async def test_delete_empty_shards(store: Store): chunk_shape=(8, 16), dtype="uint16", fill_value=1, - compressor=ShardingCodec(chunk_shape=(8, 8)), + pre_compressor=ShardingCodec(chunk_shape=(8, 8)), ) await _AsyncArrayProxy(a)[:, :].set(np.zeros((16, 16))) await _AsyncArrayProxy(a)[8:, :].set(np.ones((8, 16))) @@ -688,7 +688,7 @@ async def test_zarr_compat_F(store: Store): dtype=data.dtype, chunk_key_encoding=("v2", "."), fill_value=1, - compressor=BytesCodec(), + pre_compressor=BytesCodec(), filters=(TransposeCodec(order=order_from_dim("F", data.ndim)),), ) @@ -789,7 +789,7 @@ async def test_endian(store: Store, endian: Literal["big", "little"]): dtype=data.dtype, fill_value=0, chunk_key_encoding=("v2", "."), - compressor=BytesCodec(endian=endian), + pre_compressor=BytesCodec(endian=endian), ) await _AsyncArrayProxy(a)[:, :].set(data) @@ -824,7 +824,7 @@ async def test_endian_write( dtype="uint16", fill_value=0, chunk_key_encoding=("v2", "."), - compressor=BytesCodec(endian=dtype_store_endian), + pre_compressor=BytesCodec(endian=dtype_store_endian), ) await _AsyncArrayProxy(a)[:, :].set(data) @@ -949,8 +949,8 @@ async def test_blosc_evolve(store: Store): chunk_shape=(16, 16), dtype="uint8", fill_value=0, - compressor=BytesCodec(), - post_compressors=(BloscCodec(),), + pre_compressor=BytesCodec(), + compressors=(BloscCodec(),), ) zarr_json = json.loads((await (store / "blosc_evolve_u1" / "zarr.json").get()).to_bytes()) @@ -964,8 +964,8 @@ async def test_blosc_evolve(store: Store): chunk_shape=(16, 16), dtype="uint16", fill_value=0, - compressor=BytesCodec(), - post_compressors=(BloscCodec(),), + pre_compressor=BytesCodec(), + compressors=(BloscCodec(),), ) zarr_json = json.loads((await (store / "blosc_evolve_u2" / "zarr.json").get()).to_bytes()) @@ -979,7 +979,7 @@ async def test_blosc_evolve(store: Store): chunk_shape=(16, 16), dtype="uint16", fill_value=0, - compressor=ShardingCodec(chunk_shape=(16, 16), codecs=[BytesCodec(), BloscCodec()]), + pre_compressor=ShardingCodec(chunk_shape=(16, 16), codecs=[BytesCodec(), BloscCodec()]), ) zarr_json = json.loads((await (store / "sharding_blosc_evolve" / "zarr.json").get()).to_bytes()) From e6014f1b67fb2ddabb921a8082c08a458bb3d26c Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 14 Jun 2024 16:01:37 +0200 Subject: [PATCH 8/8] style --- src/zarr/array.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/zarr/array.py b/src/zarr/array.py index f63d83fc83..a1dd3f20c9 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -206,10 +206,12 @@ async def _create_v3( codecs: tuple[dict[str, JSON] | Codec, ...] _pre_compressor: dict[str, JSON] | ArrayBytesCodec + if pre_compressor is None: _pre_compressor = BytesCodec() else: _pre_compressor = pre_compressor + codecs = (*filters, _pre_compressor, *compressors) if fill_value is None: