From f98eefc6925b7254e4e96cef0e20ff34374718a4 Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 4 Apr 2022 17:03:26 +0200 Subject: [PATCH 1/3] Activate GHA for stable 2_11 branch --- .github/workflows/minimal.yml | 4 ++-- .github/workflows/python-package.yml | 4 ++-- .github/workflows/windows-testing.yml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/minimal.yml b/.github/workflows/minimal.yml index eb6ebd5d25..07c31b32b5 100644 --- a/.github/workflows/minimal.yml +++ b/.github/workflows/minimal.yml @@ -4,9 +4,9 @@ name: Minimal installation on: push: - branches: [ master ] + branches: [ master, 2_11 ] pull_request: - branches: [ master ] + branches: [ master, 2_11 ] jobs: minimum_build: diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index e0d404b1a0..c8c7a6ffe7 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -5,9 +5,9 @@ name: Linux Testing on: push: - branches: [ master ] + branches: [ master, 2_11 ] pull_request: - branches: [ master ] + branches: [ master, 2_11 ] jobs: build: diff --git a/.github/workflows/windows-testing.yml b/.github/workflows/windows-testing.yml index af656aa88d..0a1935cb23 100644 --- a/.github/workflows/windows-testing.yml +++ b/.github/workflows/windows-testing.yml @@ -5,9 +5,9 @@ name: Python package on: push: - branches: [ master ] + branches: [ master, 2_11 ] pull_request: - branches: [ master ] + branches: [ master, 2_11 ] jobs: windows: From f24794489fefa3fe03a87b6400d69f428a87e6a2 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Apr 2022 07:18:21 -0700 Subject: [PATCH 2/3] Revert change to default write_empty_chunks. (#1001) --- docs/release.rst | 8 ++++++++ docs/tutorial.rst | 4 ++-- zarr/core.py | 2 +- zarr/creation.py | 22 +++++++++++----------- 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index ef8a396c0f..13c2f20d2c 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -6,6 +6,14 @@ Release notes Unreleased ---------- +Bug fixes +~~~~~~~~~ + +* Changes the default value of ``write_empty_chunks`` to ``True`` to prevent + unanticipated data losses when the data types do not have a proper default + value when empty chunks are read back in. + By :user:`Vyas Ramasubramani `; :issue:`965`. + .. _release_2.11.1: 2.11.1 diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 906d5d9f08..53ddddb0b9 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -1309,7 +1309,7 @@ Empty chunks As of version 2.11, it is possible to configure how Zarr handles the storage of chunks that are "empty" (i.e., every element in the chunk is equal to the array's fill value). -When creating an array with ``write_empty_chunks=False`` (the default), +When creating an array with ``write_empty_chunks=False``, Zarr will check whether a chunk is empty before compression and storage. If a chunk is empty, then Zarr does not store it, and instead deletes the chunk from storage if the chunk had been previously stored. @@ -1318,7 +1318,7 @@ This optimization prevents storing redundant objects and can speed up reads, but added computation during array writes, since the contents of each chunk must be compared to the fill value, and these advantages are contingent on the content of the array. If you know that your data will form chunks that are almost always non-empty, then there is no advantage to the optimization described above. -In this case, creating an array with ``write_empty_chunks=True`` will instruct Zarr to write every chunk without checking for emptiness. +In this case, creating an array with ``write_empty_chunks=True`` (the default) will instruct Zarr to write every chunk without checking for emptiness. The following example illustrates the effect of the ``write_empty_chunks`` flag on the time required to write an array with different values.:: diff --git a/zarr/core.py b/zarr/core.py index e0fe4eb0e9..16f3f346fe 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -154,7 +154,7 @@ def __init__( cache_metadata=True, cache_attrs=True, partial_decompress=False, - write_empty_chunks=False, + write_empty_chunks=True, ): # N.B., expect at this point store is fully initialized with all # configuration metadata fully specified and normalized diff --git a/zarr/creation.py b/zarr/creation.py index 9d6902a6e3..0be9c56df4 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -73,11 +73,11 @@ def create(shape, chunks=True, dtype=None, compressor='default', .. versionadded:: 2.8 write_empty_chunks : bool, optional - If True, all chunks will be stored regardless of their contents. If - False (default), each chunk is compared to the array's fill value prior - to storing. If a chunk is uniformly equal to the fill value, then that - chunk is not be stored, and the store entry for that chunk's key is - deleted. This setting enables sparser storage, as only chunks with + If True (default), all chunks will be stored regardless of their + contents. If False, each chunk is compared to the array's fill value + prior to storing. If a chunk is uniformly equal to the fill value, then + that chunk is not be stored, and the store entry for that chunk's key + is deleted. This setting enables sparser storage, as only chunks with non-fill-value data are stored, at the expense of overhead associated with checking the data of each chunk. .. versionadded:: 2.11 @@ -391,7 +391,7 @@ def open_array( chunk_store=None, storage_options=None, partial_decompress=False, - write_empty_chunks=False, + write_empty_chunks=True, **kwargs ): """Open an array using file-mode-like semantics. @@ -447,11 +447,11 @@ def open_array( is Blosc, when getting data from the array chunks will be partially read and decompressed when possible. write_empty_chunks : bool, optional - If True, all chunks will be stored regardless of their contents. If - False (default), each chunk is compared to the array's fill value prior - to storing. If a chunk is uniformly equal to the fill value, then that - chunk is not be stored, and the store entry for that chunk's key is - deleted. This setting enables sparser storage, as only chunks with + If True (default), all chunks will be stored regardless of their + contents. If False, each chunk is compared to the array's fill value + prior to storing. If a chunk is uniformly equal to the fill value, then + that chunk is not be stored, and the store entry for that chunk's key + is deleted. This setting enables sparser storage, as only chunks with non-fill-value data are stored, at the expense of overhead associated with checking the data of each chunk. .. versionadded:: 2.11 From 44ebdf13475f1e47ed9332f1f3ede50efb18b5dc Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 4 Apr 2022 17:07:05 +0200 Subject: [PATCH 3/3] Prepare 2.11.2 release --- docs/release.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/release.rst b/docs/release.rst index 13c2f20d2c..2586dd0182 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -6,6 +6,11 @@ Release notes Unreleased ---------- +.. _release_2.11.2: + +2.11.2 +------ + Bug fixes ~~~~~~~~~