From 8153810c4918d3ed65d516c76b268397b7a4a761 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Sun, 16 Dec 2018 01:29:53 -0500 Subject: [PATCH 1/2] Consolidate encode/store in _chunk_setitem_nosync Matches how these lines are written in `_set_basic_selection_zd`. --- zarr/core.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 80d1830c07..1c2013aec1 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -1716,10 +1716,8 @@ def _chunk_setitem_nosync(self, chunk_coords, chunk_selection, value, fields=Non else: chunk[chunk_selection] = value - # encode chunk + # encode and store cdata = self._encode_chunk(chunk) - - # store self.chunk_store[ckey] = cdata def _chunk_key(self, chunk_coords): From c84839e53374b39b3b6f18ae2ab3f71573e7e553 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Sun, 16 Dec 2018 02:46:36 -0500 Subject: [PATCH 2/2] Clear key-value pair if chunk is just fill value Add a simple check to see if the key-value pair is just being set with a chunk equal to the fill value. If so, simply delete the key-value pair instead of storing a chunk that only contains the fill value. The Array will behave the same externally. However this will cutdown on the space require to store the Array. Also will make sure that copying one Array to another Array won't dramatically effect the storage size. --- zarr/core.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/zarr/core.py b/zarr/core.py index 1c2013aec1..33734aa304 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -1476,6 +1476,17 @@ def _set_basic_selection_zd(self, selection, value, fields=None): else: chunk[selection] = value + # clear chunk if it only contains the fill value + if np.all(np.equal(chunk, self._fill_value)): + try: + del self.chunk_store[ckey] + return + except KeyError: + return + except Exception: + # deleting failed, fallback to overwriting + pass + # encode and store cdata = self._encode_chunk(chunk) self.chunk_store[ckey] = cdata @@ -1716,6 +1727,17 @@ def _chunk_setitem_nosync(self, chunk_coords, chunk_selection, value, fields=Non else: chunk[chunk_selection] = value + # clear chunk if it only contains the fill value + if np.all(np.equal(chunk, self._fill_value)): + try: + del self.chunk_store[ckey] + return + except KeyError: + return + except Exception: + # deleting failed, fallback to overwriting + pass + # encode and store cdata = self._encode_chunk(chunk) self.chunk_store[ckey] = cdata