From 1d1d820db94bfb02637007fc5588821769710444 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Tue, 4 Dec 2018 21:51:54 -0500 Subject: [PATCH 1/8] Ensure `DictStore` gets `bytes` coercible data As the spec requires that the data in a store be a sequence of `bytes`, make sure that non-`DictStore` input meets this requirement when setting values. This effectively ensures that other `DictStore` meet this requirement as well. So we don't need to go through and check their values too. --- zarr/storage.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zarr/storage.py b/zarr/storage.py index d173acf735..db27938d19 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -544,6 +544,8 @@ def __getitem__(self, item): def __setitem__(self, item, value): with self.write_mutex: parent, key = self._require_parent(item) + if not isinstance(value, self.cls): + value = ensure_bytes(value) parent[key] = value def __delitem__(self, item): From b199f9def522a41e55265ef02fbad4d449b85570 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Tue, 4 Dec 2018 21:51:55 -0500 Subject: [PATCH 2/8] Drop undefined buffer size case from `DictStore` As everything in `DictStore` must either be another `DictStore` or `bytes`, there shouldn't be any cases where the size is undefined nor cases that this exception should need handling. Given this go ahead and drop the special casing for unknown sizes in `DictStore`. --- zarr/storage.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index db27938d19..ae455903a6 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -644,17 +644,11 @@ def getsize(self, path=None): size = 0 for v in value.values(): if not isinstance(v, self.cls): - try: - size += buffer_size(v) - except TypeError: - return -1 + size += buffer_size(v) return size else: - try: - return buffer_size(value) - except TypeError: - return -1 + return buffer_size(value) def clear(self): with self.write_mutex: From d86f2e0ede879a3b054344ba199b5b3dffef132b Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Tue, 4 Dec 2018 21:51:55 -0500 Subject: [PATCH 3/8] Drop `test_getsize_ext` While this test case does test a useful subset of the `getsize` API, the contents being added to the store here are non-conforming to our expectations of store contents. Namely the store should only contain values that are an "arbitrary sequence of bytes", which this test case is not. --- zarr/tests/test_storage.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 33c65f36c9..58c079b965 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -633,14 +633,6 @@ def test_setdel(self): store = self.create_store() setdel_hierarchy_checks(store) - def test_getsize_ext(self): - store = self.create_store() - store['a'] = list(range(10)) - store['b/c'] = list(range(10)) - assert -1 == store.getsize() - assert -1 == store.getsize('a') - assert -1 == store.getsize('b') - class TestDirectoryStore(StoreTests, unittest.TestCase): From 425e7c4afce44f03ce7d5b0bc6d13b277b719598 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Tue, 4 Dec 2018 21:51:56 -0500 Subject: [PATCH 4/8] Test `getsize` with an unknown size case This creates a non-conforming store to make sure that `getsize` handles its contents in the expected way. Namely that it returns `-1`. --- zarr/tests/test_storage.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 58c079b965..ba6164da55 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1088,6 +1088,10 @@ def test_getsize(): assert 7 == getsize(store) assert 5 == getsize(store, 'baz') + store = dict() + store['boo'] = None + assert -1 == getsize(store) + def test_migrate_1to2(): from zarr import meta_v1 From 726bdc0e049110d8757e40bda932085b6dac9aaa Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Tue, 4 Dec 2018 22:17:04 -0500 Subject: [PATCH 5/8] Note `DictStore` only contains `bytes` now [ci skip] --- docs/release.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/release.rst b/docs/release.rst index 45eb9c8a49..0c1440b9ba 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -35,6 +35,9 @@ Bug fixes * Failing tests related to pickling/unpickling have been fixed. By :user:`Ryan Williams `, :issue:`273`, :issue:`308`. +* Ensure ``DictStore`` contains only ``bytes`` to facilitate comparisons and protect against writes. + By :user:`John Kirkham `, :issue:`350` + Maintenance ~~~~~~~~~~~ From bc3fe3b0d974573b57c6f9309ba7df83dbb07f5f Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 6 Dec 2018 12:16:02 -0500 Subject: [PATCH 6/8] Check for `TypeError` for non-buffer objects Add a test to ensure that a non-buffer supporting object when stored into a valid store, will raise a `TypeError` instead of storing it. Disable this checking for generic `MappingStore`s (e.g. `dict`) as they do not perform this sort of checking on the data they accept as values. --- zarr/tests/test_storage.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index ba6164da55..0dfa760287 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -63,6 +63,12 @@ def test_get_set_del_contains(self): # noinspection PyStatementEffect del store['foo'] + def test_set_invalid_content(self): + store = self.create_store() + + with pytest.raises(TypeError): + store['baz'] = list(range(5)) + def test_clear(self): store = self.create_store() store['foo'] = b'bar' @@ -586,6 +592,10 @@ class TestMappingStore(StoreTests, unittest.TestCase): def create_store(self): return dict() + def test_set_invalid_content(self): + # Generic mappings support non-buffer types + pass + def setdel_hierarchy_checks(store): # these tests are for stores that are aware of hierarchy levels; this From 3976f963f1d1f8ddc08263552e30c2138cdf9b09 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 6 Dec 2018 12:20:15 -0500 Subject: [PATCH 7/8] Check that `DictStore` coerces all data to `bytes` Provide a simple test for `DictStore` to ensure that non-`bytes` is coerced to `bytes` before storing it and is retrieved as `bytes`. --- zarr/tests/test_storage.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 0dfa760287..0416470dd7 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -639,6 +639,11 @@ class TestDictStore(StoreTests, unittest.TestCase): def create_store(self): return DictStore() + def test_store_contains_bytes(self): + store = self.create_store() + store['foo'] = np.array([97, 98, 99, 100, 101], dtype=np.uint8) + assert store['foo'] == b'abcde' + def test_setdel(self): store = self.create_store() setdel_hierarchy_checks(store) From 845de7b0134b0ed6b332ffe6ea1418942b5017b1 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 7 Dec 2018 01:29:38 -0500 Subject: [PATCH 8/8] Disallow mutation of the internal `DictStore` Make sure that users are only able to add data to the `DictStore`. Disallow the storing of a nested `DictStore` though. --- zarr/storage.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index ae455903a6..e7d70ea7bc 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -544,8 +544,7 @@ def __getitem__(self, item): def __setitem__(self, item, value): with self.write_mutex: parent, key = self._require_parent(item) - if not isinstance(value, self.cls): - value = ensure_bytes(value) + value = ensure_bytes(value) parent[key] = value def __delitem__(self, item):