From 73d53d73dd1fc200cac54b1583828f4da0a354f9 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 25 Aug 2024 13:32:24 -0500 Subject: [PATCH 1/5] Fixed MemoryStore.list_dir Ensures that nested children are listed properly. --- src/zarr/store/memory.py | 14 +++++++++++--- src/zarr/testing/store.py | 7 +++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py index 999d750755..d474f18b28 100644 --- a/src/zarr/store/memory.py +++ b/src/zarr/store/memory.py @@ -117,6 +117,14 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: for key in keys_unique: yield key else: - for key in self._store_dict: - if key.startswith(prefix + "/") and key != prefix: - yield key.removeprefix(prefix + "/").split("/")[0] + # Our dictionary doesn't contain directory markers, but we want to include + # a pseudo directory when there's a nested item and we're listing an + # intermediate level. + n = prefix.count("/") + 2 + keys_unique = { + "/".join(k.split("/", n)[:n]) + for k in self._store_dict + if k.startswith(prefix + "/") + } + for key in keys_unique: + yield key.removeprefix(prefix + "/").split("/")[0] diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index ebef4824f7..e263cb38fd 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -192,6 +192,13 @@ async def test_list_dir(self, store: S) -> None: assert [k async for k in store.list_dir("foo")] == [] await store.set("foo/zarr.json", Buffer.from_bytes(b"bar")) await store.set("foo/c/1", Buffer.from_bytes(b"\x01")) + await store.set("foo/c/d/1", Buffer.from_bytes(b"\x01")) + await store.set("foo/c/d/2", Buffer.from_bytes(b"\x01")) + await store.set("foo/c/d/3", Buffer.from_bytes(b"\x01")) + + keys_expected = ["foo"] + keys_observed = [k async for k in store.list_dir("")] + assert set(keys_observed) == set(keys_expected), keys_observed keys_expected = ["zarr.json", "c"] keys_observed = [k async for k in store.list_dir("foo")] From 90940a0e01366a7339bf4abae5caca3fdcb73e30 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 25 Aug 2024 13:39:16 -0500 Subject: [PATCH 2/5] fixup s3 --- src/zarr/store/remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/store/remote.py b/src/zarr/store/remote.py index f5ea694b0a..83393e4dac 100644 --- a/src/zarr/store/remote.py +++ b/src/zarr/store/remote.py @@ -202,7 +202,7 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: except FileNotFoundError: return for onefile in (a.replace(prefix + "/", "") for a in allfiles): - yield onefile + yield onefile.removeprefix(self.path).removeprefix("/") async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: for onefile in await self._fs._ls(prefix, detail=False): From 3c845d93c5fd28b18867c4a0a6e6c4ffb089fc8e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 26 Aug 2024 12:41:20 -0500 Subject: [PATCH 3/5] simplify --- src/zarr/store/memory.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py index d474f18b28..117fd69ec0 100644 --- a/src/zarr/store/memory.py +++ b/src/zarr/store/memory.py @@ -114,17 +114,15 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: if prefix == "": keys_unique = set(k.split("/")[0] for k in self._store_dict.keys()) - for key in keys_unique: - yield key else: # Our dictionary doesn't contain directory markers, but we want to include # a pseudo directory when there's a nested item and we're listing an # intermediate level. - n = prefix.count("/") + 2 keys_unique = { - "/".join(k.split("/", n)[:n]) - for k in self._store_dict - if k.startswith(prefix + "/") + key.removeprefix(prefix + "/").split("/")[0] + for key in self._store_dict + if key.startswith(prefix + "/") and key != prefix } - for key in keys_unique: - yield key.removeprefix(prefix + "/").split("/")[0] + + for key in keys_unique: + yield key From e88f3d26256bc76543c7310b6e53b7e837f368ea Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 26 Aug 2024 12:54:14 -0500 Subject: [PATCH 4/5] Update src/zarr/testing/store.py Co-authored-by: David Stansby --- src/zarr/testing/store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index e263cb38fd..f70b00d365 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -198,7 +198,7 @@ async def test_list_dir(self, store: S) -> None: keys_expected = ["foo"] keys_observed = [k async for k in store.list_dir("")] - assert set(keys_observed) == set(keys_expected), keys_observed + assert set(keys_observed) == set(keys_expected) keys_expected = ["zarr.json", "c"] keys_observed = [k async for k in store.list_dir("foo")] From 7414b1064311fd17f41a40c97fcf96b2299ac29c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 26 Aug 2024 15:01:30 -0500 Subject: [PATCH 5/5] fixup tests --- src/zarr/testing/store.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index f70b00d365..11978b4121 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -191,16 +191,17 @@ async def test_list_dir(self, store: S) -> None: assert out == [] assert [k async for k in store.list_dir("foo")] == [] await store.set("foo/zarr.json", Buffer.from_bytes(b"bar")) - await store.set("foo/c/1", Buffer.from_bytes(b"\x01")) - await store.set("foo/c/d/1", Buffer.from_bytes(b"\x01")) - await store.set("foo/c/d/2", Buffer.from_bytes(b"\x01")) - await store.set("foo/c/d/3", Buffer.from_bytes(b"\x01")) + await store.set("group-0/zarr.json", Buffer.from_bytes(b"\x01")) # group + await store.set("group-0/group-1/zarr.json", Buffer.from_bytes(b"\x01")) # group + await store.set("group-0/group-1/a1/zarr.json", Buffer.from_bytes(b"\x01")) + await store.set("group-0/group-1/a2/zarr.json", Buffer.from_bytes(b"\x01")) + await store.set("group-0/group-1/a3/zarr.json", Buffer.from_bytes(b"\x01")) - keys_expected = ["foo"] + keys_expected = ["foo", "group-0"] keys_observed = [k async for k in store.list_dir("")] assert set(keys_observed) == set(keys_expected) - keys_expected = ["zarr.json", "c"] + keys_expected = ["zarr.json"] keys_observed = [k async for k in store.list_dir("foo")] assert len(keys_observed) == len(keys_expected), keys_observed @@ -209,3 +210,23 @@ async def test_list_dir(self, store: S) -> None: keys_observed = [k async for k in store.list_dir("foo/")] assert len(keys_expected) == len(keys_observed), keys_observed assert set(keys_observed) == set(keys_expected), keys_observed + + keys_observed = [k async for k in store.list_dir("group-0")] + keys_expected = ["zarr.json", "group-1"] + + assert len(keys_observed) == len(keys_expected), keys_observed + assert set(keys_observed) == set(keys_expected), keys_observed + + keys_observed = [k async for k in store.list_dir("group-0/")] + assert len(keys_expected) == len(keys_observed), keys_observed + assert set(keys_observed) == set(keys_expected), keys_observed + + keys_observed = [k async for k in store.list_dir("group-0/group-1")] + keys_expected = ["zarr.json", "a1", "a2", "a3"] + + assert len(keys_observed) == len(keys_expected), keys_observed + assert set(keys_observed) == set(keys_expected), keys_observed + + keys_observed = [k async for k in store.list_dir("group-0/group-1")] + assert len(keys_expected) == len(keys_observed), keys_observed + assert set(keys_observed) == set(keys_expected), keys_observed