From 1e8e89a4f7c997b696d47f230e3bb64a9ae4eae3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 20 Jan 2026 18:26:39 +0000 Subject: [PATCH 1/3] Initial plan From 869fcc9243cdfe2e312fbf1048f818dcb22d544c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 20 Jan 2026 18:33:40 +0000 Subject: [PATCH 2/3] Update documentation and fix RedisStorage import issue Co-authored-by: ekzhu <320302+ekzhu@users.noreply.github.com> --- datasketch/aio/storage.py | 10 ++++++++-- docs/documentation.rst | 2 +- docs/lsh.rst | 11 +++++------ 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/datasketch/aio/storage.py b/datasketch/aio/storage.py index 16b5dc56..893ee69c 100644 --- a/datasketch/aio/storage.py +++ b/datasketch/aio/storage.py @@ -10,7 +10,13 @@ from abc import ABCMeta from itertools import chain -from datasketch.storage import OrderedStorage, RedisStorage, Storage, UnorderedStorage, _random_name +from datasketch.storage import OrderedStorage, Storage, UnorderedStorage, _random_name + +# Try to import RedisStorage (only available when redis is installed) +try: + from datasketch.storage import RedisStorage +except ImportError: + RedisStorage = None ABC = ABCMeta("ABC", (object,), {}) @@ -301,7 +307,7 @@ async def remove_val(self, key, val, **kwargs): await self._collection.find_one_and_delete({"key": key, "vals": val}) -if redis is not None: +if redis is not None and RedisStorage is not None: class AsyncRedisBuffer(redis.client.Pipeline): def __init__(self, connection_pool, response_callbacks, transaction, buffer_size, shard_hint=None): diff --git a/docs/documentation.rst b/docs/documentation.rst index 202b4f01..869c11e6 100644 --- a/docs/documentation.rst +++ b/docs/documentation.rst @@ -38,7 +38,7 @@ MinHash LSH Asynchronous MinHash LSH ------------------------ -.. autoclass:: datasketch.experimental.aio.lsh.AsyncMinHashLSH +.. autoclass:: datasketch.aio.AsyncMinHashLSH :members: :special-members: diff --git a/docs/lsh.rst b/docs/lsh.rst index 7685213a..4fc827a5 100644 --- a/docs/lsh.rst +++ b/docs/lsh.rst @@ -237,8 +237,7 @@ Asynchronous MinHash LSH at scale --------------------------------- .. note:: - The module supports Python version >=3.6, and is currently experimental. - So the interface may change slightly in the future. + The module supports Python version >=3.6. This module may be useful if you want to process millions of text documents in streaming/batch mode using asynchronous RESTful API (for example, aiohttp) @@ -256,7 +255,7 @@ The Asynchronous MongoDB storage option can be configured using: .. code:: python - from datasketch.experimental.aio.lsh import AsyncMinHashLSH + from datasketch.aio import AsyncMinHashLSH from datasketch import MinHash _storage = {'type': 'aiomongo', 'mongo': {'host': 'localhost', 'port': 27017, 'db': 'lsh_test'}} @@ -277,7 +276,7 @@ The Asynchronous MongoDB storage option can be configured using: .. code:: python - from datasketch.experimental.aio.lsh import AsyncMinHashLSH + from datasketch.aio import AsyncMinHashLSH from datasketch import MinHash _storage = {'type': 'aiomongo', 'mongo': {'host': 'localhost', 'port': 27017, 'db': 'lsh_test'}} @@ -329,7 +328,7 @@ To create index for a large number of MinHashes using asynchronous MinHash LSH. .. code:: python - from datasketch.experimental.aio.lsh import AsyncMinHashLSH + from datasketch.aio import AsyncMinHashLSH from datasketch import MinHash def chunk(it, size): @@ -355,7 +354,7 @@ To bulk remove keys from LSH index using asynchronous MinHash LSH. .. code:: python - from datasketch.experimental.aio.lsh import AsyncMinHashLSH + from datasketch.aio import AsyncMinHashLSH from datasketch import MinHash def chunk(it, size): From 3f60b3ab0cac96577c1d0493565d0ab1bebd1e4a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 20 Jan 2026 18:34:44 +0000 Subject: [PATCH 3/3] Add clarifying comments for optional Redis dependency handling Co-authored-by: ekzhu <320302+ekzhu@users.noreply.github.com> --- datasketch/aio/storage.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datasketch/aio/storage.py b/datasketch/aio/storage.py index 893ee69c..3b42da72 100644 --- a/datasketch/aio/storage.py +++ b/datasketch/aio/storage.py @@ -12,7 +12,8 @@ from datasketch.storage import OrderedStorage, Storage, UnorderedStorage, _random_name -# Try to import RedisStorage (only available when redis is installed) +# RedisStorage is only available when redis package is installed (optional dependency) +# Import it conditionally to avoid ImportError when redis is not installed try: from datasketch.storage import RedisStorage except ImportError: @@ -307,6 +308,8 @@ async def remove_val(self, key, val, **kwargs): await self._collection.find_one_and_delete({"key": key, "vals": val}) +# Redis-based async storage classes are only defined when both redis package +# and RedisStorage are available (optional dependencies) if redis is not None and RedisStorage is not None: class AsyncRedisBuffer(redis.client.Pipeline):