From 81690850e4ff081fb12af13ef39a91fe2df6c0f3 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Thu, 16 Apr 2026 12:56:39 +0300 Subject: [PATCH 1/2] feat(langchain): add CoordinodeGraph.keyword_search() Wraps CoordinodeClient.text_search() to expose BM25 full-text search via the LangChain GraphStore adapter. Returns list[dict] with node_id/score/snippet keys, consistent with similarity_search() output. Gracefully returns [] when the injected client has no text_search method (e.g. bare coordinode-embedded LocalClient) to maintain parity with the existing similarity_search() fallback pattern. Closes #22 --- .../langchain_coordinode/graph.py | 54 +++++++ tests/unit/test_langchain_graph.py | 150 ++++++++++++++++++ 2 files changed, 204 insertions(+) create mode 100644 tests/unit/test_langchain_graph.py diff --git a/langchain-coordinode/langchain_coordinode/graph.py b/langchain-coordinode/langchain_coordinode/graph.py index d94a63f..480c0e1 100644 --- a/langchain-coordinode/langchain_coordinode/graph.py +++ b/langchain-coordinode/langchain_coordinode/graph.py @@ -261,6 +261,60 @@ def query( # cypher() returns List[Dict[str, Any]] directly — column name → value. return self._client.cypher(query, params=params or {}) + def keyword_search( + self, + query: str, + k: int = 5, + label: str = "Chunk", + *, + fuzzy: bool = False, + language: str = "", + ) -> list[dict[str, Any]]: + """Find nodes matching a full-text BM25 query. + + Wraps ``CoordinodeClient.text_search()``. The returned list contains + one dict per result with the keys ``node_id`` (integer internal ID), + ``score`` (BM25 relevance score, higher = more relevant), and + ``snippet`` (HTML-highlighted excerpt, may be empty). + + A full-text index must exist on *label* before calling this method. + Create one via the Cypher DDL statement:: + + CREATE TEXT INDEX my_index ON :Chunk(text) + + or via ``CoordinodeClient.create_text_index()``. + + Args: + query: Full-text query string. Supports boolean operators + (``AND``, ``OR``, ``NOT``), phrase search + (``"exact phrase"``), prefix wildcards (``term*``), + and per-term boosting (``term^N``). + k: Maximum number of results to return (default 5). + label: Node label to search (default ``"Chunk"``). + fuzzy: If ``True``, apply Levenshtein-1 fuzzy matching to + individual terms. Increases recall at the cost of precision. + language: Tokenization/stemming language (e.g. ``"english"``, + ``"russian"``). Empty string uses the index's default language. + + Returns: + List of result dicts sorted by descending BM25 score. + Returns ``[]`` if the client does not support ``text_search`` + (e.g. bare injected clients without the full SDK) or if no + text index exists for *label*. + """ + if not callable(getattr(self._client, "text_search", None)): + # Injected clients (e.g. bare coordinode-embedded LocalClient) may + # not implement text_search — return empty rather than AttributeError. + return [] + results = self._client.text_search( + label, + query, + limit=k, + fuzzy=fuzzy, + language=language, + ) + return [{"node_id": r.node_id, "score": r.score, "snippet": r.snippet} for r in results] + def similarity_search( self, query_vector: Sequence[float], diff --git a/tests/unit/test_langchain_graph.py b/tests/unit/test_langchain_graph.py new file mode 100644 index 0000000..a6573d9 --- /dev/null +++ b/tests/unit/test_langchain_graph.py @@ -0,0 +1,150 @@ +"""Unit tests for CoordinodeGraph (langchain-coordinode). + +All tests use mock clients — no proto stubs or running server required. +""" + +from __future__ import annotations + +from typing import Any + +from langchain_coordinode import CoordinodeGraph + +# ── Fake client helpers ─────────────────────────────────────────────────────── + + +class _FakeTextResult: + """Matches coordinode.client.TextResult shape.""" + + def __init__(self, node_id: int, score: float, snippet: str = "") -> None: + self.node_id = node_id + self.score = score + self.snippet = snippet + + +class _ClientWithTextSearch: + """Minimal fake client that implements text_search().""" + + def __init__(self, results: list[_FakeTextResult]) -> None: + self._results = results + self.last_call: dict[str, Any] = {} + + def cypher(self, query: str, params: dict | None = None) -> list[dict]: + return [] + + def text_search( + self, + label: str, + query: str, + *, + limit: int = 10, + fuzzy: bool = False, + language: str = "", + ) -> list[_FakeTextResult]: + self.last_call = { + "label": label, + "query": query, + "limit": limit, + "fuzzy": fuzzy, + "language": language, + } + return self._results + + def close(self) -> None: + pass + + +class _ClientWithoutTextSearch: + """Fake client that does NOT implement text_search (e.g. bare LocalClient).""" + + def cypher(self, query: str, params: dict | None = None) -> list[dict]: + return [] + + def close(self) -> None: + pass + + +# ── Tests: keyword_search ───────────────────────────────────────────────────── + + +class TestKeywordSearch: + def test_returns_list_of_dicts(self) -> None: + """keyword_search returns list[dict] with node_id/score/snippet keys.""" + results = [ + _FakeTextResult(node_id=1, score=0.95, snippet="machine learning"), + _FakeTextResult(node_id=2, score=0.72, snippet=""), + ] + client = _ClientWithTextSearch(results) + graph = CoordinodeGraph(client=client) + + out = graph.keyword_search("machine learning", k=5, label="Article") + + assert len(out) == 2 + assert out[0] == {"node_id": 1, "score": 0.95, "snippet": "machine learning"} + assert out[1] == {"node_id": 2, "score": 0.72, "snippet": ""} + + def test_passes_params_to_client(self) -> None: + """keyword_search forwards label, query, k, fuzzy, language to client.text_search.""" + client = _ClientWithTextSearch([]) + graph = CoordinodeGraph(client=client) + + graph.keyword_search( + "deep learning", + k=3, + label="Paper", + fuzzy=True, + language="english", + ) + + assert client.last_call == { + "label": "Paper", + "query": "deep learning", + "limit": 3, + "fuzzy": True, + "language": "english", + } + + def test_default_label_is_chunk(self) -> None: + """Default label is 'Chunk' (mirrors similarity_search default).""" + client = _ClientWithTextSearch([]) + graph = CoordinodeGraph(client=client) + + graph.keyword_search("query") + + assert client.last_call["label"] == "Chunk" + + def test_default_k_is_5(self) -> None: + """Default k is 5.""" + client = _ClientWithTextSearch([]) + graph = CoordinodeGraph(client=client) + + graph.keyword_search("query") + + assert client.last_call["limit"] == 5 + + def test_returns_empty_for_client_without_text_search(self) -> None: + """Returns [] gracefully when the injected client has no text_search method.""" + client = _ClientWithoutTextSearch() + graph = CoordinodeGraph(client=client) + + out = graph.keyword_search("query") + + assert out == [] + + def test_returns_empty_list_when_no_results(self) -> None: + """Returns [] when text_search returns no results (e.g. no matching index).""" + client = _ClientWithTextSearch([]) + graph = CoordinodeGraph(client=client) + + out = graph.keyword_search("no match", label="Ghost") + + assert out == [] + + def test_empty_snippet_preserved(self) -> None: + """snippet key is always present even when the server returns empty string.""" + results = [_FakeTextResult(node_id=42, score=0.5)] # snippet defaults to "" + client = _ClientWithTextSearch(results) + graph = CoordinodeGraph(client=client) + + out = graph.keyword_search("test") + + assert out[0]["snippet"] == "" From 81633f9bf5ea2273ebdcf508ce44ebfc8673ca1e Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Thu, 16 Apr 2026 13:09:28 +0300 Subject: [PATCH 2/2] fix(langchain): use "id" key in keyword_search() output, matching similarity_search() Both search methods now return dicts with "id" as the node identifier key. Docstring updated to document the cross-method consistency guarantee. --- langchain-coordinode/langchain_coordinode/graph.py | 11 +++++++---- tests/unit/test_langchain_graph.py | 12 +++++++----- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/langchain-coordinode/langchain_coordinode/graph.py b/langchain-coordinode/langchain_coordinode/graph.py index 480c0e1..b97ce78 100644 --- a/langchain-coordinode/langchain_coordinode/graph.py +++ b/langchain-coordinode/langchain_coordinode/graph.py @@ -273,9 +273,10 @@ def keyword_search( """Find nodes matching a full-text BM25 query. Wraps ``CoordinodeClient.text_search()``. The returned list contains - one dict per result with the keys ``node_id`` (integer internal ID), - ``score`` (BM25 relevance score, higher = more relevant), and - ``snippet`` (HTML-highlighted excerpt, may be empty). + one dict per result with the keys ``id`` (integer internal node ID, + matches the key used by :meth:`similarity_search`), ``score`` (BM25 + relevance score, higher = more relevant), and ``snippet`` + (HTML-highlighted excerpt, may be empty). A full-text index must exist on *label* before calling this method. Create one via the Cypher DDL statement:: @@ -313,7 +314,9 @@ def keyword_search( fuzzy=fuzzy, language=language, ) - return [{"node_id": r.node_id, "score": r.score, "snippet": r.snippet} for r in results] + # Use "id" (not "node_id") for consistency with similarity_search() return + # format, so callers can write generic code over both methods. + return [{"id": r.node_id, "score": r.score, "snippet": r.snippet} for r in results] def similarity_search( self, diff --git a/tests/unit/test_langchain_graph.py b/tests/unit/test_langchain_graph.py index a6573d9..65442f6 100644 --- a/tests/unit/test_langchain_graph.py +++ b/tests/unit/test_langchain_graph.py @@ -50,7 +50,8 @@ def text_search( return self._results def close(self) -> None: - pass + # No-op: keeps interface parity with real CoordinodeClient. + return None class _ClientWithoutTextSearch: @@ -60,7 +61,8 @@ def cypher(self, query: str, params: dict | None = None) -> list[dict]: return [] def close(self) -> None: - pass + # No-op: keeps interface parity with real CoordinodeClient. + return None # ── Tests: keyword_search ───────────────────────────────────────────────────── @@ -68,7 +70,7 @@ def close(self) -> None: class TestKeywordSearch: def test_returns_list_of_dicts(self) -> None: - """keyword_search returns list[dict] with node_id/score/snippet keys.""" + """keyword_search returns list[dict] with id/score/snippet keys.""" results = [ _FakeTextResult(node_id=1, score=0.95, snippet="machine learning"), _FakeTextResult(node_id=2, score=0.72, snippet=""), @@ -79,8 +81,8 @@ def test_returns_list_of_dicts(self) -> None: out = graph.keyword_search("machine learning", k=5, label="Article") assert len(out) == 2 - assert out[0] == {"node_id": 1, "score": 0.95, "snippet": "machine learning"} - assert out[1] == {"node_id": 2, "score": 0.72, "snippet": ""} + assert out[0] == {"id": 1, "score": 0.95, "snippet": "machine learning"} + assert out[1] == {"id": 2, "score": 0.72, "snippet": ""} def test_passes_params_to_client(self) -> None: """keyword_search forwards label, query, k, fuzzy, language to client.text_search."""