diff --git a/langchain-coordinode/langchain_coordinode/graph.py b/langchain-coordinode/langchain_coordinode/graph.py index d94a63f..b97ce78 100644 --- a/langchain-coordinode/langchain_coordinode/graph.py +++ b/langchain-coordinode/langchain_coordinode/graph.py @@ -261,6 +261,63 @@ def query( # cypher() returns List[Dict[str, Any]] directly — column name → value. return self._client.cypher(query, params=params or {}) + def keyword_search( + self, + query: str, + k: int = 5, + label: str = "Chunk", + *, + fuzzy: bool = False, + language: str = "", + ) -> list[dict[str, Any]]: + """Find nodes matching a full-text BM25 query. + + Wraps ``CoordinodeClient.text_search()``. The returned list contains + one dict per result with the keys ``id`` (integer internal node ID, + matches the key used by :meth:`similarity_search`), ``score`` (BM25 + relevance score, higher = more relevant), and ``snippet`` + (HTML-highlighted excerpt, may be empty). + + A full-text index must exist on *label* before calling this method. + Create one via the Cypher DDL statement:: + + CREATE TEXT INDEX my_index ON :Chunk(text) + + or via ``CoordinodeClient.create_text_index()``. + + Args: + query: Full-text query string. Supports boolean operators + (``AND``, ``OR``, ``NOT``), phrase search + (``"exact phrase"``), prefix wildcards (``term*``), + and per-term boosting (``term^N``). + k: Maximum number of results to return (default 5). + label: Node label to search (default ``"Chunk"``). + fuzzy: If ``True``, apply Levenshtein-1 fuzzy matching to + individual terms. Increases recall at the cost of precision. + language: Tokenization/stemming language (e.g. ``"english"``, + ``"russian"``). Empty string uses the index's default language. + + Returns: + List of result dicts sorted by descending BM25 score. + Returns ``[]`` if the client does not support ``text_search`` + (e.g. bare injected clients without the full SDK) or if no + text index exists for *label*. + """ + if not callable(getattr(self._client, "text_search", None)): + # Injected clients (e.g. bare coordinode-embedded LocalClient) may + # not implement text_search — return empty rather than AttributeError. + return [] + results = self._client.text_search( + label, + query, + limit=k, + fuzzy=fuzzy, + language=language, + ) + # Use "id" (not "node_id") for consistency with similarity_search() return + # format, so callers can write generic code over both methods. + return [{"id": r.node_id, "score": r.score, "snippet": r.snippet} for r in results] + def similarity_search( self, query_vector: Sequence[float], diff --git a/tests/unit/test_langchain_graph.py b/tests/unit/test_langchain_graph.py new file mode 100644 index 0000000..65442f6 --- /dev/null +++ b/tests/unit/test_langchain_graph.py @@ -0,0 +1,152 @@ +"""Unit tests for CoordinodeGraph (langchain-coordinode). + +All tests use mock clients — no proto stubs or running server required. +""" + +from __future__ import annotations + +from typing import Any + +from langchain_coordinode import CoordinodeGraph + +# ── Fake client helpers ─────────────────────────────────────────────────────── + + +class _FakeTextResult: + """Matches coordinode.client.TextResult shape.""" + + def __init__(self, node_id: int, score: float, snippet: str = "") -> None: + self.node_id = node_id + self.score = score + self.snippet = snippet + + +class _ClientWithTextSearch: + """Minimal fake client that implements text_search().""" + + def __init__(self, results: list[_FakeTextResult]) -> None: + self._results = results + self.last_call: dict[str, Any] = {} + + def cypher(self, query: str, params: dict | None = None) -> list[dict]: + return [] + + def text_search( + self, + label: str, + query: str, + *, + limit: int = 10, + fuzzy: bool = False, + language: str = "", + ) -> list[_FakeTextResult]: + self.last_call = { + "label": label, + "query": query, + "limit": limit, + "fuzzy": fuzzy, + "language": language, + } + return self._results + + def close(self) -> None: + # No-op: keeps interface parity with real CoordinodeClient. + return None + + +class _ClientWithoutTextSearch: + """Fake client that does NOT implement text_search (e.g. bare LocalClient).""" + + def cypher(self, query: str, params: dict | None = None) -> list[dict]: + return [] + + def close(self) -> None: + # No-op: keeps interface parity with real CoordinodeClient. + return None + + +# ── Tests: keyword_search ───────────────────────────────────────────────────── + + +class TestKeywordSearch: + def test_returns_list_of_dicts(self) -> None: + """keyword_search returns list[dict] with id/score/snippet keys.""" + results = [ + _FakeTextResult(node_id=1, score=0.95, snippet="machine learning"), + _FakeTextResult(node_id=2, score=0.72, snippet=""), + ] + client = _ClientWithTextSearch(results) + graph = CoordinodeGraph(client=client) + + out = graph.keyword_search("machine learning", k=5, label="Article") + + assert len(out) == 2 + assert out[0] == {"id": 1, "score": 0.95, "snippet": "machine learning"} + assert out[1] == {"id": 2, "score": 0.72, "snippet": ""} + + def test_passes_params_to_client(self) -> None: + """keyword_search forwards label, query, k, fuzzy, language to client.text_search.""" + client = _ClientWithTextSearch([]) + graph = CoordinodeGraph(client=client) + + graph.keyword_search( + "deep learning", + k=3, + label="Paper", + fuzzy=True, + language="english", + ) + + assert client.last_call == { + "label": "Paper", + "query": "deep learning", + "limit": 3, + "fuzzy": True, + "language": "english", + } + + def test_default_label_is_chunk(self) -> None: + """Default label is 'Chunk' (mirrors similarity_search default).""" + client = _ClientWithTextSearch([]) + graph = CoordinodeGraph(client=client) + + graph.keyword_search("query") + + assert client.last_call["label"] == "Chunk" + + def test_default_k_is_5(self) -> None: + """Default k is 5.""" + client = _ClientWithTextSearch([]) + graph = CoordinodeGraph(client=client) + + graph.keyword_search("query") + + assert client.last_call["limit"] == 5 + + def test_returns_empty_for_client_without_text_search(self) -> None: + """Returns [] gracefully when the injected client has no text_search method.""" + client = _ClientWithoutTextSearch() + graph = CoordinodeGraph(client=client) + + out = graph.keyword_search("query") + + assert out == [] + + def test_returns_empty_list_when_no_results(self) -> None: + """Returns [] when text_search returns no results (e.g. no matching index).""" + client = _ClientWithTextSearch([]) + graph = CoordinodeGraph(client=client) + + out = graph.keyword_search("no match", label="Ghost") + + assert out == [] + + def test_empty_snippet_preserved(self) -> None: + """snippet key is always present even when the server returns empty string.""" + results = [_FakeTextResult(node_id=42, score=0.5)] # snippet defaults to "" + client = _ClientWithTextSearch(results) + graph = CoordinodeGraph(client=client) + + out = graph.keyword_search("test") + + assert out[0]["snippet"] == ""