Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions langchain-coordinode/langchain_coordinode/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,63 @@ def query(
# cypher() returns List[Dict[str, Any]] directly — column name → value.
return self._client.cypher(query, params=params or {})

def keyword_search(
self,
query: str,
k: int = 5,
label: str = "Chunk",
*,
fuzzy: bool = False,
language: str = "",
) -> list[dict[str, Any]]:
"""Find nodes matching a full-text BM25 query.

Wraps ``CoordinodeClient.text_search()``. The returned list contains
one dict per result with the keys ``id`` (integer internal node ID,
matches the key used by :meth:`similarity_search`), ``score`` (BM25
relevance score, higher = more relevant), and ``snippet``
(HTML-highlighted excerpt, may be empty).

A full-text index must exist on *label* before calling this method.
Create one via the Cypher DDL statement::

CREATE TEXT INDEX my_index ON :Chunk(text)

or via ``CoordinodeClient.create_text_index()``.

Args:
query: Full-text query string. Supports boolean operators
(``AND``, ``OR``, ``NOT``), phrase search
(``"exact phrase"``), prefix wildcards (``term*``),
and per-term boosting (``term^N``).
k: Maximum number of results to return (default 5).
label: Node label to search (default ``"Chunk"``).
fuzzy: If ``True``, apply Levenshtein-1 fuzzy matching to
individual terms. Increases recall at the cost of precision.
language: Tokenization/stemming language (e.g. ``"english"``,
``"russian"``). Empty string uses the index's default language.

Returns:
List of result dicts sorted by descending BM25 score.
Returns ``[]`` if the client does not support ``text_search``
(e.g. bare injected clients without the full SDK) or if no
text index exists for *label*.
"""
if not callable(getattr(self._client, "text_search", None)):
# Injected clients (e.g. bare coordinode-embedded LocalClient) may
# not implement text_search — return empty rather than AttributeError.
return []
results = self._client.text_search(
label,
query,
limit=k,
fuzzy=fuzzy,
language=language,
)
# Use "id" (not "node_id") for consistency with similarity_search() return
# format, so callers can write generic code over both methods.
return [{"id": r.node_id, "score": r.score, "snippet": r.snippet} for r in results]

def similarity_search(
self,
query_vector: Sequence[float],
Expand Down
152 changes: 152 additions & 0 deletions tests/unit/test_langchain_graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
"""Unit tests for CoordinodeGraph (langchain-coordinode).

All tests use mock clients — no proto stubs or running server required.
"""

from __future__ import annotations

from typing import Any

from langchain_coordinode import CoordinodeGraph

# ── Fake client helpers ───────────────────────────────────────────────────────


class _FakeTextResult:
"""Matches coordinode.client.TextResult shape."""

def __init__(self, node_id: int, score: float, snippet: str = "") -> None:
self.node_id = node_id
self.score = score
self.snippet = snippet


class _ClientWithTextSearch:
"""Minimal fake client that implements text_search()."""

def __init__(self, results: list[_FakeTextResult]) -> None:
self._results = results
self.last_call: dict[str, Any] = {}

def cypher(self, query: str, params: dict | None = None) -> list[dict]:
return []

def text_search(
self,
label: str,
query: str,
*,
limit: int = 10,
fuzzy: bool = False,
language: str = "",
) -> list[_FakeTextResult]:
self.last_call = {
"label": label,
"query": query,
"limit": limit,
"fuzzy": fuzzy,
"language": language,
}
return self._results

def close(self) -> None:
# No-op: keeps interface parity with real CoordinodeClient.
return None


class _ClientWithoutTextSearch:
"""Fake client that does NOT implement text_search (e.g. bare LocalClient)."""

def cypher(self, query: str, params: dict | None = None) -> list[dict]:
return []

def close(self) -> None:
# No-op: keeps interface parity with real CoordinodeClient.
return None


# ── Tests: keyword_search ─────────────────────────────────────────────────────


class TestKeywordSearch:
def test_returns_list_of_dicts(self) -> None:
"""keyword_search returns list[dict] with id/score/snippet keys."""
results = [
_FakeTextResult(node_id=1, score=0.95, snippet="<b>machine</b> learning"),
_FakeTextResult(node_id=2, score=0.72, snippet=""),
]
client = _ClientWithTextSearch(results)
graph = CoordinodeGraph(client=client)

out = graph.keyword_search("machine learning", k=5, label="Article")

assert len(out) == 2
assert out[0] == {"id": 1, "score": 0.95, "snippet": "<b>machine</b> learning"}
assert out[1] == {"id": 2, "score": 0.72, "snippet": ""}

def test_passes_params_to_client(self) -> None:
"""keyword_search forwards label, query, k, fuzzy, language to client.text_search."""
client = _ClientWithTextSearch([])
graph = CoordinodeGraph(client=client)

graph.keyword_search(
"deep learning",
k=3,
label="Paper",
fuzzy=True,
language="english",
)

assert client.last_call == {
"label": "Paper",
"query": "deep learning",
"limit": 3,
"fuzzy": True,
"language": "english",
}

def test_default_label_is_chunk(self) -> None:
"""Default label is 'Chunk' (mirrors similarity_search default)."""
client = _ClientWithTextSearch([])
graph = CoordinodeGraph(client=client)

graph.keyword_search("query")

assert client.last_call["label"] == "Chunk"

def test_default_k_is_5(self) -> None:
"""Default k is 5."""
client = _ClientWithTextSearch([])
graph = CoordinodeGraph(client=client)

graph.keyword_search("query")

assert client.last_call["limit"] == 5

def test_returns_empty_for_client_without_text_search(self) -> None:
"""Returns [] gracefully when the injected client has no text_search method."""
client = _ClientWithoutTextSearch()
graph = CoordinodeGraph(client=client)

out = graph.keyword_search("query")

assert out == []

def test_returns_empty_list_when_no_results(self) -> None:
"""Returns [] when text_search returns no results (e.g. no matching index)."""
client = _ClientWithTextSearch([])
graph = CoordinodeGraph(client=client)

out = graph.keyword_search("no match", label="Ghost")

assert out == []

def test_empty_snippet_preserved(self) -> None:
"""snippet key is always present even when the server returns empty string."""
results = [_FakeTextResult(node_id=42, score=0.5)] # snippet defaults to ""
client = _ClientWithTextSearch(results)
graph = CoordinodeGraph(client=client)

out = graph.keyword_search("test")

assert out[0]["snippet"] == ""
Loading