From 3442f3fc6966b6ecc66514eff7ea10adb5012f75 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 12 Apr 2026 14:32:08 +0300 Subject: [PATCH 1/5] test(llama-index): add upsert_relations() idempotency test Verifies that calling upsert_relations() twice with the same Relation produces exactly one edge (MERGE semantics, not CREATE). Closes #21 --- tests/integration/adapters/test_llama_index.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/integration/adapters/test_llama_index.py b/tests/integration/adapters/test_llama_index.py index 971a2f2..759d48f 100644 --- a/tests/integration/adapters/test_llama_index.py +++ b/tests/integration/adapters/test_llama_index.py @@ -72,6 +72,23 @@ def test_upsert_nodes_idempotent(store, tag): assert len(found) == 1 +def test_upsert_relations_idempotent(store, tag): + """Upserting the same relation twice must produce exactly one edge (MERGE idempotent).""" + src = EntityNode(label="LIIdempRel", name=f"IdempSrc-{tag}") + dst = EntityNode(label="LIIdempRel", name=f"IdempDst-{tag}") + store.upsert_nodes([src, dst]) + + rel = Relation(label="LI_IDEMP_REL", source_id=src.id, target_id=dst.id) + store.upsert_relations([rel]) + store.upsert_relations([rel]) # second call must not duplicate + + rows = store._client.cypher( + "MATCH (a {id: $src})-[r:LI_IDEMP_REL]->(b {id: $dst}) RETURN count(r) AS cnt", + params={"src": src.id, "dst": dst.id}, + ) + assert rows[0]["cnt"] == 1, f"expected exactly 1 edge after double upsert, got: {rows}" + + def test_get_by_id(store, tag): node = EntityNode(label="LIGetById", name=f"ById-{tag}") node_id = node.id From 7c4d4c0b88fc934b83a6bee85ed8db04c5b69b6c Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 12 Apr 2026 14:33:49 +0300 Subject: [PATCH 2/5] feat(langchain): add similarity_search() to CoordinodeGraph Wraps CoordinodeClient.vector_search() with label/property defaults, returning [{id, node, distance}, ...] sorted by ascending distance. Guards against empty query_vector to match server validation behaviour. Adds two integration tests: one seeding a :LCSim node and verifying the seeded node appears in top-k results, one verifying empty-vector returns []. Closes #20 --- .../langchain_coordinode/graph.py | 33 ++++++++++++++++ tests/integration/adapters/test_langchain.py | 38 +++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/langchain-coordinode/langchain_coordinode/graph.py b/langchain-coordinode/langchain_coordinode/graph.py index 8a19ff6..1493d60 100644 --- a/langchain-coordinode/langchain_coordinode/graph.py +++ b/langchain-coordinode/langchain_coordinode/graph.py @@ -194,6 +194,39 @@ def query( # cypher() returns List[Dict[str, Any]] directly — column name → value. return self._client.cypher(query, params=params or {}) + def similarity_search( + self, + query_vector: list[float], + k: int = 10, + label: str = "Chunk", + property: str = "embedding", + ) -> list[dict[str, Any]]: + """Find nodes whose ``property`` vector is closest to ``query_vector``. + + Wraps ``CoordinodeClient.vector_search()``. The returned list contains + one dict per result with the keys ``node`` (node properties), ``id`` + (internal integer node ID), and ``distance`` (cosine distance, lower = + more similar). + + Args: + query_vector: Embedding vector to search for. + k: Maximum number of results to return. + label: Node label to search (default ``"Chunk"``). + property: Embedding property name (default ``"embedding"``). + + Returns: + List of result dicts sorted by ascending distance. + """ + if not query_vector: + return [] + results = self._client.vector_search( + label=label, + property=property, + vector=query_vector, + top_k=k, + ) + return [{"id": r.node.id, "node": r.node.properties, "distance": r.distance} for r in results] + # ── Lifecycle ───────────────────────────────────────────────────────── def close(self) -> None: diff --git a/tests/integration/adapters/test_langchain.py b/tests/integration/adapters/test_langchain.py index 799e831..7dc08f1 100644 --- a/tests/integration/adapters/test_langchain.py +++ b/tests/integration/adapters/test_langchain.py @@ -133,6 +133,44 @@ def test_add_graph_documents_idempotent(graph, unique_tag): assert result[0]["cnt"] == 1 +# ── similarity_search ───────────────────────────────────────────────────────── + + +def test_similarity_search_returns_results(graph, unique_tag): + """similarity_search() returns node dicts with id, node, and distance keys. + + Seeds a :LCSim node with a known embedding, then searches for the closest + vector. The seeded node must appear in the top-k results. + """ + # Derive a unique embedding from the test tag (same technique as llama-index + # test) to avoid collisions with other :LCSim nodes in the shared DB. + seed = list(bytes.fromhex(unique_tag)) + vec = [float(seed[i % len(seed)]) / 255.0 for i in range(16)] + + try: + seed_rows = graph.query( + "CREATE (n:LCSim {id: $id, embedding: $vec}) RETURN n AS nid", + params={"id": f"lcsim-{unique_tag}", "vec": vec}, + ) + seeded_internal_id = seed_rows[0]["nid"] + + results = graph.similarity_search(vec, k=5, label="LCSim", property="embedding") + + assert isinstance(results, list) + assert len(results) >= 1 + assert all("id" in r and "node" in r and "distance" in r for r in results) + assert any(r["id"] == seeded_internal_id for r in results) + assert results[0]["distance"] >= 0.0 + finally: + graph.query("MATCH (n:LCSim {id: $id}) DELETE n", params={"id": f"lcsim-{unique_tag}"}) + + +def test_similarity_search_empty_vector_returns_empty(graph): + """similarity_search() with an empty vector list returns an empty list without error.""" + results = graph.similarity_search([], k=5) + assert isinstance(results, list) + + def test_schema_refreshes_after_add(graph, unique_tag): """structured_schema is invalidated and re-fetched after add_graph_documents.""" graph._schema = None # force refresh From c9246acc1ce7ea54fae40e02a58e34a1e44b5f28 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 12 Apr 2026 14:43:02 +0300 Subject: [PATCH 3/5] fix(langchain): sort similarity_search() results by distance + tighten test - Sort vector_search results client-side by r.distance (ascending) to match the public contract promised in the docstring - Replace isinstance(results, list) with results == [] assertion to explicitly enforce the empty-vector guard contract --- langchain-coordinode/langchain_coordinode/graph.py | 13 ++++++++----- tests/integration/adapters/test_langchain.py | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/langchain-coordinode/langchain_coordinode/graph.py b/langchain-coordinode/langchain_coordinode/graph.py index 1493d60..f004369 100644 --- a/langchain-coordinode/langchain_coordinode/graph.py +++ b/langchain-coordinode/langchain_coordinode/graph.py @@ -219,11 +219,14 @@ def similarity_search( """ if not query_vector: return [] - results = self._client.vector_search( - label=label, - property=property, - vector=query_vector, - top_k=k, + results = sorted( + self._client.vector_search( + label=label, + property=property, + vector=query_vector, + top_k=k, + ), + key=lambda r: r.distance, ) return [{"id": r.node.id, "node": r.node.properties, "distance": r.distance} for r in results] diff --git a/tests/integration/adapters/test_langchain.py b/tests/integration/adapters/test_langchain.py index 7dc08f1..3b75a56 100644 --- a/tests/integration/adapters/test_langchain.py +++ b/tests/integration/adapters/test_langchain.py @@ -168,7 +168,7 @@ def test_similarity_search_returns_results(graph, unique_tag): def test_similarity_search_empty_vector_returns_empty(graph): """similarity_search() with an empty vector list returns an empty list without error.""" results = graph.similarity_search([], k=5) - assert isinstance(results, list) + assert results == [] def test_schema_refreshes_after_add(graph, unique_tag): From 951b48739be1f242b8638e9e9302a7a049f6ec79 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 12 Apr 2026 15:00:26 +0300 Subject: [PATCH 4/5] fix(langchain): guard empty query_vector via len() for Sequence compatibility similarity_search() now uses len(query_vector) == 0 instead of truthiness to avoid ValueError on numpy.ndarray and other Sequence types. --- langchain-coordinode/langchain_coordinode/graph.py | 5 ++++- tests/integration/adapters/test_langchain.py | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/langchain-coordinode/langchain_coordinode/graph.py b/langchain-coordinode/langchain_coordinode/graph.py index f004369..d5cb13e 100644 --- a/langchain-coordinode/langchain_coordinode/graph.py +++ b/langchain-coordinode/langchain_coordinode/graph.py @@ -217,7 +217,10 @@ def similarity_search( Returns: List of result dicts sorted by ascending distance. """ - if not query_vector: + # Use len() instead of truthiness check: numpy.ndarray (and other Sequence + # types) raise ValueError("The truth value of an array is ambiguous") when + # used in a boolean context. len() == 0 works for all sequence types. + if len(query_vector) == 0: return [] results = sorted( self._client.vector_search( diff --git a/tests/integration/adapters/test_langchain.py b/tests/integration/adapters/test_langchain.py index 3b75a56..d439acc 100644 --- a/tests/integration/adapters/test_langchain.py +++ b/tests/integration/adapters/test_langchain.py @@ -152,6 +152,10 @@ def test_similarity_search_returns_results(graph, unique_tag): "CREATE (n:LCSim {id: $id, embedding: $vec}) RETURN n AS nid", params={"id": f"lcsim-{unique_tag}", "vec": vec}, ) + # graph.query() wraps CoordinodeClient.cypher() which returns raw dict values. + # CoordiNode: CREATE ... RETURN n yields the internal integer node ID directly + # (NOT a node object). similarity_search() also returns {"id": r.node.id, ...} + # where r.node.id is the same integer. Direct equality comparison is correct. seeded_internal_id = seed_rows[0]["nid"] results = graph.similarity_search(vec, k=5, label="LCSim", property="embedding") From ab3559e84d6dfee411e1cac45aac22f251e71abb Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 12 Apr 2026 15:46:50 +0300 Subject: [PATCH 5/5] fix(langchain): align similarity_search() signature with Sequence protocol and issue spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Change query_vector type annotation from list[float] to Sequence[float] (matches existing code comment that explicitly documents numpy.ndarray support and aligns with CoordinodeClient.vector_search() signature) - Change default k from 10 to 5 (matches issue #20 acceptance criteria) - Strengthen test_similarity_search_returns_results: replace non-negativity check with full ascending-order assertion on adjacent distances - Use store.structured_query(param_map=...) in test_upsert_relations_idempotent instead of store._client.cypher() — keeps integration test stable vs internal API --- langchain-coordinode/langchain_coordinode/graph.py | 5 +++-- tests/integration/adapters/test_langchain.py | 2 +- tests/integration/adapters/test_llama_index.py | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/langchain-coordinode/langchain_coordinode/graph.py b/langchain-coordinode/langchain_coordinode/graph.py index d5cb13e..cc0f2e7 100644 --- a/langchain-coordinode/langchain_coordinode/graph.py +++ b/langchain-coordinode/langchain_coordinode/graph.py @@ -5,6 +5,7 @@ import hashlib import json import re +from collections.abc import Sequence from typing import Any from langchain_community.graphs.graph_store import GraphStore @@ -196,8 +197,8 @@ def query( def similarity_search( self, - query_vector: list[float], - k: int = 10, + query_vector: Sequence[float], + k: int = 5, label: str = "Chunk", property: str = "embedding", ) -> list[dict[str, Any]]: diff --git a/tests/integration/adapters/test_langchain.py b/tests/integration/adapters/test_langchain.py index d439acc..1ac62a1 100644 --- a/tests/integration/adapters/test_langchain.py +++ b/tests/integration/adapters/test_langchain.py @@ -164,7 +164,7 @@ def test_similarity_search_returns_results(graph, unique_tag): assert len(results) >= 1 assert all("id" in r and "node" in r and "distance" in r for r in results) assert any(r["id"] == seeded_internal_id for r in results) - assert results[0]["distance"] >= 0.0 + assert all(results[i]["distance"] <= results[i + 1]["distance"] for i in range(len(results) - 1)) finally: graph.query("MATCH (n:LCSim {id: $id}) DELETE n", params={"id": f"lcsim-{unique_tag}"}) diff --git a/tests/integration/adapters/test_llama_index.py b/tests/integration/adapters/test_llama_index.py index 759d48f..140d106 100644 --- a/tests/integration/adapters/test_llama_index.py +++ b/tests/integration/adapters/test_llama_index.py @@ -82,9 +82,9 @@ def test_upsert_relations_idempotent(store, tag): store.upsert_relations([rel]) store.upsert_relations([rel]) # second call must not duplicate - rows = store._client.cypher( + rows = store.structured_query( "MATCH (a {id: $src})-[r:LI_IDEMP_REL]->(b {id: $dst}) RETURN count(r) AS cnt", - params={"src": src.id, "dst": dst.id}, + param_map={"src": src.id, "dst": dst.id}, ) assert rows[0]["cnt"] == 1, f"expected exactly 1 edge after double upsert, got: {rows}"