From 3442f3fc6966b6ecc66514eff7ea10adb5012f75 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sun, 12 Apr 2026 14:32:08 +0300
Subject: [PATCH 1/5] test(llama-index): add upsert_relations() idempotency
 test

Verifies that calling upsert_relations() twice with the same Relation
produces exactly one edge (MERGE semantics, not CREATE).

Closes #21
---
 tests/integration/adapters/test_llama_index.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests/integration/adapters/test_llama_index.py b/tests/integration/adapters/test_llama_index.py
index 971a2f2..759d48f 100644
--- a/tests/integration/adapters/test_llama_index.py
+++ b/tests/integration/adapters/test_llama_index.py
@@ -72,6 +72,23 @@ def test_upsert_nodes_idempotent(store, tag):
     assert len(found) == 1
 
 
+def test_upsert_relations_idempotent(store, tag):
+    """Upserting the same relation twice must produce exactly one edge (MERGE idempotent)."""
+    src = EntityNode(label="LIIdempRel", name=f"IdempSrc-{tag}")
+    dst = EntityNode(label="LIIdempRel", name=f"IdempDst-{tag}")
+    store.upsert_nodes([src, dst])
+
+    rel = Relation(label="LI_IDEMP_REL", source_id=src.id, target_id=dst.id)
+    store.upsert_relations([rel])
+    store.upsert_relations([rel])  # second call must not duplicate
+
+    rows = store._client.cypher(
+        "MATCH (a {id: $src})-[r:LI_IDEMP_REL]->(b {id: $dst}) RETURN count(r) AS cnt",
+        params={"src": src.id, "dst": dst.id},
+    )
+    assert rows[0]["cnt"] == 1, f"expected exactly 1 edge after double upsert, got: {rows}"
+
+
 def test_get_by_id(store, tag):
     node = EntityNode(label="LIGetById", name=f"ById-{tag}")
     node_id = node.id

From 7c4d4c0b88fc934b83a6bee85ed8db04c5b69b6c Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sun, 12 Apr 2026 14:33:49 +0300
Subject: [PATCH 2/5] feat(langchain): add similarity_search() to
 CoordinodeGraph

Wraps CoordinodeClient.vector_search() with label/property defaults,
returning [{id, node, distance}, ...] sorted by ascending distance.
Guards against empty query_vector to match server validation behaviour.
Adds two integration tests: one seeding a :LCSim node and verifying the
seeded node appears in top-k results, one verifying empty-vector returns [].

Closes #20
---
 .../langchain_coordinode/graph.py             | 33 ++++++++++++++++
 tests/integration/adapters/test_langchain.py  | 38 +++++++++++++++++++
 2 files changed, 71 insertions(+)

diff --git a/langchain-coordinode/langchain_coordinode/graph.py b/langchain-coordinode/langchain_coordinode/graph.py
index 8a19ff6..1493d60 100644
--- a/langchain-coordinode/langchain_coordinode/graph.py
+++ b/langchain-coordinode/langchain_coordinode/graph.py
@@ -194,6 +194,39 @@ def query(
         # cypher() returns List[Dict[str, Any]] directly — column name → value.
         return self._client.cypher(query, params=params or {})
 
+    def similarity_search(
+        self,
+        query_vector: list[float],
+        k: int = 10,
+        label: str = "Chunk",
+        property: str = "embedding",
+    ) -> list[dict[str, Any]]:
+        """Find nodes whose ``property`` vector is closest to ``query_vector``.
+
+        Wraps ``CoordinodeClient.vector_search()``.  The returned list contains
+        one dict per result with the keys ``node`` (node properties), ``id``
+        (internal integer node ID), and ``distance`` (cosine distance, lower =
+        more similar).
+
+        Args:
+            query_vector: Embedding vector to search for.
+            k: Maximum number of results to return.
+            label: Node label to search (default ``"Chunk"``).
+            property: Embedding property name (default ``"embedding"``).
+
+        Returns:
+            List of result dicts sorted by ascending distance.
+        """
+        if not query_vector:
+            return []
+        results = self._client.vector_search(
+            label=label,
+            property=property,
+            vector=query_vector,
+            top_k=k,
+        )
+        return [{"id": r.node.id, "node": r.node.properties, "distance": r.distance} for r in results]
+
     # ── Lifecycle ─────────────────────────────────────────────────────────
 
     def close(self) -> None:
diff --git a/tests/integration/adapters/test_langchain.py b/tests/integration/adapters/test_langchain.py
index 799e831..7dc08f1 100644
--- a/tests/integration/adapters/test_langchain.py
+++ b/tests/integration/adapters/test_langchain.py
@@ -133,6 +133,44 @@ def test_add_graph_documents_idempotent(graph, unique_tag):
     assert result[0]["cnt"] == 1
 
 
+# ── similarity_search ─────────────────────────────────────────────────────────
+
+
+def test_similarity_search_returns_results(graph, unique_tag):
+    """similarity_search() returns node dicts with id, node, and distance keys.
+
+    Seeds a :LCSim node with a known embedding, then searches for the closest
+    vector. The seeded node must appear in the top-k results.
+    """
+    # Derive a unique embedding from the test tag (same technique as llama-index
+    # test) to avoid collisions with other :LCSim nodes in the shared DB.
+    seed = list(bytes.fromhex(unique_tag))
+    vec = [float(seed[i % len(seed)]) / 255.0 for i in range(16)]
+
+    try:
+        seed_rows = graph.query(
+            "CREATE (n:LCSim {id: $id, embedding: $vec}) RETURN n AS nid",
+            params={"id": f"lcsim-{unique_tag}", "vec": vec},
+        )
+        seeded_internal_id = seed_rows[0]["nid"]
+
+        results = graph.similarity_search(vec, k=5, label="LCSim", property="embedding")
+
+        assert isinstance(results, list)
+        assert len(results) >= 1
+        assert all("id" in r and "node" in r and "distance" in r for r in results)
+        assert any(r["id"] == seeded_internal_id for r in results)
+        assert results[0]["distance"] >= 0.0
+    finally:
+        graph.query("MATCH (n:LCSim {id: $id}) DELETE n", params={"id": f"lcsim-{unique_tag}"})
+
+
+def test_similarity_search_empty_vector_returns_empty(graph):
+    """similarity_search() with an empty vector list returns an empty list without error."""
+    results = graph.similarity_search([], k=5)
+    assert isinstance(results, list)
+
+
 def test_schema_refreshes_after_add(graph, unique_tag):
     """structured_schema is invalidated and re-fetched after add_graph_documents."""
     graph._schema = None  # force refresh

From c9246acc1ce7ea54fae40e02a58e34a1e44b5f28 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sun, 12 Apr 2026 14:43:02 +0300
Subject: [PATCH 3/5] fix(langchain): sort similarity_search() results by
 distance + tighten test

- Sort vector_search results client-side by r.distance (ascending) to
  match the public contract promised in the docstring
- Replace isinstance(results, list) with results == [] assertion to
  explicitly enforce the empty-vector guard contract
---
 langchain-coordinode/langchain_coordinode/graph.py | 13 ++++++++-----
 tests/integration/adapters/test_langchain.py       |  2 +-
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/langchain-coordinode/langchain_coordinode/graph.py b/langchain-coordinode/langchain_coordinode/graph.py
index 1493d60..f004369 100644
--- a/langchain-coordinode/langchain_coordinode/graph.py
+++ b/langchain-coordinode/langchain_coordinode/graph.py
@@ -219,11 +219,14 @@ def similarity_search(
         """
         if not query_vector:
             return []
-        results = self._client.vector_search(
-            label=label,
-            property=property,
-            vector=query_vector,
-            top_k=k,
+        results = sorted(
+            self._client.vector_search(
+                label=label,
+                property=property,
+                vector=query_vector,
+                top_k=k,
+            ),
+            key=lambda r: r.distance,
         )
         return [{"id": r.node.id, "node": r.node.properties, "distance": r.distance} for r in results]
 
diff --git a/tests/integration/adapters/test_langchain.py b/tests/integration/adapters/test_langchain.py
index 7dc08f1..3b75a56 100644
--- a/tests/integration/adapters/test_langchain.py
+++ b/tests/integration/adapters/test_langchain.py
@@ -168,7 +168,7 @@ def test_similarity_search_returns_results(graph, unique_tag):
 def test_similarity_search_empty_vector_returns_empty(graph):
     """similarity_search() with an empty vector list returns an empty list without error."""
     results = graph.similarity_search([], k=5)
-    assert isinstance(results, list)
+    assert results == []
 
 
 def test_schema_refreshes_after_add(graph, unique_tag):

From 951b48739be1f242b8638e9e9302a7a049f6ec79 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sun, 12 Apr 2026 15:00:26 +0300
Subject: [PATCH 4/5] fix(langchain): guard empty query_vector via len() for
 Sequence compatibility

similarity_search() now uses len(query_vector) == 0 instead of truthiness
to avoid ValueError on numpy.ndarray and other Sequence types.
---
 langchain-coordinode/langchain_coordinode/graph.py | 5 ++++-
 tests/integration/adapters/test_langchain.py       | 4 ++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/langchain-coordinode/langchain_coordinode/graph.py b/langchain-coordinode/langchain_coordinode/graph.py
index f004369..d5cb13e 100644
--- a/langchain-coordinode/langchain_coordinode/graph.py
+++ b/langchain-coordinode/langchain_coordinode/graph.py
@@ -217,7 +217,10 @@ def similarity_search(
         Returns:
             List of result dicts sorted by ascending distance.
         """
-        if not query_vector:
+        # Use len() instead of truthiness check: numpy.ndarray (and other Sequence
+        # types) raise ValueError("The truth value of an array is ambiguous") when
+        # used in a boolean context. len() == 0 works for all sequence types.
+        if len(query_vector) == 0:
             return []
         results = sorted(
             self._client.vector_search(
diff --git a/tests/integration/adapters/test_langchain.py b/tests/integration/adapters/test_langchain.py
index 3b75a56..d439acc 100644
--- a/tests/integration/adapters/test_langchain.py
+++ b/tests/integration/adapters/test_langchain.py
@@ -152,6 +152,10 @@ def test_similarity_search_returns_results(graph, unique_tag):
             "CREATE (n:LCSim {id: $id, embedding: $vec}) RETURN n AS nid",
             params={"id": f"lcsim-{unique_tag}", "vec": vec},
         )
+        # graph.query() wraps CoordinodeClient.cypher() which returns raw dict values.
+        # CoordiNode: CREATE ... RETURN n yields the internal integer node ID directly
+        # (NOT a node object). similarity_search() also returns {"id": r.node.id, ...}
+        # where r.node.id is the same integer. Direct equality comparison is correct.
         seeded_internal_id = seed_rows[0]["nid"]
 
         results = graph.similarity_search(vec, k=5, label="LCSim", property="embedding")

From ab3559e84d6dfee411e1cac45aac22f251e71abb Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sun, 12 Apr 2026 15:46:50 +0300
Subject: [PATCH 5/5] fix(langchain): align similarity_search() signature with
 Sequence protocol and issue spec
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Change query_vector type annotation from list[float] to Sequence[float]
  (matches existing code comment that explicitly documents numpy.ndarray support
  and aligns with CoordinodeClient.vector_search() signature)
- Change default k from 10 to 5 (matches issue #20 acceptance criteria)
- Strengthen test_similarity_search_returns_results: replace non-negativity
  check with full ascending-order assertion on adjacent distances
- Use store.structured_query(param_map=...) in test_upsert_relations_idempotent
  instead of store._client.cypher() — keeps integration test stable vs internal API
---
 langchain-coordinode/langchain_coordinode/graph.py | 5 +++--
 tests/integration/adapters/test_langchain.py       | 2 +-
 tests/integration/adapters/test_llama_index.py     | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/langchain-coordinode/langchain_coordinode/graph.py b/langchain-coordinode/langchain_coordinode/graph.py
index d5cb13e..cc0f2e7 100644
--- a/langchain-coordinode/langchain_coordinode/graph.py
+++ b/langchain-coordinode/langchain_coordinode/graph.py
@@ -5,6 +5,7 @@
 import hashlib
 import json
 import re
+from collections.abc import Sequence
 from typing import Any
 
 from langchain_community.graphs.graph_store import GraphStore
@@ -196,8 +197,8 @@ def query(
 
     def similarity_search(
         self,
-        query_vector: list[float],
-        k: int = 10,
+        query_vector: Sequence[float],
+        k: int = 5,
         label: str = "Chunk",
         property: str = "embedding",
     ) -> list[dict[str, Any]]:
diff --git a/tests/integration/adapters/test_langchain.py b/tests/integration/adapters/test_langchain.py
index d439acc..1ac62a1 100644
--- a/tests/integration/adapters/test_langchain.py
+++ b/tests/integration/adapters/test_langchain.py
@@ -164,7 +164,7 @@ def test_similarity_search_returns_results(graph, unique_tag):
         assert len(results) >= 1
         assert all("id" in r and "node" in r and "distance" in r for r in results)
         assert any(r["id"] == seeded_internal_id for r in results)
-        assert results[0]["distance"] >= 0.0
+        assert all(results[i]["distance"] <= results[i + 1]["distance"] for i in range(len(results) - 1))
     finally:
         graph.query("MATCH (n:LCSim {id: $id}) DELETE n", params={"id": f"lcsim-{unique_tag}"})
 
diff --git a/tests/integration/adapters/test_llama_index.py b/tests/integration/adapters/test_llama_index.py
index 759d48f..140d106 100644
--- a/tests/integration/adapters/test_llama_index.py
+++ b/tests/integration/adapters/test_llama_index.py
@@ -82,9 +82,9 @@ def test_upsert_relations_idempotent(store, tag):
     store.upsert_relations([rel])
     store.upsert_relations([rel])  # second call must not duplicate
 
-    rows = store._client.cypher(
+    rows = store.structured_query(
         "MATCH (a {id: $src})-[r:LI_IDEMP_REL]->(b {id: $dst}) RETURN count(r) AS cnt",
-        params={"src": src.id, "dst": dst.id},
+        param_map={"src": src.id, "dst": dst.id},
     )
     assert rows[0]["cnt"] == 1, f"expected exactly 1 edge after double upsert, got: {rows}"