From 6d009a714a90c3a915deb84d618a15ea35830a20 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sun, 12 Apr 2026 12:23:55 +0300
Subject: [PATCH 1/7] feat: use MERGE for edges, wildcard patterns,
 type()/labels() functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CoordiNode v0.3.1+ supports wildcard [r] in MATCH patterns, type(r),
labels(n), and MERGE for relationship patterns. Update adapters to use
these instead of the old workarounds.

LangChain adapter:
- refresh_schema(): wildcard MATCH (a)-[r]->(b) with labels()/type()
- _create_edge(): CREATE → MERGE (idempotent edge upsert)
- _link_document_to_entities(): CREATE → MERGE for MENTIONS edges

LlamaIndex adapter:
- get_triplets(): wildcard [r] pattern; type(r) instead of r.__type__
- get_rel_map(): wildcard [r]; ignore_rels pushed into Cypher WHERE
  so LIMIT applies only to non-ignored edges
- upsert_relations(): CREATE → MERGE (idempotent)
- Remove _parse_edge_types_from_schema() (no longer needed)

Tests:
- count(*) → count(r) for relationship counting
- cnt >= 1 → cnt == 1 for idempotent edge assertions
- get_triplets() test uses wildcard (no relation_names)

Closes #24
---
 .gitignore                                    |  3 +-
 .../langchain_coordinode/graph.py             | 61 +++++++------
 .../graph_stores/coordinode/base.py           | 86 ++++---------------
 tests/integration/adapters/test_langchain.py  | 18 ++--
 .../integration/adapters/test_llama_index.py  |  4 +-
 5 files changed, 57 insertions(+), 115 deletions(-)

diff --git a/.gitignore b/.gitignore
index f19cdd2..8f8786e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,7 +17,8 @@ venv/
 env/
 
 # Version files generated by hatch-vcs
-coordinode/coordinode/_version.py
+coordinode/_version.py
 langchain-coordinode/langchain_coordinode/_version.py
 llama-index-coordinode/llama_index/graph_stores/coordinode/_version.py
 GAPS.md
+CLAUDE.md
diff --git a/langchain-coordinode/langchain_coordinode/graph.py b/langchain-coordinode/langchain_coordinode/graph.py
index 6814c95..2c97d26 100644
--- a/langchain-coordinode/langchain_coordinode/graph.py
+++ b/langchain-coordinode/langchain_coordinode/graph.py
@@ -72,19 +72,18 @@ def refresh_schema(self) -> None:
         structured = _parse_schema(text)
         # Augment with relationship triples (start_label, type, end_label) via
         # Cypher — get_schema_text() only lists edge types without direction.
-        # CoordiNode: wildcard [r] returns no results; build typed pattern from
-        # the rel_props keys returned by _parse_schema().
-        rel_types = list(structured.get("rel_props", {}).keys())
-        if rel_types:
-            rel_filter = "|".join(_cypher_ident(t) for t in rel_types)
-            rows = self._client.cypher(
-                f"MATCH (a)-[r:{rel_filter}]->(b) "
-                "RETURN DISTINCT a.__label__ AS src, r.__type__ AS rel, b.__label__ AS dst"
-            )
+        rows = self._client.cypher(
+            "MATCH (a)-[r]->(b) RETURN DISTINCT labels(a) AS src_labels, type(r) AS rel, labels(b) AS dst_labels"
+        )
+        if rows:
             structured["relationships"] = [
-                {"start": row["src"], "type": row["rel"], "end": row["dst"]}
+                {
+                    "start": _first_label(row.get("src_labels")),
+                    "type": row["rel"],
+                    "end": _first_label(row.get("dst_labels")),
+                }
                 for row in rows
-                if row.get("src") and row.get("rel") and row.get("dst")
+                if _first_label(row.get("src_labels")) and row.get("rel") and _first_label(row.get("dst_labels"))
             ]
         self._structured_schema = structured
 
@@ -95,18 +94,14 @@ def add_graph_documents(
     ) -> None:
         """Store nodes and relationships extracted from ``GraphDocument`` objects.
 
-        Nodes are upserted by ``id`` (used as the ``name`` property) via
-        ``MERGE``, so repeated calls are safe for nodes.
-
-        Relationships are created with unconditional ``CREATE`` because
-        CoordiNode does not yet support ``MERGE`` for edge patterns.  Re-ingesting
-        the same ``GraphDocument`` will therefore produce duplicate edges.
+        Both nodes and relationships are upserted via ``MERGE``, so repeated
+        calls with the same data are idempotent.
 
         Args:
             graph_documents: List of ``langchain_community.graphs.graph_document.GraphDocument``.
             include_source: If ``True``, also store the source ``Document`` as a
                 ``__Document__`` node linked to every extracted entity via
-                ``MENTIONS`` edges (also unconditional ``CREATE``).
+                ``MENTIONS`` edges.
         """
         for doc in graph_documents:
             for node in doc.nodes:
@@ -133,12 +128,10 @@ def _upsert_node(self, node: Any) -> None:
         )
 
     def _create_edge(self, rel: Any) -> None:
-        """Create a relationship via unconditional CREATE.
+        """Upsert a relationship via MERGE (idempotent).
 
-        CoordiNode does not support MERGE for edge patterns.  Re-ingesting the
-        same relationship will create a duplicate edge.  SET r += $props is
-        skipped when props is empty because SET r += {} is not supported by all
-        server versions.
+        SET r += $props is skipped when props is empty because
+        SET r += {} is not supported by all server versions.
         """
         src_label = _cypher_ident(rel.source.type or "Entity")
         dst_label = _cypher_ident(rel.target.type or "Entity")
@@ -148,19 +141,19 @@ def _create_edge(self, rel: Any) -> None:
             self._client.cypher(
                 f"MATCH (src:{src_label} {{name: $src}}) "
                 f"MATCH (dst:{dst_label} {{name: $dst}}) "
-                f"CREATE (src)-[r:{rel_type}]->(dst) SET r += $props",
+                f"MERGE (src)-[r:{rel_type}]->(dst) SET r += $props",
                 params={"src": rel.source.id, "dst": rel.target.id, "props": props},
             )
         else:
             self._client.cypher(
                 f"MATCH (src:{src_label} {{name: $src}}) "
                 f"MATCH (dst:{dst_label} {{name: $dst}}) "
-                f"CREATE (src)-[r:{rel_type}]->(dst)",
+                f"MERGE (src)-[r:{rel_type}]->(dst)",
                 params={"src": rel.source.id, "dst": rel.target.id},
             )
 
     def _link_document_to_entities(self, doc: Any) -> None:
-        """Upsert a ``__Document__`` node and CREATE ``MENTIONS`` edges to all entities."""
+        """Upsert a ``__Document__`` node and MERGE ``MENTIONS`` edges to all entities."""
         src_id = getattr(doc.source, "id", None) or _stable_document_id(doc.source)
         self._client.cypher(
             "MERGE (d:__Document__ {id: $id}) SET d.page_content = $text",
@@ -169,7 +162,7 @@ def _link_document_to_entities(self, doc: Any) -> None:
         for node in doc.nodes:
             label = _cypher_ident(node.type or "Entity")
             self._client.cypher(
-                f"MATCH (d:__Document__ {{id: $doc_id}}) MATCH (n:{label} {{name: $name}}) CREATE (d)-[:MENTIONS]->(n)",
+                f"MATCH (d:__Document__ {{id: $doc_id}}) MATCH (n:{label} {{name: $name}}) MERGE (d)-[:MENTIONS]->(n)",
                 params={"doc_id": src_id, "name": node.id},
             )
 
@@ -211,10 +204,7 @@ def _stable_document_id(source: Any) -> str:
 
     Combines ``page_content`` and sorted ``metadata`` items so the same
     document produces the same ``__Document__`` node ID across different
-    Python processes.  This makes document-node creation stable when
-    ``include_source=True`` is used, but does not make re-ingest fully
-    idempotent because ``MENTIONS`` edges are not deduplicated until edge
-    ``MERGE``/dedup support is added to CoordiNode.
+    Python processes.
     """
     content = getattr(source, "page_content", "") or ""
     metadata = getattr(source, "metadata", {}) or {}
@@ -232,6 +222,15 @@ def _stable_document_id(source: Any) -> str:
     return hashlib.sha256(canonical.encode()).hexdigest()[:32]
 
 
+def _first_label(labels: Any) -> str | None:
+    """Extract the first label from a labels() result (list of strings)."""
+    if isinstance(labels, list) and labels:
+        return str(labels[0])
+    if isinstance(labels, str):
+        return labels
+    return None
+
+
 def _cypher_ident(name: str) -> str:
     """Escape a label/type name for use as a Cypher identifier."""
     # ASCII-only word characters: letter/digit/underscore, not starting with digit.
diff --git a/llama-index-coordinode/llama_index/graph_stores/coordinode/base.py b/llama-index-coordinode/llama_index/graph_stores/coordinode/base.py
index 2d43788..1a6b3e1 100644
--- a/llama-index-coordinode/llama_index/graph_stores/coordinode/base.py
+++ b/llama-index-coordinode/llama_index/graph_stores/coordinode/base.py
@@ -117,13 +117,7 @@ def get_triplets(
         properties: dict[str, Any] | None = None,
         ids: list[str] | None = None,
     ) -> list[list[LabelledNode]]:
-        """Retrieve triplets (subject, predicate, object) as node triples.
-
-        Note:
-            ``relation_names`` is **required**.  CoordiNode does not support
-            untyped wildcard ``[r]`` relationship patterns — they silently return
-            no rows.  Omitting ``relation_names`` raises ``NotImplementedError``.
-        """
+        """Retrieve triplets (subject, predicate, object) as node triples."""
         conditions: list[str] = []
         params: dict[str, Any] = {}
 
@@ -133,22 +127,15 @@ def get_triplets(
             conditions.append("(n.name IN $entity_names OR m.name IN $entity_names)")
             params["entity_names"] = entity_names
         if relation_names:
-            # Escape each type name to prevent Cypher injection
             rel_filter = "|".join(_cypher_ident(t) for t in relation_names)
             rel_pattern = f"[r:{rel_filter}]"
         else:
-            # CoordiNode: wildcard [r] pattern returns no results.
-            # Callers must supply relation_names for the query to work.
-            raise NotImplementedError(
-                "CoordinodePropertyGraphStore.get_triplets() requires relation_names — "
-                "CoordiNode does not support untyped wildcard [r] patterns"
-            )
+            rel_pattern = "[r]"
 
         where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
-        # CoordiNode: use r.__type__ instead of type(r) — type() returns null.
         cypher = (
             f"MATCH (n)-{rel_pattern}->(m) {where} "
-            "RETURN n, r.__type__ AS rel_type, m, n.id AS _src_id, m.id AS _dst_id "
+            "RETURN n, type(r) AS rel_type, m, n.id AS _src_id, m.id AS _dst_id "
             "LIMIT 1000"
         )
         result = self._client.cypher(cypher, params=params)
@@ -189,27 +176,23 @@ def get_rel_map(
         if not graph_nodes:
             return []
 
-        # CoordiNode: wildcard [r] pattern returns no results.  Fetch all
-        # known edge types from the schema and build a typed pattern instead,
-        # e.g. [r:TYPE_A|TYPE_B|...].
-        schema_text = self._client.get_schema_text()
-        edge_types = _parse_edge_types_from_schema(schema_text)
-
         ignored = set(ignore_rels) if ignore_rels else set()
-        active_types = [t for t in edge_types if t not in ignored]
-
-        if not active_types:
-            return []
-
-        rel_filter = "|".join(_cypher_ident(t) for t in active_types)
         node_ids = [n.id for n in graph_nodes]
-        safe_limit = int(limit)  # coerce to int to prevent Cypher injection via non-integer input
+        safe_limit = int(limit)
         params: dict[str, object] = {"ids": node_ids}
 
+        # Push ignore_rels filter into the WHERE clause so LIMIT applies only
+        # to non-ignored edges and callers receive up to `limit` visible results.
+        if ignored:
+            params["ignored"] = list(ignored)
+            ignore_clause = "AND type(r) NOT IN $ignored "
+        else:
+            ignore_clause = ""
+
         cypher = (
-            f"MATCH (n)-[r:{rel_filter}]->(m) "
-            f"WHERE n.id IN $ids "
-            f"RETURN n, r.__type__ AS _rel_type, m, n.id AS _src_id, m.id AS _dst_id "
+            "MATCH (n)-[r]->(m) "
+            f"WHERE n.id IN $ids {ignore_clause}"
+            f"RETURN n, type(r) AS _rel_type, m, n.id AS _src_id, m.id AS _dst_id "
             f"LIMIT {safe_limit}"
         )
         result = self._client.cypher(cypher, params=params)
@@ -237,28 +220,21 @@ def upsert_nodes(self, nodes: list[LabelledNode]) -> None:
             self._client.cypher(cypher, params={"id": node.id, "props": props})
 
     def upsert_relations(self, relations: list[Relation]) -> None:
-        """Upsert relationships into the graph."""
+        """Upsert relationships into the graph (idempotent via MERGE)."""
         for rel in relations:
             props = rel.properties or {}
             label = _cypher_ident(rel.label)
-            # CoordiNode does not yet support MERGE for edge patterns; use CREATE.
-            # A WHERE NOT (src)-[:TYPE]->(dst) guard was tested but returns 0
-            # rows silently in CoordiNode, making all CREATE statements no-ops.
-            # Until server-side MERGE or pattern predicates are supported,
-            # repeated calls will create duplicate edges.
-            # SET r += $props is skipped when props is empty — SET r += {} is
-            # not supported by all server versions.
             if props:
                 cypher = (
                     f"MATCH (src {{id: $src_id}}) MATCH (dst {{id: $dst_id}}) "
-                    f"CREATE (src)-[r:{label}]->(dst) SET r += $props"
+                    f"MERGE (src)-[r:{label}]->(dst) SET r += $props"
                 )
                 self._client.cypher(
                     cypher,
                     params={"src_id": rel.source_id, "dst_id": rel.target_id, "props": props},
                 )
             else:
-                cypher = f"MATCH (src {{id: $src_id}}) MATCH (dst {{id: $dst_id}}) CREATE (src)-[r:{label}]->(dst)"
+                cypher = f"MATCH (src {{id: $src_id}}) MATCH (dst {{id: $dst_id}}) MERGE (src)-[r:{label}]->(dst)"
                 self._client.cypher(
                     cypher,
                     params={"src_id": rel.source_id, "dst_id": rel.target_id},
@@ -376,29 +352,3 @@ def _node_label(node: LabelledNode) -> str:
     if isinstance(node, EntityNode):
         return node.label or "Entity"
     return "Node"
-
-
-def _parse_edge_types_from_schema(schema_text: str) -> list[str]:
-    """Extract edge type names from CoordiNode schema text.
-
-    Parses the "Edge types:" section produced by ``get_schema_text()``.
-    """
-    edge_types: list[str] = []
-    lines = iter(schema_text.splitlines())
-
-    # Advance to the "Edge types:" header.
-    for line in lines:
-        if line.strip().lower().startswith("edge types"):
-            break
-
-    # Collect bullet items until the first blank line.
-    for line in lines:
-        stripped = line.strip()
-        if not stripped:
-            break
-        if stripped.startswith(("-", "*")):
-            name = stripped.lstrip("-* ").split("(")[0].strip()
-            if name:
-                edge_types.append(name)
-
-    return edge_types
diff --git a/tests/integration/adapters/test_langchain.py b/tests/integration/adapters/test_langchain.py
index 2d5e274..799e831 100644
--- a/tests/integration/adapters/test_langchain.py
+++ b/tests/integration/adapters/test_langchain.py
@@ -97,21 +97,15 @@ def test_add_graph_documents_creates_relationship(graph, unique_tag):
     graph.add_graph_documents([doc])
 
     # Verify the relationship was created, not just the source node.
-    # count(*) instead of count(r): CoordiNode returns 0 for relationship-variable counts
     result = graph.query(
-        "MATCH (a:LCPerson2 {name: $src})-[r:LC_RESEARCHES]->(b:LCConcept {name: $dst}) RETURN count(*) AS cnt",
+        "MATCH (a:LCPerson2 {name: $src})-[r:LC_RESEARCHES]->(b:LCConcept {name: $dst}) RETURN count(r) AS cnt",
         params={"src": f"Charlie-{unique_tag}", "dst": f"GraphRAG-{unique_tag}"},
     )
-    assert result[0]["cnt"] >= 1, f"relationship not found: {result}"
+    assert result[0]["cnt"] == 1, f"expected exactly 1 relationship: {result}"
 
 
 def test_add_graph_documents_idempotent(graph, unique_tag):
-    """Calling add_graph_documents twice must not raise.
-
-    Nodes are idempotent (MERGE).  Edges are NOT — CoordiNode does not yet
-    support MERGE for edges, so unconditional CREATE is used and duplicate
-    edges are expected after two ingests.
-    """
+    """Calling add_graph_documents twice produces exactly one edge (MERGE idempotent)."""
     node_a = Node(id=f"Idempotent-{unique_tag}", type="LCIdempotent")
     node_b = Node(id=f"IdempTarget-{unique_tag}", type="LCIdempotent")
     rel = Relationship(source=node_a, target=node_b, type="LC_IDEMP_REL")
@@ -131,12 +125,12 @@ def test_add_graph_documents_idempotent(graph, unique_tag):
     )
     assert result[0]["cnt"] == 1
 
-    # Edges: unconditional CREATE → count >= 1 (may be > 1 due to CoordiNode limitation)
+    # Edges: MERGE keeps count at 1 (idempotent)
     result = graph.query(
-        "MATCH (a:LCIdempotent {name: $src})-[r:LC_IDEMP_REL]->(b:LCIdempotent {name: $dst}) RETURN count(*) AS cnt",
+        "MATCH (a:LCIdempotent {name: $src})-[r:LC_IDEMP_REL]->(b:LCIdempotent {name: $dst}) RETURN count(r) AS cnt",
         params={"src": f"Idempotent-{unique_tag}", "dst": f"IdempTarget-{unique_tag}"},
     )
-    assert result[0]["cnt"] >= 1
+    assert result[0]["cnt"] == 1
 
 
 def test_schema_refreshes_after_add(graph, unique_tag):
diff --git a/tests/integration/adapters/test_llama_index.py b/tests/integration/adapters/test_llama_index.py
index bf90e74..771a324 100644
--- a/tests/integration/adapters/test_llama_index.py
+++ b/tests/integration/adapters/test_llama_index.py
@@ -96,11 +96,9 @@ def test_upsert_and_get_triplets(store, tag):
     )
     store.upsert_relations([rel])
 
-    # CoordiNode does not support wildcard [r] patterns yet — must pass relation_names.
-    # See: get_triplets() implementation note.
+    # Wildcard [r] works — no need to specify relation_names.
     triplets = store.get_triplets(
         entity_names=[f"Src-{tag}"],
-        relation_names=["LI_RESEARCHES"],
     )
     assert isinstance(triplets, list)
     assert len(triplets) >= 1

From ab1ea64b1934a3b42a439cb46b00abcefed7a003 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sun, 12 Apr 2026 12:38:21 +0300
Subject: [PATCH 2/7] docs(langchain): explain why refresh_schema uses no LIMIT
 on DISTINCT query

---
 langchain-coordinode/langchain_coordinode/graph.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/langchain-coordinode/langchain_coordinode/graph.py b/langchain-coordinode/langchain_coordinode/graph.py
index 2c97d26..c9b2e51 100644
--- a/langchain-coordinode/langchain_coordinode/graph.py
+++ b/langchain-coordinode/langchain_coordinode/graph.py
@@ -72,6 +72,11 @@ def refresh_schema(self) -> None:
         structured = _parse_schema(text)
         # Augment with relationship triples (start_label, type, end_label) via
         # Cypher — get_schema_text() only lists edge types without direction.
+        # No LIMIT here intentionally: RETURN DISTINCT already collapses all edges
+        # to unique (src_label, rel_type, dst_label) combinations, so the result
+        # is bounded by the number of distinct relationship type triples, not by
+        # total edge count. Adding a LIMIT would silently drop relationship types
+        # that happen to appear beyond the limit, producing an incomplete schema.
         rows = self._client.cypher(
             "MATCH (a)-[r]->(b) RETURN DISTINCT labels(a) AS src_labels, type(r) AS rel, labels(b) AS dst_labels"
         )

From 69fa9916b29959d82d9fffd3518ad8326eaeeaac Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sun, 12 Apr 2026 12:39:55 +0300
Subject: [PATCH 3/7] test(llama-index): add vector_query() integration tests

---
 .../integration/adapters/test_llama_index.py  | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/tests/integration/adapters/test_llama_index.py b/tests/integration/adapters/test_llama_index.py
index 771a324..635fe1d 100644
--- a/tests/integration/adapters/test_llama_index.py
+++ b/tests/integration/adapters/test_llama_index.py
@@ -12,6 +12,7 @@
 
 import pytest
 from llama_index.core.graph_stores.types import EntityNode, Relation
+from llama_index.core.vector_stores.types import VectorStoreQuery
 from llama_index.graph_stores.coordinode import CoordinodePropertyGraphStore
 
 ADDR = os.environ.get("COORDINODE_ADDR", "localhost:7080")
@@ -150,3 +151,40 @@ def test_delete_by_entity_name(store, tag):
 
     found = store.get(properties={"name": f"DelNamed-{tag}"})
     assert len(found) == 0
+
+
+# ── Vector query ──────────────────────────────────────────────────────────────
+
+
+def test_vector_query_returns_results(store, tag):
+    """vector_query() returns nodes and scores for an embedding that matches stored data.
+
+    vector_query() without filters defaults to label="Chunk", so the seed node must use
+    that label to be found by the underlying vector_search() call.
+    """
+    vec = [float(i) / 16 for i in range(16)]
+    # Seed a Chunk node with an embedding directly via Cypher.
+    # vector_query() defaults label to "Chunk" when no MetadataFilters are provided.
+    store._client.cypher(
+        "CREATE (n:Chunk {id: $id, text: $text, embedding: $vec})",
+        params={"id": f"vec-{tag}", "text": "test chunk", "vec": vec},
+    )
+    try:
+        query = VectorStoreQuery(query_embedding=vec, similarity_top_k=1)
+        nodes, scores = store.vector_query(query)
+
+        assert isinstance(nodes, list)
+        assert isinstance(scores, list)
+        assert len(nodes) >= 1
+        assert len(scores) == len(nodes)
+        assert scores[0] >= 0.0
+    finally:
+        store._client.cypher("MATCH (n:Chunk {id: $id}) DELETE n", params={"id": f"vec-{tag}"})
+
+
+def test_vector_query_empty_embedding_returns_empty(store):
+    """vector_query() with no query_embedding returns empty lists without error."""
+    query = VectorStoreQuery(query_embedding=None, similarity_top_k=5)
+    nodes, scores = store.vector_query(query)
+    assert nodes == []
+    assert scores == []

From 778e8c336588b13465993312c50f48cf9b0f8e63 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sun, 12 Apr 2026 12:54:56 +0300
Subject: [PATCH 4/7] fix(langchain): use min() in _first_label for
 deterministic label selection

openCypher does not guarantee a stable ordering for labels(), so using
labels[0] produced nondeterministic schema entries across refresh_schema()
calls. Replace with min(labels) to always select the lexicographically
smallest label consistently.

Also strengthen the vector_query() integration test: capture the seeded
node's internal CoordiNode ID from CREATE RETURN and assert it appears
in the returned ChunkNode list, proving the specific seeded node was found
rather than any pre-existing Chunk.
---
 langchain-coordinode/langchain_coordinode/graph.py |  9 +++++++--
 tests/integration/adapters/test_llama_index.py     | 10 ++++++++--
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/langchain-coordinode/langchain_coordinode/graph.py b/langchain-coordinode/langchain_coordinode/graph.py
index c9b2e51..9fa96fd 100644
--- a/langchain-coordinode/langchain_coordinode/graph.py
+++ b/langchain-coordinode/langchain_coordinode/graph.py
@@ -228,9 +228,14 @@ def _stable_document_id(source: Any) -> str:
 
 
 def _first_label(labels: Any) -> str | None:
-    """Extract the first label from a labels() result (list of strings)."""
+    """Extract a stable label from a labels() result (list of strings).
+
+    openCypher does not guarantee a stable ordering for labels(), so using
+    labels[0] would produce nondeterministic schema entries across calls.
+    We return the lexicographically smallest label as a deterministic rule.
+    """
     if isinstance(labels, list) and labels:
-        return str(labels[0])
+        return str(min(labels))
     if isinstance(labels, str):
         return labels
     return None
diff --git a/tests/integration/adapters/test_llama_index.py b/tests/integration/adapters/test_llama_index.py
index 635fe1d..5083f00 100644
--- a/tests/integration/adapters/test_llama_index.py
+++ b/tests/integration/adapters/test_llama_index.py
@@ -165,10 +165,13 @@ def test_vector_query_returns_results(store, tag):
     vec = [float(i) / 16 for i in range(16)]
     # Seed a Chunk node with an embedding directly via Cypher.
     # vector_query() defaults label to "Chunk" when no MetadataFilters are provided.
-    store._client.cypher(
-        "CREATE (n:Chunk {id: $id, text: $text, embedding: $vec})",
+    # Capture the internal CoordiNode node ID (returned as integer by RETURN n) so we
+    # can assert the specific seeded node is retrieved — not just any pre-existing Chunk.
+    seed_rows = store._client.cypher(
+        "CREATE (n:Chunk {id: $id, text: $text, embedding: $vec}) RETURN n AS nid",
         params={"id": f"vec-{tag}", "text": "test chunk", "vec": vec},
     )
+    seeded_internal_id = str(seed_rows[0]["nid"])
     try:
         query = VectorStoreQuery(query_embedding=vec, similarity_top_k=1)
         nodes, scores = store.vector_query(query)
@@ -176,6 +179,9 @@ def test_vector_query_returns_results(store, tag):
         assert isinstance(nodes, list)
         assert isinstance(scores, list)
         assert len(nodes) >= 1
+        # vector_search returns CoordiNode internal node IDs (ChunkNode.id_);
+        # verify our seeded node is the one found.
+        assert any(str(getattr(node, "id_", "")) == seeded_internal_id for node in nodes)
         assert len(scores) == len(nodes)
         assert scores[0] >= 0.0
     finally:

From f31a6976504ac9482aa3f5bbc591c7dba3079bb8 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sun, 12 Apr 2026 13:06:30 +0300
Subject: [PATCH 5/7] test(llama-index): move seed creation into try block for
 reliable cleanup

Ensures the finally DELETE runs even if seeded_internal_id extraction fails
after a successful CREATE. Addresses CodeRabbit nitpick on test robustness.
---
 .../integration/adapters/test_llama_index.py  | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/tests/integration/adapters/test_llama_index.py b/tests/integration/adapters/test_llama_index.py
index 5083f00..fb4fcdc 100644
--- a/tests/integration/adapters/test_llama_index.py
+++ b/tests/integration/adapters/test_llama_index.py
@@ -163,16 +163,18 @@ def test_vector_query_returns_results(store, tag):
     that label to be found by the underlying vector_search() call.
     """
     vec = [float(i) / 16 for i in range(16)]
-    # Seed a Chunk node with an embedding directly via Cypher.
-    # vector_query() defaults label to "Chunk" when no MetadataFilters are provided.
-    # Capture the internal CoordiNode node ID (returned as integer by RETURN n) so we
-    # can assert the specific seeded node is retrieved — not just any pre-existing Chunk.
-    seed_rows = store._client.cypher(
-        "CREATE (n:Chunk {id: $id, text: $text, embedding: $vec}) RETURN n AS nid",
-        params={"id": f"vec-{tag}", "text": "test chunk", "vec": vec},
-    )
-    seeded_internal_id = str(seed_rows[0]["nid"])
+    # Seeding is inside the try block so that the finally cleanup always runs even if
+    # the CREATE succeeds but extracting seeded_internal_id raises (e.g., unexpected
+    # response format). vector_query() defaults label to "Chunk" when no
+    # MetadataFilters are provided.
     try:
+        # Capture the internal CoordiNode node ID (returned as integer by RETURN n) so
+        # we can assert the specific seeded node is retrieved — not just any Chunk.
+        seed_rows = store._client.cypher(
+            "CREATE (n:Chunk {id: $id, text: $text, embedding: $vec}) RETURN n AS nid",
+            params={"id": f"vec-{tag}", "text": "test chunk", "vec": vec},
+        )
+        seeded_internal_id = str(seed_rows[0]["nid"])
         query = VectorStoreQuery(query_embedding=vec, similarity_top_k=1)
         nodes, scores = store.vector_query(query)
 

From 543816f07b5ccd3212aa949fe52b363e5d89a2ee Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sun, 12 Apr 2026 13:30:08 +0300
Subject: [PATCH 6/7] test(llama-index): clarify CoordiNode RETURN n and
 vector_search behaviour
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add detailed comments explaining:
- In CoordiNode, CREATE ... RETURN n yields the internal integer node ID,
  not a property map — verified empirically (seed_rows[0]["nid"] == int)
- vector_search returns Node(properties={}) so node.properties.get("id")
  is always None and cannot be used for node identification
- ChunkNode.id_ == str(r.node.id) is the correct comparison target
---
 tests/integration/adapters/test_llama_index.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tests/integration/adapters/test_llama_index.py b/tests/integration/adapters/test_llama_index.py
index fb4fcdc..8496257 100644
--- a/tests/integration/adapters/test_llama_index.py
+++ b/tests/integration/adapters/test_llama_index.py
@@ -168,8 +168,16 @@ def test_vector_query_returns_results(store, tag):
     # response format). vector_query() defaults label to "Chunk" when no
     # MetadataFilters are provided.
     try:
-        # Capture the internal CoordiNode node ID (returned as integer by RETURN n) so
-        # we can assert the specific seeded node is retrieved — not just any Chunk.
+        # In CoordiNode, `CREATE (n:...) RETURN n` returns the internal integer node ID,
+        # NOT a property map. This is CoordiNode-specific behaviour verified empirically:
+        #   seed_rows[0]["nid"]  →  90  (int)
+        # ChunkNode.id_ is set from vector_search's r.node.id (same internal integer),
+        # so comparing str(node.id_) == str(seed_rows[0]["nid"]) correctly identifies
+        # the specific seeded node.
+        #
+        # NOTE: vector_search returns Node(id=N, properties={}) — the properties dict is
+        # always EMPTY, so node.properties.get("id") would always be None and cannot be
+        # used for identification.
         seed_rows = store._client.cypher(
             "CREATE (n:Chunk {id: $id, text: $text, embedding: $vec}) RETURN n AS nid",
             params={"id": f"vec-{tag}", "text": "test chunk", "vec": vec},
@@ -181,8 +189,7 @@ def test_vector_query_returns_results(store, tag):
         assert isinstance(nodes, list)
         assert isinstance(scores, list)
         assert len(nodes) >= 1
-        # vector_search returns CoordiNode internal node IDs (ChunkNode.id_);
-        # verify our seeded node is the one found.
+        # ChunkNode.id_ == str(r.node.id) == internal CoordiNode node ID captured above.
         assert any(str(getattr(node, "id_", "")) == seeded_internal_id for node in nodes)
         assert len(scores) == len(nodes)
         assert scores[0] >= 0.0

From f0e1ff3f954e83f11671fb385c058af514c72c6e Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sun, 12 Apr 2026 13:57:04 +0300
Subject: [PATCH 7/7] fix(langchain): deduplicate relationship triples after
 _first_label normalization

RETURN DISTINCT operates on raw label lists, but min()-based _first_label()
can collapse different multi-label combos (e.g. ['Employee','Person'] and
['Person','Employee']) into the same (start, type, end) triple. Use a set
for deduplication after normalization so each relationship triple appears once.

Also make the vector_query() integration test embedding unique per test tag
(derived from tag bytes) and increase similarity_top_k to 5 to prevent flaky
results in shared integration DBs where another :Chunk may share the same vector.
---
 .../langchain_coordinode/graph.py             | 20 ++++++++++++-------
 .../integration/adapters/test_llama_index.py  | 10 ++++++++--
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/langchain-coordinode/langchain_coordinode/graph.py b/langchain-coordinode/langchain_coordinode/graph.py
index 9fa96fd..8a19ff6 100644
--- a/langchain-coordinode/langchain_coordinode/graph.py
+++ b/langchain-coordinode/langchain_coordinode/graph.py
@@ -81,14 +81,20 @@ def refresh_schema(self) -> None:
             "MATCH (a)-[r]->(b) RETURN DISTINCT labels(a) AS src_labels, type(r) AS rel, labels(b) AS dst_labels"
         )
         if rows:
+            # Deduplicate after _first_label() normalization: RETURN DISTINCT operates on
+            # raw label lists, but _first_label(min()) can collapse different multi-label
+            # combinations to the same (start, type, end) triple (e.g. ['Employee','Person']
+            # and ['Person','Employee'] both min-normalize to 'Employee'). Use a set to
+            # ensure each relationship triple appears at most once.
+            triples: set[tuple[str, str, str]] = set()
+            for row in rows:
+                start = _first_label(row.get("src_labels"))
+                end = _first_label(row.get("dst_labels"))
+                rel = row.get("rel")
+                if start and rel and end:
+                    triples.add((start, rel, end))
             structured["relationships"] = [
-                {
-                    "start": _first_label(row.get("src_labels")),
-                    "type": row["rel"],
-                    "end": _first_label(row.get("dst_labels")),
-                }
-                for row in rows
-                if _first_label(row.get("src_labels")) and row.get("rel") and _first_label(row.get("dst_labels"))
+                {"start": start, "type": rel, "end": end} for start, rel, end in sorted(triples)
             ]
         self._structured_schema = structured
 
diff --git a/tests/integration/adapters/test_llama_index.py b/tests/integration/adapters/test_llama_index.py
index 8496257..971a2f2 100644
--- a/tests/integration/adapters/test_llama_index.py
+++ b/tests/integration/adapters/test_llama_index.py
@@ -162,7 +162,11 @@ def test_vector_query_returns_results(store, tag):
     vector_query() without filters defaults to label="Chunk", so the seed node must use
     that label to be found by the underlying vector_search() call.
     """
-    vec = [float(i) / 16 for i in range(16)]
+    # Derive a unique embedding from the test tag so that no other :Chunk in the shared
+    # integration DB can have the same or closer vector, preventing flaky top-k results.
+    # tag is uuid4().hex[:8] → 8 hex chars → 4 bytes of entropy.
+    seed = list(bytes.fromhex(tag))
+    vec = [float(seed[i % len(seed)]) / 255.0 for i in range(16)]
     # Seeding is inside the try block so that the finally cleanup always runs even if
     # the CREATE succeeds but extracting seeded_internal_id raises (e.g., unexpected
     # response format). vector_query() defaults label to "Chunk" when no
@@ -183,7 +187,9 @@ def test_vector_query_returns_results(store, tag):
             params={"id": f"vec-{tag}", "text": "test chunk", "vec": vec},
         )
         seeded_internal_id = str(seed_rows[0]["nid"])
-        query = VectorStoreQuery(query_embedding=vec, similarity_top_k=1)
+        # top_k=5: even if other :Chunk nodes exist with similar vectors, the unique
+        # tag-based embedding ensures ours is among the closest results.
+        query = VectorStoreQuery(query_embedding=vec, similarity_top_k=5)
         nodes, scores = store.vector_query(query)
 
         assert isinstance(nodes, list)