Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions langchain-coordinode/langchain_coordinode/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import hashlib
import json
import re
from collections.abc import Sequence
from typing import Any

from langchain_community.graphs.graph_store import GraphStore
Expand Down Expand Up @@ -194,6 +195,45 @@ def query(
# cypher() returns List[Dict[str, Any]] directly — column name → value.
return self._client.cypher(query, params=params or {})

def similarity_search(
self,
query_vector: Sequence[float],
k: int = 5,
label: str = "Chunk",
property: str = "embedding",
) -> list[dict[str, Any]]:
Comment thread
polaz marked this conversation as resolved.
"""Find nodes whose ``property`` vector is closest to ``query_vector``.

Wraps ``CoordinodeClient.vector_search()``. The returned list contains
one dict per result with the keys ``node`` (node properties), ``id``
(internal integer node ID), and ``distance`` (cosine distance, lower =
more similar).

Args:
query_vector: Embedding vector to search for.
k: Maximum number of results to return.
label: Node label to search (default ``"Chunk"``).
property: Embedding property name (default ``"embedding"``).

Returns:
List of result dicts sorted by ascending distance.
"""
# Use len() instead of truthiness check: numpy.ndarray (and other Sequence
# types) raise ValueError("The truth value of an array is ambiguous") when
# used in a boolean context. len() == 0 works for all sequence types.
if len(query_vector) == 0:
return []
results = sorted(
self._client.vector_search(
label=label,
property=property,
vector=query_vector,
top_k=k,
),
key=lambda r: r.distance,
)
return [{"id": r.node.id, "node": r.node.properties, "distance": r.distance} for r in results]

# ── Lifecycle ─────────────────────────────────────────────────────────

def close(self) -> None:
Expand Down
42 changes: 42 additions & 0 deletions tests/integration/adapters/test_langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,48 @@ def test_add_graph_documents_idempotent(graph, unique_tag):
assert result[0]["cnt"] == 1


# ── similarity_search ─────────────────────────────────────────────────────────


def test_similarity_search_returns_results(graph, unique_tag):
"""similarity_search() returns node dicts with id, node, and distance keys.

Seeds a :LCSim node with a known embedding, then searches for the closest
vector. The seeded node must appear in the top-k results.
"""
# Derive a unique embedding from the test tag (same technique as llama-index
# test) to avoid collisions with other :LCSim nodes in the shared DB.
seed = list(bytes.fromhex(unique_tag))
vec = [float(seed[i % len(seed)]) / 255.0 for i in range(16)]

try:
seed_rows = graph.query(
"CREATE (n:LCSim {id: $id, embedding: $vec}) RETURN n AS nid",
params={"id": f"lcsim-{unique_tag}", "vec": vec},
)
# graph.query() wraps CoordinodeClient.cypher() which returns raw dict values.
# CoordiNode: CREATE ... RETURN n yields the internal integer node ID directly
# (NOT a node object). similarity_search() also returns {"id": r.node.id, ...}
# where r.node.id is the same integer. Direct equality comparison is correct.
seeded_internal_id = seed_rows[0]["nid"]

results = graph.similarity_search(vec, k=5, label="LCSim", property="embedding")

assert isinstance(results, list)
assert len(results) >= 1
assert all("id" in r and "node" in r and "distance" in r for r in results)
assert any(r["id"] == seeded_internal_id for r in results)
assert all(results[i]["distance"] <= results[i + 1]["distance"] for i in range(len(results) - 1))
finally:
graph.query("MATCH (n:LCSim {id: $id}) DELETE n", params={"id": f"lcsim-{unique_tag}"})


def test_similarity_search_empty_vector_returns_empty(graph):
"""similarity_search() with an empty vector list returns an empty list without error."""
results = graph.similarity_search([], k=5)
assert results == []

Comment thread
coderabbitai[bot] marked this conversation as resolved.

def test_schema_refreshes_after_add(graph, unique_tag):
"""structured_schema is invalidated and re-fetched after add_graph_documents."""
graph._schema = None # force refresh
Expand Down
17 changes: 17 additions & 0 deletions tests/integration/adapters/test_llama_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,23 @@ def test_upsert_nodes_idempotent(store, tag):
assert len(found) == 1


def test_upsert_relations_idempotent(store, tag):
"""Upserting the same relation twice must produce exactly one edge (MERGE idempotent)."""
src = EntityNode(label="LIIdempRel", name=f"IdempSrc-{tag}")
dst = EntityNode(label="LIIdempRel", name=f"IdempDst-{tag}")
store.upsert_nodes([src, dst])

rel = Relation(label="LI_IDEMP_REL", source_id=src.id, target_id=dst.id)
store.upsert_relations([rel])
store.upsert_relations([rel]) # second call must not duplicate

rows = store.structured_query(
"MATCH (a {id: $src})-[r:LI_IDEMP_REL]->(b {id: $dst}) RETURN count(r) AS cnt",
param_map={"src": src.id, "dst": dst.id},
)
assert rows[0]["cnt"] == 1, f"expected exactly 1 edge after double upsert, got: {rows}"


def test_get_by_id(store, tag):
node = EntityNode(label="LIGetById", name=f"ById-{tag}")
node_id = node.id
Expand Down
Loading