Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
253 changes: 253 additions & 0 deletions integration/test_collection_query_profile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
import re
from typing import Any

import pytest

from weaviate.collections import Collection
from weaviate.collections.classes.config import DataType, Property
from weaviate.collections.classes.data import DataObject
from weaviate.collections.classes.grpc import GroupBy, MetadataQuery
from weaviate.collections.classes.internal import SearchProfileReturn
from integration.conftest import CollectionFactory

GO_DURATION_RE = re.compile(r"[\d.]+(ns|µs|ms|s|m|h)")


def assert_go_duration(value: str, label: str = "") -> None:
"""Assert that a string looks like a Go duration (e.g. '1.234ms', '5.458µs')."""
assert GO_DURATION_RE.fullmatch(value), (
f"Expected Go duration format for {label!r}, got {value!r}"
)


def assert_common_profile(profile: SearchProfileReturn) -> None:
"""Assertions shared by every search profile regardless of type."""
assert len(profile.details) > 0, "Profile details should not be empty"
assert "total_took" in profile.details
assert_go_duration(profile.details["total_took"], "total_took")
for key, value in profile.details.items():
assert isinstance(key, str) and key != ""
assert isinstance(value, str) and value != ""


def _create_and_populate(collection_factory: CollectionFactory) -> Collection[Any, Any]:
collection = collection_factory(
properties=[Property(name="text", data_type=DataType.TEXT)],
)
if collection._connection._weaviate_version.is_lower_than(1, 36, 9):
pytest.skip("Query profiling requires Weaviate >= 1.36.9")
collection.data.insert_many(
[
DataObject(properties={"text": "hello world"}, vector=[1.0, 0.0, 0.0]),
DataObject(properties={"text": "goodbye world"}, vector=[0.0, 1.0, 0.0]),
DataObject(properties={"text": "foo bar baz"}, vector=[0.0, 0.0, 1.0]),
]
)
return collection


def test_fetch_objects_with_query_profile(collection_factory: CollectionFactory) -> None:
"""Test that query profiling works with fetch_objects (object lookup)."""
collection = _create_and_populate(collection_factory)
result = collection.query.fetch_objects(
return_metadata=MetadataQuery(query_profile=True),
)
assert len(result.objects) == 3
assert result.query_profile is not None
assert len(result.query_profile.shards) > 0

shard = result.query_profile.shards[0]
assert shard.name != ""
assert shard.node != ""

assert "object" in shard.searches
assert "vector" not in shard.searches
assert "keyword" not in shard.searches
assert_common_profile(shard.searches["object"])


def test_near_vector_with_query_profile(collection_factory: CollectionFactory) -> None:
"""Test that query profiling works with near_vector search."""
collection = _create_and_populate(collection_factory)
result = collection.query.near_vector(
near_vector=[1.0, 0.0, 0.0],
return_metadata=MetadataQuery(query_profile=True, distance=True),
limit=2,
)
assert len(result.objects) == 2
assert result.query_profile is not None
assert len(result.query_profile.shards) > 0

shard = result.query_profile.shards[0]
assert "vector" in shard.searches
assert "keyword" not in shard.searches
assert "object" not in shard.searches
vector_profile = shard.searches["vector"]
assert_common_profile(vector_profile)

assert "vector_search_took" in vector_profile.details
assert_go_duration(vector_profile.details["vector_search_took"], "vector_search_took")

assert "hnsw_flat_search" in vector_profile.details
assert vector_profile.details["hnsw_flat_search"] in ("true", "false")

layer_keys = [k for k in vector_profile.details if k.startswith("knn_search_layer_")]
assert len(layer_keys) > 0, "Expected at least one knn_search_layer_*_took key"
for k in layer_keys:
assert_go_duration(vector_profile.details[k], k)

assert "objects_took" in vector_profile.details
assert_go_duration(vector_profile.details["objects_took"], "objects_took")


def test_bm25_with_query_profile(collection_factory: CollectionFactory) -> None:
"""Test that query profiling works with BM25 keyword search."""
collection = _create_and_populate(collection_factory)
result = collection.query.bm25(
query="hello",
return_metadata=MetadataQuery(query_profile=True, score=True),
)
assert result.query_profile is not None
assert len(result.query_profile.shards) > 0

shard = result.query_profile.shards[0]
assert "keyword" in shard.searches
assert "vector" not in shard.searches
assert "object" not in shard.searches
keyword_profile = shard.searches["keyword"]
assert_common_profile(keyword_profile)

assert "kwd_method" in keyword_profile.details
assert keyword_profile.details["kwd_method"] != ""

assert "kwd_time" in keyword_profile.details
assert_go_duration(keyword_profile.details["kwd_time"], "kwd_time")

assert "kwd_1_tok_time" in keyword_profile.details
assert_go_duration(keyword_profile.details["kwd_1_tok_time"], "kwd_1_tok_time")

assert "kwd_6_res_count" in keyword_profile.details
assert keyword_profile.details["kwd_6_res_count"].isdigit()
assert int(keyword_profile.details["kwd_6_res_count"]) >= 0


def test_hybrid_with_query_profile(collection_factory: CollectionFactory) -> None:
"""Test that query profiling works with hybrid search (both vector and keyword)."""
collection = _create_and_populate(collection_factory)
result = collection.query.hybrid(
query="hello",
vector=[1.0, 0.0, 0.0],
return_metadata=MetadataQuery(query_profile=True),
limit=2,
)
assert result.query_profile is not None
assert len(result.query_profile.shards) > 0

shard = result.query_profile.shards[0]
assert "vector" in shard.searches, "Hybrid should produce a 'vector' profile"
assert "keyword" in shard.searches, "Hybrid should produce a 'keyword' profile"
assert "object" not in shard.searches

assert_common_profile(shard.searches["vector"])
assert "vector_search_took" in shard.searches["vector"].details

assert_common_profile(shard.searches["keyword"])
assert "kwd_method" in shard.searches["keyword"].details


def test_near_vector_group_by_with_query_profile(
collection_factory: CollectionFactory,
) -> None:
"""Test that query profiling works with group_by."""
collection = _create_and_populate(collection_factory)
result = collection.query.near_vector(
near_vector=[1.0, 0.0, 0.0],
return_metadata=MetadataQuery(query_profile=True),
group_by=GroupBy(prop="text", objects_per_group=1, number_of_groups=3),
)
assert result.query_profile is not None
assert len(result.query_profile.shards) > 0

shard = result.query_profile.shards[0]
assert "vector" in shard.searches
assert_common_profile(shard.searches["vector"])


def test_full_with_profile(collection_factory: CollectionFactory) -> None:
"""Test that MetadataQuery.full_with_profile() returns profiling and all other metadata."""
collection = _create_and_populate(collection_factory)
result = collection.query.near_vector(
near_vector=[1.0, 0.0, 0.0],
return_metadata=MetadataQuery.full_with_profile(),
limit=1,
)
assert len(result.objects) == 1
obj = result.objects[0]
assert obj.metadata.distance is not None
assert obj.metadata.creation_time is not None
assert obj.metadata.last_update_time is not None
assert obj.metadata.score is not None
assert obj.metadata.explain_score is not None

assert result.query_profile is not None
assert len(result.query_profile.shards) > 0
assert_common_profile(result.query_profile.shards[0].searches["vector"])


def test_full_excludes_query_profile(collection_factory: CollectionFactory) -> None:
"""Test that MetadataQuery.full() does not include query profiling."""
collection = _create_and_populate(collection_factory)
result = collection.query.fetch_objects(
return_metadata=MetadataQuery.full(),
)
assert result.query_profile is None


def test_no_query_profile_when_not_requested(
collection_factory: CollectionFactory,
) -> None:
"""Test that query_profile is None when not requested."""
collection = _create_and_populate(collection_factory)
result = collection.query.fetch_objects(
return_metadata=MetadataQuery(distance=True),
)
assert result.query_profile is None


def test_query_profile_with_metadata_list(
collection_factory: CollectionFactory,
) -> None:
"""Test that query profiling works when using list-style metadata."""
collection = _create_and_populate(collection_factory)
result = collection.query.near_vector(
near_vector=[1.0, 0.0, 0.0],
return_metadata=["query_profile", "distance"],
limit=2,
)
assert result.query_profile is not None
assert len(result.query_profile.shards) > 0

shard = result.query_profile.shards[0]
assert "vector" in shard.searches
assert_common_profile(shard.searches["vector"])


def test_query_profile_details_are_strings(
collection_factory: CollectionFactory,
) -> None:
"""Test that all detail keys and values are non-empty strings."""
collection = _create_and_populate(collection_factory)
result = collection.query.near_vector(
near_vector=[1.0, 0.0, 0.0],
return_metadata=MetadataQuery(query_profile=True),
limit=1,
)
assert result.query_profile is not None
for shard in result.query_profile.shards:
assert len(shard.searches) > 0, "Shard should have at least one search profile"
for search_type, profile in shard.searches.items():
assert isinstance(search_type, str) and search_type != ""
assert len(profile.details) > 0
for key, value in profile.details.items():
assert isinstance(key, str) and key != ""
assert isinstance(value, str) and value != ""
28 changes: 27 additions & 1 deletion weaviate/collections/classes/grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,32 @@ class MetadataQuery(_WeaviateInput):
score: bool = Field(default=False)
explain_score: bool = Field(default=False)
is_consistent: bool = Field(default=False)
query_profile: bool = Field(default=False)

@classmethod
def full(cls) -> "MetadataQuery":
"""Return a MetadataQuery with all fields set to True."""
"""Return a MetadataQuery with all fields set to True.

NOTE: `query_profile` is excluded because it adds performance overhead.
Use `full_with_profile()` to include it.
"""
return cls(
creation_time=True,
last_update_time=True,
distance=True,
certainty=True,
score=True,
explain_score=True,
is_consistent=True,
)

@classmethod
def full_with_profile(cls) -> "MetadataQuery":
"""Return a MetadataQuery with all fields set to True, including query profiling.

Query profiling adds per-shard execution timing breakdowns to the response
but has performance overhead. Requires Weaviate >= 1.36.9.
"""
return cls(
creation_time=True,
last_update_time=True,
Expand All @@ -102,6 +124,7 @@ def full(cls) -> "MetadataQuery":
score=True,
explain_score=True,
is_consistent=True,
query_profile=True,
)


Expand All @@ -117,6 +140,7 @@ class _MetadataQuery:
explain_score: bool = False
is_consistent: bool = False
vectors: Optional[List[str]] = None
query_profile: bool = False

@classmethod
def from_public(
Expand All @@ -138,6 +162,7 @@ def from_public(
score=public.score,
explain_score=public.explain_score,
is_consistent=public.is_consistent,
query_profile=public.query_profile,
)
)

Expand All @@ -152,6 +177,7 @@ def from_public(
"score",
"explain_score",
"is_consistent",
"query_profile",
]
],
MetadataQuery,
Expand Down
29 changes: 29 additions & 0 deletions weaviate/collections/classes/internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,29 @@ def _is_empty(self) -> bool:
)


@dataclass
class SearchProfileReturn:
"""Profiling details for a single search type within a shard."""

details: Dict[str, str]


@dataclass
class ShardProfileReturn:
"""Profiling data for a single shard."""

name: str
node: str
searches: Dict[str, SearchProfileReturn]


@dataclass
class QueryProfileReturn:
"""Per-shard query profiling data returned when `query_profile=True` is set in metadata."""

shards: List[ShardProfileReturn]


@dataclass
class GroupByMetadataReturn:
"""Metadata of an object returned by a group by query."""
Expand Down Expand Up @@ -210,17 +233,20 @@ class GenerativeReturn(Generic[P, R]):
__generated: Optional[str]
objects: List[GenerativeObject[P, R]]
generative: Optional[GenerativeGrouped]
query_profile: Optional[QueryProfileReturn]

# init required because of nuances of dataclass when defining @property generated and private var __generated
def __init__(
self,
generated: Optional[str],
objects: List[GenerativeObject[P, R]],
generative: Optional[GenerativeGrouped],
query_profile: Optional[QueryProfileReturn] = None,
) -> None:
self.__generated = generated
self.objects = objects
self.generative = generative
self.query_profile = query_profile

@property
@deprecated(
Expand Down Expand Up @@ -257,6 +283,7 @@ class GenerativeGroupByReturn(Generic[P, R]):
objects: List[GroupByObject[P, R]]
groups: Dict[str, GenerativeGroup[P, R]]
generated: Optional[str]
query_profile: Optional[QueryProfileReturn] = None


@dataclass
Expand All @@ -265,13 +292,15 @@ class GroupByReturn(Generic[P, R]):

objects: List[GroupByObject[P, R]]
groups: Dict[str, Group[P, R]]
query_profile: Optional[QueryProfileReturn] = None


@dataclass
class QueryReturn(Generic[P, R]):
"""The return type of a query within the `.query` namespace of a collection."""

objects: List[Object[P, R]]
query_profile: Optional[QueryProfileReturn] = None


_GQLEntryReturnType: TypeAlias = Dict[str, List[Dict[str, Any]]]
Expand Down
Loading
Loading