diff --git a/codegen/apis b/codegen/apis index 827d26f48..bbad89bd5 160000 --- a/codegen/apis +++ b/codegen/apis @@ -1 +1 @@ -Subproject commit 827d26f4825902994a099595d49779d16fea3a0a +Subproject commit bbad89bd51d792534a9ba06a44ed1f2259f7f89f diff --git a/codegen/buf.yaml b/codegen/buf.yaml index 988170bdb..9df88119f 100644 --- a/codegen/buf.yaml +++ b/codegen/buf.yaml @@ -9,4 +9,4 @@ breaking: deps: - buf.build/googleapis/googleapis modules: - - path: apis/_build/2025-04 + - path: apis/_build/2025-10 diff --git a/docs/db_data/index-usage-byov.md b/docs/db_data/index-usage-byov.md index 85277c4f1..a3831dbdc 100644 --- a/docs/db_data/index-usage-byov.md +++ b/docs/db_data/index-usage-byov.md @@ -95,6 +95,41 @@ index = pc.Index(host=os.environ.get('INDEX_HOST')) fetch_response = index.fetch(ids=["vec1", "vec2"], namespace="example-namespace") ``` +## Fetch vectors by metadata + +The following example fetches vectors by metadata filter. + +```python +import os +from pinecone import Pinecone + +pc = Pinecone(api_key='<>') + +# Find your index host by calling describe_index +# through the Pinecone web console +index = pc.Index(host=os.environ.get('INDEX_HOST')) + +# Fetch vectors matching a metadata filter +fetch_response = index.fetch_by_metadata( + filter={"genre": {"$in": ["comedy", "drama"]}, "year": {"$eq": 2019}}, + namespace="example-namespace", + limit=50 +) + +# Iterate over the fetched vectors +for vec_id, vector in fetch_response.vectors.items(): + print(f"Vector ID: {vector.id}") + print(f"Metadata: {vector.metadata}") + +# Handle pagination if there are more results +if fetch_response.pagination: + next_page = index.fetch_by_metadata( + filter={"genre": {"$in": ["comedy", "drama"]}, "year": {"$eq": 2019}}, + namespace="example-namespace", + pagination_token=fetch_response.pagination.next + ) +``` + ## Update vectors The following example updates vectors by ID. diff --git a/pinecone/__init__.py b/pinecone/__init__.py index 3b9dde4f6..e41825808 100644 --- a/pinecone/__init__.py +++ b/pinecone/__init__.py @@ -46,6 +46,7 @@ "SearchQueryVector": ("pinecone.db_data.dataclasses", "SearchQueryVector"), "SearchRerank": ("pinecone.db_data.dataclasses", "SearchRerank"), "FetchResponse": ("pinecone.db_data.dataclasses", "FetchResponse"), + "FetchByMetadataResponse": ("pinecone.db_data.dataclasses", "FetchByMetadataResponse"), "DeleteRequest": ("pinecone.db_data.models", "DeleteRequest"), "DescribeIndexStatsRequest": ("pinecone.db_data.models", "DescribeIndexStatsRequest"), "DescribeIndexStatsResponse": ("pinecone.db_data.models", "IndexDescription"), diff --git a/pinecone/core/grpc/protos/db_data_2025_04_pb2.py b/pinecone/core/grpc/protos/db_data_2025_04_pb2.py deleted file mode 100644 index caf7aa594..000000000 --- a/pinecone/core/grpc/protos/db_data_2025_04_pb2.py +++ /dev/null @@ -1,146 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# NO CHECKED-IN PROTOBUF GENCODE -# source: db_data_2025-04.proto -# Protobuf Python Version: 5.29.0 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import runtime_version as _runtime_version -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -_runtime_version.ValidateProtobufRuntimeVersion( - _runtime_version.Domain.PUBLIC, - 5, - 29, - 0, - '', - 'db_data_2025-04.proto' -) -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2 -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x15\x64\x62_data_2025-04.proto\x1a\x1cgoogle/protobuf/struct.proto\x1a\x1cgoogle/api/annotations.proto\x1a\x1fgoogle/api/field_behavior.proto\"J\n\x0cSparseValues\x12\x1d\n\x07indices\x18\x01 \x03(\rB\x03\xe0\x41\x02R\x07indices\x12\x1b\n\x06values\x18\x02 \x03(\x02\x42\x03\xe0\x41\x02R\x06values\"\x9e\x01\n\x06Vector\x12\x13\n\x02id\x18\x01 \x01(\tB\x03\xe0\x41\x02R\x02id\x12\x16\n\x06values\x18\x02 \x03(\x02R\x06values\x12\x32\n\rsparse_values\x18\x04 \x01(\x0b\x32\r.SparseValuesR\x0csparseValues\x12\x33\n\x08metadata\x18\x03 \x01(\x0b\x32\x17.google.protobuf.StructR\x08metadata\"\xba\x01\n\x0cScoredVector\x12\x13\n\x02id\x18\x01 \x01(\tB\x03\xe0\x41\x02R\x02id\x12\x14\n\x05score\x18\x02 \x01(\x02R\x05score\x12\x16\n\x06values\x18\x03 \x03(\x02R\x06values\x12\x32\n\rsparse_values\x18\x05 \x01(\x0b\x32\r.SparseValuesR\x0csparseValues\x12\x33\n\x08metadata\x18\x04 \x01(\x0b\x32\x17.google.protobuf.StructR\x08metadata\"\xa1\x01\n\x0cRequestUnion\x12(\n\x06upsert\x18\x01 \x01(\x0b\x32\x0e.UpsertRequestH\x00R\x06upsert\x12(\n\x06\x64\x65lete\x18\x02 \x01(\x0b\x32\x0e.DeleteRequestH\x00R\x06\x64\x65lete\x12(\n\x06update\x18\x03 \x01(\x0b\x32\x0e.UpdateRequestH\x00R\x06updateB\x13\n\x11RequestUnionInner\"U\n\rUpsertRequest\x12&\n\x07vectors\x18\x01 \x03(\x0b\x32\x07.VectorB\x03\xe0\x41\x02R\x07vectors\x12\x1c\n\tnamespace\x18\x02 \x01(\tR\tnamespace\"7\n\x0eUpsertResponse\x12%\n\x0eupserted_count\x18\x01 \x01(\rR\rupsertedCount\"\x8f\x01\n\rDeleteRequest\x12\x10\n\x03ids\x18\x01 \x03(\tR\x03ids\x12\x1d\n\ndelete_all\x18\x02 \x01(\x08R\tdeleteAll\x12\x1c\n\tnamespace\x18\x03 \x01(\tR\tnamespace\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x17.google.protobuf.StructR\x06\x66ilter\"\x10\n\x0e\x44\x65leteResponse\"C\n\x0c\x46\x65tchRequest\x12\x15\n\x03ids\x18\x01 \x03(\tB\x03\xe0\x41\x02R\x03ids\x12\x1c\n\tnamespace\x18\x02 \x01(\tR\tnamespace\"\xd6\x01\n\rFetchResponse\x12\x35\n\x07vectors\x18\x01 \x03(\x0b\x32\x1b.FetchResponse.VectorsEntryR\x07vectors\x12\x1c\n\tnamespace\x18\x02 \x01(\tR\tnamespace\x12!\n\x05usage\x18\x03 \x01(\x0b\x32\x06.UsageH\x00R\x05usage\x88\x01\x01\x1a\x43\n\x0cVectorsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x1d\n\x05value\x18\x02 \x01(\x0b\x32\x07.VectorR\x05value:\x02\x38\x01\x42\x08\n\x06_usage\"\xbd\x01\n\x0bListRequest\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x12\x19\n\x05limit\x18\x02 \x01(\rH\x01R\x05limit\x88\x01\x01\x12.\n\x10pagination_token\x18\x03 \x01(\tH\x02R\x0fpaginationToken\x88\x01\x01\x12\x1c\n\tnamespace\x18\x04 \x01(\tR\tnamespaceB\t\n\x07_prefixB\x08\n\x06_limitB\x13\n\x11_pagination_token\" \n\nPagination\x12\x12\n\x04next\x18\x01 \x01(\tR\x04next\"\x1a\n\x08ListItem\x12\x0e\n\x02id\x18\x01 \x01(\tR\x02id\"\xbf\x01\n\x0cListResponse\x12#\n\x07vectors\x18\x01 \x03(\x0b\x32\t.ListItemR\x07vectors\x12\x30\n\npagination\x18\x02 \x01(\x0b\x32\x0b.PaginationH\x00R\npagination\x88\x01\x01\x12\x1c\n\tnamespace\x18\x03 \x01(\tR\tnamespace\x12!\n\x05usage\x18\x04 \x01(\x0b\x32\x06.UsageH\x01R\x05usage\x88\x01\x01\x42\r\n\x0b_paginationB\x08\n\x06_usage\"\xbd\x01\n\x0bQueryVector\x12\x16\n\x06values\x18\x01 \x03(\x02R\x06values\x12\x32\n\rsparse_values\x18\x05 \x01(\x0b\x32\r.SparseValuesR\x0csparseValues\x12\x13\n\x05top_k\x18\x02 \x01(\rR\x04topK\x12\x1c\n\tnamespace\x18\x03 \x01(\tR\tnamespace\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x17.google.protobuf.StructR\x06\x66ilter\"\xd1\x02\n\x0cQueryRequest\x12\x1c\n\tnamespace\x18\x01 \x01(\tR\tnamespace\x12\x18\n\x05top_k\x18\x02 \x01(\rB\x03\xe0\x41\x02R\x04topK\x12/\n\x06\x66ilter\x18\x03 \x01(\x0b\x32\x17.google.protobuf.StructR\x06\x66ilter\x12%\n\x0einclude_values\x18\x04 \x01(\x08R\rincludeValues\x12)\n\x10include_metadata\x18\x05 \x01(\x08R\x0fincludeMetadata\x12*\n\x07queries\x18\x06 \x03(\x0b\x32\x0c.QueryVectorB\x02\x18\x01R\x07queries\x12\x16\n\x06vector\x18\x07 \x03(\x02R\x06vector\x12\x32\n\rsparse_vector\x18\t \x01(\x0b\x32\r.SparseValuesR\x0csparseVector\x12\x0e\n\x02id\x18\x08 \x01(\tR\x02id\"[\n\x12SingleQueryResults\x12\'\n\x07matches\x18\x01 \x03(\x0b\x32\r.ScoredVectorR\x07matches\x12\x1c\n\tnamespace\x18\x02 \x01(\tR\tnamespace\"\xb6\x01\n\rQueryResponse\x12\x31\n\x07results\x18\x01 \x03(\x0b\x32\x13.SingleQueryResultsB\x02\x18\x01R\x07results\x12\'\n\x07matches\x18\x02 \x03(\x0b\x32\r.ScoredVectorR\x07matches\x12\x1c\n\tnamespace\x18\x03 \x01(\tR\tnamespace\x12!\n\x05usage\x18\x04 \x01(\x0b\x32\x06.UsageH\x00R\x05usage\x88\x01\x01\x42\x08\n\x06_usage\":\n\x05Usage\x12\"\n\nread_units\x18\x01 \x01(\rH\x00R\treadUnits\x88\x01\x01\x42\r\n\x0b_read_units\"\xca\x01\n\rUpdateRequest\x12\x13\n\x02id\x18\x01 \x01(\tB\x03\xe0\x41\x02R\x02id\x12\x16\n\x06values\x18\x02 \x03(\x02R\x06values\x12\x32\n\rsparse_values\x18\x05 \x01(\x0b\x32\r.SparseValuesR\x0csparseValues\x12:\n\x0cset_metadata\x18\x03 \x01(\x0b\x32\x17.google.protobuf.StructR\x0bsetMetadata\x12\x1c\n\tnamespace\x18\x04 \x01(\tR\tnamespace\"\x10\n\x0eUpdateResponse\"L\n\x19\x44\x65scribeIndexStatsRequest\x12/\n\x06\x66ilter\x18\x01 \x01(\x0b\x32\x17.google.protobuf.StructR\x06\x66ilter\"5\n\x10NamespaceSummary\x12!\n\x0cvector_count\x18\x01 \x01(\rR\x0bvectorCount\"\x81\x01\n\x15ListNamespacesRequest\x12.\n\x10pagination_token\x18\x01 \x01(\tH\x00R\x0fpaginationToken\x88\x01\x01\x12\x19\n\x05limit\x18\x02 \x01(\rH\x01R\x05limit\x88\x01\x01\x42\x13\n\x11_pagination_tokenB\x08\n\x06_limit\"\x90\x01\n\x16ListNamespacesResponse\x12\x35\n\nnamespaces\x18\x01 \x03(\x0b\x32\x15.NamespaceDescriptionR\nnamespaces\x12\x30\n\npagination\x18\x02 \x01(\x0b\x32\x0b.PaginationH\x00R\npagination\x88\x01\x01\x42\r\n\x0b_pagination\"8\n\x18\x44\x65scribeNamespaceRequest\x12\x1c\n\tnamespace\x18\x01 \x01(\tR\tnamespace\"M\n\x14NamespaceDescription\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12!\n\x0crecord_count\x18\x02 \x01(\x04R\x0brecordCount\"6\n\x16\x44\x65leteNamespaceRequest\x12\x1c\n\tnamespace\x18\x01 \x01(\tR\tnamespace\"\x9f\x03\n\x1a\x44\x65scribeIndexStatsResponse\x12K\n\nnamespaces\x18\x01 \x03(\x0b\x32+.DescribeIndexStatsResponse.NamespacesEntryR\nnamespaces\x12!\n\tdimension\x18\x02 \x01(\rH\x00R\tdimension\x88\x01\x01\x12%\n\x0eindex_fullness\x18\x03 \x01(\x02R\rindexFullness\x12,\n\x12total_vector_count\x18\x04 \x01(\rR\x10totalVectorCount\x12\x1b\n\x06metric\x18\x05 \x01(\tH\x01R\x06metric\x88\x01\x01\x12$\n\x0bvector_type\x18\x06 \x01(\tH\x02R\nvectorType\x88\x01\x01\x1aP\n\x0fNamespacesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\'\n\x05value\x18\x02 \x01(\x0b\x32\x11.NamespaceSummaryR\x05value:\x02\x38\x01\x42\x0c\n\n_dimensionB\t\n\x07_metricB\x0e\n\x0c_vector_type2\xd7\x06\n\rVectorService\x12\x45\n\x06Upsert\x12\x0e.UpsertRequest\x1a\x0f.UpsertResponse\"\x1a\x82\xd3\xe4\x93\x02\x14\"\x0f/vectors/upsert:\x01*\x12X\n\x06\x44\x65lete\x12\x0e.DeleteRequest\x1a\x0f.DeleteResponse\"-\x82\xd3\xe4\x93\x02\'\"\x0f/vectors/delete:\x01*Z\x11*\x0f/vectors/delete\x12>\n\x05\x46\x65tch\x12\r.FetchRequest\x1a\x0e.FetchResponse\"\x16\x82\xd3\xe4\x93\x02\x10\x12\x0e/vectors/fetch\x12:\n\x04List\x12\x0c.ListRequest\x1a\r.ListResponse\"\x15\x82\xd3\xe4\x93\x02\x0f\x12\r/vectors/list\x12\x39\n\x05Query\x12\r.QueryRequest\x1a\x0e.QueryResponse\"\x11\x82\xd3\xe4\x93\x02\x0b\"\x06/query:\x01*\x12\x45\n\x06Update\x12\x0e.UpdateRequest\x1a\x0f.UpdateResponse\"\x1a\x82\xd3\xe4\x93\x02\x14\"\x0f/vectors/update:\x01*\x12\x88\x01\n\x12\x44\x65scribeIndexStats\x12\x1a.DescribeIndexStatsRequest\x1a\x1b.DescribeIndexStatsResponse\"9\x82\xd3\xe4\x93\x02\x33\"\x15/describe_index_stats:\x01*Z\x17\x12\x15/describe_index_stats\x12V\n\x0eListNamespaces\x12\x16.ListNamespacesRequest\x1a\x17.ListNamespacesResponse\"\x13\x82\xd3\xe4\x93\x02\r\x12\x0b/namespaces\x12\x66\n\x11\x44\x65scribeNamespace\x12\x19.DescribeNamespaceRequest\x1a\x15.NamespaceDescription\"\x1f\x82\xd3\xe4\x93\x02\x19\x12\x17/namespaces/{namespace}\x12\\\n\x0f\x44\x65leteNamespace\x12\x17.DeleteNamespaceRequest\x1a\x0f.DeleteResponse\"\x1f\x82\xd3\xe4\x93\x02\x19*\x17/namespaces/{namespace}BS\n\x11io.pinecone.protoP\x01Z\n\x05\x46\x65tch\x12\r.FetchRequest\x1a\x0e.FetchResponse\"\x16\x82\xd3\xe4\x93\x02\x10\x12\x0e/vectors/fetch\x12:\n\x04List\x12\x0c.ListRequest\x1a\r.ListResponse\"\x15\x82\xd3\xe4\x93\x02\x0f\x12\r/vectors/list\x12\x39\n\x05Query\x12\r.QueryRequest\x1a\x0e.QueryResponse\"\x11\x82\xd3\xe4\x93\x02\x0b\"\x06/query:\x01*\x12\x45\n\x06Update\x12\x0e.UpdateRequest\x1a\x0f.UpdateResponse\"\x1a\x82\xd3\xe4\x93\x02\x14\"\x0f/vectors/update:\x01*\x12\x88\x01\n\x12\x44\x65scribeIndexStats\x12\x1a.DescribeIndexStatsRequest\x1a\x1b.DescribeIndexStatsResponse\"9\x82\xd3\xe4\x93\x02\x33\"\x15/describe_index_stats:\x01*Z\x17\x12\x15/describe_index_stats\x12V\n\x0eListNamespaces\x12\x16.ListNamespacesRequest\x1a\x17.ListNamespacesResponse\"\x13\x82\xd3\xe4\x93\x02\r\x12\x0b/namespaces\x12\x66\n\x11\x44\x65scribeNamespace\x12\x19.DescribeNamespaceRequest\x1a\x15.NamespaceDescription\"\x1f\x82\xd3\xe4\x93\x02\x19\x12\x17/namespaces/{namespace}\x12\\\n\x0f\x44\x65leteNamespace\x12\x17.DeleteNamespaceRequest\x1a\x0f.DeleteResponse\"\x1f\x82\xd3\xe4\x93\x02\x19*\x17/namespaces/{namespace}\x12V\n\x0f\x43reateNamespace\x12\x17.CreateNamespaceRequest\x1a\x15.NamespaceDescription\"\x13\x82\xd3\xe4\x93\x02\r\"\x0b/namespaces\x12k\n\x0f\x46\x65tchByMetadata\x12\x17.FetchByMetadataRequest\x1a\x18.FetchByMetadataResponse\"%\x82\xd3\xe4\x93\x02\x1f\"\x1a/vectors/fetch_by_metadata:\x01*BS\n\x11io.pinecone.protoP\x01Z None: ... +class FetchByMetadataRequest(_message.Message): + __slots__ = ("namespace", "filter", "limit", "pagination_token") + NAMESPACE_FIELD_NUMBER: _ClassVar[int] + FILTER_FIELD_NUMBER: _ClassVar[int] + LIMIT_FIELD_NUMBER: _ClassVar[int] + PAGINATION_TOKEN_FIELD_NUMBER: _ClassVar[int] + namespace: str + filter: _struct_pb2.Struct + limit: int + pagination_token: str + def __init__(self, namespace: _Optional[str] = ..., filter: _Optional[_Union[_struct_pb2.Struct, _Mapping]] = ..., limit: _Optional[int] = ..., pagination_token: _Optional[str] = ...) -> None: ... + +class FetchByMetadataResponse(_message.Message): + __slots__ = ("vectors", "namespace", "usage", "pagination") + class VectorsEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: Vector + def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[Vector, _Mapping]] = ...) -> None: ... + VECTORS_FIELD_NUMBER: _ClassVar[int] + NAMESPACE_FIELD_NUMBER: _ClassVar[int] + USAGE_FIELD_NUMBER: _ClassVar[int] + PAGINATION_FIELD_NUMBER: _ClassVar[int] + vectors: _containers.MessageMap[str, Vector] + namespace: str + usage: Usage + pagination: Pagination + def __init__(self, vectors: _Optional[_Mapping[str, Vector]] = ..., namespace: _Optional[str] = ..., usage: _Optional[_Union[Usage, _Mapping]] = ..., pagination: _Optional[_Union[Pagination, _Mapping]] = ...) -> None: ... + class FetchResponse(_message.Message): __slots__ = ("vectors", "namespace", "usage") class VectorsEntry(_message.Message): @@ -206,22 +237,28 @@ class Usage(_message.Message): def __init__(self, read_units: _Optional[int] = ...) -> None: ... class UpdateRequest(_message.Message): - __slots__ = ("id", "values", "sparse_values", "set_metadata", "namespace") + __slots__ = ("id", "values", "sparse_values", "set_metadata", "namespace", "filter", "dry_run") ID_FIELD_NUMBER: _ClassVar[int] VALUES_FIELD_NUMBER: _ClassVar[int] SPARSE_VALUES_FIELD_NUMBER: _ClassVar[int] SET_METADATA_FIELD_NUMBER: _ClassVar[int] NAMESPACE_FIELD_NUMBER: _ClassVar[int] + FILTER_FIELD_NUMBER: _ClassVar[int] + DRY_RUN_FIELD_NUMBER: _ClassVar[int] id: str values: _containers.RepeatedScalarFieldContainer[float] sparse_values: SparseValues set_metadata: _struct_pb2.Struct namespace: str - def __init__(self, id: _Optional[str] = ..., values: _Optional[_Iterable[float]] = ..., sparse_values: _Optional[_Union[SparseValues, _Mapping]] = ..., set_metadata: _Optional[_Union[_struct_pb2.Struct, _Mapping]] = ..., namespace: _Optional[str] = ...) -> None: ... + filter: _struct_pb2.Struct + dry_run: bool + def __init__(self, id: _Optional[str] = ..., values: _Optional[_Iterable[float]] = ..., sparse_values: _Optional[_Union[SparseValues, _Mapping]] = ..., set_metadata: _Optional[_Union[_struct_pb2.Struct, _Mapping]] = ..., namespace: _Optional[str] = ..., filter: _Optional[_Union[_struct_pb2.Struct, _Mapping]] = ..., dry_run: bool = ...) -> None: ... class UpdateResponse(_message.Message): - __slots__ = () - def __init__(self) -> None: ... + __slots__ = ("matched_records",) + MATCHED_RECORDS_FIELD_NUMBER: _ClassVar[int] + matched_records: int + def __init__(self, matched_records: _Optional[int] = ...) -> None: ... class DescribeIndexStatsRequest(_message.Message): __slots__ = ("filter",) @@ -236,20 +273,24 @@ class NamespaceSummary(_message.Message): def __init__(self, vector_count: _Optional[int] = ...) -> None: ... class ListNamespacesRequest(_message.Message): - __slots__ = ("pagination_token", "limit") + __slots__ = ("pagination_token", "limit", "prefix") PAGINATION_TOKEN_FIELD_NUMBER: _ClassVar[int] LIMIT_FIELD_NUMBER: _ClassVar[int] + PREFIX_FIELD_NUMBER: _ClassVar[int] pagination_token: str limit: int - def __init__(self, pagination_token: _Optional[str] = ..., limit: _Optional[int] = ...) -> None: ... + prefix: str + def __init__(self, pagination_token: _Optional[str] = ..., limit: _Optional[int] = ..., prefix: _Optional[str] = ...) -> None: ... class ListNamespacesResponse(_message.Message): - __slots__ = ("namespaces", "pagination") + __slots__ = ("namespaces", "pagination", "total_count") NAMESPACES_FIELD_NUMBER: _ClassVar[int] PAGINATION_FIELD_NUMBER: _ClassVar[int] + TOTAL_COUNT_FIELD_NUMBER: _ClassVar[int] namespaces: _containers.RepeatedCompositeFieldContainer[NamespaceDescription] pagination: Pagination - def __init__(self, namespaces: _Optional[_Iterable[_Union[NamespaceDescription, _Mapping]]] = ..., pagination: _Optional[_Union[Pagination, _Mapping]] = ...) -> None: ... + total_count: int + def __init__(self, namespaces: _Optional[_Iterable[_Union[NamespaceDescription, _Mapping]]] = ..., pagination: _Optional[_Union[Pagination, _Mapping]] = ..., total_count: _Optional[int] = ...) -> None: ... class DescribeNamespaceRequest(_message.Message): __slots__ = ("namespace",) @@ -257,13 +298,31 @@ class DescribeNamespaceRequest(_message.Message): namespace: str def __init__(self, namespace: _Optional[str] = ...) -> None: ... +class CreateNamespaceRequest(_message.Message): + __slots__ = ("name", "schema") + NAME_FIELD_NUMBER: _ClassVar[int] + SCHEMA_FIELD_NUMBER: _ClassVar[int] + name: str + schema: MetadataSchema + def __init__(self, name: _Optional[str] = ..., schema: _Optional[_Union[MetadataSchema, _Mapping]] = ...) -> None: ... + +class IndexedFields(_message.Message): + __slots__ = ("fields",) + FIELDS_FIELD_NUMBER: _ClassVar[int] + fields: _containers.RepeatedScalarFieldContainer[str] + def __init__(self, fields: _Optional[_Iterable[str]] = ...) -> None: ... + class NamespaceDescription(_message.Message): - __slots__ = ("name", "record_count") + __slots__ = ("name", "record_count", "schema", "indexed_fields") NAME_FIELD_NUMBER: _ClassVar[int] RECORD_COUNT_FIELD_NUMBER: _ClassVar[int] + SCHEMA_FIELD_NUMBER: _ClassVar[int] + INDEXED_FIELDS_FIELD_NUMBER: _ClassVar[int] name: str record_count: int - def __init__(self, name: _Optional[str] = ..., record_count: _Optional[int] = ...) -> None: ... + schema: MetadataSchema + indexed_fields: IndexedFields + def __init__(self, name: _Optional[str] = ..., record_count: _Optional[int] = ..., schema: _Optional[_Union[MetadataSchema, _Mapping]] = ..., indexed_fields: _Optional[_Union[IndexedFields, _Mapping]] = ...) -> None: ... class DeleteNamespaceRequest(_message.Message): __slots__ = ("namespace",) @@ -272,7 +331,7 @@ class DeleteNamespaceRequest(_message.Message): def __init__(self, namespace: _Optional[str] = ...) -> None: ... class DescribeIndexStatsResponse(_message.Message): - __slots__ = ("namespaces", "dimension", "index_fullness", "total_vector_count", "metric", "vector_type") + __slots__ = ("namespaces", "dimension", "index_fullness", "total_vector_count", "metric", "vector_type", "memory_fullness", "storage_fullness") class NamespacesEntry(_message.Message): __slots__ = ("key", "value") KEY_FIELD_NUMBER: _ClassVar[int] @@ -286,10 +345,33 @@ class DescribeIndexStatsResponse(_message.Message): TOTAL_VECTOR_COUNT_FIELD_NUMBER: _ClassVar[int] METRIC_FIELD_NUMBER: _ClassVar[int] VECTOR_TYPE_FIELD_NUMBER: _ClassVar[int] + MEMORY_FULLNESS_FIELD_NUMBER: _ClassVar[int] + STORAGE_FULLNESS_FIELD_NUMBER: _ClassVar[int] namespaces: _containers.MessageMap[str, NamespaceSummary] dimension: int index_fullness: float total_vector_count: int metric: str vector_type: str - def __init__(self, namespaces: _Optional[_Mapping[str, NamespaceSummary]] = ..., dimension: _Optional[int] = ..., index_fullness: _Optional[float] = ..., total_vector_count: _Optional[int] = ..., metric: _Optional[str] = ..., vector_type: _Optional[str] = ...) -> None: ... + memory_fullness: float + storage_fullness: float + def __init__(self, namespaces: _Optional[_Mapping[str, NamespaceSummary]] = ..., dimension: _Optional[int] = ..., index_fullness: _Optional[float] = ..., total_vector_count: _Optional[int] = ..., metric: _Optional[str] = ..., vector_type: _Optional[str] = ..., memory_fullness: _Optional[float] = ..., storage_fullness: _Optional[float] = ...) -> None: ... + +class MetadataFieldProperties(_message.Message): + __slots__ = ("filterable",) + FILTERABLE_FIELD_NUMBER: _ClassVar[int] + filterable: bool + def __init__(self, filterable: bool = ...) -> None: ... + +class MetadataSchema(_message.Message): + __slots__ = ("fields",) + class FieldsEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: MetadataFieldProperties + def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[MetadataFieldProperties, _Mapping]] = ...) -> None: ... + FIELDS_FIELD_NUMBER: _ClassVar[int] + fields: _containers.MessageMap[str, MetadataFieldProperties] + def __init__(self, fields: _Optional[_Mapping[str, MetadataFieldProperties]] = ...) -> None: ... diff --git a/pinecone/core/grpc/protos/db_data_2025_04_pb2_grpc.py b/pinecone/core/grpc/protos/db_data_2025_10_pb2_grpc.py similarity index 63% rename from pinecone/core/grpc/protos/db_data_2025_04_pb2_grpc.py rename to pinecone/core/grpc/protos/db_data_2025_10_pb2_grpc.py index d733efcf3..2350d1352 100644 --- a/pinecone/core/grpc/protos/db_data_2025_04_pb2_grpc.py +++ b/pinecone/core/grpc/protos/db_data_2025_10_pb2_grpc.py @@ -2,7 +2,7 @@ """Client and server classes corresponding to protobuf-defined services.""" import grpc -import pinecone.core.grpc.protos.db_data_2025_04_pb2 as db__data__2025__04__pb2 +import pinecone.core.grpc.protos.db_data_2025_10_pb2 as db__data__2025__10__pb2 class VectorServiceStub(object): @@ -18,53 +18,63 @@ def __init__(self, channel): """ self.Upsert = channel.unary_unary( '/VectorService/Upsert', - request_serializer=db__data__2025__04__pb2.UpsertRequest.SerializeToString, - response_deserializer=db__data__2025__04__pb2.UpsertResponse.FromString, + request_serializer=db__data__2025__10__pb2.UpsertRequest.SerializeToString, + response_deserializer=db__data__2025__10__pb2.UpsertResponse.FromString, ) self.Delete = channel.unary_unary( '/VectorService/Delete', - request_serializer=db__data__2025__04__pb2.DeleteRequest.SerializeToString, - response_deserializer=db__data__2025__04__pb2.DeleteResponse.FromString, + request_serializer=db__data__2025__10__pb2.DeleteRequest.SerializeToString, + response_deserializer=db__data__2025__10__pb2.DeleteResponse.FromString, ) self.Fetch = channel.unary_unary( '/VectorService/Fetch', - request_serializer=db__data__2025__04__pb2.FetchRequest.SerializeToString, - response_deserializer=db__data__2025__04__pb2.FetchResponse.FromString, + request_serializer=db__data__2025__10__pb2.FetchRequest.SerializeToString, + response_deserializer=db__data__2025__10__pb2.FetchResponse.FromString, ) self.List = channel.unary_unary( '/VectorService/List', - request_serializer=db__data__2025__04__pb2.ListRequest.SerializeToString, - response_deserializer=db__data__2025__04__pb2.ListResponse.FromString, + request_serializer=db__data__2025__10__pb2.ListRequest.SerializeToString, + response_deserializer=db__data__2025__10__pb2.ListResponse.FromString, ) self.Query = channel.unary_unary( '/VectorService/Query', - request_serializer=db__data__2025__04__pb2.QueryRequest.SerializeToString, - response_deserializer=db__data__2025__04__pb2.QueryResponse.FromString, + request_serializer=db__data__2025__10__pb2.QueryRequest.SerializeToString, + response_deserializer=db__data__2025__10__pb2.QueryResponse.FromString, ) self.Update = channel.unary_unary( '/VectorService/Update', - request_serializer=db__data__2025__04__pb2.UpdateRequest.SerializeToString, - response_deserializer=db__data__2025__04__pb2.UpdateResponse.FromString, + request_serializer=db__data__2025__10__pb2.UpdateRequest.SerializeToString, + response_deserializer=db__data__2025__10__pb2.UpdateResponse.FromString, ) self.DescribeIndexStats = channel.unary_unary( '/VectorService/DescribeIndexStats', - request_serializer=db__data__2025__04__pb2.DescribeIndexStatsRequest.SerializeToString, - response_deserializer=db__data__2025__04__pb2.DescribeIndexStatsResponse.FromString, + request_serializer=db__data__2025__10__pb2.DescribeIndexStatsRequest.SerializeToString, + response_deserializer=db__data__2025__10__pb2.DescribeIndexStatsResponse.FromString, ) self.ListNamespaces = channel.unary_unary( '/VectorService/ListNamespaces', - request_serializer=db__data__2025__04__pb2.ListNamespacesRequest.SerializeToString, - response_deserializer=db__data__2025__04__pb2.ListNamespacesResponse.FromString, + request_serializer=db__data__2025__10__pb2.ListNamespacesRequest.SerializeToString, + response_deserializer=db__data__2025__10__pb2.ListNamespacesResponse.FromString, ) self.DescribeNamespace = channel.unary_unary( '/VectorService/DescribeNamespace', - request_serializer=db__data__2025__04__pb2.DescribeNamespaceRequest.SerializeToString, - response_deserializer=db__data__2025__04__pb2.NamespaceDescription.FromString, + request_serializer=db__data__2025__10__pb2.DescribeNamespaceRequest.SerializeToString, + response_deserializer=db__data__2025__10__pb2.NamespaceDescription.FromString, ) self.DeleteNamespace = channel.unary_unary( '/VectorService/DeleteNamespace', - request_serializer=db__data__2025__04__pb2.DeleteNamespaceRequest.SerializeToString, - response_deserializer=db__data__2025__04__pb2.DeleteResponse.FromString, + request_serializer=db__data__2025__10__pb2.DeleteNamespaceRequest.SerializeToString, + response_deserializer=db__data__2025__10__pb2.DeleteResponse.FromString, + ) + self.CreateNamespace = channel.unary_unary( + '/VectorService/CreateNamespace', + request_serializer=db__data__2025__10__pb2.CreateNamespaceRequest.SerializeToString, + response_deserializer=db__data__2025__10__pb2.NamespaceDescription.FromString, + ) + self.FetchByMetadata = channel.unary_unary( + '/VectorService/FetchByMetadata', + request_serializer=db__data__2025__10__pb2.FetchByMetadataRequest.SerializeToString, + response_deserializer=db__data__2025__10__pb2.FetchByMetadataResponse.FromString, ) @@ -157,9 +167,13 @@ def DescribeIndexStats(self, request, context): def ListNamespaces(self, request, context): """List namespaces - Get a list of all [namespaces](https://docs.pinecone.io/guides/index-data/indexing-overview#namespaces) in a serverless index. + List all namespaces in a serverless index. + + Up to 100 namespaces are returned at a time by default, in sorted order (bitwise "C" collation). If the `limit` parameter is set, up to that number of namespaces are returned instead. Whenever there are additional namespaces to return, the response also includes a `pagination_token` that you can use to get the next batch of namespaces. When the response does not include a `pagination_token`, there are no more namespaces to return. - Up to 100 namespaces are returned at a time by default, in sorted order (bitwise ā€œCā€ collation). If the `limit` parameter is set, up to that number of namespaces are returned instead. Whenever there are additional namespaces to return, the response also includes a `pagination_token` that you can use to get the next batch of namespaces. When the response does not include a `pagination_token`, there are no more namespaces to return. + For guidance and examples, see [Manage namespaces](https://docs.pinecone.io/guides/manage-data/manage-namespaces). + + **Note:** This operation is not supported for pod-based indexes. """ context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') @@ -168,7 +182,11 @@ def ListNamespaces(self, request, context): def DescribeNamespace(self, request, context): """Describe a namespace - Describe a [namespace](https://docs.pinecone.io/guides/index-data/indexing-overview#namespaces) in a serverless index, including the total number of vectors in the namespace. + Describe a namespace in a serverless index, including the total number of vectors in the namespace. + + For guidance and examples, see [Manage namespaces](https://docs.pinecone.io/guides/manage-data/manage-namespaces). + + **Note:** This operation is not supported for pod-based indexes. """ context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') @@ -177,64 +195,97 @@ def DescribeNamespace(self, request, context): def DeleteNamespace(self, request, context): """Delete a namespace - Delete a namespace from an index. + Delete a namespace from a serverless index. Deleting a namespace is irreversible; all data in the namespace is permanently deleted. + + For guidance and examples, see [Manage namespaces](https://docs.pinecone.io/guides/manage-data/manage-namespaces). + + **Note:** This operation is not supported for pod-based indexes. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def CreateNamespace(self, request, context): + """Create a namespace + + Create a namespace in a serverless index. + + For guidance and examples, see [Manage namespaces](https://docs.pinecone.io/guides/manage-data/manage-namespaces). + + **Note:** This operation is not supported for pod-based indexes. """ context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def FetchByMetadata(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_VectorServiceServicer_to_server(servicer, server): rpc_method_handlers = { 'Upsert': grpc.unary_unary_rpc_method_handler( servicer.Upsert, - request_deserializer=db__data__2025__04__pb2.UpsertRequest.FromString, - response_serializer=db__data__2025__04__pb2.UpsertResponse.SerializeToString, + request_deserializer=db__data__2025__10__pb2.UpsertRequest.FromString, + response_serializer=db__data__2025__10__pb2.UpsertResponse.SerializeToString, ), 'Delete': grpc.unary_unary_rpc_method_handler( servicer.Delete, - request_deserializer=db__data__2025__04__pb2.DeleteRequest.FromString, - response_serializer=db__data__2025__04__pb2.DeleteResponse.SerializeToString, + request_deserializer=db__data__2025__10__pb2.DeleteRequest.FromString, + response_serializer=db__data__2025__10__pb2.DeleteResponse.SerializeToString, ), 'Fetch': grpc.unary_unary_rpc_method_handler( servicer.Fetch, - request_deserializer=db__data__2025__04__pb2.FetchRequest.FromString, - response_serializer=db__data__2025__04__pb2.FetchResponse.SerializeToString, + request_deserializer=db__data__2025__10__pb2.FetchRequest.FromString, + response_serializer=db__data__2025__10__pb2.FetchResponse.SerializeToString, ), 'List': grpc.unary_unary_rpc_method_handler( servicer.List, - request_deserializer=db__data__2025__04__pb2.ListRequest.FromString, - response_serializer=db__data__2025__04__pb2.ListResponse.SerializeToString, + request_deserializer=db__data__2025__10__pb2.ListRequest.FromString, + response_serializer=db__data__2025__10__pb2.ListResponse.SerializeToString, ), 'Query': grpc.unary_unary_rpc_method_handler( servicer.Query, - request_deserializer=db__data__2025__04__pb2.QueryRequest.FromString, - response_serializer=db__data__2025__04__pb2.QueryResponse.SerializeToString, + request_deserializer=db__data__2025__10__pb2.QueryRequest.FromString, + response_serializer=db__data__2025__10__pb2.QueryResponse.SerializeToString, ), 'Update': grpc.unary_unary_rpc_method_handler( servicer.Update, - request_deserializer=db__data__2025__04__pb2.UpdateRequest.FromString, - response_serializer=db__data__2025__04__pb2.UpdateResponse.SerializeToString, + request_deserializer=db__data__2025__10__pb2.UpdateRequest.FromString, + response_serializer=db__data__2025__10__pb2.UpdateResponse.SerializeToString, ), 'DescribeIndexStats': grpc.unary_unary_rpc_method_handler( servicer.DescribeIndexStats, - request_deserializer=db__data__2025__04__pb2.DescribeIndexStatsRequest.FromString, - response_serializer=db__data__2025__04__pb2.DescribeIndexStatsResponse.SerializeToString, + request_deserializer=db__data__2025__10__pb2.DescribeIndexStatsRequest.FromString, + response_serializer=db__data__2025__10__pb2.DescribeIndexStatsResponse.SerializeToString, ), 'ListNamespaces': grpc.unary_unary_rpc_method_handler( servicer.ListNamespaces, - request_deserializer=db__data__2025__04__pb2.ListNamespacesRequest.FromString, - response_serializer=db__data__2025__04__pb2.ListNamespacesResponse.SerializeToString, + request_deserializer=db__data__2025__10__pb2.ListNamespacesRequest.FromString, + response_serializer=db__data__2025__10__pb2.ListNamespacesResponse.SerializeToString, ), 'DescribeNamespace': grpc.unary_unary_rpc_method_handler( servicer.DescribeNamespace, - request_deserializer=db__data__2025__04__pb2.DescribeNamespaceRequest.FromString, - response_serializer=db__data__2025__04__pb2.NamespaceDescription.SerializeToString, + request_deserializer=db__data__2025__10__pb2.DescribeNamespaceRequest.FromString, + response_serializer=db__data__2025__10__pb2.NamespaceDescription.SerializeToString, ), 'DeleteNamespace': grpc.unary_unary_rpc_method_handler( servicer.DeleteNamespace, - request_deserializer=db__data__2025__04__pb2.DeleteNamespaceRequest.FromString, - response_serializer=db__data__2025__04__pb2.DeleteResponse.SerializeToString, + request_deserializer=db__data__2025__10__pb2.DeleteNamespaceRequest.FromString, + response_serializer=db__data__2025__10__pb2.DeleteResponse.SerializeToString, + ), + 'CreateNamespace': grpc.unary_unary_rpc_method_handler( + servicer.CreateNamespace, + request_deserializer=db__data__2025__10__pb2.CreateNamespaceRequest.FromString, + response_serializer=db__data__2025__10__pb2.NamespaceDescription.SerializeToString, + ), + 'FetchByMetadata': grpc.unary_unary_rpc_method_handler( + servicer.FetchByMetadata, + request_deserializer=db__data__2025__10__pb2.FetchByMetadataRequest.FromString, + response_serializer=db__data__2025__10__pb2.FetchByMetadataResponse.SerializeToString, ), } generic_handler = grpc.method_handlers_generic_handler( @@ -260,8 +311,8 @@ def Upsert(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/VectorService/Upsert', - db__data__2025__04__pb2.UpsertRequest.SerializeToString, - db__data__2025__04__pb2.UpsertResponse.FromString, + db__data__2025__10__pb2.UpsertRequest.SerializeToString, + db__data__2025__10__pb2.UpsertResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -277,8 +328,8 @@ def Delete(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/VectorService/Delete', - db__data__2025__04__pb2.DeleteRequest.SerializeToString, - db__data__2025__04__pb2.DeleteResponse.FromString, + db__data__2025__10__pb2.DeleteRequest.SerializeToString, + db__data__2025__10__pb2.DeleteResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -294,8 +345,8 @@ def Fetch(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/VectorService/Fetch', - db__data__2025__04__pb2.FetchRequest.SerializeToString, - db__data__2025__04__pb2.FetchResponse.FromString, + db__data__2025__10__pb2.FetchRequest.SerializeToString, + db__data__2025__10__pb2.FetchResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -311,8 +362,8 @@ def List(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/VectorService/List', - db__data__2025__04__pb2.ListRequest.SerializeToString, - db__data__2025__04__pb2.ListResponse.FromString, + db__data__2025__10__pb2.ListRequest.SerializeToString, + db__data__2025__10__pb2.ListResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -328,8 +379,8 @@ def Query(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/VectorService/Query', - db__data__2025__04__pb2.QueryRequest.SerializeToString, - db__data__2025__04__pb2.QueryResponse.FromString, + db__data__2025__10__pb2.QueryRequest.SerializeToString, + db__data__2025__10__pb2.QueryResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -345,8 +396,8 @@ def Update(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/VectorService/Update', - db__data__2025__04__pb2.UpdateRequest.SerializeToString, - db__data__2025__04__pb2.UpdateResponse.FromString, + db__data__2025__10__pb2.UpdateRequest.SerializeToString, + db__data__2025__10__pb2.UpdateResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -362,8 +413,8 @@ def DescribeIndexStats(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/VectorService/DescribeIndexStats', - db__data__2025__04__pb2.DescribeIndexStatsRequest.SerializeToString, - db__data__2025__04__pb2.DescribeIndexStatsResponse.FromString, + db__data__2025__10__pb2.DescribeIndexStatsRequest.SerializeToString, + db__data__2025__10__pb2.DescribeIndexStatsResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -379,8 +430,8 @@ def ListNamespaces(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/VectorService/ListNamespaces', - db__data__2025__04__pb2.ListNamespacesRequest.SerializeToString, - db__data__2025__04__pb2.ListNamespacesResponse.FromString, + db__data__2025__10__pb2.ListNamespacesRequest.SerializeToString, + db__data__2025__10__pb2.ListNamespacesResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -396,8 +447,8 @@ def DescribeNamespace(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/VectorService/DescribeNamespace', - db__data__2025__04__pb2.DescribeNamespaceRequest.SerializeToString, - db__data__2025__04__pb2.NamespaceDescription.FromString, + db__data__2025__10__pb2.DescribeNamespaceRequest.SerializeToString, + db__data__2025__10__pb2.NamespaceDescription.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -413,7 +464,41 @@ def DeleteNamespace(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/VectorService/DeleteNamespace', - db__data__2025__04__pb2.DeleteNamespaceRequest.SerializeToString, - db__data__2025__04__pb2.DeleteResponse.FromString, + db__data__2025__10__pb2.DeleteNamespaceRequest.SerializeToString, + db__data__2025__10__pb2.DeleteResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def CreateNamespace(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/VectorService/CreateNamespace', + db__data__2025__10__pb2.CreateNamespaceRequest.SerializeToString, + db__data__2025__10__pb2.NamespaceDescription.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def FetchByMetadata(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/VectorService/FetchByMetadata', + db__data__2025__10__pb2.FetchByMetadataRequest.SerializeToString, + db__data__2025__10__pb2.FetchByMetadataResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/pinecone/db_data/dataclasses/__init__.py b/pinecone/db_data/dataclasses/__init__.py index 2434890dc..f31e5c3c2 100644 --- a/pinecone/db_data/dataclasses/__init__.py +++ b/pinecone/db_data/dataclasses/__init__.py @@ -1,6 +1,7 @@ from .sparse_values import SparseValues from .vector import Vector from .fetch_response import FetchResponse +from .fetch_by_metadata_response import FetchByMetadataResponse, Pagination from .search_query import SearchQuery from .search_query_vector import SearchQueryVector from .search_rerank import SearchRerank @@ -9,6 +10,8 @@ "SparseValues", "Vector", "FetchResponse", + "FetchByMetadataResponse", + "Pagination", "SearchQuery", "SearchQueryVector", "SearchRerank", diff --git a/pinecone/db_data/dataclasses/fetch_by_metadata_response.py b/pinecone/db_data/dataclasses/fetch_by_metadata_response.py new file mode 100644 index 000000000..c47595252 --- /dev/null +++ b/pinecone/db_data/dataclasses/fetch_by_metadata_response.py @@ -0,0 +1,17 @@ +from dataclasses import dataclass +from typing import Dict, Optional + +from .vector import Vector + + +@dataclass +class Pagination: + next: str + + +@dataclass +class FetchByMetadataResponse: + namespace: str + vectors: Dict[str, Vector] + usage: Dict[str, int] + pagination: Optional[Pagination] = None diff --git a/pinecone/db_data/index.py b/pinecone/db_data/index.py index 37f9ca565..29e19b699 100644 --- a/pinecone/db_data/index.py +++ b/pinecone/db_data/index.py @@ -18,7 +18,15 @@ ListNamespacesResponse, NamespaceDescription, ) -from .dataclasses import Vector, SparseValues, FetchResponse, SearchQuery, SearchRerank +from .dataclasses import ( + Vector, + SparseValues, + FetchResponse, + FetchByMetadataResponse, + Pagination, + SearchQuery, + SearchRerank, +) from .interfaces import IndexInterface from .request_factory import IndexRequestFactory from .types import ( @@ -328,6 +336,66 @@ def fetch(self, ids: List[str], namespace: Optional[str] = None, **kwargs) -> Fe usage=result.usage, ) + @validate_and_convert_errors + def fetch_by_metadata( + self, + filter: FilterTypedDict, + namespace: Optional[str] = None, + limit: Optional[int] = None, + pagination_token: Optional[str] = None, + **kwargs, + ) -> FetchByMetadataResponse: + """Fetch vectors by metadata filter. + + Look up and return vectors by metadata filter from a single namespace. + The returned vectors include the vector data and/or metadata. + + Examples: + + .. code-block:: python + + >>> index.fetch_by_metadata( + ... filter={'genre': {'$in': ['comedy', 'drama']}, 'year': {'$eq': 2019}}, + ... namespace='my_namespace', + ... limit=50 + ... ) + >>> index.fetch_by_metadata( + ... filter={'status': 'active'}, + ... pagination_token='token123' + ... ) + + Args: + filter (Dict[str, Union[str, float, int, bool, List, dict]]): + Metadata filter expression to select vectors. + See `metadata filtering _` + namespace (str): The namespace to fetch vectors from. + If not specified, the default namespace is used. [optional] + limit (int): Max number of vectors to return. Defaults to 100. [optional] + pagination_token (str): Pagination token to continue a previous listing operation. [optional] + + Returns: + FetchByMetadataResponse: Object containing the fetched vectors, namespace, usage, and pagination token. + """ + request = IndexRequestFactory.fetch_by_metadata_request( + filter=filter, + namespace=namespace, + limit=limit, + pagination_token=pagination_token, + **kwargs, + ) + result = self._vector_api.fetch_vectors_by_metadata(request, **self._openapi_kwargs(kwargs)) + + pagination = None + if result.pagination and result.pagination.next: + pagination = Pagination(next=result.pagination.next) + + return FetchByMetadataResponse( + namespace=result.namespace or "", + vectors={k: Vector.from_dict(v) for k, v in result.vectors.items()}, + usage=result.usage, + pagination=pagination, + ) + @validate_and_convert_errors def query( self, diff --git a/pinecone/db_data/index_asyncio.py b/pinecone/db_data/index_asyncio.py index 65fe66438..a46573e10 100644 --- a/pinecone/db_data/index_asyncio.py +++ b/pinecone/db_data/index_asyncio.py @@ -43,7 +43,15 @@ SearchQueryTypedDict, SearchRerankTypedDict, ) -from .dataclasses import Vector, SparseValues, FetchResponse, SearchQuery, SearchRerank +from .dataclasses import ( + Vector, + SparseValues, + FetchResponse, + FetchByMetadataResponse, + Pagination, + SearchQuery, + SearchRerank, +) from pinecone.openapi_support import OPENAPI_ENDPOINT_PARAMS from .index import IndexRequestFactory @@ -356,7 +364,82 @@ async def fetch( self, ids: List[str], namespace: Optional[str] = None, **kwargs ) -> FetchResponse: args_dict = parse_non_empty_args([("namespace", namespace)]) - return await self._vector_api.fetch_vectors(ids=ids, **args_dict, **kwargs) + result = await self._vector_api.fetch_vectors(ids=ids, **args_dict, **kwargs) + return FetchResponse( + namespace=result.namespace, + vectors={k: Vector.from_dict(v) for k, v in result.vectors.items()}, + usage=result.usage, + ) + + @validate_and_convert_errors + async def fetch_by_metadata( + self, + filter: FilterTypedDict, + namespace: Optional[str] = None, + limit: Optional[int] = None, + pagination_token: Optional[str] = None, + **kwargs, + ) -> FetchByMetadataResponse: + """Fetch vectors by metadata filter. + + Look up and return vectors by metadata filter from a single namespace. + The returned vectors include the vector data and/or metadata. + + Examples: + + .. code-block:: python + + import asyncio + from pinecone import Pinecone + + async def main(): + pc = Pinecone() + async with pc.IndexAsyncio(host="example-host") as idx: + result = await idx.fetch_by_metadata( + filter={'genre': {'$in': ['comedy', 'drama']}, 'year': {'$eq': 2019}}, + namespace='my_namespace', + limit=50 + ) + for vec_id in result.vectors: + vector = result.vectors[vec_id] + print(vector.id) + print(vector.metadata) + + asyncio.run(main()) + + Args: + filter (Dict[str, Union[str, float, int, bool, List, dict]]): + Metadata filter expression to select vectors. + See `metadata filtering _` + namespace (str): The namespace to fetch vectors from. + If not specified, the default namespace is used. [optional] + limit (int): Max number of vectors to return. Defaults to 100. [optional] + pagination_token (str): Pagination token to continue a previous listing operation. [optional] + + Returns: + FetchByMetadataResponse: Object containing the fetched vectors, namespace, usage, and pagination token. + """ + request = IndexRequestFactory.fetch_by_metadata_request( + filter=filter, + namespace=namespace, + limit=limit, + pagination_token=pagination_token, + **kwargs, + ) + result = await self._vector_api.fetch_vectors_by_metadata( + request, **{k: v for k, v in kwargs.items() if k in _OPENAPI_ENDPOINT_PARAMS} + ) + + pagination = None + if result.pagination and result.pagination.next: + pagination = Pagination(next=result.pagination.next) + + return FetchByMetadataResponse( + namespace=result.namespace or "", + vectors={k: Vector.from_dict(v) for k, v in result.vectors.items()}, + usage=result.usage, + pagination=pagination, + ) @validate_and_convert_errors async def query( diff --git a/pinecone/db_data/index_asyncio_interface.py b/pinecone/db_data/index_asyncio_interface.py index af8841fb1..50e4d1f65 100644 --- a/pinecone/db_data/index_asyncio_interface.py +++ b/pinecone/db_data/index_asyncio_interface.py @@ -24,7 +24,7 @@ SearchQueryTypedDict, SearchRerankTypedDict, ) -from .dataclasses import SearchQuery, SearchRerank +from .dataclasses import SearchQuery, SearchRerank, FetchByMetadataResponse from pinecone.utils import require_kwargs @@ -287,6 +287,55 @@ async def main(): """ pass + @abstractmethod + async def fetch_by_metadata( + self, + filter: FilterTypedDict, + namespace: Optional[str] = None, + limit: Optional[int] = None, + pagination_token: Optional[str] = None, + **kwargs, + ) -> FetchByMetadataResponse: + """ + Fetch vectors by metadata filter. + + Look up and return vectors by metadata filter from a single namespace. + The returned vectors include the vector data and/or metadata. + + .. code-block:: python + + import asyncio + from pinecone import Pinecone + + async def main(): + pc = Pinecone() + async with pc.IndexAsyncio(host="example-host") as idx: + result = await idx.fetch_by_metadata( + filter={'genre': {'$in': ['comedy', 'drama']}, 'year': {'$eq': 2019}}, + namespace='my_namespace', + limit=50 + ) + for vec_id in result.vectors: + vector = result.vectors[vec_id] + print(vector.id) + print(vector.metadata) + + asyncio.run(main()) + + Args: + filter (Dict[str, Union[str, float, int, bool, List, dict]]): + Metadata filter expression to select vectors. + See `metadata filtering _` + namespace (str): The namespace to fetch vectors from. + If not specified, the default namespace is used. [optional] + limit (int): Max number of vectors to return. Defaults to 100. [optional] + pagination_token (str): Pagination token to continue a previous listing operation. [optional] + + Returns: + FetchByMetadataResponse: Object containing the fetched vectors, namespace, usage, and pagination token. + """ + pass + @abstractmethod async def query( self, diff --git a/pinecone/db_data/interfaces.py b/pinecone/db_data/interfaces.py index 4cc473646..263de553a 100644 --- a/pinecone/db_data/interfaces.py +++ b/pinecone/db_data/interfaces.py @@ -25,7 +25,7 @@ SearchQueryTypedDict, SearchRerankTypedDict, ) -from .dataclasses import SearchQuery, SearchRerank +from .dataclasses import SearchQuery, SearchRerank, FetchByMetadataResponse from pinecone.utils import require_kwargs @@ -524,6 +524,49 @@ def fetch(self, ids: List[str], namespace: Optional[str] = None, **kwargs) -> Fe """ pass + @abstractmethod + def fetch_by_metadata( + self, + filter: FilterTypedDict, + namespace: Optional[str] = None, + limit: Optional[int] = None, + pagination_token: Optional[str] = None, + **kwargs, + ) -> FetchByMetadataResponse: + """ + Fetch vectors by metadata filter. + + Look up and return vectors by metadata filter from a single namespace. + The returned vectors include the vector data and/or metadata. + + Examples: + + .. code-block:: python + + >>> index.fetch_by_metadata( + ... filter={'genre': {'$in': ['comedy', 'drama']}, 'year': {'$eq': 2019}}, + ... namespace='my_namespace', + ... limit=50 + ... ) + >>> index.fetch_by_metadata( + ... filter={'status': 'active'}, + ... pagination_token='token123' + ... ) + + Args: + filter (Dict[str, Union[str, float, int, bool, List, dict]]): + Metadata filter expression to select vectors. + See `metadata filtering _` + namespace (str): The namespace to fetch vectors from. + If not specified, the default namespace is used. [optional] + limit (int): Max number of vectors to return. Defaults to 100. [optional] + pagination_token (str): Pagination token to continue a previous listing operation. [optional] + + Returns: + FetchByMetadataResponse: Object containing the fetched vectors, namespace, usage, and pagination token. + """ + pass + @abstractmethod def query( self, diff --git a/pinecone/db_data/request_factory.py b/pinecone/db_data/request_factory.py index 8233a85eb..780a3fa0b 100644 --- a/pinecone/db_data/request_factory.py +++ b/pinecone/db_data/request_factory.py @@ -7,6 +7,7 @@ DeleteRequest, UpdateRequest, DescribeIndexStatsRequest, + FetchByMetadataRequest, SearchRecordsRequest, SearchRecordsRequestQuery, SearchRecordsRequestRerank, @@ -110,6 +111,27 @@ def delete_request( ) return DeleteRequest(**args_dict, **non_openapi_kwargs(kwargs), _check_type=_check_type) + @staticmethod + def fetch_by_metadata_request( + filter: FilterTypedDict, + namespace: Optional[str] = None, + limit: Optional[int] = None, + pagination_token: Optional[str] = None, + **kwargs, + ) -> FetchByMetadataRequest: + _check_type = kwargs.pop("_check_type", False) + args_dict = parse_non_empty_args( + [ + ("namespace", namespace), + ("filter", filter), + ("limit", limit), + ("pagination_token", pagination_token), + ] + ) + return FetchByMetadataRequest( + **args_dict, **non_openapi_kwargs(kwargs), _check_type=_check_type + ) + @staticmethod def update_request( id: str, diff --git a/pinecone/db_data/types/query_filter.py b/pinecone/db_data/types/query_filter.py index 927382f61..cb4669d47 100644 --- a/pinecone/db_data/types/query_filter.py +++ b/pinecone/db_data/types/query_filter.py @@ -15,7 +15,7 @@ InFilter = Dict[Literal["$in"], List[FieldValue]] NinFilter = Dict[Literal["$nin"], List[FieldValue]] - +ExistsFilter = Dict[Literal["$exists"], bool] SimpleFilter = Union[ ExactMatchFilter, @@ -27,7 +27,9 @@ LteFilter, InFilter, NinFilter, + ExistsFilter, ] AndFilter = Dict[Literal["$and"], List[SimpleFilter]] +OrFilter = Dict[Literal["$or"], List[SimpleFilter]] -FilterTypedDict = Union[SimpleFilter, AndFilter] +FilterTypedDict = Union[SimpleFilter, AndFilter, OrFilter] diff --git a/pinecone/grpc/__init__.py b/pinecone/grpc/__init__.py index 92c10d0f5..13ca6d26a 100644 --- a/pinecone/grpc/__init__.py +++ b/pinecone/grpc/__init__.py @@ -51,7 +51,7 @@ from pinecone.db_data.dataclasses import Vector, SparseValues -from pinecone.core.grpc.protos.db_data_2025_04_pb2 import ( +from pinecone.core.grpc.protos.db_data_2025_10_pb2 import ( Vector as GRPCVector, SparseValues as GRPCSparseValues, DeleteResponse as GRPCDeleteResponse, diff --git a/pinecone/grpc/index_grpc.py b/pinecone/grpc/index_grpc.py index ef51a8d23..adf6cc4e7 100644 --- a/pinecone/grpc/index_grpc.py +++ b/pinecone/grpc/index_grpc.py @@ -11,6 +11,7 @@ from .utils import ( dict_to_proto_struct, parse_fetch_response, + parse_fetch_by_metadata_response, parse_query_response, parse_stats_response, parse_upsert_response, @@ -29,8 +30,9 @@ NamespaceDescription, ListNamespacesResponse, ) +from pinecone.db_data.dataclasses import FetchByMetadataResponse from pinecone.db_control.models.list_response import ListResponse as SimpleListResponse, Pagination -from pinecone.core.grpc.protos.db_data_2025_04_pb2 import ( +from pinecone.core.grpc.protos.db_data_2025_10_pb2 import ( Vector as GRPCVector, QueryVector as GRPCQueryVector, UpsertRequest, @@ -38,6 +40,7 @@ DeleteRequest, QueryRequest, FetchRequest, + FetchByMetadataRequest, UpdateRequest, ListRequest, DescribeIndexStatsRequest, @@ -48,9 +51,9 @@ DeleteNamespaceRequest, ListNamespacesRequest, ) +from pinecone.core.grpc.protos.db_data_2025_10_pb2_grpc import VectorServiceStub from pinecone import Vector, SparseValues from pinecone.db_data.query_results_aggregator import QueryNamespacesResults, QueryResultsAggregator -from pinecone.core.grpc.protos.db_data_2025_04_pb2_grpc import VectorServiceStub from .base import GRPCIndexBase from .future import PineconeGrpcFuture from ..db_data.types import ( @@ -363,6 +366,76 @@ def fetch( response = self.runner.run(self.stub.Fetch, request, timeout=timeout) return parse_fetch_response(response) + def fetch_by_metadata( + self, + filter: FilterTypedDict, + namespace: Optional[str] = None, + limit: Optional[int] = None, + pagination_token: Optional[str] = None, + async_req: Optional[bool] = False, + **kwargs, + ) -> Union[FetchByMetadataResponse, PineconeGrpcFuture]: + """ + Fetch vectors by metadata filter. + + Look up and return vectors by metadata filter from a single namespace. + The returned vectors include the vector data and/or metadata. + + Examples: + + .. code-block:: python + + >>> index.fetch_by_metadata( + ... filter={'genre': {'$in': ['comedy', 'drama']}, 'year': {'$eq': 2019}}, + ... namespace='my_namespace', + ... limit=50 + ... ) + >>> index.fetch_by_metadata( + ... filter={'status': 'active'}, + ... pagination_token='token123' + ... ) + + Args: + filter (Dict[str, Union[str, float, int, bool, List, dict]]): + Metadata filter expression to select vectors. + See `metadata filtering _` + namespace (str): The namespace to fetch vectors from. + If not specified, the default namespace is used. [optional] + limit (int): Max number of vectors to return. Defaults to 100. [optional] + pagination_token (str): Pagination token to continue a previous listing operation. [optional] + async_req (bool): If True, the fetch operation will be performed asynchronously. + Defaults to False. [optional] + + Returns: + FetchByMetadataResponse: Object containing the fetched vectors, namespace, usage, and pagination token. + """ + timeout = kwargs.pop("timeout", None) + + if filter is not None: + filter_struct = dict_to_proto_struct(filter) + else: + filter_struct = None + + args_dict = self._parse_non_empty_args( + [ + ("namespace", namespace), + ("filter", filter_struct), + ("limit", limit), + ("pagination_token", pagination_token), + ] + ) + + request = FetchByMetadataRequest(**args_dict, **kwargs) + + if async_req: + future = self.runner.run(self.stub.FetchByMetadata.future, request, timeout=timeout) + return PineconeGrpcFuture( + future, result_transformer=parse_fetch_by_metadata_response, timeout=timeout + ) + else: + response = self.runner.run(self.stub.FetchByMetadata, request, timeout=timeout) + return parse_fetch_by_metadata_response(response) + def query( self, vector: Optional[List[float]] = None, diff --git a/pinecone/grpc/sparse_values_factory.py b/pinecone/grpc/sparse_values_factory.py index 06aa8e678..85e85e0b7 100644 --- a/pinecone/grpc/sparse_values_factory.py +++ b/pinecone/grpc/sparse_values_factory.py @@ -6,7 +6,7 @@ from ..db_data import SparseValuesTypeError, SparseValuesMissingKeysError from ..db_data.types import SparseVectorTypedDict -from pinecone.core.grpc.protos.db_data_2025_04_pb2 import SparseValues as GRPCSparseValues +from pinecone.core.grpc.protos.db_data_2025_10_pb2 import SparseValues as GRPCSparseValues from pinecone.core.openapi.db_data.models import SparseValues as OpenApiSparseValues from pinecone import SparseValues diff --git a/pinecone/grpc/utils.py b/pinecone/grpc/utils.py index e741809aa..263da0c6f 100644 --- a/pinecone/grpc/utils.py +++ b/pinecone/grpc/utils.py @@ -15,9 +15,9 @@ NamespaceSummary, NamespaceDescription, ListNamespacesResponse, - Pagination, + Pagination as OpenApiPagination, ) -from pinecone.db_data.dataclasses import FetchResponse +from pinecone.db_data.dataclasses import FetchResponse, FetchByMetadataResponse, Pagination from google.protobuf.struct_pb2 import Struct @@ -63,6 +63,34 @@ def parse_fetch_response(response: Message): ) +def parse_fetch_by_metadata_response(response: Message): + json_response = json_format.MessageToDict(response) + + vd = {} + vectors = json_response.get("vectors", {}) + namespace = json_response.get("namespace", "") + + for id, vec in vectors.items(): + vd[id] = _Vector( + id=vec["id"], + values=vec.get("values", None), + sparse_values=parse_sparse_values(vec.get("sparseValues", None)), + metadata=vec.get("metadata", None), + _check_type=False, + ) + + pagination = None + if json_response.get("pagination") and json_response["pagination"].get("next"): + pagination = Pagination(next=json_response["pagination"]["next"]) + + return FetchByMetadataResponse( + vectors=vd, + namespace=namespace, + usage=parse_usage(json_response.get("usage", {})), + pagination=pagination, + ) + + def parse_usage(usage: dict): return Usage(read_units=int(usage.get("readUnits", 0))) @@ -153,6 +181,8 @@ def parse_list_namespaces_response(response: Message) -> ListNamespacesResponse: pagination = None if "pagination" in json_response and json_response["pagination"]: - pagination = Pagination(next=json_response["pagination"].get("next", ""), _check_type=False) + pagination = OpenApiPagination( + next=json_response["pagination"].get("next", ""), _check_type=False + ) return ListNamespacesResponse(namespaces=namespaces, pagination=pagination, _check_type=False) diff --git a/pinecone/grpc/vector_factory_grpc.py b/pinecone/grpc/vector_factory_grpc.py index 3af3add82..f40249dd6 100644 --- a/pinecone/grpc/vector_factory_grpc.py +++ b/pinecone/grpc/vector_factory_grpc.py @@ -17,7 +17,7 @@ from ..db_data.types import VectorTuple, VectorTypedDict from .sparse_values_factory import SparseValuesFactory -from pinecone.core.grpc.protos.db_data_2025_04_pb2 import ( +from pinecone.core.grpc.protos.db_data_2025_10_pb2 import ( Vector as GRPCVector, SparseValues as GRPCSparseValues, ) diff --git a/tests/integration/data/test_fetch_by_metadata.py b/tests/integration/data/test_fetch_by_metadata.py new file mode 100644 index 000000000..7a84f2f2f --- /dev/null +++ b/tests/integration/data/test_fetch_by_metadata.py @@ -0,0 +1,227 @@ +import logging +import pytest +from ..helpers import poll_fetch_for_ids_in_namespace, embedding_values, random_string + +from pinecone import Vector, FetchByMetadataResponse + +logger = logging.getLogger(__name__) + + +@pytest.fixture(scope="session") +def fetch_by_metadata_namespace(): + return random_string(10) + + +def seed_for_fetch_by_metadata(idx, namespace): + """Seed vectors with various metadata for testing fetch_by_metadata.""" + logger.info(f"Seeding vectors with metadata into namespace '{namespace}'") + + # Upsert vectors with different metadata + idx.upsert( + vectors=[ + Vector( + id="genre-action-1", + values=embedding_values(2), + metadata={"genre": "action", "year": 2020, "rating": 8.5}, + ), + Vector( + id="genre-action-2", + values=embedding_values(2), + metadata={"genre": "action", "year": 2021, "rating": 7.5}, + ), + Vector( + id="genre-comedy-1", + values=embedding_values(2), + metadata={"genre": "comedy", "year": 2020, "rating": 9.0}, + ), + Vector( + id="genre-comedy-2", + values=embedding_values(2), + metadata={"genre": "comedy", "year": 2022, "rating": 8.0}, + ), + Vector( + id="genre-drama-1", + values=embedding_values(2), + metadata={"genre": "drama", "year": 2020, "rating": 9.5}, + ), + Vector( + id="genre-romance-1", + values=embedding_values(2), + metadata={"genre": "romance", "year": 2021, "rating": 7.0}, + ), + Vector(id="no-metadata-1", values=embedding_values(2), metadata=None), + ], + namespace=namespace, + ) + + poll_fetch_for_ids_in_namespace( + idx, + ids=[ + "genre-action-1", + "genre-action-2", + "genre-comedy-1", + "genre-comedy-2", + "genre-drama-1", + "genre-romance-1", + "no-metadata-1", + ], + namespace=namespace, + ) + + +@pytest.fixture(scope="class") +def seed_for_fetch_by_metadata_fixture(idx, fetch_by_metadata_namespace): + seed_for_fetch_by_metadata(idx, fetch_by_metadata_namespace) + seed_for_fetch_by_metadata(idx, "") + yield + + +@pytest.mark.usefixtures("seed_for_fetch_by_metadata_fixture") +class TestFetchByMetadata: + def setup_method(self): + self.expected_dimension = 2 + + @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) + def test_fetch_by_metadata_simple_filter( + self, idx, fetch_by_metadata_namespace, use_nondefault_namespace + ): + target_namespace = fetch_by_metadata_namespace if use_nondefault_namespace else "" + + results = idx.fetch_by_metadata( + filter={"genre": {"$eq": "action"}}, namespace=target_namespace + ) + assert isinstance(results, FetchByMetadataResponse) + assert results.namespace == target_namespace + # Check that we have at least the vectors we seeded + assert len(results.vectors) >= 2 + assert "genre-action-1" in results.vectors + assert "genre-action-2" in results.vectors + + # Verify metadata + assert results.vectors["genre-action-1"].metadata["genre"] == "action" + assert results.vectors["genre-action-2"].metadata["genre"] == "action" + + # Verify usage + assert results.usage is not None + assert results.usage["read_units"] is not None + assert results.usage["read_units"] > 0 + + @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) + def test_fetch_by_metadata_with_limit( + self, idx, fetch_by_metadata_namespace, use_nondefault_namespace + ): + target_namespace = fetch_by_metadata_namespace if use_nondefault_namespace else "" + + results = idx.fetch_by_metadata( + filter={"genre": {"$eq": "action"}}, namespace=target_namespace, limit=1 + ) + assert isinstance(results, FetchByMetadataResponse) + assert results.namespace == target_namespace + assert len(results.vectors) == 1 + + @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) + def test_fetch_by_metadata_with_in_operator( + self, idx, fetch_by_metadata_namespace, use_nondefault_namespace + ): + target_namespace = fetch_by_metadata_namespace if use_nondefault_namespace else "" + + results = idx.fetch_by_metadata( + filter={"genre": {"$in": ["comedy", "drama"]}}, namespace=target_namespace + ) + assert isinstance(results, FetchByMetadataResponse) + assert results.namespace == target_namespace + # Check that we have at least the vectors we seeded + assert len(results.vectors) >= 3 # comedy-1, comedy-2, drama-1 + assert "genre-comedy-1" in results.vectors + assert "genre-comedy-2" in results.vectors + assert "genre-drama-1" in results.vectors + + @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) + def test_fetch_by_metadata_with_multiple_conditions( + self, idx, fetch_by_metadata_namespace, use_nondefault_namespace + ): + target_namespace = fetch_by_metadata_namespace if use_nondefault_namespace else "" + + results = idx.fetch_by_metadata( + filter={"genre": {"$eq": "action"}, "year": {"$eq": 2020}}, namespace=target_namespace + ) + assert isinstance(results, FetchByMetadataResponse) + assert results.namespace == target_namespace + assert len(results.vectors) == 1 + assert "genre-action-1" in results.vectors + assert results.vectors["genre-action-1"].metadata["year"] == 2020 + + @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) + def test_fetch_by_metadata_with_numeric_filter( + self, idx, fetch_by_metadata_namespace, use_nondefault_namespace + ): + target_namespace = fetch_by_metadata_namespace if use_nondefault_namespace else "" + + results = idx.fetch_by_metadata(filter={"year": {"$gte": 2021}}, namespace=target_namespace) + assert isinstance(results, FetchByMetadataResponse) + assert results.namespace == target_namespace + # Should return action-2, comedy-2, romance-1 (all year >= 2021) + assert len(results.vectors) >= 3 + assert "genre-action-2" in results.vectors + assert "genre-comedy-2" in results.vectors + assert "genre-romance-1" in results.vectors + + @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) + def test_fetch_by_metadata_no_results( + self, idx, fetch_by_metadata_namespace, use_nondefault_namespace + ): + target_namespace = fetch_by_metadata_namespace if use_nondefault_namespace else "" + + results = idx.fetch_by_metadata( + filter={"genre": {"$eq": "horror"}}, namespace=target_namespace + ) + assert isinstance(results, FetchByMetadataResponse) + assert results.namespace == target_namespace + assert len(results.vectors) == 0 + + def test_fetch_by_metadata_nonexistent_namespace(self, idx): + target_namespace = "nonexistent-namespace" + + results = idx.fetch_by_metadata( + filter={"genre": {"$eq": "action"}}, namespace=target_namespace + ) + assert isinstance(results, FetchByMetadataResponse) + assert results.namespace == target_namespace + assert len(results.vectors) == 0 + + def test_fetch_by_metadata_unspecified_namespace(self, idx): + # Fetch without specifying namespace gives default namespace results + results = idx.fetch_by_metadata(filter={"genre": {"$eq": "action"}}) + assert isinstance(results, FetchByMetadataResponse) + assert results.namespace == "" + # Check that we have at least the vectors we seeded + assert len(results.vectors) >= 2 + assert "genre-action-1" in results.vectors + assert "genre-action-2" in results.vectors + + def test_fetch_by_metadata_pagination(self, idx, fetch_by_metadata_namespace): + # First page + results1 = idx.fetch_by_metadata( + filter={"genre": {"$in": ["action", "comedy", "drama", "romance"]}}, + namespace=fetch_by_metadata_namespace, + limit=2, + ) + assert isinstance(results1, FetchByMetadataResponse) + assert len(results1.vectors) == 2 + + # Check if pagination token exists (if more results available) + if results1.pagination and results1.pagination.next: + # Second page + results2 = idx.fetch_by_metadata( + filter={"genre": {"$in": ["action", "comedy", "drama", "romance"]}}, + namespace=fetch_by_metadata_namespace, + limit=2, + pagination_token=results1.pagination.next, + ) + assert isinstance(results2, FetchByMetadataResponse) + assert len(results2.vectors) >= 0 # Could be 0 if no more results + + # Verify no overlap between pages + page1_ids = set(results1.vectors.keys()) + page2_ids = set(results2.vectors.keys()) + assert len(page1_ids.intersection(page2_ids)) == 0 diff --git a/tests/integration/data_asyncio/conftest.py b/tests/integration/data_asyncio/conftest.py index 9769a5e90..b60811868 100644 --- a/tests/integration/data_asyncio/conftest.py +++ b/tests/integration/data_asyncio/conftest.py @@ -1,4 +1,5 @@ import pytest +import pytest_asyncio import json import asyncio from ..helpers import get_environment_var, generate_index_name @@ -50,16 +51,20 @@ def build_asyncioindex_client(index_host) -> _IndexAsyncio: return Pinecone().IndexAsyncio(host=index_host) -@pytest.fixture(scope="session") -def idx(client, index_name, index_host): - print("Building client for {}".format(index_name)) - return build_asyncioindex_client(index_host) +@pytest_asyncio.fixture(scope="function") +async def idx(index_host): + print("Building client for async index") + client = build_asyncioindex_client(index_host) + yield client + await client.close() -@pytest.fixture(scope="session") -def sparse_idx(client, sparse_index_name, sparse_index_host): - print("Building client for {}".format(sparse_index_name)) - return build_asyncioindex_client(sparse_index_host) +@pytest_asyncio.fixture(scope="function") +async def sparse_idx(sparse_index_host): + print("Building client for async sparse index") + client = build_asyncioindex_client(sparse_index_host) + yield client + await client.close() @pytest.fixture(scope="session") diff --git a/tests/integration/data_asyncio/test_fetch_by_metadata.py b/tests/integration/data_asyncio/test_fetch_by_metadata.py new file mode 100644 index 000000000..8a72bb36a --- /dev/null +++ b/tests/integration/data_asyncio/test_fetch_by_metadata.py @@ -0,0 +1,242 @@ +import logging +import pytest +import pytest_asyncio +import asyncio +from ..helpers import embedding_values, random_string +from pinecone import Vector, FetchByMetadataResponse + +logger = logging.getLogger(__name__) + + +@pytest.fixture(scope="session") +def fetch_by_metadata_namespace(): + return random_string(10) + + +async def seed_for_fetch_by_metadata(idx, namespace): + """Seed vectors with various metadata for testing fetch_by_metadata.""" + logger.info(f"Seeding vectors with metadata into namespace '{namespace}'") + + # Upsert vectors with different metadata + await idx.upsert( + vectors=[ + Vector( + id="genre-action-1", + values=embedding_values(2), + metadata={"genre": "action", "year": 2020, "rating": 8.5}, + ), + Vector( + id="genre-action-2", + values=embedding_values(2), + metadata={"genre": "action", "year": 2021, "rating": 7.5}, + ), + Vector( + id="genre-comedy-1", + values=embedding_values(2), + metadata={"genre": "comedy", "year": 2020, "rating": 9.0}, + ), + Vector( + id="genre-comedy-2", + values=embedding_values(2), + metadata={"genre": "comedy", "year": 2022, "rating": 8.0}, + ), + Vector( + id="genre-drama-1", + values=embedding_values(2), + metadata={"genre": "drama", "year": 2020, "rating": 9.5}, + ), + Vector( + id="genre-romance-1", + values=embedding_values(2), + metadata={"genre": "romance", "year": 2021, "rating": 7.0}, + ), + Vector(id="no-metadata-1", values=embedding_values(2), metadata=None), + ], + namespace=namespace, + ) + + # Wait for vectors to be available by polling fetch_by_metadata + max_wait = 60 + wait_time = 0 + while wait_time < max_wait: + try: + results = await idx.fetch_by_metadata( + filter={"genre": {"$in": ["action", "comedy", "drama", "romance"]}}, + namespace=namespace, + limit=10, + ) + if len(results.vectors) >= 6: # At least 6 vectors with genre metadata + break + except Exception: + pass + await asyncio.sleep(2) + wait_time += 2 + + +@pytest_asyncio.fixture(scope="function") +async def seed_for_fetch_by_metadata_fixture(idx, fetch_by_metadata_namespace): + await seed_for_fetch_by_metadata(idx, fetch_by_metadata_namespace) + await seed_for_fetch_by_metadata(idx, "") + yield + + +@pytest.mark.usefixtures("seed_for_fetch_by_metadata_fixture") +class TestFetchByMetadataAsyncio: + def setup_method(self): + self.expected_dimension = 2 + + @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) + @pytest.mark.asyncio + async def test_fetch_by_metadata_simple_filter( + self, idx, fetch_by_metadata_namespace, use_nondefault_namespace + ): + target_namespace = fetch_by_metadata_namespace if use_nondefault_namespace else "" + + results = await idx.fetch_by_metadata( + filter={"genre": {"$eq": "action"}}, namespace=target_namespace + ) + assert isinstance(results, FetchByMetadataResponse) + assert results.namespace == target_namespace + # Check that we have at least the vectors we seeded + assert len(results.vectors) >= 2 + assert "genre-action-1" in results.vectors + assert "genre-action-2" in results.vectors + + # Verify metadata + assert results.vectors["genre-action-1"].metadata["genre"] == "action" + assert results.vectors["genre-action-2"].metadata["genre"] == "action" + + # Verify usage + assert results.usage is not None + assert results.usage["read_units"] is not None + assert results.usage["read_units"] > 0 + + @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) + @pytest.mark.asyncio + async def test_fetch_by_metadata_with_limit( + self, idx, fetch_by_metadata_namespace, use_nondefault_namespace + ): + target_namespace = fetch_by_metadata_namespace if use_nondefault_namespace else "" + + results = await idx.fetch_by_metadata( + filter={"genre": {"$eq": "action"}}, namespace=target_namespace, limit=1 + ) + assert isinstance(results, FetchByMetadataResponse) + assert results.namespace == target_namespace + assert len(results.vectors) == 1 + + @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) + @pytest.mark.asyncio + async def test_fetch_by_metadata_with_in_operator( + self, idx, fetch_by_metadata_namespace, use_nondefault_namespace + ): + target_namespace = fetch_by_metadata_namespace if use_nondefault_namespace else "" + + results = await idx.fetch_by_metadata( + filter={"genre": {"$in": ["comedy", "drama"]}}, namespace=target_namespace + ) + assert isinstance(results, FetchByMetadataResponse) + assert results.namespace == target_namespace + # Check that we have at least the vectors we seeded + assert len(results.vectors) >= 3 # comedy-1, comedy-2, drama-1 + assert "genre-comedy-1" in results.vectors + assert "genre-comedy-2" in results.vectors + assert "genre-drama-1" in results.vectors + + @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) + @pytest.mark.asyncio + async def test_fetch_by_metadata_with_multiple_conditions( + self, idx, fetch_by_metadata_namespace, use_nondefault_namespace + ): + target_namespace = fetch_by_metadata_namespace if use_nondefault_namespace else "" + + results = await idx.fetch_by_metadata( + filter={"genre": {"$eq": "action"}, "year": {"$eq": 2020}}, namespace=target_namespace + ) + assert isinstance(results, FetchByMetadataResponse) + assert results.namespace == target_namespace + assert len(results.vectors) == 1 + assert "genre-action-1" in results.vectors + assert results.vectors["genre-action-1"].metadata["year"] == 2020 + + @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) + @pytest.mark.asyncio + async def test_fetch_by_metadata_with_numeric_filter( + self, idx, fetch_by_metadata_namespace, use_nondefault_namespace + ): + target_namespace = fetch_by_metadata_namespace if use_nondefault_namespace else "" + + results = await idx.fetch_by_metadata( + filter={"year": {"$gte": 2021}}, namespace=target_namespace + ) + assert isinstance(results, FetchByMetadataResponse) + assert results.namespace == target_namespace + # Should return action-2, comedy-2, romance-1 (all year >= 2021) + assert len(results.vectors) >= 3 + assert "genre-action-2" in results.vectors + assert "genre-comedy-2" in results.vectors + assert "genre-romance-1" in results.vectors + + @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) + @pytest.mark.asyncio + async def test_fetch_by_metadata_no_results( + self, idx, fetch_by_metadata_namespace, use_nondefault_namespace + ): + target_namespace = fetch_by_metadata_namespace if use_nondefault_namespace else "" + + results = await idx.fetch_by_metadata( + filter={"genre": {"$eq": "horror"}}, namespace=target_namespace + ) + assert isinstance(results, FetchByMetadataResponse) + assert results.namespace == target_namespace + assert len(results.vectors) == 0 + + @pytest.mark.asyncio + async def test_fetch_by_metadata_nonexistent_namespace(self, idx): + target_namespace = "nonexistent-namespace" + + results = await idx.fetch_by_metadata( + filter={"genre": {"$eq": "action"}}, namespace=target_namespace + ) + assert isinstance(results, FetchByMetadataResponse) + assert results.namespace == target_namespace + assert len(results.vectors) == 0 + + @pytest.mark.asyncio + async def test_fetch_by_metadata_unspecified_namespace(self, idx): + # Fetch without specifying namespace gives default namespace results + results = await idx.fetch_by_metadata(filter={"genre": {"$eq": "action"}}) + assert isinstance(results, FetchByMetadataResponse) + assert results.namespace == "" + # Check that we have at least the vectors we seeded + assert len(results.vectors) >= 2 + assert "genre-action-1" in results.vectors + assert "genre-action-2" in results.vectors + + @pytest.mark.asyncio + async def test_fetch_by_metadata_pagination(self, idx, fetch_by_metadata_namespace): + # First page + results1 = await idx.fetch_by_metadata( + filter={"genre": {"$in": ["action", "comedy", "drama", "romance"]}}, + namespace=fetch_by_metadata_namespace, + limit=2, + ) + assert isinstance(results1, FetchByMetadataResponse) + assert len(results1.vectors) == 2 + + # Check if pagination token exists (if more results available) + if results1.pagination and results1.pagination.next: + # Second page + results2 = await idx.fetch_by_metadata( + filter={"genre": {"$in": ["action", "comedy", "drama", "romance"]}}, + namespace=fetch_by_metadata_namespace, + limit=2, + pagination_token=results1.pagination.next, + ) + assert isinstance(results2, FetchByMetadataResponse) + assert len(results2.vectors) >= 0 # Could be 0 if no more results + + # Verify no overlap between pages + page1_ids = set(results1.vectors.keys()) + page2_ids = set(results2.vectors.keys()) + assert len(page1_ids.intersection(page2_ids)) == 0 diff --git a/tests/integration/data_grpc_futures/stub_backend.py b/tests/integration/data_grpc_futures/stub_backend.py index 85f400eae..bb5efff34 100644 --- a/tests/integration/data_grpc_futures/stub_backend.py +++ b/tests/integration/data_grpc_futures/stub_backend.py @@ -2,8 +2,8 @@ import grpc import logging from concurrent import futures -import pinecone.core.grpc.protos.db_data_2025_04_pb2 as pb2 -import pinecone.core.grpc.protos.db_data_2025_04_pb2_grpc as pb2_grpc +import pinecone.core.grpc.protos.db_data_2025_10_pb2 as pb2 +import pinecone.core.grpc.protos.db_data_2025_10_pb2_grpc as pb2_grpc logger = logging.getLogger(__name__) diff --git a/tests/integration/data_grpc_futures/test_fetch_by_metadata_future.py b/tests/integration/data_grpc_futures/test_fetch_by_metadata_future.py new file mode 100644 index 000000000..5fa5d3aae --- /dev/null +++ b/tests/integration/data_grpc_futures/test_fetch_by_metadata_future.py @@ -0,0 +1,165 @@ +import pytest +from ..helpers import poll_fetch_for_ids_in_namespace, embedding_values, generate_name +from pinecone import Vector +import logging +from pinecone.grpc import PineconeGrpcFuture + +logger = logging.getLogger(__name__) + + +@pytest.fixture(scope="session") +def fetch_by_metadata_namespace_future(): + return generate_name("TestFetchByMetadataFuture", "fetch-by-metadata-namespace") + + +def seed_for_fetch_by_metadata(idx, namespace): + # Upsert vectors with different metadata for filtering tests + logger.info("Seeding vectors with metadata to namespace '%s'", namespace) + idx.upsert( + vectors=[ + Vector( + id="meta1", values=embedding_values(2), metadata={"genre": "action", "year": 2020} + ), + Vector( + id="meta2", values=embedding_values(2), metadata={"genre": "comedy", "year": 2021} + ), + Vector( + id="meta3", values=embedding_values(2), metadata={"genre": "action", "year": 2022} + ), + Vector( + id="meta4", values=embedding_values(2), metadata={"genre": "drama", "year": 2020} + ), + Vector( + id="meta5", values=embedding_values(2), metadata={"genre": "action", "year": 2021} + ), + ], + namespace=namespace, + ) + + poll_fetch_for_ids_in_namespace( + idx, ids=["meta1", "meta2", "meta3", "meta4", "meta5"], namespace=namespace + ) + + +@pytest.mark.usefixtures("fetch_by_metadata_namespace_future") +@pytest.fixture(scope="class") +def seed_for_fetch_by_metadata_future(idx, fetch_by_metadata_namespace_future): + seed_for_fetch_by_metadata(idx, fetch_by_metadata_namespace_future) + seed_for_fetch_by_metadata(idx, "") + yield + + +@pytest.mark.usefixtures("seed_for_fetch_by_metadata_future") +class TestFetchByMetadataFuture: + def setup_method(self): + self.expected_dimension = 2 + + def test_fetch_by_metadata_simple_filter(self, idx, fetch_by_metadata_namespace_future): + target_namespace = fetch_by_metadata_namespace_future + + future = idx.fetch_by_metadata( + filter={"genre": {"$eq": "action"}}, namespace=target_namespace, async_req=True + ) + assert isinstance(future, PineconeGrpcFuture) + + from concurrent.futures import wait, FIRST_COMPLETED + + done, _ = wait([future], return_when=FIRST_COMPLETED) + results = done.pop().result() + + assert results.usage is not None + assert results.usage["read_units"] is not None + assert results.usage["read_units"] > 0 + + assert results.namespace == target_namespace + assert len(results.vectors) == 3 + assert "meta1" in results.vectors + assert "meta3" in results.vectors + assert "meta5" in results.vectors + assert results.vectors["meta1"].metadata["genre"] == "action" + assert results.vectors["meta1"].values is not None + assert len(results.vectors["meta1"].values) == self.expected_dimension + + def test_fetch_by_metadata_with_limit(self, idx, fetch_by_metadata_namespace_future): + target_namespace = fetch_by_metadata_namespace_future + + future = idx.fetch_by_metadata( + filter={"genre": {"$eq": "action"}}, namespace=target_namespace, limit=2, async_req=True + ) + + from concurrent.futures import wait, FIRST_COMPLETED + + done, _ = wait([future], return_when=FIRST_COMPLETED) + results = done.pop().result() + + assert results.namespace == target_namespace + assert len(results.vectors) <= 2 + + def test_fetch_by_metadata_with_complex_filter(self, idx, fetch_by_metadata_namespace_future): + target_namespace = fetch_by_metadata_namespace_future + + future = idx.fetch_by_metadata( + filter={"genre": {"$eq": "action"}, "year": {"$eq": 2020}}, + namespace=target_namespace, + async_req=True, + ) + + from concurrent.futures import wait, FIRST_COMPLETED + + done, _ = wait([future], return_when=FIRST_COMPLETED) + results = done.pop().result() + + assert results.namespace == target_namespace + assert len(results.vectors) == 1 + assert "meta1" in results.vectors + assert results.vectors["meta1"].metadata["genre"] == "action" + assert results.vectors["meta1"].metadata["year"] == 2020 + + def test_fetch_by_metadata_with_in_operator(self, idx, fetch_by_metadata_namespace_future): + target_namespace = fetch_by_metadata_namespace_future + + future = idx.fetch_by_metadata( + filter={"genre": {"$in": ["comedy", "drama"]}}, + namespace=target_namespace, + async_req=True, + ) + + from concurrent.futures import wait, FIRST_COMPLETED + + done, _ = wait([future], return_when=FIRST_COMPLETED) + results = done.pop().result() + + assert results.namespace == target_namespace + assert len(results.vectors) == 2 + assert "meta2" in results.vectors + assert "meta4" in results.vectors + + def test_fetch_by_metadata_no_results(self, idx, fetch_by_metadata_namespace_future): + target_namespace = fetch_by_metadata_namespace_future + + future = idx.fetch_by_metadata( + filter={"genre": {"$eq": "horror"}}, namespace=target_namespace, async_req=True + ) + + from concurrent.futures import wait, FIRST_COMPLETED + + done, _ = wait([future], return_when=FIRST_COMPLETED) + results = done.pop().result() + + assert results.namespace == target_namespace + assert len(results.vectors) == 0 + + def test_fetch_by_metadata_unspecified_namespace(self, idx): + # Fetch from default namespace + future = idx.fetch_by_metadata(filter={"genre": {"$eq": "action"}}, async_req=True) + + from concurrent.futures import wait, FIRST_COMPLETED + + done, _ = wait([future], return_when=FIRST_COMPLETED) + results = done.pop().result() + + assert results.namespace == "" + assert len(results.vectors) == 3 + assert "meta1" in results.vectors + assert "meta3" in results.vectors + assert "meta5" in results.vectors diff --git a/tests/integration/data_grpc_futures/test_query_future.py b/tests/integration/data_grpc_futures/test_query_future.py index 3e4a1c569..e2fbb1d64 100644 --- a/tests/integration/data_grpc_futures/test_query_future.py +++ b/tests/integration/data_grpc_futures/test_query_future.py @@ -150,7 +150,9 @@ def test_query_by_vector_include_metadata(self, idx, query_namespace, use_nondef for match in query_result.matches if match.metadata is not None and match.metadata != {} ] - assert len(matches_with_metadata) == 3 + # Check that we have at least the vectors we seeded + assert len(matches_with_metadata) >= 3 + assert find_by_id(query_result.matches, "4") is not None assert find_by_id(query_result.matches, "4").metadata["genre"] == "action" def test_query_by_vector_include_values_and_metadata( @@ -174,7 +176,9 @@ def test_query_by_vector_include_values_and_metadata( for match in query_result.matches if match.metadata is not None and match.metadata != {} ] - assert len(matches_with_metadata) == 3 + # Check that we have at least the vectors we seeded + assert len(matches_with_metadata) >= 3 + assert find_by_id(query_result.matches, "4") is not None assert find_by_id(query_result.matches, "4").metadata["genre"] == "action" assert len(query_result.matches[0].values) == self.expected_dimension @@ -198,8 +202,9 @@ def test_query_by_id_with_filter(self, idx, query_namespace, use_nondefault_name ).result() assert isinstance(query_result, QueryResponse) == True assert query_result.namespace == target_namespace - assert len(query_result.matches) == 1 - assert query_result.matches[0].id == "4" + # Check that we have at least the vector we seeded + assert len(query_result.matches) >= 1 + assert find_by_id(query_result.matches, "4") is not None def test_query_by_id_with_filter_gt(self, idx, query_namespace, use_nondefault_namespace): target_namespace = query_namespace if use_nondefault_namespace else "" @@ -318,7 +323,8 @@ def test_query_by_id_with_filter_nin(self, idx, query_namespace, use_nondefault_ for match in query_result.matches if match.metadata is not None and match.metadata != {} ] - assert len(matches_with_metadata) == 2 + # Check that we have at least the vectors we seeded + assert len(matches_with_metadata) >= 2 for match in matches_with_metadata: assert match.metadata["genre"] != "romance" @@ -347,9 +353,11 @@ def test_query_by_id_with_filter_eq(self, idx, query_namespace, use_nondefault_n for match in query_result.matches if match.metadata is not None and match.metadata != {} ] - assert len(matches_with_metadata) == 1 - for match in matches_with_metadata: - assert match.metadata["genre"] == "action" + # Check that we have at least the vector we seeded + assert len(matches_with_metadata) >= 1 + # Verify that vector "4" is in the results + assert find_by_id(query_result.matches, "4") is not None + assert find_by_id(query_result.matches, "4").metadata["genre"] == "action" def test_query_by_id_with_filter_ne(self, idx, query_namespace, use_nondefault_namespace): target_namespace = query_namespace if use_nondefault_namespace else "" @@ -375,7 +383,11 @@ def test_query_by_id_with_filter_ne(self, idx, query_namespace, use_nondefault_n for match in query_result.matches if match.metadata is not None and match.metadata != {} ] - assert len(matches_with_metadata) == 2 + # Check that we have at least the vectors we seeded + assert len(matches_with_metadata) >= 2 + # Verify that vectors "5" and "6" are in the results + assert find_by_id(query_result.matches, "5") is not None + assert find_by_id(query_result.matches, "6") is not None for match in matches_with_metadata: assert match.metadata["genre"] != "action" assert match.id != "4" diff --git a/tests/unit/data/test_request_factory.py b/tests/unit/data/test_request_factory.py index ea04acdff..0092bc921 100644 --- a/tests/unit/data/test_request_factory.py +++ b/tests/unit/data/test_request_factory.py @@ -12,6 +12,7 @@ SearchRecordsVector, VectorValues, SearchRecordsRequest, + FetchByMetadataRequest, ) from pinecone import RerankModel @@ -399,3 +400,50 @@ def test_search_request_with_no_rerank(self): ), fields=["*"], ) + + def test_fetch_by_metadata_request_with_filter(self): + request = IndexRequestFactory.fetch_by_metadata_request(filter={"genre": {"$eq": "action"}}) + assert request == FetchByMetadataRequest(filter={"genre": {"$eq": "action"}}) + + def test_fetch_by_metadata_request_with_filter_and_namespace(self): + request = IndexRequestFactory.fetch_by_metadata_request( + filter={"genre": {"$in": ["comedy", "drama"]}}, namespace="my_namespace" + ) + assert request == FetchByMetadataRequest( + filter={"genre": {"$in": ["comedy", "drama"]}}, namespace="my_namespace" + ) + + def test_fetch_by_metadata_request_with_limit(self): + request = IndexRequestFactory.fetch_by_metadata_request( + filter={"year": {"$gte": 2020}}, limit=50 + ) + assert request == FetchByMetadataRequest(filter={"year": {"$gte": 2020}}, limit=50) + + def test_fetch_by_metadata_request_with_pagination_token(self): + request = IndexRequestFactory.fetch_by_metadata_request( + filter={"status": "active"}, pagination_token="token123" + ) + assert request == FetchByMetadataRequest( + filter={"status": "active"}, pagination_token="token123" + ) + + def test_fetch_by_metadata_request_with_all_params(self): + request = IndexRequestFactory.fetch_by_metadata_request( + filter={"genre": {"$eq": "action"}, "year": {"$eq": 2020}}, + namespace="my_namespace", + limit=100, + pagination_token="token456", + ) + assert request == FetchByMetadataRequest( + filter={"genre": {"$eq": "action"}, "year": {"$eq": 2020}}, + namespace="my_namespace", + limit=100, + pagination_token="token456", + ) + + def test_fetch_by_metadata_request_without_optional_params(self): + request = IndexRequestFactory.fetch_by_metadata_request(filter={"genre": {"$eq": "action"}}) + assert request.filter == {"genre": {"$eq": "action"}} + assert request.namespace is None + assert request.limit is None + assert request.pagination_token is None diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 6e8800166..300638115 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -445,6 +445,50 @@ def test_fetch_byIdsAndNS_fetchByIdsAndNS(self, mocker): ids=["vec1", "vec2"], namespace="ns" ) + def test_fetch_by_metadata_with_filter(self, mocker): + mocker.patch.object(self.index._vector_api, "fetch_vectors_by_metadata", autospec=True) + filter_dict = {"genre": {"$eq": "action"}} + self.index.fetch_by_metadata(filter=filter_dict) + call_args = self.index._vector_api.fetch_vectors_by_metadata.call_args + assert call_args is not None + request = call_args[0][0] + assert isinstance(request, oai.FetchByMetadataRequest) + assert request.filter == filter_dict + + def test_fetch_by_metadata_with_filter_and_namespace(self, mocker): + mocker.patch.object(self.index._vector_api, "fetch_vectors_by_metadata", autospec=True) + filter_dict = {"genre": {"$in": ["comedy", "drama"]}} + self.index.fetch_by_metadata(filter=filter_dict, namespace="ns") + call_args = self.index._vector_api.fetch_vectors_by_metadata.call_args + assert call_args is not None + request = call_args[0][0] + assert isinstance(request, oai.FetchByMetadataRequest) + assert request.filter == filter_dict + assert request.namespace == "ns" + + def test_fetch_by_metadata_with_limit(self, mocker): + mocker.patch.object(self.index._vector_api, "fetch_vectors_by_metadata", autospec=True) + filter_dict = {"year": {"$gte": 2020}} + self.index.fetch_by_metadata(filter=filter_dict, limit=50) + call_args = self.index._vector_api.fetch_vectors_by_metadata.call_args + assert call_args is not None + request = call_args[0][0] + assert isinstance(request, oai.FetchByMetadataRequest) + assert request.filter == filter_dict + assert request.limit == 50 + + def test_fetch_by_metadata_with_pagination_token(self, mocker): + mocker.patch.object(self.index._vector_api, "fetch_vectors_by_metadata", autospec=True) + filter_dict = {"status": "active"} + pagination_token = "token123" + self.index.fetch_by_metadata(filter=filter_dict, pagination_token=pagination_token) + call_args = self.index._vector_api.fetch_vectors_by_metadata.call_args + assert call_args is not None + request = call_args[0][0] + assert isinstance(request, oai.FetchByMetadataRequest) + assert request.filter == filter_dict + assert request.pagination_token == pagination_token + # endregion # region: update tests diff --git a/tests/unit_grpc/test_grpc_index_describe_index_stats.py b/tests/unit_grpc/test_grpc_index_describe_index_stats.py index 554fbd402..fcd01b81f 100644 --- a/tests/unit_grpc/test_grpc_index_describe_index_stats.py +++ b/tests/unit_grpc/test_grpc_index_describe_index_stats.py @@ -1,6 +1,6 @@ from pinecone import Config from pinecone.grpc import GRPCIndex -from pinecone.core.grpc.protos.db_data_2025_04_pb2 import DescribeIndexStatsRequest +from pinecone.core.grpc.protos.db_data_2025_10_pb2 import DescribeIndexStatsRequest from pinecone.grpc.utils import dict_to_proto_struct diff --git a/tests/unit_grpc/test_grpc_index_fetch.py b/tests/unit_grpc/test_grpc_index_fetch.py index 97291fe0f..df56161b7 100644 --- a/tests/unit_grpc/test_grpc_index_fetch.py +++ b/tests/unit_grpc/test_grpc_index_fetch.py @@ -1,6 +1,6 @@ from pinecone import Config from pinecone.grpc import GRPCIndex -from pinecone.core.grpc.protos.db_data_2025_04_pb2 import FetchRequest +from pinecone.core.grpc.protos.db_data_2025_10_pb2 import FetchRequest class TestGrpcIndexFetch: diff --git a/tests/unit_grpc/test_grpc_index_namespace.py b/tests/unit_grpc/test_grpc_index_namespace.py index 427585d92..e36a3b030 100644 --- a/tests/unit_grpc/test_grpc_index_namespace.py +++ b/tests/unit_grpc/test_grpc_index_namespace.py @@ -1,6 +1,6 @@ from pinecone import Config from pinecone.grpc import GRPCIndex -from pinecone.core.grpc.protos.db_data_2025_04_pb2 import ( +from pinecone.core.grpc.protos.db_data_2025_10_pb2 import ( DescribeNamespaceRequest, DeleteNamespaceRequest, ListNamespacesRequest, diff --git a/tests/unit_grpc/test_grpc_index_query.py b/tests/unit_grpc/test_grpc_index_query.py index d237aa98a..4c5fc72da 100644 --- a/tests/unit_grpc/test_grpc_index_query.py +++ b/tests/unit_grpc/test_grpc_index_query.py @@ -2,7 +2,7 @@ from pinecone import Config from pinecone.grpc import GRPCIndex -from pinecone.core.grpc.protos.db_data_2025_04_pb2 import QueryRequest +from pinecone.core.grpc.protos.db_data_2025_10_pb2 import QueryRequest from pinecone.grpc.utils import dict_to_proto_struct diff --git a/tests/unit_grpc/test_grpc_index_update.py b/tests/unit_grpc/test_grpc_index_update.py index 207cd09eb..1d5e7bd76 100644 --- a/tests/unit_grpc/test_grpc_index_update.py +++ b/tests/unit_grpc/test_grpc_index_update.py @@ -1,6 +1,6 @@ from pinecone import Config from pinecone.grpc import GRPCIndex -from pinecone.core.grpc.protos.db_data_2025_04_pb2 import UpdateRequest +from pinecone.core.grpc.protos.db_data_2025_10_pb2 import UpdateRequest from pinecone.grpc.utils import dict_to_proto_struct diff --git a/tests/unit_grpc/test_grpc_index_upsert.py b/tests/unit_grpc/test_grpc_index_upsert.py index 1a65da1ca..cb9eccb6f 100644 --- a/tests/unit_grpc/test_grpc_index_upsert.py +++ b/tests/unit_grpc/test_grpc_index_upsert.py @@ -6,7 +6,7 @@ from pinecone import Config from pinecone.grpc import GRPCIndex -from pinecone.core.grpc.protos.db_data_2025_04_pb2 import ( +from pinecone.core.grpc.protos.db_data_2025_10_pb2 import ( Vector, UpsertRequest, UpsertResponse,