Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pinecone/db_data/dataclasses/update_response.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dataclasses import dataclass, field
from typing import cast
from typing import Optional, cast

from .utils import DictLike
from pinecone.utils.response_info import ResponseInfo
Expand All @@ -10,9 +10,11 @@ class UpdateResponse(DictLike):
"""Response from an update operation.

Attributes:
matched_records: The number of records that matched the filter (if a filter was provided).
_response_info: Response metadata including LSN headers.
"""

matched_records: Optional[int] = None
_response_info: ResponseInfo = field(
default_factory=lambda: cast(ResponseInfo, {"raw_headers": {}}), repr=True, compare=False
)
26 changes: 24 additions & 2 deletions pinecone/db_data/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,26 +652,38 @@ def query_namespaces(
@validate_and_convert_errors
def update(
self,
id: str,
id: Optional[str] = None,
values: Optional[List[float]] = None,
set_metadata: Optional[VectorMetadataTypedDict] = None,
namespace: Optional[str] = None,
sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None,
filter: Optional[FilterTypedDict] = None,
dry_run: Optional[bool] = None,
**kwargs,
) -> UpdateResponse:
# Validate that exactly one of id or filter is provided
if id is None and filter is None:
raise ValueError("Either 'id' or 'filter' must be provided to update vectors.")
if id is not None and filter is not None:
raise ValueError(
"Cannot provide both 'id' and 'filter' in the same update call. Use 'id' for single vector updates or 'filter' for bulk updates."
)
result = self._vector_api.update_vector(
IndexRequestFactory.update_request(
id=id,
values=values,
set_metadata=set_metadata,
namespace=namespace,
sparse_values=sparse_values,
filter=filter,
dry_run=dry_run,
**kwargs,
),
**self._openapi_kwargs(kwargs),
)
# Extract response info from result if it's an OpenAPI model with _response_info
response_info = None
matched_records = None
if hasattr(result, "_response_info"):
response_info = result._response_info
else:
Expand All @@ -680,7 +692,17 @@ def update(

response_info = extract_response_info({})

return UpdateResponse(_response_info=response_info)
# Extract matched_records from OpenAPI model
if hasattr(result, "matched_records"):
matched_records = result.matched_records
# Check _data_store for fields not in the OpenAPI spec
if hasattr(result, "_data_store"):
if matched_records is None:
matched_records = result._data_store.get(
"matchedRecords"
) or result._data_store.get("matched_records")

return UpdateResponse(matched_records=matched_records, _response_info=response_info)

@validate_and_convert_errors
def describe_index_stats(
Expand Down
26 changes: 24 additions & 2 deletions pinecone/db_data/index_asyncio.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,26 +623,38 @@ async def query_namespaces(
@validate_and_convert_errors
async def update(
self,
id: str,
id: Optional[str] = None,
values: Optional[List[float]] = None,
set_metadata: Optional[VectorMetadataTypedDict] = None,
namespace: Optional[str] = None,
sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None,
filter: Optional[FilterTypedDict] = None,
dry_run: Optional[bool] = None,
**kwargs,
) -> UpdateResponse:
# Validate that exactly one of id or filter is provided
if id is None and filter is None:
raise ValueError("Either 'id' or 'filter' must be provided to update vectors.")
if id is not None and filter is not None:
raise ValueError(
"Cannot provide both 'id' and 'filter' in the same update call. Use 'id' for single vector updates or 'filter' for bulk updates."
)
result = await self._vector_api.update_vector(
IndexRequestFactory.update_request(
id=id,
values=values,
set_metadata=set_metadata,
namespace=namespace,
sparse_values=sparse_values,
filter=filter,
dry_run=dry_run,
**kwargs,
),
**self._openapi_kwargs(kwargs),
)
# Extract response info from result if it's an OpenAPI model with _response_info
response_info = None
matched_records = None
if hasattr(result, "_response_info"):
response_info = result._response_info
else:
Expand All @@ -651,7 +663,17 @@ async def update(

response_info = extract_response_info({})

return UpdateResponse(_response_info=response_info)
# Extract matched_records from OpenAPI model
if hasattr(result, "matched_records"):
matched_records = result.matched_records
# Check _data_store for fields not in the OpenAPI spec
if hasattr(result, "_data_store"):
if matched_records is None:
matched_records = result._data_store.get(
"matchedRecords"
) or result._data_store.get("matched_records")

return UpdateResponse(matched_records=matched_records, _response_info=response_info)

@validate_and_convert_errors
async def describe_index_stats(
Expand Down
78 changes: 64 additions & 14 deletions pinecone/db_data/index_asyncio_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,33 +525,41 @@ async def main():
@abstractmethod
async def update(
self,
id: str,
id: Optional[str] = None,
values: Optional[List[float]] = None,
set_metadata: Optional[VectorMetadataTypedDict] = None,
namespace: Optional[str] = None,
sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None,
filter: Optional[FilterTypedDict] = None,
dry_run: Optional[bool] = None,
**kwargs,
) -> UpdateResponse:
"""
The Update operation updates vector in a namespace.
The Update operation updates vectors in a namespace.

Args:
id (str): Vector's unique id.
values (List[float]): vector values to set. [optional]
set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
metadata to set for vector. [optional]
namespace (str): Namespace name where to update the vector.. [optional]
sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector.
Expected to be either a SparseValues object or a dict of the form:
{'indices': List[int], 'values': List[float]} where the lists each have the same length.
This method supports two update modes:

If a value is included, it will overwrite the previous value.
If a set_metadata is included,
the values of the fields specified in it will be added or overwrite the previous value.
1. **Single vector update by ID**: Provide `id` to update a specific vector.
- Updates the vector with the given ID
- If `values` is included, it will overwrite the previous vector values
- If `set_metadata` is included, the metadata will be merged with existing metadata on the vector.
Fields specified in `set_metadata` will overwrite existing fields with the same key, while
fields not in `set_metadata` will remain unchanged.

2. **Bulk update by metadata filter**: Provide `filter` to update all vectors matching the filter criteria.
- Updates all vectors in the namespace that match the filter expression
- Useful for updating metadata across multiple vectors at once
- If `set_metadata` is included, the metadata will be merged with existing metadata on each vector.
Fields specified in `set_metadata` will overwrite existing fields with the same key, while
fields not in `set_metadata` will remain unchanged.
- The response includes `matched_records` indicating how many vectors were updated

Either `id` or `filter` must be provided (but not both in the same call).

Examples:

**Single vector update by ID:**

.. code-block:: python

import asyncio
Expand Down Expand Up @@ -588,8 +596,50 @@ async def main():
namespace='my_namespace'
)

**Bulk update by metadata filter:**

.. code-block:: python

# Update metadata for all vectors matching the filter
response = await idx.update(
set_metadata={'status': 'active'},
filter={'genre': {'$eq': 'drama'}},
namespace='my_namespace'
)
print(f"Updated {response.matched_records} vectors")
# Preview how many vectors would be updated (dry run)
response = await idx.update(
set_metadata={'status': 'active'},
filter={'genre': {'$eq': 'drama'}},
namespace='my_namespace',
dry_run=True
)
print(f"Would update {response.matched_records} vectors")

asyncio.run(main())

Args:
id (str): Vector's unique id. Required for single vector updates. Must not be provided when using filter. [optional]
values (List[float]): Vector values to set. [optional]
set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
Metadata to merge with existing metadata on the vector(s). Fields specified will overwrite
existing fields with the same key, while fields not specified will remain unchanged. [optional]
namespace (str): Namespace name where to update the vector(s). [optional]
sparse_values: (Dict[str, Union[List[float], List[int]]]): Sparse values to update for the vector.
Expected to be either a SparseValues object or a dict of the form:
{'indices': List[int], 'values': List[float]} where the lists each have the same length. [optional]
filter (Dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression.
When provided, updates all vectors in the namespace that match the filter criteria.
See `metadata filtering <https://www.pinecone.io/docs/metadata-filtering/>_`.
Must not be provided when using id. Either `id` or `filter` must be provided. [optional]
dry_run (bool): If `True`, return the number of records that match the `filter` without executing
the update. Only meaningful when using `filter` (not with `id`). Useful for previewing
the impact of a bulk update before applying changes. Defaults to `False`. [optional]

Returns:
UpdateResponse: An UpdateResponse object. When using filter-based updates, the response includes
`matched_records` indicating the number of vectors that were updated (or would be updated if
`dry_run=True`).
"""
pass

Expand Down
72 changes: 59 additions & 13 deletions pinecone/db_data/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,42 +710,88 @@ def query_namespaces(
@abstractmethod
def update(
self,
id: str,
id: Optional[str] = None,
values: Optional[List[float]] = None,
set_metadata: Optional[VectorMetadataTypedDict] = None,
namespace: Optional[str] = None,
sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None,
filter: Optional[FilterTypedDict] = None,
dry_run: Optional[bool] = None,
**kwargs,
) -> UpdateResponse:
"""
The Update operation updates vector in a namespace.
If a value is included, it will overwrite the previous value.
If a set_metadata is included,
the values of the fields specified in it will be added or overwrite the previous value.
The Update operation updates vectors in a namespace.

This method supports two update modes:

1. **Single vector update by ID**: Provide `id` to update a specific vector.
- Updates the vector with the given ID
- If `values` is included, it will overwrite the previous vector values
- If `set_metadata` is included, the metadata will be merged with existing metadata on the vector.
Fields specified in `set_metadata` will overwrite existing fields with the same key, while
fields not in `set_metadata` will remain unchanged.

2. **Bulk update by metadata filter**: Provide `filter` to update all vectors matching the filter criteria.
- Updates all vectors in the namespace that match the filter expression
- Useful for updating metadata across multiple vectors at once
- If `set_metadata` is included, the metadata will be merged with existing metadata on each vector.
Fields specified in `set_metadata` will overwrite existing fields with the same key, while
fields not in `set_metadata` will remain unchanged.
- The response includes `matched_records` indicating how many vectors were updated

Either `id` or `filter` must be provided (but not both in the same call).

Examples:

**Single vector update by ID:**

.. code-block:: python

>>> # Update vector values
>>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace')
>>> # Update vector metadata
>>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace')
>>> # Update vector values and sparse values
>>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]},
>>> namespace='my_namespace')
>>> index.update(id='id1', values=[1, 2, 3], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]),
>>> namespace='my_namespace')

**Bulk update by metadata filter:**

.. code-block:: python

>>> # Update metadata for all vectors matching the filter
>>> response = index.update(set_metadata={'status': 'active'}, filter={'genre': {'$eq': 'drama'}},
>>> namespace='my_namespace')
>>> print(f"Updated {response.matched_records} vectors")
>>> # Preview how many vectors would be updated (dry run)
>>> response = index.update(set_metadata={'status': 'active'}, filter={'genre': {'$eq': 'drama'}},
>>> namespace='my_namespace', dry_run=True)
>>> print(f"Would update {response.matched_records} vectors")

Args:
id (str): Vector's unique id.
values (List[float]): vector values to set. [optional]
id (str): Vector's unique id. Required for single vector updates. Must not be provided when using filter. [optional]
values (List[float]): Vector values to set. [optional]
set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
metadata to set for vector. [optional]
namespace (str): Namespace name where to update the vector.. [optional]
sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector.
Metadata to merge with existing metadata on the vector(s). Fields specified will overwrite
existing fields with the same key, while fields not specified will remain unchanged. [optional]
namespace (str): Namespace name where to update the vector(s). [optional]
sparse_values: (Dict[str, Union[List[float], List[int]]]): Sparse values to update for the vector.
Expected to be either a SparseValues object or a dict of the form:
{'indices': List[int], 'values': List[float]} where the lists each have the same length.

{'indices': List[int], 'values': List[float]} where the lists each have the same length. [optional]
filter (Dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression.
When provided, updates all vectors in the namespace that match the filter criteria.
See `metadata filtering <https://www.pinecone.io/docs/metadata-filtering/>_`.
Must not be provided when using id. Either `id` or `filter` must be provided. [optional]
dry_run (bool): If `True`, return the number of records that match the `filter` without executing
the update. Only meaningful when using `filter` (not with `id`). Useful for previewing
the impact of a bulk update before applying changes. Defaults to `False`. [optional]

Returns: An empty dictionary if the update was successful.
Returns:
UpdateResponse: An UpdateResponse object. When using filter-based updates, the response includes
`matched_records` indicating the number of vectors that were updated (or would be updated if
`dry_run=True`).
"""
pass

Expand Down
11 changes: 7 additions & 4 deletions pinecone/db_data/request_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,27 +135,30 @@ def fetch_by_metadata_request(

@staticmethod
def update_request(
id: str,
id: Optional[str] = None,
values: Optional[List[float]] = None,
set_metadata: Optional[VectorMetadataTypedDict] = None,
namespace: Optional[str] = None,
sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None,
filter: Optional[FilterTypedDict] = None,
dry_run: Optional[bool] = None,
**kwargs,
) -> UpdateRequest:
_check_type = kwargs.pop("_check_type", False)
sparse_values_normalized = SparseValuesFactory.build(sparse_values)
args_dict = parse_non_empty_args(
[
("id", id),
("values", values),
("set_metadata", set_metadata),
("namespace", namespace),
("sparse_values", sparse_values_normalized),
("filter", filter),
("dry_run", dry_run),
]
)

return UpdateRequest(
id=id, **args_dict, _check_type=_check_type, **non_openapi_kwargs(kwargs)
)
return UpdateRequest(**args_dict, _check_type=_check_type, **non_openapi_kwargs(kwargs))

@staticmethod
def describe_index_stats_request(
Expand Down
Loading