Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions vectordb_bench/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ class config:

OPTIMIZE_TIMEOUT_1536D_500K = 15 * 60 # 15min
OPTIMIZE_TIMEOUT_1536D_5M = 2.5 * 3600 # 2.5h

CHURN_CYCLES_DEFAULT = 0 # Keeping this default to 0 as most clients do not support churn
CHURN_P_CHURN_DEFAULT = 10
def display(self) -> str:
tmp = [
i for i in inspect.getmembers(self)
Expand Down
6 changes: 5 additions & 1 deletion vectordb_bench/backend/cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def case_description(self, custom_configs: dict | None = None) -> str:
class CaseLabel(Enum):
Load = auto()
Performance = auto()

Churn = auto()

class Case(BaseModel):
"""Undefined case
Expand All @@ -83,6 +83,8 @@ class Case(BaseModel):
dataset(DataSet): dataset for this case runner.
filter_rate(float | None): one of 99% | 1% | None
filters(dict | None): filters for search
cycles(float | None): number of times to run churn cycles
p_churn(float | None): % of data to delete and reinsert
"""

case_id: CaseType
Expand All @@ -95,6 +97,8 @@ class Case(BaseModel):
optimize_timeout: float | int | None = None

filter_rate: float | None = None
cycles: int | None = None
p_churn: float | int | None = None

@property
def filters(self) -> dict | None:
Expand Down
16 changes: 16 additions & 0 deletions vectordb_bench/backend/clients/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,22 @@ def search_embedding(
"""
raise NotImplementedError

@abstractmethod
def delete_embeddings(
self,
metadata: list[int],
**kwargs,
) -> (int, Exception):
"""Delete embeddings from the vector database based on metadata.
Args:
metadata (list[int]): List of metadata associated with the embeddings to delete.
**kwargs (Any): Vector database specific parameters.
Returns:
int: Number of deleted embeddings.
Exception: An exception if any error occurred during deletion.
"""
raise NotImplementedError

# TODO: remove
@abstractmethod
def optimize(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from contextlib import contextmanager
import time
from typing import Iterable, Type
from typing import Any, Iterable, Optional, Tuple, Type
from ..api import VectorDB, DBCaseConfig, DBConfig, IndexType
from .config import AWSOpenSearchConfig, AWSOpenSearchIndexConfig, AWSOS_Engine
from opensearchpy import OpenSearch
Expand Down Expand Up @@ -151,6 +151,13 @@ def search_embedding(
except Exception as e:
log.warning(f"Failed to search: {self.index_name} error: {str(e)}")
raise e from None

def delete_embeddings(
self,
metadata: list[int],
**kwargs: Any,
) -> Tuple[int, Optional[Exception]]:
pass

def optimize(self):
"""optimize will be called between insertion and search in performance cases."""
Expand Down
9 changes: 8 additions & 1 deletion vectordb_bench/backend/clients/chroma/chroma.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import chromadb
import logging
from contextlib import contextmanager
from typing import Any
from typing import Any, Optional, Tuple
from ..api import VectorDB, DBCaseConfig

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -63,6 +63,13 @@ def ready_to_load(self) -> bool:
def optimize(self) -> None:
pass

def delete_embeddings(
self,
metadata: list[int],
**kwargs: Any,
) -> Tuple[int, Optional[Exception]]:
pass

def insert_embeddings(
self,
embeddings: list[list[float]],
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import time
from contextlib import contextmanager
from typing import Iterable
from typing import Any, Iterable, Optional, Tuple
from ..api import VectorDB
from .config import ElasticCloudIndexConfig
from elasticsearch.helpers import bulk
Expand Down Expand Up @@ -97,6 +97,13 @@ def insert_embeddings(
log.warning(f"Failed to insert data: {self.indice} error: {str(e)}")
return (0, e)

def delete_embeddings(
self,
metadata: list[int],
**kwargs: Any,
) -> Tuple[int, Optional[Exception]]:
pass

def search_embedding(
self,
query: list[float],
Expand Down
7 changes: 7 additions & 0 deletions vectordb_bench/backend/clients/memorydb/memorydb.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,13 @@ def insert_embeddings(
return 0, e

return result_len, None

def delete_embeddings(
self,
metadata: list[int],
**kwargs: Any,
) -> Tuple[int, Optional[Exception]]:
pass

def _post_insert(self):
"""Wait for indexing to finish"""
Expand Down
10 changes: 9 additions & 1 deletion vectordb_bench/backend/clients/milvus/milvus.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import time
from contextlib import contextmanager
from typing import Iterable
from typing import Any, Iterable, Optional, Tuple

from pymilvus import Collection, utility
from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusException
Expand Down Expand Up @@ -196,6 +196,14 @@ def insert_embeddings(
return (insert_count, e)
return (insert_count, None)


def delete_embeddings(
self,
metadata: list[int],
**kwargs: Any,
) -> Tuple[int, Optional[Exception]]:
pass

def search_embedding(
self,
query: list[float],
Expand Down
7 changes: 7 additions & 0 deletions vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,13 @@ def insert_embeddings(
)
return 0, e

def delete_embeddings(
self,
metadata: list[int],
**kwargs: Any,
) -> Tuple[int, Optional[Exception]]:
pass

def search_embedding(
self,
query: list[float],
Expand Down
32 changes: 32 additions & 0 deletions vectordb_bench/backend/clients/pgvector/pgvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,38 @@ def insert_embeddings(
)
return 0, e

def delete_embeddings(
self,
metadata: list[int],
**kwargs: Any,
) -> Tuple[int, Optional[Exception]]:
"""Deletes embeddings from the pgvector table based on metadata (IDs).
Args:
metadata (list[int]): List of metadata (IDs) for the embeddings to delete.
**kwargs (Any): Additional vector database-specific parameters.
Returns:
int: Number of deleted embeddings.
Exception: An exception if an error occurs.
"""
assert self.conn is not None, "Connection is not initialized"
assert self.cursor is not None, "Cursor is not initialized"

try:
# Construct SQL for deleting embeddings based on the metadata (IDs)
delete_sql = sql.SQL(
"DELETE FROM public.{table_name} WHERE id = ANY(%s)"
).format(table_name=sql.Identifier(self.table_name))

# Execute the delete statement
self.cursor.execute(delete_sql, (metadata,))
deleted_count = self.cursor.rowcount # Get the number of rows deleted
self.conn.commit()

return deleted_count, None
except Exception as e:
log.warning(f"Failed to delete data from pgvector table ({self.table_name}), error: {e}")
return 0, e

def search_embedding(
self,
query: list[float],
Expand Down
7 changes: 7 additions & 0 deletions vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,13 @@ def insert_embeddings(
)
return 0, e

def delete_embeddings(
self,
metadata: list[int],
**kwargs: Any,
) -> Tuple[int, Optional[Exception]]:
pass

def search_embedding(
self,
query: list[float],
Expand Down
9 changes: 8 additions & 1 deletion vectordb_bench/backend/clients/pinecone/pinecone.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
from contextlib import contextmanager
from typing import Type
from typing import Any, Optional, Tuple, Type

from ..api import VectorDB, DBConfig, DBCaseConfig, EmptyDBCaseConfig, IndexType
from .config import PineconeConfig
Expand Down Expand Up @@ -95,6 +95,13 @@ def insert_embeddings(
return (insert_count, e)
return (len(embeddings), None)

def delete_embeddings(
self,
metadata: list[int],
**kwargs: Any,
) -> Tuple[int, Optional[Exception]]:
pass

def search_embedding(
self,
query: list[float],
Expand Down
8 changes: 8 additions & 0 deletions vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
import time
from contextlib import contextmanager
from typing import Any, Optional, Tuple

from ..api import VectorDB, DBCaseConfig
from qdrant_client.http.models import (
Expand Down Expand Up @@ -127,6 +128,13 @@ def insert_embeddings(
else:
return len(metadata), None

def delete_embeddings(
self,
metadata: list[int],
**kwargs: Any,
) -> Tuple[int, Optional[Exception]]:
pass

def search_embedding(
self,
query: list[float],
Expand Down
9 changes: 8 additions & 1 deletion vectordb_bench/backend/clients/redis/redis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
from contextlib import contextmanager
from typing import Any, Type
from typing import Any, Optional, Tuple, Type
from ..api import VectorDB, DBConfig, DBCaseConfig, EmptyDBCaseConfig, IndexType
from .config import RedisConfig
import redis
Expand Down Expand Up @@ -123,6 +123,13 @@ def insert_embeddings(
return 0, e

return result_len, None

def delete_embeddings(
self,
metadata: list[int],
**kwargs: Any,
) -> Tuple[int, Optional[Exception]]:
pass

def search_embedding(
self,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Wrapper around the Weaviate vector database over VectorDB"""

import logging
from typing import Iterable
from typing import Any, Iterable, Optional, Tuple
from contextlib import contextmanager

import weaviate
Expand Down Expand Up @@ -113,6 +113,13 @@ def insert_embeddings(
log.warning(f"Failed to insert data, error: {str(e)}")
return (insert_count, e)

def delete_embeddings(
self,
metadata: list[int],
**kwargs: Any,
) -> Tuple[int, Optional[Exception]]:
pass

def search_embedding(
self,
query: list[float],
Expand Down
7 changes: 6 additions & 1 deletion vectordb_bench/backend/runner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@
MultiProcessingSearchRunner,
)

from .serial_runner import SerialSearchRunner, SerialInsertRunner
from .serial_runner import (
SerialSearchRunner,
SerialInsertRunner,
SerialChurnRunner,
)


__all__ = [
'MultiProcessingSearchRunner',
'SerialSearchRunner',
'SerialInsertRunner',
'SerialChurnRunner',
]
Loading