Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ all = [
"memorydb",
"alibabacloud_ha3engine_vector",
"alibabacloud_searchengine20211025",
"mariadb",
]

qdrant = [ "qdrant-client" ]
Expand All @@ -86,6 +87,7 @@ chromadb = [ "chromadb" ]
opensearch = [ "opensearch-py" ]
aliyun_opensearch = [ "alibabacloud_ha3engine_vector", "alibabacloud_searchengine20211025"]
mongodb = [ "pymongo" ]
mariadb = [ "mariadb" ]

[project.urls]
"repository" = "https://github.com/zilliztech/VectorDBBench"
Expand Down
15 changes: 15 additions & 0 deletions vectordb_bench/backend/clients/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class DB(Enum):
Chroma = "Chroma"
AWSOpenSearch = "OpenSearch"
AliyunElasticsearch = "AliyunElasticsearch"
MariaDB = "MariaDB"
Test = "test"
AliyunOpenSearch = "AliyunOpenSearch"
MongoDB = "MongoDB"
Expand Down Expand Up @@ -135,6 +136,11 @@ def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901

return MongoDB

if self == DB.MariaDB:
from .mariadb.mariadb import MariaDB

return MariaDB

if self == DB.Test:
from .test.test import Test

Expand Down Expand Up @@ -236,6 +242,10 @@ def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912, C901

return MongoDBConfig

if self == DB.MariaDB:
from .mariadb.config import MariaDBConfig
return MariaDBConfig

if self == DB.Test:
from .test.config import TestConfig

Expand Down Expand Up @@ -318,6 +328,11 @@ def case_config_cls( # noqa: PLR0911

return MongoDBIndexConfig

if self == DB.MariaDB:
from .mariadb.config import _mariadb_case_config

return _mariadb_case_config.get(index_type)

# DB.Pinecone, DB.Chroma, DB.Redis
return EmptyDBCaseConfig

Expand Down
107 changes: 107 additions & 0 deletions vectordb_bench/backend/clients/mariadb/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
from typing import Annotated, Optional, Unpack

import click
import os
from pydantic import SecretStr

from ....cli.cli import (
CommonTypedDict,
HNSWFlavor1,
cli,
click_parameter_decorators_from_typed_dict,
run,
)
from vectordb_bench.backend.clients import DB


class MariaDBTypedDict(CommonTypedDict):
user_name: Annotated[
str, click.option("--username",
type=str,
help="Username",
required=True,
),
]
password: Annotated[
str, click.option("--password",
type=str,
help="Password",
required=True,
),
]

host: Annotated[
str, click.option("--host",
type=str,
help="Db host",
default="127.0.0.1",
),
]

port: Annotated[
int, click.option("--port",
type=int,
default=3306,
help="Db Port",
),
]

storage_engine: Annotated[
int, click.option("--storage-engine",
type=click.Choice(["InnoDB", "MyISAM"]),
help="DB storage engine",
required=True,
),
]

class MariaDBHNSWTypedDict(MariaDBTypedDict):
...
m: Annotated[
Optional[int], click.option("--m",
type=int,
help="M parameter in MHNSW vector indexing",
required=False,
),
]

ef_search: Annotated[
Optional[int], click.option("--ef-search",
type=int,
help="MariaDB system variable mhnsw_min_limit",
required=False,
),
]

max_cache_size: Annotated[
Optional[int], click.option("--max-cache-size",
type=int,
help="MariaDB system variable mhnsw_max_cache_size",
required=False,
),
]


@cli.command()
@click_parameter_decorators_from_typed_dict(MariaDBHNSWTypedDict)
def MariaDBHNSW(
**parameters: Unpack[MariaDBHNSWTypedDict],
):
from .config import MariaDBConfig, MariaDBHNSWConfig

run(
db=DB.MariaDB,
db_config=MariaDBConfig(
db_label=parameters["db_label"],
user_name=parameters["username"],
password=SecretStr(parameters["password"]),
host=parameters["host"],
port=parameters["port"],
),
db_case_config=MariaDBHNSWConfig(
M=parameters["m"],
ef_search=parameters["ef_search"],
storage_engine=parameters["storage_engine"],
max_cache_size=parameters["max_cache_size"],
),
**parameters,
)
71 changes: 71 additions & 0 deletions vectordb_bench/backend/clients/mariadb/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from pydantic import SecretStr, BaseModel
from typing import TypedDict
from ..api import DBConfig, DBCaseConfig, MetricType, IndexType

class MariaDBConfigDict(TypedDict):
"""These keys will be directly used as kwargs in mariadb connection string,
so the names must match exactly mariadb API"""

user: str
password: str
host: str
port: int


class MariaDBConfig(DBConfig):
user_name: str = "root"
password: SecretStr
host: str = "127.0.0.1"
port: int = 3306

def to_dict(self) -> MariaDBConfigDict:
pwd_str = self.password.get_secret_value()
return {
"host": self.host,
"port": self.port,
"user": self.user_name,
"password": pwd_str,
}


class MariaDBIndexConfig(BaseModel):
"""Base config for MariaDB"""

metric_type: MetricType | None = None

def parse_metric(self) -> str:
if self.metric_type == MetricType.L2:
return "euclidean"
elif self.metric_type == MetricType.COSINE:
return "cosine"
else:
raise ValueError(f"Metric type {self.metric_type} is not supported!")

class MariaDBHNSWConfig(MariaDBIndexConfig, DBCaseConfig):
M: int | None
ef_search: int | None
index: IndexType = IndexType.HNSW
storage_engine: str = "InnoDB"
max_cache_size: int | None

def index_param(self) -> dict:
return {
"storage_engine": self.storage_engine,
"metric_type": self.parse_metric(),
"index_type": self.index.value,
"M": self.M,
"max_cache_size": self.max_cache_size,
}

def search_param(self) -> dict:
return {
"metric_type": self.parse_metric(),
"ef_search": self.ef_search,
}


_mariadb_case_config = {
IndexType.HNSW: MariaDBHNSWConfig,
}


Loading