From a24899073e38f688de5a069311282c4059e22106 Mon Sep 17 00:00:00 2001 From: "wangzihao.wzh" Date: Mon, 1 Dec 2025 16:52:06 +0800 Subject: [PATCH 1/2] [Bugfix] Several fixes to AliSQL Summary ======= 1. Online users cannot directly `SET GLOBAL` 2. Online instances uses strict SQL mode by default, which can affect vector insertion. --- vectordb_bench/backend/clients/alisql/alisql.py | 7 ++----- vectordb_bench/backend/clients/alisql/cli.py | 13 +------------ vectordb_bench/backend/clients/alisql/config.py | 2 -- vectordb_bench/frontend/config/dbCaseConfigs.py | 14 -------------- vectordb_bench/models.py | 1 - 5 files changed, 3 insertions(+), 34 deletions(-) diff --git a/vectordb_bench/backend/clients/alisql/alisql.py b/vectordb_bench/backend/clients/alisql/alisql.py index c3c2fd953..12f4f35e0 100644 --- a/vectordb_bench/backend/clients/alisql/alisql.py +++ b/vectordb_bench/backend/clients/alisql/alisql.py @@ -103,14 +103,11 @@ def init(self): index_param = self.case_config.index_param() search_param = self.case_config.search_param() - # maximize allowed package size - self.cursor.execute("SET GLOBAL max_allowed_packet = 1073741824") + self.cursor.execute("SET sql_mode = ''") if index_param["index_type"] == "HNSW": - if index_param["cache_size"] is not None: - self.cursor.execute(f"SET GLOBAL vidx_hnsw_cache_size = {index_param['cache_size']}") if search_param["ef_search"] is not None: - self.cursor.execute(f"SET GLOBAL vidx_hnsw_ef_search = {search_param['ef_search']}") + self.cursor.execute(f"SET SESSION vidx_hnsw_ef_search = {search_param['ef_search']}") self.cursor.execute("COMMIT") self.insert_sql = f"INSERT INTO {self.db_name}.{self.table_name} (id, v) VALUES (%s, %s)" # noqa: S608 diff --git a/vectordb_bench/backend/clients/alisql/cli.py b/vectordb_bench/backend/clients/alisql/cli.py index dbbc8513d..70e7fdfae 100644 --- a/vectordb_bench/backend/clients/alisql/cli.py +++ b/vectordb_bench/backend/clients/alisql/cli.py @@ -49,7 +49,7 @@ class AliSQLTypedDict(CommonTypedDict): "--port", type=int, default=3306, - help="DB Port", + help="Db Port", ), ] @@ -75,16 +75,6 @@ class AliSQLHNSWTypedDict(AliSQLTypedDict): ), ] - cache_size: Annotated[ - int | None, - click.option( - "--cache-size", - type=int, - help="AliSQL system variable vidx_hnsw_cache_size", - required=False, - ), - ] - @cli.command() @click_parameter_decorators_from_typed_dict(AliSQLHNSWTypedDict) @@ -105,7 +95,6 @@ def AliSQLHNSW( db_case_config=AliSQLHNSWConfig( M=parameters["m"], ef_search=parameters["ef_search"], - cache_size=parameters["cache_size"], ), **parameters, ) diff --git a/vectordb_bench/backend/clients/alisql/config.py b/vectordb_bench/backend/clients/alisql/config.py index eddf19e98..66df2d69a 100644 --- a/vectordb_bench/backend/clients/alisql/config.py +++ b/vectordb_bench/backend/clients/alisql/config.py @@ -49,14 +49,12 @@ class AliSQLHNSWConfig(AliSQLIndexConfig, DBCaseConfig): M: int | None ef_search: int | None index: IndexType = IndexType.HNSW - cache_size: int | None def index_param(self) -> dict: return { "metric_type": self.parse_metric(), "index_type": self.index.value, "M": self.M, - "cache_size": self.cache_size, } def search_param(self) -> dict: diff --git a/vectordb_bench/frontend/config/dbCaseConfigs.py b/vectordb_bench/frontend/config/dbCaseConfigs.py index 6dd8a6e19..efbfeca95 100644 --- a/vectordb_bench/frontend/config/dbCaseConfigs.py +++ b/vectordb_bench/frontend/config/dbCaseConfigs.py @@ -1575,18 +1575,6 @@ class CaseConfigInput(BaseModel): isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value, ) -CaseConfigParamInput_CacheSize_AliSQL = CaseConfigInput( - label=CaseConfigParamType.cache_size, - inputHelp="vidx_hnsw_cache_size", - inputType=InputType.Number, - inputConfig={ - "min": 1048576, - "max": (1 << 53) - 1, - "value": 16 * 1024**3, - }, - isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value, -) - CaseConfigParamInput_IndexType_AliSQL = CaseConfigInput( label=CaseConfigParamType.IndexType, inputHelp="Select Index Type", @@ -2077,12 +2065,10 @@ class CaseConfigInput(BaseModel): AliSQLLoadingConfig = [ CaseConfigParamInput_IndexType_AliSQL, CaseConfigParamInput_M_AliSQL, - CaseConfigParamInput_CacheSize_AliSQL, ] AliSQLPerformanceConfig = [ CaseConfigParamInput_IndexType_AliSQL, CaseConfigParamInput_M_AliSQL, - CaseConfigParamInput_CacheSize_AliSQL, CaseConfigParamInput_EFSearch_AliSQL, ] diff --git a/vectordb_bench/models.py b/vectordb_bench/models.py index 5ffd0e70f..8a6d64267 100644 --- a/vectordb_bench/models.py +++ b/vectordb_bench/models.py @@ -127,7 +127,6 @@ class CaseConfigParamType(Enum): oversample_ratio = "oversample_ratio" use_routing = "use_routing" replication_type = "replication_type" - cache_size = "cache_size" dataset_with_size_type = "dataset_with_size_type" filter_rate = "filter_rate" From 2e716b061a8a84fa82e19480b9c7dd898fa62082 Mon Sep 17 00:00:00 2001 From: "wangzihao.wzh" Date: Mon, 1 Dec 2025 17:19:18 +0800 Subject: [PATCH 2/2] [Feature] AliSQL supports custom db name --- .../backend/clients/alisql/alisql.py | 21 ++++++++++--------- vectordb_bench/backend/clients/alisql/cli.py | 11 ++++++++++ .../backend/clients/alisql/config.py | 3 +++ 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/vectordb_bench/backend/clients/alisql/alisql.py b/vectordb_bench/backend/clients/alisql/alisql.py index 12f4f35e0..76179c2ed 100644 --- a/vectordb_bench/backend/clients/alisql/alisql.py +++ b/vectordb_bench/backend/clients/alisql/alisql.py @@ -23,7 +23,6 @@ def __init__( self.name = "AliSQL" self.db_config = db_config self.case_config = db_case_config - self.db_name = "vectordbbench" self.table_name = collection_name self.dim = dim @@ -57,11 +56,11 @@ def _create_connection(self): def _drop_db(self): assert self.conn is not None, "Connection is not initialized" assert self.cursor is not None, "Cursor is not initialized" - log.info(f"{self.name} client drop db : {self.db_name}") + log.info(f'{self.name} client drop db : {self.db_config["database"]}') # flush tables before dropping database to avoid some locking issue self.cursor.execute("FLUSH TABLES") - self.cursor.execute(f"DROP DATABASE IF EXISTS {self.db_name}") + self.cursor.execute(f'DROP DATABASE IF EXISTS {self.db_config["database"]}') self.cursor.execute("COMMIT") self.cursor.execute("FLUSH TABLES") @@ -70,11 +69,11 @@ def _create_db_table(self, dim: int): assert self.cursor is not None, "Cursor is not initialized" try: - log.info(f"{self.name} client create database : {self.db_name}") - self.cursor.execute(f"CREATE DATABASE {self.db_name}") + log.info(f'{self.name} client create database : {self.db_config["database"]}') + self.cursor.execute(f'CREATE DATABASE {self.db_config["database"]}') log.info(f"{self.name} client create table : {self.table_name}") - self.cursor.execute(f"USE {self.db_name}") + self.cursor.execute(f'USE {self.db_config["database"]}') self.cursor.execute( f""" @@ -110,13 +109,15 @@ def init(self): self.cursor.execute(f"SET SESSION vidx_hnsw_ef_search = {search_param['ef_search']}") self.cursor.execute("COMMIT") - self.insert_sql = f"INSERT INTO {self.db_name}.{self.table_name} (id, v) VALUES (%s, %s)" # noqa: S608 + self.insert_sql = ( + f'INSERT INTO {self.db_config["database"]}.{self.table_name} (id, v) VALUES (%s, %s)' # noqa: S608 + ) self.select_sql = ( - f"SELECT id FROM {self.db_name}.{self.table_name} " # noqa: S608 + f'SELECT id FROM {self.db_config["database"]}.{self.table_name} ' # noqa: S608 f"ORDER by vec_distance_{search_param['metric_type']}(v, %s) LIMIT %s" ) self.select_sql_with_filter = ( - f"SELECT id FROM {self.db_name}.{self.table_name} WHERE id >= %s " # noqa: S608 + f'SELECT id FROM {self.db_config["database"]}.{self.table_name} WHERE id >= %s ' # noqa: S608 f"ORDER by vec_distance_{search_param['metric_type']}(v, %s) LIMIT %s" ) @@ -144,7 +145,7 @@ def optimize(self, data_size: int) -> None: self.cursor.execute( f""" - ALTER TABLE {self.db_name}.{self.table_name} + ALTER TABLE {self.db_config["database"]}.{self.table_name} ADD VECTOR KEY v(v) {index_options} """ ) diff --git a/vectordb_bench/backend/clients/alisql/cli.py b/vectordb_bench/backend/clients/alisql/cli.py index 70e7fdfae..aee3ec2b2 100644 --- a/vectordb_bench/backend/clients/alisql/cli.py +++ b/vectordb_bench/backend/clients/alisql/cli.py @@ -53,6 +53,16 @@ class AliSQLTypedDict(CommonTypedDict): ), ] + database: Annotated[ + str, + click.option( + "--database", + type=str, + help="Database name", + default="vectordbbench", + ), + ] + class AliSQLHNSWTypedDict(AliSQLTypedDict): m: Annotated[ @@ -91,6 +101,7 @@ def AliSQLHNSW( password=SecretStr(parameters["password"]), host=parameters["host"], port=parameters["port"], + database=parameters["database"], ), db_case_config=AliSQLHNSWConfig( M=parameters["m"], diff --git a/vectordb_bench/backend/clients/alisql/config.py b/vectordb_bench/backend/clients/alisql/config.py index 66df2d69a..f942c30f1 100644 --- a/vectordb_bench/backend/clients/alisql/config.py +++ b/vectordb_bench/backend/clients/alisql/config.py @@ -13,6 +13,7 @@ class AliSQLConfigDict(TypedDict): password: str host: str port: int + database: str class AliSQLConfig(DBConfig): @@ -20,6 +21,7 @@ class AliSQLConfig(DBConfig): password: SecretStr host: str = "127.0.0.1" port: int = 3306 + database: str = "vectordbbench" def to_dict(self) -> AliSQLConfigDict: pwd_str = self.password.get_secret_value() @@ -28,6 +30,7 @@ def to_dict(self) -> AliSQLConfigDict: "port": self.port, "user": self.user_name, "password": pwd_str, + "database": self.database, }