diff --git a/vectordb_bench/backend/clients/api.py b/vectordb_bench/backend/clients/api.py index 605e85ac0..822821b34 100644 --- a/vectordb_bench/backend/clients/api.py +++ b/vectordb_bench/backend/clients/api.py @@ -41,6 +41,7 @@ class IndexType(str, Enum): GPU_IVF_PQ = "GPU_IVF_PQ" GPU_CAGRA = "GPU_CAGRA" SCANN = "scann" + SCANN_MILVUS = "SCANN_MILVUS" Hologres_HGraph = "HGraph" Hologres_Graph = "Graph" NONE = "NONE" diff --git a/vectordb_bench/backend/clients/milvus/config.py b/vectordb_bench/backend/clients/milvus/config.py index e6188614d..70271a600 100644 --- a/vectordb_bench/backend/clients/milvus/config.py +++ b/vectordb_bench/backend/clients/milvus/config.py @@ -414,6 +414,29 @@ def search_param(self) -> dict: } +class SCANNConfig(MilvusIndexConfig, DBCaseConfig): + nlist: int = 1024 + with_raw_data: bool = False + reorder_k: int | None = 100 + index: IndexType = IndexType.SCANN_MILVUS + + def index_param(self) -> dict: + return { + "metric_type": self.parse_metric(), + "index_type": "SCANN", + "params": { + "nlist": self.nlist, + "with_raw_data": self.with_raw_data, + }, + } + + def search_param(self) -> dict: + return { + "metric_type": self.parse_metric(), + "params": {"reorder_k": self.reorder_k}, + } + + _milvus_case_config = { IndexType.AUTOINDEX: AutoIndexConfig, IndexType.HNSW: HNSWConfig, @@ -430,4 +453,5 @@ def search_param(self) -> dict: IndexType.GPU_IVF_PQ: GPUIVFPQConfig, IndexType.GPU_CAGRA: GPUCAGRAConfig, IndexType.GPU_BRUTE_FORCE: GPUBruteForceConfig, + IndexType.SCANN_MILVUS: SCANNConfig, } diff --git a/vectordb_bench/frontend/config/dbCaseConfigs.py b/vectordb_bench/frontend/config/dbCaseConfigs.py index 9348c243e..e9adb8c6c 100644 --- a/vectordb_bench/frontend/config/dbCaseConfigs.py +++ b/vectordb_bench/frontend/config/dbCaseConfigs.py @@ -411,6 +411,7 @@ class CaseConfigInput(BaseModel): IndexType.IVFPQ.value, IndexType.IVFSQ8.value, IndexType.IVF_RABITQ.value, + IndexType.SCANN_MILVUS.value, IndexType.DISKANN.value, IndexType.Flat.value, IndexType.AUTOINDEX.value, @@ -1014,12 +1015,33 @@ class CaseConfigInput(BaseModel): IndexType.IVFPQ.value, IndexType.IVFSQ8.value, IndexType.IVF_RABITQ.value, + IndexType.SCANN_MILVUS.value, IndexType.GPU_IVF_FLAT.value, IndexType.GPU_IVF_PQ.value, IndexType.GPU_BRUTE_FORCE.value, ], ) +CaseConfigParamInput_with_raw_data = CaseConfigInput( + label=CaseConfigParamType.with_raw_data, + inputType=InputType.Option, + inputHelp="Whether to include raw data in the index. Setting to True enables reordering with original vectors.", + inputConfig={"options": [False, True]}, + isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.SCANN_MILVUS.value, +) + +CaseConfigParamInput_reorder_k = CaseConfigInput( + label=CaseConfigParamType.reorder_k, + inputType=InputType.Number, + inputHelp="Number of candidate vectors to reorder. Must be greater than or equal to k.", + inputConfig={ + "min": 1, + "max": 65536, + "value": 100, + }, + isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.SCANN_MILVUS.value, +) + CaseConfigParamInput_Nprobe = CaseConfigInput( label=CaseConfigParamType.Nprobe, inputType=InputType.Number, @@ -1908,6 +1930,7 @@ class CaseConfigInput(BaseModel): CaseConfigParamInput_M, CaseConfigParamInput_EFConstruction_Milvus, CaseConfigParamInput_Nlist, + CaseConfigParamInput_with_raw_data, CaseConfigParamInput_M_PQ, CaseConfigParamInput_Nbits_PQ, CaseConfigParamInput_intermediate_graph_degree, @@ -1927,6 +1950,8 @@ class CaseConfigInput(BaseModel): CaseConfigParamInput_EF_Milvus, CaseConfigParamInput_SearchList, CaseConfigParamInput_Nlist, + CaseConfigParamInput_with_raw_data, + CaseConfigParamInput_reorder_k, CaseConfigParamInput_Nprobe, CaseConfigParamInput_M_PQ, CaseConfigParamInput_Nbits_PQ, diff --git a/vectordb_bench/models.py b/vectordb_bench/models.py index cce0fa116..e97bc15e6 100644 --- a/vectordb_bench/models.py +++ b/vectordb_bench/models.py @@ -81,6 +81,8 @@ class CaseConfigParamType(Enum): refine_k = "refine_k" rbq_bits_query = "rbq_bits_query" sq_type = "sq_type" + with_raw_data = "with_raw_data" + reorder_k = "reorder_k" level = "level" maintenance_work_mem = "maintenance_work_mem" max_parallel_workers = "max_parallel_workers"