From 8fd4072d150483bfb0613e0c250e00eac482d2d5 Mon Sep 17 00:00:00 2001 From: Wahaj Ali Date: Thu, 17 Apr 2025 16:57:36 +0500 Subject: [PATCH 1/8] Updates config for bq param sweep --- config.json | 731 +--------------------------------------------------- run.py | 7 +- 2 files changed, 12 insertions(+), 726 deletions(-) diff --git a/config.json b/config.json index 83027bc9d..6bc265b6b 100644 --- a/config.json +++ b/config.json @@ -6,668 +6,13 @@ "db-name": "postgres" }, "benchmark-info": { - "name": "ann-benchmark-param-sweep", + "name": "hnsw-bq-param-sweep", "instance-size": "Standard_D8ds_v5", - "instance-service": "azure-vm", + "instance-service": "azure-flex", "provider": "azure", - "description": "Running param sweep for HNSW full vector and HNSW binary quantization with reranking index alogrithms" + "description": "Running param sweep for HNSW binary quantization with reranking index alogrithms" }, "cases": [ - { - "db-label": "hnsw-fv-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-fv", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 8, - "ef-construction": 32, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7 - }, - "search-params": { - "ef-search": [10, 20, 40, 80, 120, 200, 400] - }, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-fv-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-fv", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 8, - "ef-construction": 64, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7 - }, - "search-params": { - "ef-search": [10, 20, 40, 80, 120, 200, 400] - }, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-fv-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-fv", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 8, - "ef-construction": 128, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7 - }, - "search-params": { - "ef-search": [10, 20, 40, 80, 120, 200, 400] - }, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-fv-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-fv", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 16, - "ef-construction": 32, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7 - }, - "search-params": { - "ef-search": [10, 20, 40, 80, 120, 200, 400] - }, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-fv-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-fv", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 16, - "ef-construction": 64, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7 - }, - "search-params": { - "ef-search": [10, 20, 40, 80, 120, 200, 400] - }, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-fv-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-fv", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 16, - "ef-construction": 128, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7 - }, - "search-params": { - "ef-search": [10, 20, 40, 80, 120, 200, 400] - }, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-fv-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-fv", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 32, - "ef-construction": 64, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7 - }, - "search-params": { - "ef-search": [10, 20, 40, 80, 120, 200, 400] - }, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-fv-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-fv", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 32, - "ef-construction": 128, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7 - }, - "search-params": { - "ef-search": [10, 20, 40, 80, 120, 200, 400] - }, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-fv-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-fv", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 64, - "ef-construction": 128, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7 - }, - "search-params": { - "ef-search": [10, 20, 40, 80, 120, 200, 400] - }, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "diskann-fv-param-sweep", - "vdb-command": "pgdiskann", - "vector-ext": "pg_diskann", - "index-type": "diskann-fv", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "case-type": "Performance1536D500K", - "index-params": { - "max-neighbors": 32, - "l-value-ib": 64, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7 - }, - "search-params": { - "l-value-is": [32, 64, 128, 256, 512] - }, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "diskann-fv-param-sweep", - "vdb-command": "pgdiskann", - "vector-ext": "pg_diskann", - "index-type": "diskann-fv", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "case-type": "Performance1536D500K", - "index-params": { - "max-neighbors": 32, - "l-value-ib": 128, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7 - }, - "search-params": { - "l-value-is": [32, 64, 128, 256, 512] - }, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "diskann-fv-param-sweep", - "vdb-command": "pgdiskann", - "vector-ext": "pg_diskann", - "index-type": "diskann-fv", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "case-type": "Performance1536D500K", - "index-params": { - "max-neighbors": 64, - "l-value-ib": 64, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7 - }, - "search-params": { - "l-value-is": [32, 64, 128, 256, 512] - }, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "diskann-fv-param-sweep", - "vdb-command": "pgdiskann", - "vector-ext": "pg_diskann", - "index-type": "diskann-fv", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "case-type": "Performance1536D500K", - "index-params": { - "max-neighbors": 64, - "l-value-ib": 128, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7 - }, - "search-params": { - "l-value-is": [32, 64, 128, 256, 512] - }, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-bq-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-bq", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 8, - "ef-construction": 32, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7, - "quantization-type": "bit" - }, - "search-params": { - "ef-search": [40, 80, 120, 200, 400, 800] - }, - "reranking": true, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-bq-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-bq", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 8, - "ef-construction": 64, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7, - "quantization-type": "bit" - }, - "search-params": { - "ef-search": [40, 80, 120, 200, 400, 800] - }, - "reranking": true, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-bq-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-bq", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 8, - "ef-construction": 128, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7, - "quantization-type": "bit" - }, - "search-params": { - "ef-search": [40, 80, 120, 200, 400, 800] - }, - "reranking": true, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-bq-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-bq", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 16, - "ef-construction": 32, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7, - "quantization-type": "bit" - }, - "search-params": { - "ef-search": [40, 80, 120, 200, 400, 800] - }, - "reranking": true, - "half-quantized-fetch-limit": true, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-bq-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-bq", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 16, - "ef-construction": 64, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7, - "quantization-type": "bit" - }, - "search-params": { - "ef-search": [40, 80, 120, 200, 400, 800] - }, - "reranking": true, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-bq-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-bq", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 16, - "ef-construction": 128, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7, - "quantization-type": "bit" - }, - "search-params": { - "ef-search": [40, 80, 120, 200, 400, 800] - }, - "reranking": true, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-bq-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-bq", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 32, - "ef-construction": 64, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7, - "quantization-type": "bit" - }, - "search-params": { - "ef-search": [40, 80, 120, 200, 400, 800] - }, - "reranking": true, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-bq-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-bq", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 32, - "ef-construction": 128, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7, - "quantization-type": "bit" - }, - "search-params": { - "ef-search": [40, 80, 120, 200, 400, 800] - }, - "reranking": true, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-bq-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-bq", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 64, - "ef-construction": 128, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7, - "quantization-type": "bit" - }, - "search-params": { - "ef-search": [40, 80, 120, 200, 400, 800] - }, - "reranking": true, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-bq-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-bq", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 8, - "ef-construction": 32, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7, - "quantization-type": "bit" - }, - "search-params": { - "ef-search": [40, 80, 120, 200, 400, 800] - }, - "reranking": true, - "half-quantized-fetch-limit": true, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-bq-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-bq", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 8, - "ef-construction": 64, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7, - "quantization-type": "bit" - }, - "search-params": { - "ef-search": [40, 80, 120, 200, 400, 800] - }, - "reranking": true, - "half-quantized-fetch-limit": true, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-bq-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-bq", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 8, - "ef-construction": 128, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7, - "quantization-type": "bit" - }, - "search-params": { - "ef-search": [40, 80, 120, 200, 400, 800] - }, - "reranking": true, - "half-quantized-fetch-limit": true, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-bq-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-bq", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 16, - "ef-construction": 32, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7, - "quantization-type": "bit" - }, - "search-params": { - "ef-search": [40, 80, 120, 200, 400, 800] - }, - "reranking": true, - "half-quantized-fetch-limit": true, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, { "db-label": "hnsw-bq-param-sweep", "vdb-command": "pgvectorhnsw", @@ -686,10 +31,10 @@ "quantization-type": "bit" }, "search-params": { - "ef-search": [40, 80, 120, 200, 400, 800] + "ef-search": [64, 128, 256], + "quantized-fetch-limit": [30, 40, 50] }, "reranking": true, - "half-quantized-fetch-limit": true, "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", "concurrency-duration": 30, "k": 10, @@ -713,64 +58,10 @@ "quantization-type": "bit" }, "search-params": { - "ef-search": [40, 80, 120, 200, 400, 800] - }, - "reranking": true, - "half-quantized-fetch-limit": true, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-bq-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-bq", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 32, - "ef-construction": 64, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7, - "quantization-type": "bit" - }, - "search-params": { - "ef-search": [40, 80, 120, 200, 400, 800] - }, - "reranking": true, - "half-quantized-fetch-limit": true, - "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", - "concurrency-duration": 30, - "k": 10, - "run-count": 3 - }, - { - "db-label": "hnsw-bq-param-sweep", - "vdb-command": "pgvectorhnsw", - "vector-ext": "vector", - "index-type": "hnsw-bq", - "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, - "search-serial": true, - "search-concurrent": true, - "index-params": { - "m": 32, - "ef-construction": 128, - "maintenance-work-mem": "8GB", - "max-parallel-workers": 7, - "quantization-type": "bit" - }, - "search-params": { - "ef-search": [40, 80, 120, 200, 400, 800] + "ef-search": [64, 128, 256], + "quantized-fetch-limit": [30, 40, 50] }, "reranking": true, - "half-quantized-fetch-limit": true, "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", "concurrency-duration": 30, "k": 10, @@ -787,17 +78,17 @@ "search-serial": true, "search-concurrent": true, "index-params": { - "m": 64, - "ef-construction": 128, + "m": 16, + "ef-construction": 256, "maintenance-work-mem": "8GB", "max-parallel-workers": 7, "quantization-type": "bit" }, "search-params": { - "ef-search": [40, 80, 120, 200, 400, 800] + "ef-search": [64, 128, 256], + "quantized-fetch-limit": [30, 40, 50] }, "reranking": true, - "half-quantized-fetch-limit": true, "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", "concurrency-duration": 30, "k": 10, diff --git a/run.py b/run.py index 3115d8caa..1a2f28e34 100644 --- a/run.py +++ b/run.py @@ -26,7 +26,7 @@ def main(): start_time = time.time() start_timeh = time.strftime('%Y-%m-%d %H:%M:%S') logger.info(f"Benchmark run start time: {time.strftime('%Y-%m-%d %H:%M:%S')}") - + for case in config['cases']: print(f"Running case: {case['db-label']}") setup_database(config) @@ -50,11 +50,6 @@ def run_benchmark(case, db_config, benchmark_info, dry_run=False): print(f"Starting run {run + 1} of {run_count} for case: {case['db-label']}") for i, search_params in enumerate(generate_combinations(case["search-params"])): command = base_command + search_params - if case["index-type"] == "hnsw-bq" and "reranking" in case: - if case.get("half-quantized-fetch-limit", False): - command += ["--quantized-fetch-limit", str(int(int(search_params[1]) / 2))] - else: - command += ["--quantized-fetch-limit", search_params[1]] if i > 0 or run > 0: command = handle_drop_old_load_flags(command) From 7bf2b51dd3be19cfa48a2240562c43847845a7f5 Mon Sep 17 00:00:00 2001 From: Wahaj Ali Date: Thu, 17 Apr 2025 18:48:45 +0500 Subject: [PATCH 2/8] updated config --- config.json | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 84 insertions(+), 3 deletions(-) diff --git a/config.json b/config.json index 6bc265b6b..4cd8054f7 100644 --- a/config.json +++ b/config.json @@ -14,13 +14,40 @@ }, "cases": [ { - "db-label": "hnsw-bq-param-sweep", + "db-label": "build-index-hnsw-bq-param-sweep", "vdb-command": "pgvectorhnsw", "vector-ext": "vector", "index-type": "hnsw-bq", "case-type": "Performance1536D500K", "drop-old": true, "load": true, + "search-serial": false, + "search-concurrent": false, + "index-params": { + "m": 16, + "ef-construction": 64, + "maintenance-work-mem": "8GB", + "max-parallel-workers": 7, + "quantization-type": "bit" + }, + "search-params": { + "ef-search": [64], + "quantized-fetch-limit": [30] + }, + "reranking": true, + "num-concurrency": "1", + "concurrency-duration": 30, + "k": 10, + "run-count": 1 + }, + { + "db-label": "run-hnsw-bq-param-sweep", + "vdb-command": "pgvectorhnsw", + "vector-ext": "vector", + "index-type": "hnsw-bq", + "case-type": "Performance1536D500K", + "drop-old": false, + "load": false, "search-serial": true, "search-concurrent": true, "index-params": { @@ -41,13 +68,40 @@ "run-count": 3 }, { - "db-label": "hnsw-bq-param-sweep", + "db-label": "index-hnsw-bq-param-sweep", "vdb-command": "pgvectorhnsw", "vector-ext": "vector", "index-type": "hnsw-bq", "case-type": "Performance1536D500K", "drop-old": true, "load": true, + "search-serial": false, + "search-concurrent": false, + "index-params": { + "m": 16, + "ef-construction": 128, + "maintenance-work-mem": "8GB", + "max-parallel-workers": 7, + "quantization-type": "bit" + }, + "search-params": { + "ef-search": [64], + "quantized-fetch-limit": [30] + }, + "reranking": true, + "num-concurrency": "1", + "concurrency-duration": 30, + "k": 10, + "run-count": 1 + }, + { + "db-label": "run-hnsw-bq-param-sweep", + "vdb-command": "pgvectorhnsw", + "vector-ext": "vector", + "index-type": "hnsw-bq", + "case-type": "Performance1536D500K", + "drop-old": false, + "load": false, "search-serial": true, "search-concurrent": true, "index-params": { @@ -68,7 +122,34 @@ "run-count": 3 }, { - "db-label": "hnsw-bq-param-sweep", + "db-label": "index-hnsw-bq-param-sweep", + "vdb-command": "pgvectorhnsw", + "vector-ext": "vector", + "index-type": "hnsw-bq", + "case-type": "Performance1536D500K", + "drop-old": true, + "load": true, + "search-serial": false, + "search-concurrent": false, + "index-params": { + "m": 16, + "ef-construction": 256, + "maintenance-work-mem": "8GB", + "max-parallel-workers": 7, + "quantization-type": "bit" + }, + "search-params": { + "ef-search": [64], + "quantized-fetch-limit": [30] + }, + "reranking": true, + "num-concurrency": "1", + "concurrency-duration": 30, + "k": 10, + "run-count": 1 + }, + { + "db-label": "run-hnsw-bq-param-sweep", "vdb-command": "pgvectorhnsw", "vector-ext": "vector", "index-type": "hnsw-bq", From c288a59c348ef4b765a97813614fc5e5c3a19845 Mon Sep 17 00:00:00 2001 From: Wahaj Ali Date: Fri, 18 Apr 2025 00:46:34 +0500 Subject: [PATCH 3/8] fix config --- config.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config.json b/config.json index 4cd8054f7..2e345fed7 100644 --- a/config.json +++ b/config.json @@ -154,8 +154,8 @@ "vector-ext": "vector", "index-type": "hnsw-bq", "case-type": "Performance1536D500K", - "drop-old": true, - "load": true, + "drop-old": false, + "load": false, "search-serial": true, "search-concurrent": true, "index-params": { From 522fc70c011fd661dbe7986bee47a476bd48901a Mon Sep 17 00:00:00 2001 From: Wahaj Ali Date: Tue, 22 Apr 2025 10:30:22 +0500 Subject: [PATCH 4/8] Add support for PQ in pgdiskann --- config-bq.json | 341 ++++++++++++++++++ vectordb_bench/__init__.py | 4 +- .../backend/clients/pgdiskann/cli.py | 47 ++- .../backend/clients/pgdiskann/config.py | 16 +- .../backend/clients/pgdiskann/pgdiskann.py | 70 +++- .../backend/clients/pgvector/pgvector.py | 3 + 6 files changed, 460 insertions(+), 21 deletions(-) create mode 100644 config-bq.json diff --git a/config-bq.json b/config-bq.json new file mode 100644 index 000000000..b63e4ac6e --- /dev/null +++ b/config-bq.json @@ -0,0 +1,341 @@ +{ + "database": { + "host": "localhost", + "username": "postgres", + "password": "admin123", + "db-name": "postgres" + }, + "benchmark-info": { + "name": "hnsw-bq-param-sweep", + "instance-size": "Standard_D8ds_v5", + "instance-service": "azure-flex", + "provider": "azure", + "description": "Running param sweep for HNSW binary quantization with reranking index alogrithms" + }, + "cases": [ + { + "db-label": "index-hnsw-bq-param-sweep", + "vdb-command": "pgvectorhnsw", + "vector-ext": "vector", + "index-type": "hnsw-bq", + "case-type": "Performance1536D500K", + "drop-old": true, + "load": true, + "search-serial": false, + "search-concurrent": false, + "index-params": { + "m": 16, + "ef-construction": 64, + "maintenance-work-mem": "8GB", + "max-parallel-workers": 7, + "quantization-type": "bit" + }, + "search-params": { + "ef-search": [64], + "quantized-fetch-limit": [30] + }, + "reranking": true, + "num-concurrency": "1", + "concurrency-duration": 30, + "k": 10, + "run-count": 1 + }, + { + "db-label": "run-hnsw-bq-param-sweep", + "vdb-command": "pgvectorhnsw", + "vector-ext": "vector", + "index-type": "hnsw-bq", + "case-type": "Performance1536D500K", + "drop-old": false, + "load": false, + "search-serial": true, + "search-concurrent": true, + "index-params": { + "m": 16, + "ef-construction": 64, + "maintenance-work-mem": "8GB", + "max-parallel-workers": 7, + "quantization-type": "bit" + }, + "search-params": { + "ef-search": [64, 128, 256], + "quantized-fetch-limit": [30, 40, 50] + }, + "reranking": true, + "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", + "concurrency-duration": 30, + "k": 10, + "run-count": 3 + }, + { + "db-label": "index-hnsw-bq-param-sweep", + "vdb-command": "pgvectorhnsw", + "vector-ext": "vector", + "index-type": "hnsw-bq", + "case-type": "Performance1536D500K", + "drop-old": true, + "load": true, + "search-serial": false, + "search-concurrent": false, + "index-params": { + "m": 16, + "ef-construction": 128, + "maintenance-work-mem": "8GB", + "max-parallel-workers": 7, + "quantization-type": "bit" + }, + "search-params": { + "ef-search": [64], + "quantized-fetch-limit": [30] + }, + "reranking": true, + "num-concurrency": "1", + "concurrency-duration": 30, + "k": 10, + "run-count": 1 + }, + { + "db-label": "run-hnsw-bq-param-sweep", + "vdb-command": "pgvectorhnsw", + "vector-ext": "vector", + "index-type": "hnsw-bq", + "case-type": "Performance1536D500K", + "drop-old": false, + "load": false, + "search-serial": true, + "search-concurrent": true, + "index-params": { + "m": 16, + "ef-construction": 128, + "maintenance-work-mem": "8GB", + "max-parallel-workers": 7, + "quantization-type": "bit" + }, + "search-params": { + "ef-search": [64, 128, 256], + "quantized-fetch-limit": [30, 40, 50] + }, + "reranking": true, + "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", + "concurrency-duration": 30, + "k": 10, + "run-count": 3 + }, + { + "db-label": "index-hnsw-bq-param-sweep", + "vdb-command": "pgvectorhnsw", + "vector-ext": "vector", + "index-type": "hnsw-bq", + "case-type": "Performance1536D500K", + "drop-old": true, + "load": true, + "search-serial": false, + "search-concurrent": false, + "index-params": { + "m": 16, + "ef-construction": 256, + "maintenance-work-mem": "8GB", + "max-parallel-workers": 7, + "quantization-type": "bit" + }, + "search-params": { + "ef-search": [64], + "quantized-fetch-limit": [30] + }, + "reranking": true, + "num-concurrency": "1", + "concurrency-duration": 30, + "k": 10, + "run-count": 1 + }, + { + "db-label": "run-hnsw-bq-param-sweep", + "vdb-command": "pgvectorhnsw", + "vector-ext": "vector", + "index-type": "hnsw-bq", + "case-type": "Performance1536D500K", + "drop-old": false, + "load": false, + "search-serial": true, + "search-concurrent": true, + "index-params": { + "m": 16, + "ef-construction": 256, + "maintenance-work-mem": "8GB", + "max-parallel-workers": 7, + "quantization-type": "bit" + }, + "search-params": { + "ef-search": [64, 128, 256], + "quantized-fetch-limit": [30, 40, 50] + }, + "reranking": true, + "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", + "concurrency-duration": 30, + "k": 10, + "run-count": 3 + }, + { + "db-label": "index-hnsw-bq-param-sweep", + "vdb-command": "pgvectorhnsw", + "vector-ext": "vector", + "index-type": "hnsw-bq", + "case-type": "Performance1536D500K", + "drop-old": true, + "load": true, + "search-serial": false, + "search-concurrent": false, + "index-params": { + "m": 32, + "ef-construction": 64, + "maintenance-work-mem": "8GB", + "max-parallel-workers": 7, + "quantization-type": "bit" + }, + "search-params": { + "ef-search": [64], + "quantized-fetch-limit": [30] + }, + "reranking": true, + "num-concurrency": "1", + "concurrency-duration": 30, + "k": 10, + "run-count": 1 + }, + { + "db-label": "run-hnsw-bq-param-sweep", + "vdb-command": "pgvectorhnsw", + "vector-ext": "vector", + "index-type": "hnsw-bq", + "case-type": "Performance1536D500K", + "drop-old": false, + "load": false, + "search-serial": true, + "search-concurrent": true, + "index-params": { + "m": 32, + "ef-construction": 64, + "maintenance-work-mem": "8GB", + "max-parallel-workers": 7, + "quantization-type": "bit" + }, + "search-params": { + "ef-search": [64, 128, 256], + "quantized-fetch-limit": [30, 40, 50] + }, + "reranking": true, + "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", + "concurrency-duration": 30, + "k": 10, + "run-count": 3 + }, + { + "db-label": "index-hnsw-bq-param-sweep", + "vdb-command": "pgvectorhnsw", + "vector-ext": "vector", + "index-type": "hnsw-bq", + "case-type": "Performance1536D500K", + "drop-old": true, + "load": true, + "search-serial": false, + "search-concurrent": false, + "index-params": { + "m": 32, + "ef-construction": 128, + "maintenance-work-mem": "8GB", + "max-parallel-workers": 7, + "quantization-type": "bit" + }, + "search-params": { + "ef-search": [64], + "quantized-fetch-limit": [30] + }, + "reranking": true, + "num-concurrency": "1", + "concurrency-duration": 30, + "k": 10, + "run-count": 1 + }, + { + "db-label": "run-hnsw-bq-param-sweep", + "vdb-command": "pgvectorhnsw", + "vector-ext": "vector", + "index-type": "hnsw-bq", + "case-type": "Performance1536D500K", + "drop-old": false, + "load": false, + "search-serial": true, + "search-concurrent": true, + "index-params": { + "m": 32, + "ef-construction": 128, + "maintenance-work-mem": "8GB", + "max-parallel-workers": 7, + "quantization-type": "bit" + }, + "search-params": { + "ef-search": [64, 128, 256], + "quantized-fetch-limit": [30, 40, 50] + }, + "reranking": true, + "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", + "concurrency-duration": 30, + "k": 10, + "run-count": 3 + }, + { + "db-label": "index-hnsw-bq-param-sweep", + "vdb-command": "pgvectorhnsw", + "vector-ext": "vector", + "index-type": "hnsw-bq", + "case-type": "Performance1536D500K", + "drop-old": true, + "load": true, + "search-serial": false, + "search-concurrent": false, + "index-params": { + "m": 32, + "ef-construction": 256, + "maintenance-work-mem": "8GB", + "max-parallel-workers": 7, + "quantization-type": "bit" + }, + "search-params": { + "ef-search": [64], + "quantized-fetch-limit": [30] + }, + "reranking": true, + "num-concurrency": "1", + "concurrency-duration": 30, + "k": 10, + "run-count": 1 + }, + { + "db-label": "run-hnsw-bq-param-sweep", + "vdb-command": "pgvectorhnsw", + "vector-ext": "vector", + "index-type": "hnsw-bq", + "case-type": "Performance1536D500K", + "drop-old": false, + "load": false, + "search-serial": true, + "search-concurrent": true, + "index-params": { + "m": 32, + "ef-construction": 256, + "maintenance-work-mem": "8GB", + "max-parallel-workers": 7, + "quantization-type": "bit" + }, + "search-params": { + "ef-search": [64, 128, 256], + "quantized-fetch-limit": [30, 40, 50] + }, + "reranking": true, + "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100", + "concurrency-duration": 30, + "k": 10, + "run-count": 3 + } + ] +} diff --git a/vectordb_bench/__init__.py b/vectordb_bench/__init__.py index 95dece42d..2c70e25b2 100644 --- a/vectordb_bench/__init__.py +++ b/vectordb_bench/__init__.py @@ -14,11 +14,11 @@ class config: ALIYUN_OSS_URL = "assets.zilliz.com.cn/benchmark/" AWS_S3_URL = "assets.zilliz.com/benchmark/" - LOG_LEVEL = env.str("LOG_LEVEL", "INFO") + LOG_LEVEL = env.str("LOG_LEVEL", "DEBUG") DEFAULT_DATASET_URL = env.str("DEFAULT_DATASET_URL", AWS_S3_URL) DATASET_LOCAL_DIR = env.path("DATASET_LOCAL_DIR", f"/home/{os.getenv('USER')}/vectordb_bench/dataset") - NUM_PER_BATCH = env.int("NUM_PER_BATCH", 100) + NUM_PER_BATCH = env.int("NUM_PER_BATCH", 1000) DROP_OLD = env.bool("DROP_OLD", True) USE_SHUFFLED_DATA = env.bool("USE_SHUFFLED_DATA", True) diff --git a/vectordb_bench/backend/clients/pgdiskann/cli.py b/vectordb_bench/backend/clients/pgdiskann/cli.py index 19f47988f..27ac2d78c 100644 --- a/vectordb_bench/backend/clients/pgdiskann/cli.py +++ b/vectordb_bench/backend/clients/pgdiskann/cli.py @@ -5,6 +5,7 @@ from pydantic import SecretStr from vectordb_bench.backend.clients import DB +from vectordb_bench.backend.clients.api import MetricType from ....cli.cli import ( CommonTypedDict, @@ -48,6 +49,15 @@ class PgDiskAnnTypedDict(CommonTypedDict): help="PgDiskAnn l_value_ib", ), ] + pq_param_num_chunks: Annotated[ + int, + click.option( + "--pq-param-num-chunks", + type=int, + help="PgDiskAnn pq_param_num_chunks", + required=False, + ), + ] l_value_is: Annotated[ float, click.option( @@ -77,6 +87,37 @@ class PgDiskAnnTypedDict(CommonTypedDict): required=False, ), ] + reranking: Annotated[ + bool | None, + click.option( + "--reranking/--skip-reranking", + type=bool, + help="Enable reranking for PQ search", + default=False, + ), + ] + reranking_metric: Annotated[ + str | None, + click.option( + "--reranking-metric", + type=click.Choice( + [metric.value for metric in MetricType if metric.value not in ["HAMMING", "JACCARD"]], + ), + help="Distance metric for reranking", + default="COSINE", + show_default=True, + required=False, + ), + ] + quantized_fetch_limit: Annotated[ + int | None, + click.option( + "--quantized-fetch-limit", + type=int, + help="Limit of inner query in case of reranking", + required=False, + ), + ] @cli.command() @@ -98,9 +139,13 @@ def PgDiskAnn( db_case_config=PgDiskANNImplConfig( max_neighbors=parameters["max_neighbors"], l_value_ib=parameters["l_value_ib"], + pq_param_num_chunks=["pq_param_num_chunks"], l_value_is=parameters["l_value_is"], + reranking=parameters["reranking"], + reranking_metric=parameters["reranking_metric"], + quantized_fetch_limit=parameters["quantized_fetch_limit"], max_parallel_workers=parameters["max_parallel_workers"], maintenance_work_mem=parameters["maintenance_work_mem"], ), **parameters, - ) + ) \ No newline at end of file diff --git a/vectordb_bench/backend/clients/pgdiskann/config.py b/vectordb_bench/backend/clients/pgdiskann/config.py index ed478acc2..f4fa23958 100644 --- a/vectordb_bench/backend/clients/pgdiskann/config.py +++ b/vectordb_bench/backend/clients/pgdiskann/config.py @@ -60,6 +60,13 @@ def parse_metric_fun_op(self) -> LiteralString: return "<#>" return "<=>" + def parse_reranking_metric_fun_op(self) -> LiteralString: + if self.reranking_metric == MetricType.L2: + return "<->" + if self.reranking_metric == MetricType.IP: + return "<#>" + return "<=>" + def parse_metric_fun_str(self) -> str: if self.metric_type == MetricType.L2: return "l2_distance" @@ -110,12 +117,15 @@ def _optionally_build_set_options( ) return session_options - class PgDiskANNImplConfig(PgDiskANNIndexConfig): index: IndexType = IndexType.DISKANN max_neighbors: int | None l_value_ib: int | None + pq_param_num_chunks: int | None l_value_is: float | None + reranking: bool | None = None + reranking_metric: str | None = None + quantized_fetch_limit: int | None = None maintenance_work_mem: str | None = None max_parallel_workers: int | None = None @@ -126,6 +136,7 @@ def index_param(self) -> dict: "options": { "max_neighbors": self.max_neighbors, "l_value_ib": self.l_value_ib, + "pq_param_num_chunks": self.pq_param_num_chunks, }, "maintenance_work_mem": self.maintenance_work_mem, "max_parallel_workers": self.max_parallel_workers, @@ -135,6 +146,9 @@ def search_param(self) -> dict: return { "metric": self.parse_metric(), "metric_fun_op": self.parse_metric_fun_op(), + "reranking": self.reranking, + "reranking_metric_fun_op": self.parse_reranking_metric_fun_op(), + "quantized_fetch_limit": self.quantized_fetch_limit, } def session_param(self) -> dict: diff --git a/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py b/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py index a8c5956ff..a20c1d2d6 100644 --- a/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +++ b/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py @@ -120,38 +120,74 @@ def _create_connection(**kwargs) -> tuple[Connection, Cursor]: def init(self) -> Generator[None, None, None]: self.conn, self.cursor = self._create_connection(**self.db_config) - # index configuration may have commands defined that we should set during each client session session_options: dict[str, Any] = self.case_config.session_param() - if len(session_options) > 0: for setting_name, setting_val in session_options.items(): - command = sql.SQL("SET {setting_name} " + "= {setting_val};").format( + command = sql.SQL("SET {setting_name} = {setting_val};").format( setting_name=sql.Identifier(setting_name), - setting_val=sql.Identifier(str(setting_val)), + setting_val=sql.Literal(setting_val), # Use Literal instead of Identifier for values ) log.debug(command.as_string(self.cursor)) self.cursor.execute(command) self.conn.commit() - self._filtered_search = sql.Composed( - [ + search_params = self.case_config.search_param() + + if search_params.get("reranking"): + # Reranking-enabled queries + self._filtered_search = sql.SQL(""" + SELECT i.id + FROM ( + SELECT id, embedding + FROM public.{table_name} + WHERE id >= %s + ORDER BY embedding {metric_fun_op} %s::vector + LIMIT %s + ) i + ORDER BY i.embedding {reranking_metric_fun_op} %s::vector + LIMIT %s::int + """).format( + table_name=sql.Identifier(self.table_name), + metric_fun_op=sql.SQL(search_params["metric_fun_op"]), + reranking_metric_fun_op=sql.SQL(search_params["reranking_metric_fun_op"]), + ) + + self._unfiltered_search = sql.SQL(""" + SELECT i.id + FROM ( + SELECT id, embedding + FROM public.{table_name} + ORDER BY embedding {metric_fun_op} %s::vector + LIMIT %s + ) i + ORDER BY i.embedding {reranking_metric_fun_op} %s::vector + LIMIT %s::int + """).format( + table_name=sql.Identifier(self.table_name), + metric_fun_op=sql.SQL(search_params["metric_fun_op"]), + reranking_metric_fun_op=sql.SQL(search_params["reranking_metric_fun_op"]), + ) + + else: + # Regular queries without reranking + self._filtered_search = sql.Composed([ sql.SQL( "SELECT id FROM public.{table_name} WHERE id >= %s ORDER BY embedding ", ).format(table_name=sql.Identifier(self.table_name)), - sql.SQL(self.case_config.search_param()["metric_fun_op"]), + sql.SQL(search_params["metric_fun_op"]), sql.SQL(" %s::vector LIMIT %s::int"), - ], - ) + ]) - self._unfiltered_search = sql.Composed( - [ - sql.SQL("SELECT id FROM public.{} ORDER BY embedding ").format( - sql.Identifier(self.table_name), + self._unfiltered_search = sql.Composed([ + sql.SQL("SELECT id FROM public.{table_name} ORDER BY embedding ").format( + table_name=sql.Identifier(self.table_name) ), - sql.SQL(self.case_config.search_param()["metric_fun_op"]), + sql.SQL(search_params["metric_fun_op"]), sql.SQL(" %s::vector LIMIT %s::int"), - ], - ) + ]) + + log.debug(f"Unfiltered search query={self._unfiltered_search.as_string(self.conn)}") + log.debug(f"Filtered search query={self._filtered_search.as_string(self.conn)}") try: yield @@ -264,7 +300,7 @@ def _create_index(self): options.append( sql.SQL("{option_name} = {val}").format( option_name=sql.Identifier(option_name), - val=sql.Identifier(str(option_val)), + val=sql.Literal(option_val), ), ) diff --git a/vectordb_bench/backend/clients/pgvector/pgvector.py b/vectordb_bench/backend/clients/pgvector/pgvector.py index c29fb1cdc..557827c68 100644 --- a/vectordb_bench/backend/clients/pgvector/pgvector.py +++ b/vectordb_bench/backend/clients/pgvector/pgvector.py @@ -249,6 +249,9 @@ def init(self) -> Generator[None, None, None]: self._filtered_search = self._generate_search_query(filtered=True) self._unfiltered_search = self._generate_search_query() + log.debug(f"Unfiltered search query={self._unfiltered_search.as_string(self.conn)}") + log.debug(f"Filtered search query={self._filtered_search.as_string(self.conn)}") + try: yield finally: From af29b444a5e9aa14e5b1bea2133dd01ee70977f6 Mon Sep 17 00:00:00 2001 From: Wahaj Ali Date: Tue, 22 Apr 2025 10:48:11 +0500 Subject: [PATCH 5/8] Fix bug --- vectordb_bench/backend/clients/pgdiskann/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vectordb_bench/backend/clients/pgdiskann/cli.py b/vectordb_bench/backend/clients/pgdiskann/cli.py index 27ac2d78c..1662f74a6 100644 --- a/vectordb_bench/backend/clients/pgdiskann/cli.py +++ b/vectordb_bench/backend/clients/pgdiskann/cli.py @@ -139,7 +139,7 @@ def PgDiskAnn( db_case_config=PgDiskANNImplConfig( max_neighbors=parameters["max_neighbors"], l_value_ib=parameters["l_value_ib"], - pq_param_num_chunks=["pq_param_num_chunks"], + pq_param_num_chunks=parameters["pq_param_num_chunks"], l_value_is=parameters["l_value_is"], reranking=parameters["reranking"], reranking_metric=parameters["reranking_metric"], From d7fb09cac6534ed7d5b374370c7d0abcd54dadba Mon Sep 17 00:00:00 2001 From: Wahaj Ali Date: Tue, 22 Apr 2025 11:24:52 +0500 Subject: [PATCH 6/8] Update query --- .../backend/clients/pgdiskann/pgdiskann.py | 44 +++++++++++++++---- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py b/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py index a20c1d2d6..426061908 100644 --- a/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +++ b/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py @@ -142,7 +142,7 @@ def init(self) -> Generator[None, None, None]: FROM public.{table_name} WHERE id >= %s ORDER BY embedding {metric_fun_op} %s::vector - LIMIT %s + LIMIT {quantized_fetch_limit}::int ) i ORDER BY i.embedding {reranking_metric_fun_op} %s::vector LIMIT %s::int @@ -150,6 +150,7 @@ def init(self) -> Generator[None, None, None]: table_name=sql.Identifier(self.table_name), metric_fun_op=sql.SQL(search_params["metric_fun_op"]), reranking_metric_fun_op=sql.SQL(search_params["reranking_metric_fun_op"]), + quantized_fetch_limit=sql.Identifier(search_params["quantized_fetch_limit"]), ) self._unfiltered_search = sql.SQL(""" @@ -158,7 +159,7 @@ def init(self) -> Generator[None, None, None]: SELECT id, embedding FROM public.{table_name} ORDER BY embedding {metric_fun_op} %s::vector - LIMIT %s + LIMIT {quantized_fetch_limit}::int ) i ORDER BY i.embedding {reranking_metric_fun_op} %s::vector LIMIT %s::int @@ -166,6 +167,7 @@ def init(self) -> Generator[None, None, None]: table_name=sql.Identifier(self.table_name), metric_fun_op=sql.SQL(search_params["metric_fun_op"]), reranking_metric_fun_op=sql.SQL(search_params["reranking_metric_fun_op"]), + quantized_fetch_limit=sql.Identifier(search_params["quantized_fetch_limit"]), ) else: @@ -380,16 +382,40 @@ def search_embedding( assert self.conn is not None, "Connection is not initialized" assert self.cursor is not None, "Cursor is not initialized" + search_params = self.case_config.search_param() + is_reranking = search_params.get("reranking", False) + q = np.asarray(query) if filters: gt = filters.get("id") - result = self.cursor.execute( - self._filtered_search, - (gt, q, k), - prepare=True, - binary=True, - ) + if is_reranking: + result = self.cursor.execute( + self._filtered_search, + (gt, q, q, k), + prepare=True, + binary=True, + ) + else: + result = self.cursor.execute( + self._filtered_search, + (gt, q, k), + prepare=True, + binary=True, + ) else: - result = self.cursor.execute(self._unfiltered_search, (q, k), prepare=True, binary=True) + if is_reranking: + result = self.cursor.execute( + self._unfiltered_search, + (q, q, k), + prepare=True, + binary=True, + ) + else: + result = self.cursor.execute( + self._unfiltered_search, + (q, k), + prepare=True, + binary=True, + ) return [int(i[0]) for i in result.fetchall()] From c6a6681d961bf87dd98dfbec8153911738369bd8 Mon Sep 17 00:00:00 2001 From: Wahaj Ali Date: Tue, 22 Apr 2025 11:28:23 +0500 Subject: [PATCH 7/8] Fix quantized_fetch_limit type --- vectordb_bench/backend/clients/pgdiskann/pgdiskann.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py b/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py index 426061908..6c0d90c6d 100644 --- a/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +++ b/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py @@ -150,7 +150,7 @@ def init(self) -> Generator[None, None, None]: table_name=sql.Identifier(self.table_name), metric_fun_op=sql.SQL(search_params["metric_fun_op"]), reranking_metric_fun_op=sql.SQL(search_params["reranking_metric_fun_op"]), - quantized_fetch_limit=sql.Identifier(search_params["quantized_fetch_limit"]), + quantized_fetch_limit=sql.Literal(search_params["quantized_fetch_limit"]), ) self._unfiltered_search = sql.SQL(""" @@ -167,7 +167,7 @@ def init(self) -> Generator[None, None, None]: table_name=sql.Identifier(self.table_name), metric_fun_op=sql.SQL(search_params["metric_fun_op"]), reranking_metric_fun_op=sql.SQL(search_params["reranking_metric_fun_op"]), - quantized_fetch_limit=sql.Identifier(search_params["quantized_fetch_limit"]), + quantized_fetch_limit=sql.Literal(search_params["quantized_fetch_limit"]), ) else: From 917b65e5fba7fe02639bf8fae9f7c0c0ae326c0c Mon Sep 17 00:00:00 2001 From: Wahaj Ali Date: Wed, 7 May 2025 14:14:59 +0500 Subject: [PATCH 8/8] Add product_quantized --- vectordb_bench/backend/clients/pgdiskann/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vectordb_bench/backend/clients/pgdiskann/config.py b/vectordb_bench/backend/clients/pgdiskann/config.py index f4fa23958..e157fb127 100644 --- a/vectordb_bench/backend/clients/pgdiskann/config.py +++ b/vectordb_bench/backend/clients/pgdiskann/config.py @@ -137,6 +137,7 @@ def index_param(self) -> dict: "max_neighbors": self.max_neighbors, "l_value_ib": self.l_value_ib, "pq_param_num_chunks": self.pq_param_num_chunks, + "product_quantized": str(self.reranking), }, "maintenance_work_mem": self.maintenance_work_mem, "max_parallel_workers": self.max_parallel_workers,