Skip to content

Commit a86274d

Browse files
committed
test: cover distributed vector segment build
1 parent 43c7f0b commit a86274d

3 files changed

Lines changed: 931 additions & 17 deletions

File tree

python/python/tests/test_vector_index.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2750,6 +2750,58 @@ def test_merge_two_shards_parameterized(tmp_path, index_type, num_sub_vectors):
27502750
assert 0 < len(results) <= 5
27512751

27522752

2753+
def test_index_segment_builder_builds_vector_segments(tmp_path):
2754+
ds = _make_sample_dataset_base(tmp_path, "segment_builder_ds", 2000, 128)
2755+
frags = ds.get_fragments()
2756+
assert len(frags) >= 2
2757+
shared_uuid = str(uuid.uuid4())
2758+
2759+
builder = IndicesBuilder(ds, "vector")
2760+
preprocessed = builder.prepare_global_ivf_pq(
2761+
num_partitions=4,
2762+
num_subvectors=4,
2763+
distance_type="l2",
2764+
sample_rate=7,
2765+
max_iters=20,
2766+
)
2767+
2768+
partial_indices = []
2769+
for fragment in frags[:2]:
2770+
partial_indices.append(
2771+
ds._ds.create_index(
2772+
["vector"],
2773+
"IVF_FLAT",
2774+
"vector_idx",
2775+
False,
2776+
True,
2777+
None,
2778+
{
2779+
"fragment_ids": [fragment.fragment_id],
2780+
"index_uuid": shared_uuid,
2781+
"num_partitions": 4,
2782+
"num_sub_vectors": 128,
2783+
"ivf_centroids": preprocessed["ivf_centroids"],
2784+
"pq_codebook": preprocessed["pq_codebook"],
2785+
},
2786+
)
2787+
)
2788+
2789+
segment_builder = ds.create_index_segment_builder(shared_uuid).with_partial_indices(
2790+
partial_indices
2791+
)
2792+
plans = segment_builder.plan()
2793+
assert len(plans) == 2
2794+
assert all(len(plan.partial_indices) == 1 for plan in plans)
2795+
2796+
segments = segment_builder.build_all()
2797+
assert len(segments) == 2
2798+
ds = ds.commit_existing_index_segments("vector_idx", "vector", segments)
2799+
2800+
q = np.random.rand(128).astype(np.float32)
2801+
results = ds.to_table(nearest={"column": "vector", "q": q, "k": 5})
2802+
assert 0 < len(results) <= 5
2803+
2804+
27532805
def test_distributed_ivf_pq_order_invariance(tmp_path: Path):
27542806
"""Ensure distributed IVF_PQ build is invariant to shard build order."""
27552807
ds = _make_sample_dataset_base(tmp_path, "dist_ds", 2000, 128)

0 commit comments

Comments
 (0)