From a7d1a5e416c93548352fd82eb3bbf49442e5ae94 Mon Sep 17 00:00:00 2001 From: fenfeng9 Date: Mon, 19 Jan 2026 01:15:45 +0800 Subject: [PATCH] test: add f16/f64 coverage to PQ distance table benchmarks --- rust/lance-index/benches/4bitpq_dist_table.rs | 62 +++++++++++++------ rust/lance-index/benches/pq_dist_table.rs | 62 +++++++++++++------ 2 files changed, 88 insertions(+), 36 deletions(-) diff --git a/rust/lance-index/benches/4bitpq_dist_table.rs b/rust/lance-index/benches/4bitpq_dist_table.rs index 53ac80ab95d..c6c69a9536b 100644 --- a/rust/lance-index/benches/4bitpq_dist_table.rs +++ b/rust/lance-index/benches/4bitpq_dist_table.rs @@ -5,13 +5,13 @@ use std::iter::repeat_n; -use arrow_array::types::Float32Type; +use arrow_array::types::{Float16Type, Float32Type, Float64Type}; use arrow_array::{FixedSizeListArray, UInt8Array}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use lance_arrow::FixedSizeListArrayExt; +use lance_arrow::{ArrowFloatType, FixedSizeListArrayExt, FloatArray}; use lance_index::vector::pq::distance::{build_distance_table_dot, build_distance_table_l2}; use lance_index::vector::pq::ProductQuantizer; -use lance_linalg::distance::DistanceType; +use lance_linalg::distance::{DistanceType, Dot, L2}; use lance_testing::datagen::generate_random_array_with_seed; use rand::{prelude::StdRng, Rng, SeedableRng}; @@ -23,25 +23,36 @@ const DIM: usize = 1536; const TOTAL: usize = 16 * 1000; fn construct_dist_table(c: &mut Criterion) { - let codebook = generate_random_array_with_seed::(256 * DIM, [88; 32]); - let query = generate_random_array_with_seed::(DIM, [32; 32]); + construct_dist_table_for_type::(c, "f16"); + construct_dist_table_for_type::(c, "f32"); + construct_dist_table_for_type::(c, "f64"); +} + +fn construct_dist_table_for_type(c: &mut Criterion, type_name: &str) +where + T::Native: L2 + Dot, + T::ArrayType: FloatArray, +{ + let codebook = generate_random_array_with_seed::(256 * DIM, [88; 32]); + let query = generate_random_array_with_seed::(DIM, [32; 32]); c.bench_function( format!( - "construct_dist_table: {},PQ={}x{},DIM={}", + "construct_dist_table: {},PQ={}x{},DIM={},type={}", DistanceType::L2, PQ, 4, - DIM + DIM, + type_name ) .as_str(), |b| { b.iter(|| { black_box(build_distance_table_l2( - codebook.values(), + codebook.as_slice(), 4, PQ, - query.values(), + query.as_slice(), )); }) }, @@ -49,20 +60,21 @@ fn construct_dist_table(c: &mut Criterion) { c.bench_function( format!( - "construct_dist_table: {},PQ={}x{},DIM={}", + "construct_dist_table: {},PQ={}x{},DIM={},type={}", DistanceType::Dot, PQ, 4, - DIM + DIM, + type_name ) .as_str(), |b| { b.iter(|| { black_box(build_distance_table_dot( - codebook.values(), + codebook.as_slice(), 4, PQ, - query.values(), + query.as_slice(), )); }) }, @@ -70,23 +82,37 @@ fn construct_dist_table(c: &mut Criterion) { } fn compute_distances(c: &mut Criterion) { - let codebook = generate_random_array_with_seed::(256 * DIM, [88; 32]); - let query = generate_random_array_with_seed::(DIM, [32; 32]); + compute_distances_for_type::(c, "f16"); + compute_distances_for_type::(c, "f32"); + compute_distances_for_type::(c, "f64"); +} + +fn compute_distances_for_type(c: &mut Criterion, type_name: &str) +where + T::Native: L2 + Dot, + T::ArrayType: FloatArray, +{ + let codebook = generate_random_array_with_seed::(256 * DIM, [88; 32]); + let query = generate_random_array_with_seed::(DIM, [32; 32]); let mut rnd = StdRng::from_seed([32; 32]); let code = UInt8Array::from_iter_values(repeat_n(rnd.random::(), TOTAL * PQ)); - for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot].iter() { + for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot] { let pq = ProductQuantizer::new( PQ, 4, DIM, FixedSizeListArray::try_new_from_values(codebook.clone(), DIM as i32).unwrap(), - *dt, + dt, ); c.bench_function( - format!("{},{},PQ={}x{},DIM={}", TOTAL, dt, PQ, 4, DIM).as_str(), + format!( + "compute_distances: {},{},PQ={}x{},DIM={},type={}", + TOTAL, dt, PQ, 4, DIM, type_name + ) + .as_str(), |b| { b.iter(|| { black_box(pq.compute_distances(&query, &code).unwrap()); diff --git a/rust/lance-index/benches/pq_dist_table.rs b/rust/lance-index/benches/pq_dist_table.rs index 05876a445be..8c3b135f8f4 100644 --- a/rust/lance-index/benches/pq_dist_table.rs +++ b/rust/lance-index/benches/pq_dist_table.rs @@ -5,13 +5,13 @@ use std::iter::repeat_n; -use arrow_array::types::Float32Type; +use arrow_array::types::{Float16Type, Float32Type, Float64Type}; use arrow_array::{FixedSizeListArray, UInt8Array}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use lance_arrow::FixedSizeListArrayExt; +use lance_arrow::{ArrowFloatType, FixedSizeListArrayExt, FloatArray}; use lance_index::vector::pq::distance::*; use lance_index::vector::pq::ProductQuantizer; -use lance_linalg::distance::DistanceType; +use lance_linalg::distance::{DistanceType, Dot, L2}; use lance_testing::datagen::generate_random_array_with_seed; use rand::{prelude::StdRng, Rng, SeedableRng}; @@ -23,24 +23,35 @@ const PQ: usize = DIM / 8; const TOTAL: usize = 16 * 1000; fn construct_dist_table(c: &mut Criterion) { - let codebook = generate_random_array_with_seed::(256 * DIM, [88; 32]); - let query = generate_random_array_with_seed::(DIM, [32; 32]); + construct_dist_table_for_type::(c, "f16"); + construct_dist_table_for_type::(c, "f32"); + construct_dist_table_for_type::(c, "f64"); +} + +fn construct_dist_table_for_type(c: &mut Criterion, type_name: &str) +where + T::Native: L2 + Dot, + T::ArrayType: FloatArray, +{ + let codebook = generate_random_array_with_seed::(256 * DIM, [88; 32]); + let query = generate_random_array_with_seed::(DIM, [32; 32]); c.bench_function( format!( - "construct_dist_table: {},PQ={},DIM={}", + "construct_dist_table: {},PQ={},DIM={},type={}", DistanceType::L2, PQ, - DIM + DIM, + type_name ) .as_str(), |b| { b.iter(|| { black_box(build_distance_table_l2( - codebook.values(), + codebook.as_slice(), 8, PQ, - query.values(), + query.as_slice(), )); }) }, @@ -48,19 +59,20 @@ fn construct_dist_table(c: &mut Criterion) { c.bench_function( format!( - "construct_dist_table: {},PQ={},DIM={}", + "construct_dist_table: {},PQ={},DIM={},type={}", DistanceType::Dot, PQ, - DIM + DIM, + type_name ) .as_str(), |b| { b.iter(|| { black_box(build_distance_table_dot( - codebook.values(), + codebook.as_slice(), 8, PQ, - query.values(), + query.as_slice(), )); }) }, @@ -68,23 +80,37 @@ fn construct_dist_table(c: &mut Criterion) { } fn compute_distances(c: &mut Criterion) { - let codebook = generate_random_array_with_seed::(256 * DIM, [88; 32]); - let query = generate_random_array_with_seed::(DIM, [32; 32]); + compute_distances_for_type::(c, "f16"); + compute_distances_for_type::(c, "f32"); + compute_distances_for_type::(c, "f64"); +} + +fn compute_distances_for_type(c: &mut Criterion, type_name: &str) +where + T::Native: L2 + Dot, + T::ArrayType: FloatArray, +{ + let codebook = generate_random_array_with_seed::(256 * DIM, [88; 32]); + let query = generate_random_array_with_seed::(DIM, [32; 32]); let mut rnd = StdRng::from_seed([32; 32]); let code = UInt8Array::from_iter_values(repeat_n(rnd.random::(), TOTAL * PQ)); - for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot].iter() { + for dt in [DistanceType::L2, DistanceType::Cosine, DistanceType::Dot] { let pq = ProductQuantizer::new( PQ, 8, DIM, FixedSizeListArray::try_new_from_values(codebook.clone(), DIM as i32).unwrap(), - *dt, + dt, ); c.bench_function( - format!("compute_distances: {},{},PQ={},DIM={}", TOTAL, dt, PQ, DIM).as_str(), + format!( + "compute_distances: {},{},PQ={},DIM={},type={}", + TOTAL, dt, PQ, DIM, type_name + ) + .as_str(), |b| { b.iter(|| { black_box(pq.compute_distances(&query, &code).unwrap());