From 4116d5e8a74e303bd7ed9e7e9a461f7c4edb2bb5 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Tue, 6 Jan 2026 06:27:22 -0800 Subject: [PATCH 1/4] Add vector throughput benchmark --- .../benchmarks/test_ivf_pq_search.py | 142 +++++-- python/python/ci_benchmarks/datagen/basic.py | 2 - rust/lance/Cargo.toml | 5 + rust/lance/benches/vector_throughput.rs | 352 ++++++++++++++++++ 4 files changed, 461 insertions(+), 40 deletions(-) create mode 100644 rust/lance/benches/vector_throughput.rs diff --git a/python/python/ci_benchmarks/benchmarks/test_ivf_pq_search.py b/python/python/ci_benchmarks/benchmarks/test_ivf_pq_search.py index 6b81d7e9887..c271c574679 100644 --- a/python/python/ci_benchmarks/benchmarks/test_ivf_pq_search.py +++ b/python/python/ci_benchmarks/benchmarks/test_ivf_pq_search.py @@ -4,7 +4,9 @@ """Benchmarks for IVF_PQ vector search performance.""" import math +import multiprocessing as mp import tempfile +from concurrent.futures import ThreadPoolExecutor from pathlib import Path import lance @@ -35,8 +37,8 @@ K_LABELS = ["k10", "k100"] -# Global cache for datasets, keyed by (num_rows, dim) -_DATASET_CACHE = {} +# Datasets are stored in fixed temporary directories and reused between runs +# to avoid retraining indexes def _generate_vector_dataset(num_rows: int, dim: int = 1024): @@ -73,46 +75,56 @@ def _generate_vector_dataset(num_rows: int, dim: int = 1024): def _get_or_create_dataset(num_rows: int, dim: int = 1024) -> str: """Get or create a dataset with the specified parameters. - Datasets are cached globally per process to avoid expensive recreation. + Uses a fixed temporary directory so datasets persist between benchmark runs. + If the dataset exists and has the correct number of rows, it will be reused. Returns the URI to the dataset. """ - cache_key = (num_rows, dim) - - if cache_key not in _DATASET_CACHE: - # Create a persistent temporary directory for this dataset - tmpdir = tempfile.mkdtemp(prefix=f"lance_bench_{num_rows}_{dim}_") - dataset_uri = str(Path(tmpdir) / "vector_dataset.lance") - - # Create schema - schema = pa.schema( - [ - pa.field("vector", pa.list_(pa.float32(), dim)), - pa.field("id", pa.int64()), - ] - ) - - # Generate and write dataset - data = _generate_vector_dataset(num_rows, dim) - ds = lance.write_dataset( - data, - dataset_uri, - schema=schema, - mode="create", - ) + # Use a fixed directory path based on parameters + tmpdir = Path(tempfile.gettempdir()) / f"lance_bench_{num_rows}_{dim}" + tmpdir.mkdir(exist_ok=True) + dataset_uri = "file+uring://" + str(tmpdir / "vector_dataset.lance") + + # Check if dataset already exists and has correct row count + try: + ds = lance.dataset(dataset_uri) + if ds.count_rows() == num_rows: + print(f"Reusing existing dataset at {dataset_uri}") + return dataset_uri + else: + print( + f"Dataset exists but has wrong row count ({ds.count_rows()} vs {num_rows}), recreating..." + ) + except Exception: + print(f"Creating new dataset at {dataset_uri}") + + # Create schema + schema = pa.schema( + [ + pa.field("vector", pa.list_(pa.float32(), dim)), + pa.field("id", pa.int64()), + ] + ) - num_partitions = min(num_rows // 4000, int(math.sqrt(num_rows))) + # Generate and write dataset + data = _generate_vector_dataset(num_rows, dim) + ds = lance.write_dataset( + data, + dataset_uri, + schema=schema, + mode="overwrite", # Use overwrite to handle recreation + ) - # Create IVF_PQ index - ds.create_index( - "vector", - index_type="IVF_PQ", - num_partitions=num_partitions, - num_sub_vectors=dim // 16, - ) + num_partitions = min(num_rows // 4000, int(math.sqrt(num_rows))) - _DATASET_CACHE[cache_key] = dataset_uri + # Create IVF_PQ index + ds.create_index( + "vector", + index_type="IVF_PQ", + num_partitions=num_partitions, + num_sub_vectors=dim // 16, + ) - return _DATASET_CACHE[cache_key] + return dataset_uri @pytest.mark.parametrize("num_rows", DATASET_SIZES, ids=DATASET_SIZE_LABELS) @@ -139,7 +151,7 @@ def test_ivf_pq_search( Uses 1024-dimensional float32 vectors with IVF_PQ index. """ - # Get or create the dataset (cached globally per process) + # Get or create the dataset (reused from fixed temp directory between runs) dataset_uri = _get_or_create_dataset(num_rows, dim=VECTOR_DIM) ds = lance.dataset(dataset_uri) @@ -204,7 +216,7 @@ def test_ivf_pq_search_with_payload( Similar to test_ivf_pq_search but includes retrieving vector data along with results, which tests data loading performance. """ - # Get or create the dataset (cached globally per process) + # Get or create the dataset (reused from fixed temp directory between runs) dataset_uri = _get_or_create_dataset(num_rows, dim=VECTOR_DIM) ds = lance.dataset(dataset_uri) @@ -248,3 +260,57 @@ def bench(): iterations=1, setup=setup, ) + + +@pytest.mark.parametrize("use_cache", [True, False], ids=["cache", "no_cache"]) +def test_ivf_pq_throughput( + benchmark, + use_cache: bool, +): + """Benchmark IVF_PQ vector search throughput (with payload)""" + # Get or create the dataset (reused from fixed temp directory between runs) + dataset_uri = _get_or_create_dataset(1_000_000, dim=768) + ds = lance.dataset(dataset_uri) + + NUM_QUERIES = 1000 + + # Generate query vectors + query_vectors = [ + np.random.randn(768).astype(np.float32) for _ in range(NUM_QUERIES) + ] + + def clear_cache(): + if not use_cache: + wipe_os_cache(dataset_uri) + + def bench(): + with ThreadPoolExecutor(max_workers=2 * (mp.cpu_count() - 2)) as executor: + futures = [ + executor.submit( + ds.to_table, + nearest={ + "column": "vector", + "q": query_vector, + "k": 50, + "nprobes": 20, + "refine_factor": 10, + }, + columns=["vector", "_distance"], + ) + for query_vector in query_vectors + ] + for future in futures: + future.result() + + if use_cache: + setup = None + else: + setup = clear_cache + + benchmark.pedantic( + bench, + warmup_rounds=1, + rounds=1, + iterations=1, + setup=setup, + ) diff --git a/python/python/ci_benchmarks/datagen/basic.py b/python/python/ci_benchmarks/datagen/basic.py index c14d7dcb47a..9629ac09509 100644 --- a/python/python/ci_benchmarks/datagen/basic.py +++ b/python/python/ci_benchmarks/datagen/basic.py @@ -58,7 +58,6 @@ def _create(dataset_uri: str): dataset_uri, schema=SCHEMA, mode="append", - use_legacy_format=False, ) else: raise Exception( @@ -72,7 +71,6 @@ def _create(dataset_uri: str): dataset_uri, schema=SCHEMA, mode="create", - use_legacy_format=False, ) if ds.list_indices() == []: ds.create_scalar_index("row_number", "BTREE") diff --git a/rust/lance/Cargo.toml b/rust/lance/Cargo.toml index 25c30230b35..a36cb6c25e0 100644 --- a/rust/lance/Cargo.toml +++ b/rust/lance/Cargo.toml @@ -91,6 +91,7 @@ lzma-sys = { version = "0.1" } lance-test-macros = { workspace = true } lance-datagen = { workspace = true } pretty_assertions = { workspace = true } +libc = { workspace = true } clap = { workspace = true, features = ["derive"] } criterion = { workspace = true } approx.workspace = true @@ -165,5 +166,9 @@ harness = false name = "random_access" harness = false +[[bench]] +name = "vector_throughput" +harness = false + [lints] workspace = true diff --git a/rust/lance/benches/vector_throughput.rs b/rust/lance/benches/vector_throughput.rs new file mode 100644 index 00000000000..89f82372988 --- /dev/null +++ b/rust/lance/benches/vector_throughput.rs @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +//! Benchmark for IVF_PQ vector search throughput +//! +//! This benchmark measures concurrent vector search performance with IVF_PQ indexes, +//! similar to the Python test_ivf_pq_throughput benchmark. + +#![allow(clippy::print_stdout)] + +use std::sync::Arc; + +use arrow_array::{FixedSizeListArray, Float32Array, RecordBatch, RecordBatchIterator}; +use arrow_schema::{DataType, Field, FieldRef, Schema as ArrowSchema}; +use criterion::{criterion_group, criterion_main, BatchSize, Criterion, Throughput}; +use futures::{StreamExt, TryStreamExt}; +use lance_file::version::LanceFileVersion; +#[cfg(target_os = "linux")] +use pprof::criterion::{Output, PProfProfiler}; +use rand::Rng; + +use lance::dataset::{Dataset, WriteMode, WriteParams}; +use lance::index::vector::VectorIndexParams; +use lance_arrow::FixedSizeListArrayExt; +use lance_index::{ + vector::{ivf::IvfBuildParams, pq::PQBuildParams}, + DatasetIndexExt, IndexType, +}; +use lance_linalg::distance::MetricType; +use lance_testing::datagen::generate_random_array; +use tokio::runtime::Runtime; + +// Benchmark parameters matching Python test_ivf_pq_throughput +const NUM_ROWS: usize = 1_000_000; +const DIM: usize = 768; +const NUM_QUERIES: usize = 100; +const K: usize = 50; +const NPROBES: usize = 20; +const REFINE_FACTOR: u32 = 10; + +// IVF_PQ index parameters +const IVF_PARTITIONS: usize = 256; +const PQ_BITS: usize = 8; +const PQ_SUB_VECTORS: usize = DIM / 16; +const MAX_ITERATIONS: usize = 50; + +/// Cached dataset with pre-generated query vectors +struct CachedDataset { + uri: String, + dataset: Arc, + query_vectors: Vec>, +} + +/// Get or create a cached dataset with IVF_PQ index and query vectors +fn get_or_create_dataset(rt: &Runtime, version: LanceFileVersion) -> Arc { + // Create dataset in fixed temp directory + let uri = format!( + "file+uring:///tmp/lance_bench_throughput_{}_{}_{}", + NUM_ROWS, DIM, version + ); + + rt.block_on(async { + // Check if dataset exists on disk with correct row count + let mut needs_creation = true; + let mut needs_indexing = true; + + if let Ok(dataset) = Dataset::open(&uri).await { + let row_count = dataset.count_rows(None).await.unwrap(); + if row_count == NUM_ROWS { + println!("Reusing existing dataset at {} ({} rows)", uri, row_count); + needs_creation = false; + + // Check if index exists + let indices = dataset.load_indices().await.unwrap(); + if !indices.is_empty() { + println!( + "Dataset already has {} index(es), skipping index creation", + indices.len() + ); + needs_indexing = false; + } else { + println!("Dataset exists but has no index, will create index"); + } + } else { + println!( + "Dataset exists but has wrong row count ({} vs {}), recreating", + row_count, NUM_ROWS + ); + std::fs::remove_dir_all(&uri).ok(); + } + } else { + println!( + "Creating new dataset with {} rows, {} dimensions", + NUM_ROWS, DIM + ); + } + + // Create dataset if needed + if needs_creation { + create_dataset(&uri).await; + } + + // Open dataset + let mut dataset = Dataset::open(&uri).await.unwrap(); + + // Create index if needed + if needs_indexing { + create_ivf_pq_index(&mut dataset).await; + } + + // Generate query vectors + let query_vectors = generate_query_vectors(); + + let cached = Arc::new(CachedDataset { + uri: uri.clone(), + dataset: Arc::new(dataset), + query_vectors, + }); + + cached + }) +} + +/// Create a dataset with random vectors +async fn create_dataset(uri: &str) { + let schema = Arc::new(ArrowSchema::new(vec![Field::new( + "vector", + DataType::FixedSizeList( + FieldRef::new(Field::new("item", DataType::Float32, true)), + DIM as i32, + ), + false, + )])); + + let batch_size = 10_000; + let batches: Vec = (0..(NUM_ROWS / batch_size)) + .map(|_| { + RecordBatch::try_new( + schema.clone(), + vec![Arc::new( + FixedSizeListArray::try_new_from_values( + generate_random_array(batch_size * DIM), + DIM as i32, + ) + .unwrap(), + )], + ) + .unwrap() + }) + .collect(); + + let write_params = WriteParams { + max_rows_per_file: NUM_ROWS, + max_rows_per_group: batch_size, + mode: WriteMode::Create, + ..Default::default() + }; + + let reader = RecordBatchIterator::new(batches.into_iter().map(Ok), schema.clone()); + Dataset::write(reader, uri, Some(write_params)) + .await + .unwrap(); + + println!("Dataset created at {}", uri); +} + +/// Create IVF_PQ index on the dataset +async fn create_ivf_pq_index(dataset: &mut Dataset) { + println!("Creating IVF_PQ index..."); + + let ivf_params = IvfBuildParams { + num_partitions: Some(IVF_PARTITIONS), + max_iters: MAX_ITERATIONS, + ..Default::default() + }; + let pq_params = PQBuildParams { + num_bits: PQ_BITS, + num_sub_vectors: PQ_SUB_VECTORS, + ..Default::default() + }; + let params = VectorIndexParams::with_ivf_pq_params(MetricType::L2, ivf_params, pq_params); + + dataset + .create_index( + vec!["vector"].as_slice(), + IndexType::Vector, + Some("ivf_pq_index".to_string()), + ¶ms, + true, + ) + .await + .unwrap(); + + println!("IVF_PQ index created"); +} + +/// Generate random query vectors +fn generate_query_vectors() -> Vec> { + let mut rng = rand::rng(); + (0..NUM_QUERIES) + .map(|_| { + let values: Vec = (0..DIM).map(|_| rng.random_range(0.0..1.0)).collect(); + Arc::new(Float32Array::from(values)) + }) + .collect() +} + +/// Drop dataset files from OS page cache (Linux only) +#[cfg(target_os = "linux")] +fn drop_dataset_from_cache(uri: &str) -> std::io::Result<()> { + use std::fs; + use std::os::unix::io::AsRawFd; + + // Walk the dataset directory and drop each file from cache + if let Ok(entries) = fs::read_dir(uri) { + for entry in entries.flatten() { + let path = entry.path(); + if path.is_file() { + if let Ok(file) = fs::File::open(&path) { + let fd = file.as_raw_fd(); + // POSIX_FADV_DONTNEED = 4 + let result = + unsafe { libc::posix_fadvise(fd, 0, 0, libc::POSIX_FADV_DONTNEED) }; + if result != 0 { + eprintln!( + "Warning: Failed to drop {:?} from cache: {}", + path, + std::io::Error::from_raw_os_error(result) + ); + } + } + } + } + } + + Ok(()) +} + +#[cfg(not(target_os = "linux"))] +fn drop_dataset_from_cache(_uri: &str) -> std::io::Result<()> { + Ok(()) +} + +/// Run vector search queries +async fn run_queries( + dataset: Arc, + query_vectors: &[Arc], + concurrent_queries: usize, +) { + // Run queries concurrently using tokio tasks + futures::stream::iter(query_vectors) + .map(|q| { + let dataset = dataset.clone(); + let q = q.clone(); + tokio::spawn(async move { + dataset + .scan() + .nearest("vector", q.as_ref(), K) + .unwrap() + .minimum_nprobes(NPROBES) + .maximum_nprobes(NPROBES) + .refine(REFINE_FACTOR) + .project(&["vector", "_distance"]) + .unwrap() + .try_into_stream() + .await + .unwrap() + .try_collect::>() + .await + .unwrap() + }) + }) + .buffered(concurrent_queries) + .try_collect::>() + .await + .unwrap(); +} + +fn bench_ivf_pq_throughput(c: &mut Criterion) { + env_logger::init(); + + let rt = tokio::runtime::Builder::new_multi_thread().build().unwrap(); + + let mut group = c.benchmark_group("ivf_pq_throughput"); + group.throughput(Throughput::Elements(NUM_QUERIES as u64)); + + for &version in &[LanceFileVersion::V2_0, LanceFileVersion::V2_1] { + // Get or create cached dataset + let cached_dataset = get_or_create_dataset(&rt, version); + + for &concurrent_queries in &[1, 16] { + for &cached in &[true, false] { + // Skip uncached tests on non-Linux platforms + #[cfg(not(target_os = "linux"))] + if !cached { + continue; + } + + let cache_label = if cached { "cached" } else { "nocache" }; + + // One pass to warm up the index cache + rt.block_on(run_queries( + cached_dataset.dataset.clone(), + &cached_dataset.query_vectors, + concurrent_queries, + )); + + group.bench_function( + format!("{}_{}threads_{}", version, concurrent_queries, cache_label), + |b| { + b.iter_batched( + || { + // Setup: drop cache if uncached + if !cached { + drop_dataset_from_cache(&cached_dataset.uri).ok(); + } + }, + |_| { + // Run the queries + rt.block_on(run_queries( + cached_dataset.dataset.clone(), + &cached_dataset.query_vectors, + concurrent_queries, + )); + }, + BatchSize::PerIteration, + ); + }, + ); + } + } + } + group.finish(); +} + +#[cfg(target_os = "linux")] +criterion_group!( + name=benches; + config = Criterion::default().significance_level(0.1).sample_size(10) + .with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); + targets = bench_ivf_pq_throughput +); + +// Non-linux version does not support pprof. +#[cfg(not(target_os = "linux"))] +criterion_group!( + name=benches; + config = Criterion::default().significance_level(0.1).sample_size(10); + targets = bench_ivf_pq_throughput +); + +criterion_main!(benches); From 1a981457952002ba45060971b22267aabbfe58df Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Tue, 6 Jan 2026 06:59:12 -0800 Subject: [PATCH 2/4] Remove uring references --- Cargo.lock | 1 + python/python/ci_benchmarks/benchmarks/test_ivf_pq_search.py | 5 +++-- rust/lance/benches/vector_throughput.rs | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9cc4d912115..583283b75b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4739,6 +4739,7 @@ dependencies = [ "lance-test-macros", "lance-testing", "lapack", + "libc", "log", "lzma-sys", "mock_instant", diff --git a/python/python/ci_benchmarks/benchmarks/test_ivf_pq_search.py b/python/python/ci_benchmarks/benchmarks/test_ivf_pq_search.py index c271c574679..5bef0492964 100644 --- a/python/python/ci_benchmarks/benchmarks/test_ivf_pq_search.py +++ b/python/python/ci_benchmarks/benchmarks/test_ivf_pq_search.py @@ -82,7 +82,7 @@ def _get_or_create_dataset(num_rows: int, dim: int = 1024) -> str: # Use a fixed directory path based on parameters tmpdir = Path(tempfile.gettempdir()) / f"lance_bench_{num_rows}_{dim}" tmpdir.mkdir(exist_ok=True) - dataset_uri = "file+uring://" + str(tmpdir / "vector_dataset.lance") + dataset_uri = "file://" + str(tmpdir / "vector_dataset.lance") # Check if dataset already exists and has correct row count try: @@ -92,7 +92,8 @@ def _get_or_create_dataset(num_rows: int, dim: int = 1024) -> str: return dataset_uri else: print( - f"Dataset exists but has wrong row count ({ds.count_rows()} vs {num_rows}), recreating..." + "Dataset exists but has wrong row count " + f"({ds.count_rows()} vs {num_rows}), recreating..." ) except Exception: print(f"Creating new dataset at {dataset_uri}") diff --git a/rust/lance/benches/vector_throughput.rs b/rust/lance/benches/vector_throughput.rs index 89f82372988..b03d725324b 100644 --- a/rust/lance/benches/vector_throughput.rs +++ b/rust/lance/benches/vector_throughput.rs @@ -55,7 +55,7 @@ struct CachedDataset { fn get_or_create_dataset(rt: &Runtime, version: LanceFileVersion) -> Arc { // Create dataset in fixed temp directory let uri = format!( - "file+uring:///tmp/lance_bench_throughput_{}_{}_{}", + "file:///tmp/lance_bench_throughput_{}_{}_{}", NUM_ROWS, DIM, version ); From 5c4102b07716f2aa9dafaee96248afd26cf05970 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Tue, 6 Jan 2026 13:59:28 -0800 Subject: [PATCH 3/4] Replace println with logging. Fix cache removal code --- rust/lance/benches/vector_throughput.rs | 75 +++++++++++++------------ 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/rust/lance/benches/vector_throughput.rs b/rust/lance/benches/vector_throughput.rs index b03d725324b..ac03932f849 100644 --- a/rust/lance/benches/vector_throughput.rs +++ b/rust/lance/benches/vector_throughput.rs @@ -6,8 +6,6 @@ //! This benchmark measures concurrent vector search performance with IVF_PQ indexes, //! similar to the Python test_ivf_pq_throughput benchmark. -#![allow(clippy::print_stdout)] - use std::sync::Arc; use arrow_array::{FixedSizeListArray, Float32Array, RecordBatch, RecordBatchIterator}; @@ -15,6 +13,7 @@ use arrow_schema::{DataType, Field, FieldRef, Schema as ArrowSchema}; use criterion::{criterion_group, criterion_main, BatchSize, Criterion, Throughput}; use futures::{StreamExt, TryStreamExt}; use lance_file::version::LanceFileVersion; +use log::info; #[cfg(target_os = "linux")] use pprof::criterion::{Output, PProfProfiler}; use rand::Rng; @@ -46,18 +45,21 @@ const MAX_ITERATIONS: usize = 50; /// Cached dataset with pre-generated query vectors struct CachedDataset { - uri: String, dataset: Arc, query_vectors: Vec>, } +fn dataset_path(version: LanceFileVersion) -> String { + format!( + "/tmp/lance_bench_throughput_{}_{}_{}", + NUM_ROWS, DIM, version + ) +} + /// Get or create a cached dataset with IVF_PQ index and query vectors fn get_or_create_dataset(rt: &Runtime, version: LanceFileVersion) -> Arc { // Create dataset in fixed temp directory - let uri = format!( - "file:///tmp/lance_bench_throughput_{}_{}_{}", - NUM_ROWS, DIM, version - ); + let uri = format!("file://{}", dataset_path(version)); rt.block_on(async { // Check if dataset exists on disk with correct row count @@ -67,29 +69,29 @@ fn get_or_create_dataset(rt: &Runtime, version: LanceFileVersion) -> Arc Arc Vec> { /// Drop dataset files from OS page cache (Linux only) #[cfg(target_os = "linux")] -fn drop_dataset_from_cache(uri: &str) -> std::io::Result<()> { +fn drop_dataset_from_cache(dataset_dir: &str) -> std::io::Result<()> { use std::fs; use std::os::unix::io::AsRawFd; // Walk the dataset directory and drop each file from cache - if let Ok(entries) = fs::read_dir(uri) { - for entry in entries.flatten() { - let path = entry.path(); - if path.is_file() { - if let Ok(file) = fs::File::open(&path) { - let fd = file.as_raw_fd(); - // POSIX_FADV_DONTNEED = 4 - let result = - unsafe { libc::posix_fadvise(fd, 0, 0, libc::POSIX_FADV_DONTNEED) }; - if result != 0 { - eprintln!( - "Warning: Failed to drop {:?} from cache: {}", - path, - std::io::Error::from_raw_os_error(result) - ); - } + let mut num_dropped = 0; + let entries = fs::read_dir(format!("{}/data", dataset_dir)).unwrap(); + for entry in entries.flatten() { + let path = entry.path(); + if path.is_file() { + if let Ok(file) = fs::File::open(&path) { + let fd = file.as_raw_fd(); + // POSIX_FADV_DONTNEED = 4 + let result = unsafe { libc::posix_fadvise(fd, 0, 0, libc::POSIX_FADV_DONTNEED) }; + if result != 0 { + panic!( + "Warning: Failed to drop {:?} from cache: {}", + path, + std::io::Error::from_raw_os_error(result) + ); } + num_dropped += 1; } } } + if num_dropped == 0 { + // Sanity check to ensure that we actually dropped some files from cache. + panic!("No files dropped from cache"); + } Ok(()) } #[cfg(not(target_os = "linux"))] -fn drop_dataset_from_cache(_uri: &str) -> std::io::Result<()> { +fn drop_dataset_from_cache(_path: &str) -> std::io::Result<()> { Ok(()) } @@ -312,7 +317,7 @@ fn bench_ivf_pq_throughput(c: &mut Criterion) { || { // Setup: drop cache if uncached if !cached { - drop_dataset_from_cache(&cached_dataset.uri).ok(); + drop_dataset_from_cache(&dataset_path(version)).ok(); } }, |_| { From 856b255d2306417dbcc275e9e82150404c532725 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Fri, 16 Jan 2026 06:56:19 -0800 Subject: [PATCH 4/4] Address clippy suggestions --- rust/lance/benches/vector_throughput.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/rust/lance/benches/vector_throughput.rs b/rust/lance/benches/vector_throughput.rs index ac03932f849..aa557863d2b 100644 --- a/rust/lance/benches/vector_throughput.rs +++ b/rust/lance/benches/vector_throughput.rs @@ -113,12 +113,10 @@ fn get_or_create_dataset(rt: &Runtime, version: LanceFileVersion) -> Arc