From a7cabfaf39d500db10434087bb22ba66f0a95c8c Mon Sep 17 00:00:00 2001 From: Wyatt Alt Date: Fri, 27 Feb 2026 15:26:31 +0000 Subject: [PATCH 1/3] fix: bitmap iterator exhaustion in mask_to_offset_ranges with multi-segment sequences In mask_to_offset_ranges, the RangeWithBitmap case advanced the bitmap iterator using a global offset (addr - range.start + offset_start) instead of a range-local position (addr - range.start). When a RangeWithBitmap segment appeared after other segments (offset_start > 0), the iterator was advanced past its end, causing a panic. The fix separates range-local iteration from the final offset calculation: iterate the bitmap using position_in_range, then add offset_start at the end. Includes an integration test that reproduces the panic through the user-facing API: write 2 fragments with stable row IDs, delete some rows, compact, create a BTree index, then run a filtered scan. Co-Authored-By: Claude Opus 4.6 --- rust/lance-table/src/rowids.rs | 6 +-- rust/lance/tests/query/primitives.rs | 75 +++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/rust/lance-table/src/rowids.rs b/rust/lance-table/src/rowids.rs index 74b0e224bb9..6de926f8dff 100644 --- a/rust/lance-table/src/rowids.rs +++ b/rust/lance-table/src/rowids.rs @@ -422,14 +422,14 @@ impl RowIdSequence { let mut holes_passed = 0; ranges.extend(GroupingIterator::new(unsafe { ids.into_addr_iter() }.map( |addr| { - let offset_no_holes = addr - range.start + offset_start; - while bitmap_iter_pos < offset_no_holes { + let position_in_range = addr - range.start; + while bitmap_iter_pos < position_in_range { if !bitmap_iter.next().unwrap() { holes_passed += 1; } bitmap_iter_pos += 1; } - offset_no_holes - holes_passed + offset_start + position_in_range - holes_passed }, ))); } diff --git a/rust/lance/tests/query/primitives.rs b/rust/lance/tests/query/primitives.rs index c1c70e6a3fe..c1501c2c239 100644 --- a/rust/lance/tests/query/primitives.rs +++ b/rust/lance/tests/query/primitives.rs @@ -9,10 +9,12 @@ use arrow_array::{ LargeBinaryArray, LargeStringArray, RecordBatch, StringArray, StringViewArray, }; use arrow_schema::DataType; +use lance::dataset::optimize::{compact_files, CompactionOptions}; +use lance::dataset::WriteParams; use lance::Dataset; use lance_datagen::{array, gen_batch, ArrayGeneratorExt, RowCount}; -use lance_index::IndexType; +use lance_index::{DatasetIndexExt, IndexType}; use super::{test_filter, test_scan, test_take}; use crate::utils::DatasetTestCases; @@ -403,3 +405,74 @@ async fn test_query_decimal(#[case] data_type: DataType) { }) .await } + +/// Regression test: filtered scan panics after compaction with SRID when a +/// RangeWithBitmap segment appears after a Range segment in a fragment's +/// RowIdSequence. The bitmap iterator was advanced using a global offset +/// instead of a range-local position, exhausting the iterator. +/// +/// Sequence: Write(2 frags) → Delete(from frag1) → Compact → CreateIndex → FilteredScan +#[tokio::test] +async fn test_filtered_scan_after_compact_with_srid() { + use arrow::record_batch::RecordBatchIterator; + + // Write 100 rows across 2 fragments (50 each) with stable row IDs. + let batch = RecordBatch::try_from_iter(vec![( + "int_col", + Arc::new(Int32Array::from_iter_values(0..100)) as ArrayRef, + )]) + .unwrap(); + let schema = batch.schema(); + let reader = RecordBatchIterator::new(vec![Ok(batch)], schema); + let write_params = WriteParams { + enable_stable_row_ids: true, + max_rows_per_file: 50, + ..Default::default() + }; + let mut ds = Dataset::write(reader, "memory://compact_srid_test", Some(write_params)) + .await + .unwrap(); + assert_eq!(ds.get_fragments().len(), 2); + assert_eq!(ds.count_rows(None).await.unwrap(), 100); + + // Delete some rows from the second fragment to create holes. + // After compaction, this fragment's row_ids become a RangeWithBitmap segment. + ds.delete("int_col >= 60 AND int_col < 70").await.unwrap(); + assert_eq!(ds.count_rows(None).await.unwrap(), 90); + + // Compact: merges both fragments into one. The output RowIdSequence has + // multiple segments: Range(0..50) followed by RangeWithBitmap(50..100). + // The RangeWithBitmap segment has offset_start=50 from the preceding Range. + compact_files(&mut ds, CompactionOptions::default(), None) + .await + .unwrap(); + + // Create a BTree index so filtered scans use mask_to_offset_ranges. + ds.create_index( + &["int_col"], + IndexType::BTree, + None, + &lance_index::scalar::ScalarIndexParams::default(), + true, + ) + .await + .unwrap(); + + // Filtered scan: the index produces a RowAddrMask, which is passed to + // mask_to_offset_ranges on the multi-segment RowIdSequence. Before the + // fix, this panicked with "called Option::unwrap() on a None value". + let results = ds + .scan() + .filter("int_col < 200") + .unwrap() + .try_into_batch() + .await + .unwrap(); + + assert_eq!( + results.num_rows(), + 90, + "Expected 90 rows (100 written - 10 deleted) but got {}", + results.num_rows() + ); +} From e23d15873a51d1ec4c1aa459ad92632fabd9d5d0 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 6 Mar 2026 14:34:32 +0800 Subject: [PATCH 2/3] fix: format rust/lance/tests/query/primitives.rs Amp-Thread-ID: https://ampcode.com/threads/T-019cc1ba-6e5b-74ca-979a-34ab5be8fd84 Co-authored-by: Amp --- rust/lance/tests/query/primitives.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/lance/tests/query/primitives.rs b/rust/lance/tests/query/primitives.rs index 2f2a4ac5360..4650ba4620e 100644 --- a/rust/lance/tests/query/primitives.rs +++ b/rust/lance/tests/query/primitives.rs @@ -9,9 +9,9 @@ use arrow_array::{ LargeBinaryArray, LargeStringArray, RecordBatch, StringArray, StringViewArray, }; use arrow_schema::DataType; -use lance::dataset::optimize::{compact_files, CompactionOptions}; -use lance::dataset::WriteParams; use lance::Dataset; +use lance::dataset::WriteParams; +use lance::dataset::optimize::{CompactionOptions, compact_files}; use lance_datagen::{ArrayGeneratorExt, RowCount, array, gen_batch}; use lance_index::IndexType; From f3a6754f1e8aeb3b5d48da09087f9d58d298f6ce Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 6 Mar 2026 14:53:52 +0800 Subject: [PATCH 3/3] fix: restore DatasetIndexExt import needed for create_index test Amp-Thread-ID: https://ampcode.com/threads/T-019cc1ba-6e5b-74ca-979a-34ab5be8fd84 Co-authored-by: Amp --- rust/lance/tests/query/primitives.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/lance/tests/query/primitives.rs b/rust/lance/tests/query/primitives.rs index 4650ba4620e..6c72f66bd23 100644 --- a/rust/lance/tests/query/primitives.rs +++ b/rust/lance/tests/query/primitives.rs @@ -14,7 +14,7 @@ use lance::dataset::WriteParams; use lance::dataset::optimize::{CompactionOptions, compact_files}; use lance_datagen::{ArrayGeneratorExt, RowCount, array, gen_batch}; -use lance_index::IndexType; +use lance_index::{DatasetIndexExt, IndexType}; use super::{test_filter, test_scan, test_take}; use crate::utils::DatasetTestCases;