diff --git a/rust/lance-table/src/rowids.rs b/rust/lance-table/src/rowids.rs index a8b3f63c2e3..5818e9666fd 100644 --- a/rust/lance-table/src/rowids.rs +++ b/rust/lance-table/src/rowids.rs @@ -419,14 +419,14 @@ impl RowIdSequence { let mut holes_passed = 0; ranges.extend(GroupingIterator::new(unsafe { ids.into_addr_iter() }.map( |addr| { - let offset_no_holes = addr - range.start + offset_start; - while bitmap_iter_pos < offset_no_holes { + let position_in_range = addr - range.start; + while bitmap_iter_pos < position_in_range { if !bitmap_iter.next().unwrap() { holes_passed += 1; } bitmap_iter_pos += 1; } - offset_no_holes - holes_passed + offset_start + position_in_range - holes_passed }, ))); } diff --git a/rust/lance/tests/query/primitives.rs b/rust/lance/tests/query/primitives.rs index 8b257f812ed..6c72f66bd23 100644 --- a/rust/lance/tests/query/primitives.rs +++ b/rust/lance/tests/query/primitives.rs @@ -10,9 +10,11 @@ use arrow_array::{ }; use arrow_schema::DataType; use lance::Dataset; +use lance::dataset::WriteParams; +use lance::dataset::optimize::{CompactionOptions, compact_files}; use lance_datagen::{ArrayGeneratorExt, RowCount, array, gen_batch}; -use lance_index::IndexType; +use lance_index::{DatasetIndexExt, IndexType}; use super::{test_filter, test_scan, test_take}; use crate::utils::DatasetTestCases; @@ -439,3 +441,74 @@ async fn test_query_decimal(#[case] data_type: DataType) { }) .await } + +/// Regression test: filtered scan panics after compaction with SRID when a +/// RangeWithBitmap segment appears after a Range segment in a fragment's +/// RowIdSequence. The bitmap iterator was advanced using a global offset +/// instead of a range-local position, exhausting the iterator. +/// +/// Sequence: Write(2 frags) → Delete(from frag1) → Compact → CreateIndex → FilteredScan +#[tokio::test] +async fn test_filtered_scan_after_compact_with_srid() { + use arrow::record_batch::RecordBatchIterator; + + // Write 100 rows across 2 fragments (50 each) with stable row IDs. + let batch = RecordBatch::try_from_iter(vec![( + "int_col", + Arc::new(Int32Array::from_iter_values(0..100)) as ArrayRef, + )]) + .unwrap(); + let schema = batch.schema(); + let reader = RecordBatchIterator::new(vec![Ok(batch)], schema); + let write_params = WriteParams { + enable_stable_row_ids: true, + max_rows_per_file: 50, + ..Default::default() + }; + let mut ds = Dataset::write(reader, "memory://compact_srid_test", Some(write_params)) + .await + .unwrap(); + assert_eq!(ds.get_fragments().len(), 2); + assert_eq!(ds.count_rows(None).await.unwrap(), 100); + + // Delete some rows from the second fragment to create holes. + // After compaction, this fragment's row_ids become a RangeWithBitmap segment. + ds.delete("int_col >= 60 AND int_col < 70").await.unwrap(); + assert_eq!(ds.count_rows(None).await.unwrap(), 90); + + // Compact: merges both fragments into one. The output RowIdSequence has + // multiple segments: Range(0..50) followed by RangeWithBitmap(50..100). + // The RangeWithBitmap segment has offset_start=50 from the preceding Range. + compact_files(&mut ds, CompactionOptions::default(), None) + .await + .unwrap(); + + // Create a BTree index so filtered scans use mask_to_offset_ranges. + ds.create_index( + &["int_col"], + IndexType::BTree, + None, + &lance_index::scalar::ScalarIndexParams::default(), + true, + ) + .await + .unwrap(); + + // Filtered scan: the index produces a RowAddrMask, which is passed to + // mask_to_offset_ranges on the multi-segment RowIdSequence. Before the + // fix, this panicked with "called Option::unwrap() on a None value". + let results = ds + .scan() + .filter("int_col < 200") + .unwrap() + .try_into_batch() + .await + .unwrap(); + + assert_eq!( + results.num_rows(), + 90, + "Expected 90 rows (100 written - 10 deleted) but got {}", + results.num_rows() + ); +}