diff --git a/python/python/tests/test_scalar_index.py b/python/python/tests/test_scalar_index.py index 8ca3e01d95c..75ec01d9a82 100644 --- a/python/python/tests/test_scalar_index.py +++ b/python/python/tests/test_scalar_index.py @@ -1554,6 +1554,21 @@ def test_bitmap_index(tmp_path: Path): assert indices[0]["type"] == "Bitmap" +def test_bitmap_empty_range(tmp_path: Path): + data = pa.table({"c0": pa.array([1, 2, 3], type=pa.int64())}) + dataset = lance.write_dataset(data, tmp_path / "dataset") + dataset.create_scalar_index("c0", index_type="BITMAP") + filters = [ + "c0 BETWEEN 2 AND 1", + "c0 > 2 AND c0 < 2", + "c0 >= 2 AND c0 < 2", + "c0 > 2 AND c0 <= 2", + ] + for filter_expr in filters: + result = dataset.to_table(filter=filter_expr, use_scalar_index=True) + assert result.num_rows == 0 + + def test_btree_remap_big_deletions(tmp_path: Path): # Write 15K rows in 3 fragments ds = lance.write_dataset(pa.table({"a": range(5000)}), tmp_path) diff --git a/rust/lance-index/src/scalar/bitmap.rs b/rust/lance-index/src/scalar/bitmap.rs index 12d0b6232a1..4fb9fc3334c 100644 --- a/rust/lance-index/src/scalar/bitmap.rs +++ b/rust/lance-index/src/scalar/bitmap.rs @@ -438,11 +438,23 @@ impl ScalarIndex for BitmapIndex { Bound::Unbounded => Bound::Unbounded, }; - let keys: Vec<_> = self - .index_map - .range((range_start, range_end)) - .map(|(k, _v)| k.clone()) - .collect(); + // Empty range if lower > upper, or if any bound is excluded and lower >= upper. + let empty_range = match (&range_start, &range_end) { + (Bound::Included(lower), Bound::Included(upper)) => lower > upper, + (Bound::Included(lower), Bound::Excluded(upper)) + | (Bound::Excluded(lower), Bound::Included(upper)) + | (Bound::Excluded(lower), Bound::Excluded(upper)) => lower >= upper, + _ => false, + }; + + let keys: Vec<_> = if empty_range { + Vec::new() + } else { + self.index_map + .range((range_start, range_end)) + .map(|(k, _v)| k.clone()) + .collect() + }; metrics.record_comparisons(keys.len()); @@ -952,6 +964,18 @@ pub mod tests { assert_eq!(actual, expected_range_rows); } + // Test 3b: Inverted range query should return empty result + let query = SargableQuery::Range( + std::ops::Bound::Included(ScalarValue::Utf8(Some("green".to_string()))), + std::ops::Bound::Included(ScalarValue::Utf8(Some("blue".to_string()))), + ); + let result = index.search(&query, &NoOpMetricsCollector).await.unwrap(); + if let SearchResult::Exact(row_ids) = result { + assert!(row_ids.true_rows().is_empty()); + } else { + panic!("Expected exact search result"); + } + // Test 4: IsIn query let query = SargableQuery::IsIn(vec![ ScalarValue::Utf8(Some("red".to_string())),