Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ rand_distr = { version = "0.5.1" }
rand_xoshiro = "0.7.0"
rangemap = { version = "1.0" }
rayon = "1.10"
roaring = "0.10.1"
roaring = "0.11"
rstest = "0.23.0"
rustc_version = "0.4"
serde = { version = "^1" }
Expand Down
4 changes: 2 additions & 2 deletions java/lance-jni/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion java/lance-jni/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ log = "0.4"
env_logger = "0.11.7"
uuid = { version = "1.17.0", features = ["v4"] }
prost = "0.14.1"
roaring = "0.10.1"
roaring = "0.11"
prost-types = "0.14.1"
chrono = "0.4.41"

Expand Down
4 changes: 2 additions & 2 deletions python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ pythonize = "0.26"
tokio = { version = "1.48", features = ["rt-multi-thread"] }
uuid = "1.3.0"
regex = "1"
roaring = "0.10.1"
roaring = "0.11"
serde_json = "1"
serde = "1.0.197"
serde_yaml = "0.9.34"
Expand Down
129 changes: 109 additions & 20 deletions rust/lance-core/src/utils/mask.rs
Original file line number Diff line number Diff line change
Expand Up @@ -934,32 +934,42 @@ impl Extend<Self> for RowAddrTreeMap {
}
}

/// Convert a RoaringBitmap to a vector of contiguous ranges.
///
/// This is more efficient than iterating over individual bits and coalescing,
/// as it builds ranges directly in a single pass.
pub fn bitmap_to_ranges(bitmap: &RoaringBitmap) -> Vec<Range<u64>> {
if bitmap.is_empty() {
return vec![];
}

let mut ranges = Vec::new();
let mut iter = bitmap.iter();
let first = iter.next().unwrap();
let mut start = first;
let mut end = first;
while let Some(r) = iter.next_range() {
ranges.push(*r.start() as u64..(*r.end() as u64 + 1));
}
ranges
Comment on lines +940 to +943
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is so much nicer 😆

}

for val in iter {
if val == end + 1 {
end = val;
} else {
ranges.push(start as u64..(end + 1) as u64);
start = val;
end = val;
pub fn ranges_to_bitmap(ranges: &[Range<u64>], sorted: bool) -> RoaringBitmap {
if ranges.is_empty() {
return RoaringBitmap::new();
}
if sorted {
let sample_size = ranges.len().min(10);
let avg_len: u64 = ranges
.iter()
.take(sample_size)
.map(|r| r.end - r.start)
.sum::<u64>()
/ sample_size as u64;
// from_sorted_iter appends each value in O(1) but must visit every u32.
// insert_range bulk-fills containers but does a binary search per call.
// Crossover is ~6: below that, iterating all values is cheaper.
if avg_len <= 6 {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice

return RoaringBitmap::from_sorted_iter(
ranges.iter().flat_map(|r| r.start as u32..r.end as u32),
)
.unwrap();
}
}
ranges.push(start as u64..(end + 1) as u64);
ranges
let mut bm = RoaringBitmap::new();
for r in ranges {
bm.insert_range(r.start as u32..r.end as u32);
}
bm
}

/// A set of stable row ids backed by a 64-bit Roaring bitmap.
Expand Down Expand Up @@ -2036,6 +2046,85 @@ mod tests {
}
}

// ============================================================================
// Tests for bitmap_to_ranges / ranges_to_bitmap
// ============================================================================

#[test]
fn test_bitmap_to_ranges_empty() {
let bm = RoaringBitmap::new();
assert!(bitmap_to_ranges(&bm).is_empty());
}

#[test]
fn test_bitmap_to_ranges_single() {
let bm = RoaringBitmap::from_iter([5]);
assert_eq!(bitmap_to_ranges(&bm), vec![5..6]);
}

#[test]
fn test_bitmap_to_ranges_contiguous() {
let mut bm = RoaringBitmap::new();
bm.insert_range(10..20);
assert_eq!(bitmap_to_ranges(&bm), vec![10..20]);
}

#[test]
fn test_bitmap_to_ranges_multiple() {
let mut bm = RoaringBitmap::new();
bm.insert_range(0..3);
bm.insert_range(10..15);
bm.insert(100);
assert_eq!(bitmap_to_ranges(&bm), vec![0..3, 10..15, 100..101]);
}

#[test]
fn test_ranges_to_bitmap_empty() {
let bm = ranges_to_bitmap(&[], true);
assert!(bm.is_empty());
}

#[test]
fn test_ranges_to_bitmap_sorted_short_ranges() {
// avg len = 1, uses from_sorted_iter path
let ranges = vec![0..1, 5..6, 10..11];
let bm = ranges_to_bitmap(&ranges, true);
assert!(bm.contains(0) && bm.contains(5) && bm.contains(10));
assert_eq!(bm.len(), 3);
}

#[test]
fn test_ranges_to_bitmap_sorted_long_ranges() {
// avg len = 100, uses insert_range path
let ranges = vec![0..100, 200..300];
let bm = ranges_to_bitmap(&ranges, true);
assert_eq!(bm.len(), 200);
assert!(bm.contains(0) && bm.contains(99));
assert!(!bm.contains(100));
assert!(bm.contains(200) && bm.contains(299));
}

#[test]
fn test_ranges_to_bitmap_unsorted() {
let ranges = vec![200..300, 0..100];
let bm = ranges_to_bitmap(&ranges, false);
assert_eq!(bm.len(), 200);
assert!(bm.contains(0) && bm.contains(250));
}

#[test]
fn test_bitmap_ranges_roundtrip() {
let mut original = RoaringBitmap::new();
original.insert_range(0..50);
original.insert_range(100..200);
original.insert(500);
original.insert_range(1000..1010);

let ranges = bitmap_to_ranges(&original);
let reconstructed = ranges_to_bitmap(&ranges, true);
assert_eq!(original, reconstructed);
}

// ============================================================================
// Tests for RowIdSet
// ============================================================================
Expand Down
10 changes: 4 additions & 6 deletions rust/lance/src/io/exec/filtered_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ use lance_arrow::RecordBatchExt;
use lance_core::datatypes::OnMissing;
use lance_core::utils::deletion::DeletionVector;
use lance_core::utils::futures::FinallyStreamExt;
use lance_core::utils::mask::{bitmap_to_ranges, RowAddrMask, RowAddrSelection, RowAddrTreeMap};
use lance_core::utils::mask::{
bitmap_to_ranges, ranges_to_bitmap, RowAddrMask, RowAddrSelection, RowAddrTreeMap,
};
use lance_core::utils::tokio::get_num_compute_intensive_cpus;
use lance_core::{datatypes::Projection, Error, Result};
use lance_datafusion::planner::Planner;
Expand Down Expand Up @@ -1501,11 +1503,7 @@ impl FilteredReadInternalPlan {
let mut rows = RowAddrTreeMap::new();
for (fragment_id, ranges) in &self.rows {
if !ranges.is_empty() {
let mut bitmap = RoaringBitmap::new();
for range in ranges {
bitmap.insert_range(range.start as u32..range.end as u32);
}
rows.insert_bitmap(*fragment_id, bitmap);
rows.insert_bitmap(*fragment_id, ranges_to_bitmap(ranges, true));
}
}
FilteredReadPlan {
Expand Down
Loading