Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ rand_distr = { version = "0.5.1" }
rand_xoshiro = "0.7.0"
rangemap = { version = "1.0" }
rayon = "1.10"
roaring = "0.10.1"
roaring = "0.11"
rstest = "0.23.0"
rustc_version = "0.4"
serde = { version = "^1" }
Expand Down
129 changes: 109 additions & 20 deletions rust/lance-core/src/utils/mask.rs
Original file line number Diff line number Diff line change
Expand Up @@ -934,32 +934,42 @@ impl Extend<Self> for RowAddrTreeMap {
}
}

/// Convert a RoaringBitmap to a vector of contiguous ranges.
///
/// This is more efficient than iterating over individual bits and coalescing,
/// as it builds ranges directly in a single pass.
pub fn bitmap_to_ranges(bitmap: &RoaringBitmap) -> Vec<Range<u64>> {
if bitmap.is_empty() {
return vec![];
}

let mut ranges = Vec::new();
let mut iter = bitmap.iter();
let first = iter.next().unwrap();
let mut start = first;
let mut end = first;
while let Some(r) = iter.next_range() {
ranges.push(*r.start() as u64..(*r.end() as u64 + 1));
}
ranges
Comment on lines +940 to +943

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is so much nicer 😆

}

for val in iter {
if val == end + 1 {
end = val;
} else {
ranges.push(start as u64..(end + 1) as u64);
start = val;
end = val;
pub fn ranges_to_bitmap(ranges: &[Range<u64>], sorted: bool) -> RoaringBitmap {
if ranges.is_empty() {
return RoaringBitmap::new();
}
if sorted {
let sample_size = ranges.len().min(10);
let avg_len: u64 = ranges
.iter()
.take(sample_size)
.map(|r| r.end - r.start)
.sum::<u64>()
/ sample_size as u64;
// from_sorted_iter appends each value in O(1) but must visit every u32.
// insert_range bulk-fills containers but does a binary search per call.
// Crossover is ~6: below that, iterating all values is cheaper.
if avg_len <= 6 {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice

return RoaringBitmap::from_sorted_iter(
ranges.iter().flat_map(|r| r.start as u32..r.end as u32),
)
.unwrap();
}
}
ranges.push(start as u64..(end + 1) as u64);
ranges
let mut bm = RoaringBitmap::new();
for r in ranges {
bm.insert_range(r.start as u32..r.end as u32);
}
bm
}

/// A set of stable row ids backed by a 64-bit Roaring bitmap.
Expand Down Expand Up @@ -2036,6 +2046,85 @@ mod tests {
}
}

// ============================================================================
// Tests for bitmap_to_ranges / ranges_to_bitmap
// ============================================================================

#[test]
fn test_bitmap_to_ranges_empty() {
let bm = RoaringBitmap::new();
assert!(bitmap_to_ranges(&bm).is_empty());
}

#[test]
fn test_bitmap_to_ranges_single() {
let bm = RoaringBitmap::from_iter([5]);
assert_eq!(bitmap_to_ranges(&bm), vec![5..6]);
}

#[test]
fn test_bitmap_to_ranges_contiguous() {
let mut bm = RoaringBitmap::new();
bm.insert_range(10..20);
assert_eq!(bitmap_to_ranges(&bm), vec![10..20]);
}

#[test]
fn test_bitmap_to_ranges_multiple() {
let mut bm = RoaringBitmap::new();
bm.insert_range(0..3);
bm.insert_range(10..15);
bm.insert(100);
assert_eq!(bitmap_to_ranges(&bm), vec![0..3, 10..15, 100..101]);
}

#[test]
fn test_ranges_to_bitmap_empty() {
let bm = ranges_to_bitmap(&[], true);
assert!(bm.is_empty());
}

#[test]
fn test_ranges_to_bitmap_sorted_short_ranges() {
// avg len = 1, uses from_sorted_iter path
let ranges = vec![0..1, 5..6, 10..11];
let bm = ranges_to_bitmap(&ranges, true);
assert!(bm.contains(0) && bm.contains(5) && bm.contains(10));
assert_eq!(bm.len(), 3);
}

#[test]
fn test_ranges_to_bitmap_sorted_long_ranges() {
// avg len = 100, uses insert_range path
let ranges = vec![0..100, 200..300];
let bm = ranges_to_bitmap(&ranges, true);
assert_eq!(bm.len(), 200);
assert!(bm.contains(0) && bm.contains(99));
assert!(!bm.contains(100));
assert!(bm.contains(200) && bm.contains(299));
}

#[test]
fn test_ranges_to_bitmap_unsorted() {
let ranges = vec![200..300, 0..100];
let bm = ranges_to_bitmap(&ranges, false);
assert_eq!(bm.len(), 200);
assert!(bm.contains(0) && bm.contains(250));
}

#[test]
fn test_bitmap_ranges_roundtrip() {
let mut original = RoaringBitmap::new();
original.insert_range(0..50);
original.insert_range(100..200);
original.insert(500);
original.insert_range(1000..1010);

let ranges = bitmap_to_ranges(&original);
let reconstructed = ranges_to_bitmap(&ranges, true);
assert_eq!(original, reconstructed);
}

// ============================================================================
// Tests for RowIdSet
// ============================================================================
Expand Down
10 changes: 4 additions & 6 deletions rust/lance/src/io/exec/filtered_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ use lance_arrow::RecordBatchExt;
use lance_core::datatypes::OnMissing;
use lance_core::utils::deletion::DeletionVector;
use lance_core::utils::futures::FinallyStreamExt;
use lance_core::utils::mask::{bitmap_to_ranges, RowAddrMask, RowAddrSelection, RowAddrTreeMap};
use lance_core::utils::mask::{
bitmap_to_ranges, ranges_to_bitmap, RowAddrMask, RowAddrSelection, RowAddrTreeMap,
};
use lance_core::utils::tokio::get_num_compute_intensive_cpus;
use lance_core::{datatypes::Projection, Error, Result};
use lance_datafusion::planner::Planner;
Expand Down Expand Up @@ -1501,11 +1503,7 @@ impl FilteredReadInternalPlan {
let mut rows = RowAddrTreeMap::new();
for (fragment_id, ranges) in &self.rows {
if !ranges.is_empty() {
let mut bitmap = RoaringBitmap::new();
for range in ranges {
bitmap.insert_range(range.start as u32..range.end as u32);
}
rows.insert_bitmap(*fragment_id, bitmap);
rows.insert_bitmap(*fragment_id, ranges_to_bitmap(ranges, true));
}
}
FilteredReadPlan {
Expand Down
Loading