Skip to content

Commit

Permalink
Merge pull request #125 from Dr-Emann/roaring64
Browse files Browse the repository at this point in the history
Bindings CRoaring 3.0, including 64 bit bitmaps
  • Loading branch information
saulius authored May 13, 2024
2 parents a61067e + 1db901c commit 457e7c9
Show file tree
Hide file tree
Showing 31 changed files with 16,706 additions and 6,867 deletions.
519 changes: 479 additions & 40 deletions croaring-sys/CRoaring/bindgen_bundled_version.rs

Large diffs are not rendered by default.

16,975 changes: 10,843 additions & 6,132 deletions croaring-sys/CRoaring/roaring.c

Large diffs are not rendered by default.

1,556 changes: 1,222 additions & 334 deletions croaring-sys/CRoaring/roaring.h

Large diffs are not rendered by default.

613 changes: 333 additions & 280 deletions croaring-sys/CRoaring/roaring.hh

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion croaring-sys/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "croaring-sys"
version = "1.1.0"
version = "2.0.0"
edition = "2021"
authors = ["croaring-rs developers"]
license = "Apache-2.0"
Expand Down
1 change: 0 additions & 1 deletion croaring-sys/build.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use std::env;
use std::path::PathBuf;

fn main() {
println!("cargo:rerun-if-changed=CRoaring");
Expand Down
4 changes: 2 additions & 2 deletions croaring/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "croaring"
version = "1.0.1"
version = "1.1.0"
edition = "2021"
authors = ["croaring-rs developers"]
license = "Apache-2.0"
Expand All @@ -22,7 +22,7 @@ roaring = "0.10"
criterion = { version = "0.5", features = ["html_reports"] }

[dependencies]
ffi = { package = "croaring-sys", path = "../croaring-sys", version = "1.1.0" }
ffi = { package = "croaring-sys", path = "../croaring-sys", version = "2.0.0" }
byteorder = "1.4.3"

[[bench]]
Expand Down
149 changes: 134 additions & 15 deletions croaring/benches/benches.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use criterion::{
black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput,
};
use std::ops::ControlFlow;

use croaring::{Bitmap, Portable};
use croaring::{Bitmap, Bitmap64, Portable};

fn new(c: &mut Criterion) {
c.bench_function("new", |b| b.iter(Bitmap::new));
Expand Down Expand Up @@ -130,10 +131,25 @@ fn flip(c: &mut Criterion) {
}

fn to_vec(c: &mut Criterion) {
c.bench_function("to_vec", |b| {
let bitmap = Bitmap::of(&[1, 2, 3]);
const N: usize = 100_000;
let bitmap: Bitmap = random_iter().take(N).collect();
let mut g = c.benchmark_group("collect");
g.bench_function("to_vec", |b| {
b.iter(|| bitmap.to_vec());
});
g.bench_function("via_iter", |b| {
b.iter(|| bitmap.iter().collect::<Vec<_>>());
});
g.bench_function("foreach", |b| {
b.iter(|| {
let mut vec = Vec::with_capacity(bitmap.cardinality() as usize);
bitmap.for_each(|item| -> ControlFlow<()> {
vec.push(item);
ControlFlow::Continue(())
});
vec
});
});
}

fn get_serialized_size_in_bytes(c: &mut Criterion) {
Expand Down Expand Up @@ -213,24 +229,35 @@ fn bulk_new(c: &mut Criterion) {
group.finish();
}

fn random_iter(c: &mut Criterion) {
#[derive(Clone)]
struct RandomIter {
x: u32,
}

impl Iterator for RandomIter {
type Item = u32;

fn next(&mut self) -> Option<u32> {
const MULTIPLIER: u32 = 742938285;
const MODULUS: u32 = (1 << 31) - 1;
self.x = (MULTIPLIER.wrapping_mul(self.x)) % MODULUS;
Some(self.x)
}
}

fn random_iter() -> RandomIter {
RandomIter { x: 20170705 }
}

fn create_random(c: &mut Criterion) {
const N: u32 = 5_000;
// Clamp values so we get some re-use of containers
const MAX: u32 = 8 * (u16::MAX as u32 + 1);

let mut group = c.benchmark_group("random_iter");
group.throughput(Throughput::Elements(N.into()));

let rand_iter = {
const MULTIPLIER: u32 = 742938285;
const MODULUS: u32 = (1 << 31) - 1;
// Super simple LCG iterator
let mut z = 20170705; // seed
std::iter::from_fn(move || {
z = (MULTIPLIER * z) % MODULUS;
Some(z % MAX)
})
};
let rand_iter = random_iter();

group.bench_function("random_adds", |b| {
b.iter(|| {
Expand All @@ -252,6 +279,96 @@ fn random_iter(c: &mut Criterion) {
});
}

fn collect_bitmap64_to_vec(c: &mut Criterion) {
const N: u64 = 1_000_000;

let mut group = c.benchmark_group("collect_bitmap64_to_vec");
group.throughput(Throughput::Elements(N.into()));
let bitmap = Bitmap64::from_range(0..N);
group.bench_function("to_vec", |b| {
b.iter_batched(|| (), |()| bitmap.to_vec(), BatchSize::LargeInput);
});
group.bench_function("foreach", |b| {
b.iter_batched(
|| (),
|()| {
let mut vec = Vec::with_capacity(bitmap.cardinality() as usize);
bitmap.for_each(|item| -> ControlFlow<()> {
vec.push(item);
ControlFlow::Continue(())
});
vec
},
BatchSize::LargeInput,
);
});
group.bench_function("iter", |b| {
b.iter_batched(
|| (),
|()| {
let mut vec = Vec::with_capacity(bitmap.cardinality() as usize);
vec.extend(bitmap.iter());
vec
},
BatchSize::LargeInput,
);
});
group.bench_function("iter_many", |b| {
b.iter_batched(
|| (),
|()| {
let mut vec = vec![0; bitmap.cardinality() as usize];
let mut iter = bitmap.cursor();
assert_eq!(iter.read_many(&mut vec), vec.len());
vec
},
BatchSize::LargeInput,
);
});

group.finish();
}

fn iterate_bitmap64(c: &mut Criterion) {
const N: u64 = 1_000_000;
const END_ITER: u64 = N - 100;

let mut group = c.benchmark_group("bitmap64_iterate");
group.throughput(Throughput::Elements(N.into()));
let bitmap = Bitmap64::from_range(0..N);
group.bench_function("iter", |b| {
b.iter(|| {
for x in bitmap.iter() {
if x == END_ITER {
break;
}
}
})
});
group.bench_function("cursor", |b| {
b.iter(|| {
let mut cursor = bitmap.cursor();
while let Some(x) = cursor.next() {
if x == END_ITER {
break;
}
}
})
});
group.bench_function("for_each", |b| {
b.iter(|| {
bitmap.for_each(|x| -> ControlFlow<()> {
if x == END_ITER {
return ControlFlow::Break(());
}
ControlFlow::Continue(())
})
})
});

group.finish();
}

criterion_group!(
benches,
new,
Expand All @@ -269,6 +386,8 @@ criterion_group!(
serialize,
deserialize,
bulk_new,
random_iter,
create_random,
collect_bitmap64_to_vec,
iterate_bitmap64,
);
criterion_main!(benches);
81 changes: 77 additions & 4 deletions croaring/src/bitmap/imp.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use crate::callback::CallbackWrapper;
use crate::Bitset;
use ffi::roaring_bitmap_t;
use std::convert::TryInto;
use std::ffi::{c_void, CStr};
use std::ops::{Bound, RangeBounds};
use std::{mem, ptr};
use std::ops::{Bound, ControlFlow, RangeBounds};
use std::{mem, panic, ptr};

use super::serialization::{Deserializer, Serializer};
use super::{Bitmap, Statistics};
Expand All @@ -21,7 +21,7 @@ impl Bitmap {
// (it can be moved safely), and can be freed with `free`, without freeing the underlying
// containers and auxiliary data. Ensure this is still valid every time we update
// the version of croaring.
const _: () = assert!(ffi::ROARING_VERSION_MAJOR == 2 && ffi::ROARING_VERSION_MINOR == 0);
const _: () = assert!(ffi::ROARING_VERSION_MAJOR == 3 && ffi::ROARING_VERSION_MINOR == 0);
ffi::roaring_free(p.cast::<c_void>());
result
}
Expand Down Expand Up @@ -279,6 +279,29 @@ impl Bitmap {
unsafe { ffi::roaring_bitmap_remove_checked(&mut self.bitmap, element) }
}

/// Remove many values from the bitmap
///
/// This should be faster than calling `remove` multiple times.
///
/// In order to exploit this optimization, the caller should attempt to keep values with the same high 48 bits of
/// the value as consecutive elements in `vals`
///
/// # Examples
///
/// ```
/// use croaring::Bitmap;
/// let mut bitmap = Bitmap::of(&[1, 2, 3, 4, 5, 6, 7, 8, 9]);
/// bitmap.remove_many(&[1, 2, 3, 4, 5, 6, 7, 8]);
/// assert_eq!(bitmap.to_vec(), vec![9]);
/// ```
#[inline]
#[doc(alias = "roaring_bitmap_remove_many")]
pub fn remove_many(&mut self, elements: &[u32]) {
unsafe {
ffi::roaring_bitmap_remove_many(&mut self.bitmap, elements.len(), elements.as_ptr())
}
}

/// Contains returns true if the integer element is contained in the bitmap
///
/// # Examples
Expand Down Expand Up @@ -721,6 +744,49 @@ impl Bitmap {
unsafe { ffi::roaring_bitmap_flip_inplace(&mut self.bitmap, start, end) }
}

/// Iterate over the values in the bitmap in sorted order
///
/// If `f` returns `Break`, iteration will stop and the value will be returned,
/// Otherwise, iteration continues. If `f` never returns break, `None` is returned after all values are visited.
///
/// # Examples
///
/// ```
/// use croaring::Bitmap;
/// use std::ops::ControlFlow;
///
/// let bitmap = Bitmap::of(&[1, 2, 3, 14, 20, 21, 100]);
/// let mut even_nums_under_50 = vec![];
///
/// let first_over_50 = bitmap.for_each(|value| {
/// if value > 50 {
/// return ControlFlow::Break(value);
/// }
/// if value % 2 == 0 {
/// even_nums_under_50.push(value);
/// }
/// ControlFlow::Continue(())
/// });
///
/// assert_eq!(even_nums_under_50, vec![2, 14, 20]);
/// assert_eq!(first_over_50, ControlFlow::Break(100));
/// ```
#[inline]
pub fn for_each<F, O>(&self, f: F) -> ControlFlow<O>
where
F: FnMut(u32) -> ControlFlow<O>,
{
let mut callback_wrapper = CallbackWrapper::new(f);
let (callback, context) = callback_wrapper.callback_and_ctx();
unsafe {
ffi::roaring_iterate(&self.bitmap, Some(callback), context);
}
match callback_wrapper.result() {
Ok(cf) => cf,
Err(e) => panic::resume_unwind(e),
}
}

/// Returns a vector containing all of the integers stored in the Bitmap
/// in sorted order.
///
Expand Down Expand Up @@ -922,6 +988,13 @@ impl Bitmap {
/// // Exclusive ranges still step from the start, but do not include it
/// let bitmap = Bitmap::from_range_with_step((Bound::Excluded(10), Bound::Included(30)), 10);
/// assert_eq!(bitmap.to_vec(), [20, 30]);
///
/// // Ranges including max value
/// let bitmap = Bitmap::from_range_with_step((u32::MAX - 1)..=u32::MAX, 1);
/// assert_eq!(bitmap.to_vec(), vec![u32::MAX - 1, u32::MAX]);
///
/// let bitmap = Bitmap::from_range_with_step((u32::MAX - 1)..=u32::MAX, 3);
/// assert_eq!(bitmap.to_vec(), vec![u32::MAX - 1]);
/// ```
#[inline]
#[doc(alias = "roaring_bitmap_from_range")]
Expand Down
Loading

0 comments on commit 457e7c9

Please sign in to comment.