Skip to content

Commit

Permalink
Add for_each to Bitmap too
Browse files Browse the repository at this point in the history
  • Loading branch information
Dr-Emann committed Apr 7, 2024
1 parent 79dcb57 commit ea093d4
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 53 deletions.
54 changes: 40 additions & 14 deletions croaring/benches/benches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,25 @@ fn flip(c: &mut Criterion) {
}

fn to_vec(c: &mut Criterion) {
c.bench_function("to_vec", |b| {
let bitmap = Bitmap::of(&[1, 2, 3]);
const N: usize = 100_000;
let bitmap: Bitmap = random_iter().take(N).collect();
let mut g = c.benchmark_group("collect");
g.bench_function("to_vec", |b| {
b.iter(|| bitmap.to_vec());
});
g.bench_function("via_iter", |b| {
b.iter(|| bitmap.iter().collect::<Vec<_>>());
});
g.bench_function("foreach", |b| {
b.iter(|| {
let mut vec = Vec::with_capacity(bitmap.cardinality() as usize);
bitmap.for_each(|item| -> ControlFlow<()> {
vec.push(item);
ControlFlow::Continue(())
});
vec
});
});
}

fn get_serialized_size_in_bytes(c: &mut Criterion) {
Expand Down Expand Up @@ -214,24 +229,35 @@ fn bulk_new(c: &mut Criterion) {
group.finish();
}

fn random_iter(c: &mut Criterion) {
#[derive(Clone)]
struct RandomIter {
x: u32,
}

impl Iterator for RandomIter {
type Item = u32;

fn next(&mut self) -> Option<u32> {
const MULTIPLIER: u32 = 742938285;
const MODULUS: u32 = (1 << 31) - 1;
self.x = (MULTIPLIER.wrapping_mul(self.x)) % MODULUS;
Some(self.x)
}
}

fn random_iter() -> RandomIter {
RandomIter { x: 20170705 }
}

fn create_random(c: &mut Criterion) {
const N: u32 = 5_000;
// Clamp values so we get some re-use of containers
const MAX: u32 = 8 * (u16::MAX as u32 + 1);

let mut group = c.benchmark_group("random_iter");
group.throughput(Throughput::Elements(N.into()));

let rand_iter = {
const MULTIPLIER: u32 = 742938285;
const MODULUS: u32 = (1 << 31) - 1;
// Super simple LCG iterator
let mut z = 20170705; // seed
std::iter::from_fn(move || {
z = (MULTIPLIER.wrapping_mul(z)) % MODULUS;
Some(z % MAX)
})
};
let rand_iter = random_iter();

group.bench_function("random_adds", |b| {
b.iter(|| {
Expand Down Expand Up @@ -360,7 +386,7 @@ criterion_group!(
serialize,
deserialize,
bulk_new,
random_iter,
create_random,
collect_bitmap64_to_vec,
iterate_bitmap64,
);
Expand Down
48 changes: 46 additions & 2 deletions croaring/src/bitmap/imp.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use crate::callback::CallbackWrapper;
use crate::Bitset;
use ffi::roaring_bitmap_t;
use std::ffi::{c_void, CStr};
use std::ops::{Bound, RangeBounds};
use std::{mem, ptr};
use std::ops::{Bound, ControlFlow, RangeBounds};
use std::{mem, panic, ptr};

use super::serialization::{Deserializer, Serializer};
use super::{Bitmap, Statistics};
Expand Down Expand Up @@ -743,6 +744,49 @@ impl Bitmap {
unsafe { ffi::roaring_bitmap_flip_inplace(&mut self.bitmap, start, end) }
}

/// Iterate over the values in the bitmap in sorted order
///
/// If `f` returns `Break`, iteration will stop and the value will be returned,
/// Otherwise, iteration continues. If `f` never returns break, `None` is returned after all values are visited.
///
/// # Examples
///
/// ```
/// use croaring::Bitmap;
/// use std::ops::ControlFlow;
///
/// let bitmap = Bitmap::of(&[1, 2, 3, 14, 20, 21, 100]);
/// let mut even_nums_under_50 = vec![];
///
/// let first_over_50 = bitmap.for_each(|value| {
/// if value > 50 {
/// return ControlFlow::Break(value);
/// }
/// if value % 2 == 0 {
/// even_nums_under_50.push(value);
/// }
/// ControlFlow::Continue(())
/// });
///
/// assert_eq!(even_nums_under_50, vec![2, 14, 20]);
/// assert_eq!(first_over_50, ControlFlow::Break(100));
/// ```
#[inline]
pub fn for_each<F, O>(&self, f: F) -> ControlFlow<O>
where
F: FnMut(u32) -> ControlFlow<O>,
{
let mut callback_wrapper = CallbackWrapper::new(f);
let (callback, context) = callback_wrapper.callback_and_ctx();
unsafe {
ffi::roaring_iterate(&self.bitmap, Some(callback), context);
}
match callback_wrapper.result() {
Ok(cf) => cf,
Err(e) => panic::resume_unwind(e),
}
}

/// Returns a vector containing all of the integers stored in the Bitmap
/// in sorted order.
///
Expand Down
43 changes: 6 additions & 37 deletions croaring/src/bitmap64/imp.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use crate::bitmap64::Bitmap64;
use crate::bitmap64::{Deserializer, Serializer};
use std::any::Any;
use crate::callback::CallbackWrapper;
use std::collections::Bound;
use std::ffi::CStr;
use std::mem::MaybeUninit;
use std::ops::{ControlFlow, RangeBounds};
use std::panic::{self, AssertUnwindSafe};
use std::panic;
use std::ptr;
use std::ptr::NonNull;

Expand Down Expand Up @@ -910,43 +910,12 @@ impl Bitmap64 {
where
F: FnMut(u64) -> ControlFlow<O>,
{
struct State<F, O> {
f: F,
result: Result<ControlFlow<O>, Box<dyn Any + Send + 'static>>,
}

unsafe extern "C" fn callback<F, O>(value: u64, arg: *mut std::ffi::c_void) -> bool
where
F: FnMut(u64) -> ControlFlow<O>,
{
let state: &mut State<F, O> = unsafe { &mut *arg.cast::<State<F, O>>() };
let mut f = AssertUnwindSafe(&mut state.f);
let result = panic::catch_unwind(move || f(value));
match result {
Ok(ControlFlow::Continue(())) => true,
Ok(ControlFlow::Break(val)) => {
state.result = Ok(ControlFlow::Break(val));
false
}
Err(e) => {
state.result = Err(e);
false
}
}
}

let mut state = State {
f,
result: Ok(ControlFlow::Continue(())),
};
let mut callback_wrapper = CallbackWrapper::new(f);
let (callback, context) = callback_wrapper.callback_and_ctx();
unsafe {
ffi::roaring64_bitmap_iterate(
self.raw.as_ptr(),
Some(callback::<F, O>),
ptr::addr_of_mut!(state).cast(),
);
ffi::roaring64_bitmap_iterate(self.raw.as_ptr(), Some(callback), context);
}
match state.result {
match callback_wrapper.result() {
Ok(cf) => cf,
Err(e) => panic::resume_unwind(e),
}
Expand Down
56 changes: 56 additions & 0 deletions croaring/src/callback.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
use std::any::Any;
use std::ops::ControlFlow;
use std::panic::AssertUnwindSafe;
use std::{panic, ptr};

pub struct CallbackWrapper<F, O> {
f: F,
result: Result<ControlFlow<O>, Box<dyn Any + Send + 'static>>,
}

impl<F, O> CallbackWrapper<F, O> {
pub fn new(f: F) -> Self {
Self {
f,
result: Ok(ControlFlow::Continue(())),
}
}

unsafe extern "C" fn raw_callback<I>(value: I, arg: *mut std::ffi::c_void) -> bool
where
I: panic::UnwindSafe,
F: FnMut(I) -> ControlFlow<O>,
{
let wrapper = &mut *(arg as *mut Self);
let mut f = AssertUnwindSafe(&mut wrapper.f);
let result = panic::catch_unwind(move || f(value));
match result {
Ok(ControlFlow::Continue(())) => true,
Ok(cf @ ControlFlow::Break(_)) => {
wrapper.result = Ok(cf);
false
}
Err(err) => {
wrapper.result = Err(err);
false
}
}
}

pub fn callback_and_ctx<I>(
&mut self,
) -> (
unsafe extern "C" fn(I, *mut std::ffi::c_void) -> bool,
*mut std::ffi::c_void,
)
where
I: panic::UnwindSafe,
F: FnMut(I) -> ControlFlow<O>,
{
(Self::raw_callback::<I>, ptr::addr_of_mut!(*self).cast())
}

pub fn result(self) -> Result<ControlFlow<O>, Box<dyn Any + Send + 'static>> {
self.result
}
}
1 change: 1 addition & 0 deletions croaring/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pub mod bitmap64;
pub mod bitset;
pub mod treemap;

mod callback;
mod serialization;

pub use serialization::*;
Expand Down

0 comments on commit ea093d4

Please sign in to comment.