From 9d9b8074de7892ab4e1418e6d4db14eeea3b0819 Mon Sep 17 00:00:00 2001 From: saik0 Date: Tue, 4 Jan 2022 01:54:40 -0800 Subject: [PATCH 01/11] fix formatting --- src/bitmap/store.rs | 1022 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1022 insertions(+) create mode 100644 src/bitmap/store.rs diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs new file mode 100644 index 000000000..11f8cd5cd --- /dev/null +++ b/src/bitmap/store.rs @@ -0,0 +1,1022 @@ +use std::borrow::Borrow; +use std::cmp::Ordering::{Equal, Greater, Less}; +use std::mem; +use std::ops::{ + BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, RangeInclusive, Sub, SubAssign, +}; +use std::{slice, vec}; + +use self::Store::{Array, Bitmap}; + +const BITMAP_LENGTH: usize = 1024; + +pub enum Store { + Array(Vec), + Bitmap(Box<[u64; BITMAP_LENGTH]>), +} + +pub enum Iter<'a> { + Array(slice::Iter<'a, u16>), + Vec(vec::IntoIter), + BitmapBorrowed(BitmapIter<&'a [u64; BITMAP_LENGTH]>), + BitmapOwned(BitmapIter>), +} + +pub struct BitmapIter> { + key: usize, + value: u64, + bits: B, +} + +impl Store { + pub fn insert(&mut self, index: u16) -> bool { + match *self { + Array(ref mut vec) => { + vec.binary_search(&index).map_err(|loc| vec.insert(loc, index)).is_err() + } + Bitmap(ref mut bits) => { + let (key, bit) = (key(index), bit(index)); + if bits[key] & (1 << bit) == 0 { + bits[key] |= 1 << bit; + true + } else { + false + } + } + } + } + + pub fn insert_range(&mut self, range: RangeInclusive) -> u64 { + // A Range is defined as being of size 0 if start >= end. + if range.is_empty() { + return 0; + } + + let start = *range.start(); + let end = *range.end(); + + match *self { + Array(ref mut vec) => { + // Figure out the starting/ending position in the vec. + let pos_start = vec.binary_search(&start).unwrap_or_else(|x| x); + let pos_end = vec + .binary_search_by(|p| { + // binary search the right most position when equals + match p.cmp(&end) { + Greater => Greater, + _ => Less, + } + }) + .unwrap_or_else(|x| x); + + // Overwrite the range in the middle - there's no need to take + // into account any existing elements between start and end, as + // they're all being added to the set. + let dropped = vec.splice(pos_start..pos_end, start..=end); + + end as u64 - start as u64 + 1 - dropped.len() as u64 + } + Bitmap(ref mut bits) => { + let (start_key, start_bit) = (key(start), bit(start)); + let (end_key, end_bit) = (key(end), bit(end)); + + // MSB > start_bit > end_bit > LSB + if start_key == end_key { + // Set the end_bit -> LSB to 1 + let mut mask = if end_bit == 63 { u64::MAX } else { (1 << (end_bit + 1)) - 1 }; + // Set MSB -> start_bit to 1 + mask &= !((1 << start_bit) - 1); + + let existed = (bits[start_key] & mask).count_ones(); + bits[start_key] |= mask; + + return u64::from(end - start + 1) - u64::from(existed); + } + + // Mask off the left-most bits (MSB -> start_bit) + let mask = !((1 << start_bit) - 1); + + // Keep track of the number of bits that were already set to + // return how many new bits were set later + let mut existed = (bits[start_key] & mask).count_ones(); + + bits[start_key] |= mask; + + // Set the full blocks, tracking the number of set bits + for i in (start_key + 1)..end_key { + existed += bits[i].count_ones(); + bits[i] = u64::MAX; + } + + // Set the end bits in the last chunk (MSB -> end_bit) + let mask = if end_bit == 63 { u64::MAX } else { (1 << (end_bit + 1)) - 1 }; + existed += (bits[end_key] & mask).count_ones(); + bits[end_key] |= mask; + + end as u64 - start as u64 + 1 - existed as u64 + } + } + } + + /// Push `index` at the end of the store only if `index` is the new max. + /// + /// Returns whether `index` was effectively pushed. + pub fn push(&mut self, index: u16) -> bool { + if self.max().map_or(true, |max| max < index) { + match self { + Array(vec) => vec.push(index), + Bitmap(bits) => { + let (key, bit) = (key(index), bit(index)); + bits[key] |= 1 << bit; + } + } + true + } else { + false + } + } + + pub fn remove(&mut self, index: u16) -> bool { + match *self { + Array(ref mut vec) => vec.binary_search(&index).map(|loc| vec.remove(loc)).is_ok(), + Bitmap(ref mut bits) => { + let (key, bit) = (key(index), bit(index)); + if bits[key] & (1 << bit) != 0 { + bits[key] &= !(1 << bit); + true + } else { + false + } + } + } + } + + pub fn remove_range(&mut self, range: RangeInclusive) -> u64 { + if range.is_empty() { + return 0; + } + + let start = *range.start(); + let end = *range.end(); + + match *self { + Array(ref mut vec) => { + // Figure out the starting/ending position in the vec. + let pos_start = vec.binary_search(&start).unwrap_or_else(|x| x); + let pos_end = vec + .binary_search_by(|p| { + // binary search the right most position when equals + match p.cmp(&end) { + Greater => Greater, + _ => Less, + } + }) + .unwrap_or_else(|x| x); + vec.drain(pos_start..pos_end); + (pos_end - pos_start) as u64 + } + Bitmap(ref mut bits) => { + let (start_key, start_bit) = (key(start), bit(start)); + let (end_key, end_bit) = (key(end), bit(end)); + + if start_key == end_key { + let mask = (u64::MAX << start_bit) & (u64::MAX >> (63 - end_bit)); + let removed = (bits[start_key] & mask).count_ones(); + bits[start_key] &= !mask; + return u64::from(removed); + } + + let mut removed = 0; + // start key bits + removed += (bits[start_key] & (u64::MAX << start_bit)).count_ones(); + bits[start_key] &= !(u64::MAX << start_bit); + // counts bits in between + for word in &bits[start_key + 1..end_key] { + removed += word.count_ones(); + // When popcnt is available zeroing in this loop is faster, + // but we opt to perform reasonably on most cpus by zeroing after. + // By doing that the compiler uses simd to count ones. + } + // do zeroing outside the loop + for word in &mut bits[start_key + 1..end_key] { + *word = 0; + } + // end key bits + removed += (bits[end_key] & (u64::MAX >> (63 - end_bit))).count_ones(); + bits[end_key] &= !(u64::MAX >> (63 - end_bit)); + u64::from(removed) + } + } + } + + pub fn contains(&self, index: u16) -> bool { + match *self { + Array(ref vec) => vec.binary_search(&index).is_ok(), + Bitmap(ref bits) => bits[key(index)] & (1 << bit(index)) != 0, + } + } + + pub fn is_disjoint<'a>(&'a self, other: &'a Self) -> bool { + match (self, other) { + (&Array(ref vec1), &Array(ref vec2)) => { + let (mut i1, mut i2) = (vec1.iter(), vec2.iter()); + let (mut value1, mut value2) = (i1.next(), i2.next()); + loop { + match value1.and_then(|v1| value2.map(|v2| v1.cmp(v2))) { + None => return true, + Some(Equal) => return false, + Some(Less) => value1 = i1.next(), + Some(Greater) => value2 = i2.next(), + } + } + } + (&Bitmap(ref bits1), &Bitmap(ref bits2)) => { + bits1.iter().zip(bits2.iter()).all(|(&i1, &i2)| (i1 & i2) == 0) + } + (&Array(ref vec), store @ &Bitmap(..)) | (store @ &Bitmap(..), &Array(ref vec)) => { + vec.iter().all(|&i| !store.contains(i)) + } + } + } + + pub fn is_subset(&self, other: &Self) -> bool { + match (self, other) { + (&Array(ref vec1), &Array(ref vec2)) => { + let (mut i1, mut i2) = (vec1.iter(), vec2.iter()); + let (mut value1, mut value2) = (i1.next(), i2.next()); + loop { + match (value1, value2) { + (None, _) => return true, + (Some(..), None) => return false, + (Some(v1), Some(v2)) => match v1.cmp(v2) { + Equal => { + value1 = i1.next(); + value2 = i2.next(); + } + Less => return false, + Greater => value2 = i2.next(), + }, + } + } + } + (&Bitmap(ref bits1), &Bitmap(ref bits2)) => { + bits1.iter().zip(bits2.iter()).all(|(&i1, &i2)| (i1 & i2) == i1) + } + (&Array(ref vec), store @ &Bitmap(..)) => vec.iter().all(|&i| store.contains(i)), + (&Bitmap(..), &Array(..)) => false, + } + } + + pub fn to_array(&self) -> Self { + match *self { + Array(..) => panic!("Cannot convert array to array"), + Bitmap(ref bits) => { + let mut vec = Vec::new(); + for (index, mut bit) in bits.iter().cloned().enumerate() { + while bit != 0 { + vec.push((u64::trailing_zeros(bit) + (64 * index as u32)) as u16); + bit &= bit - 1; + } + } + Array(vec) + } + } + } + + pub fn to_bitmap(&self) -> Self { + match *self { + Array(ref vec) => { + let mut bits = Box::new([0; BITMAP_LENGTH]); + for &index in vec { + bits[key(index)] |= 1 << bit(index); + } + Bitmap(bits) + } + Bitmap(..) => panic!("Cannot convert bitmap to bitmap"), + } + } + + pub fn len(&self) -> u64 { + match *self { + Array(ref vec) => vec.len() as u64, + Bitmap(ref bits) => bits.iter().map(|bit| u64::from(bit.count_ones())).sum(), + } + } + + pub fn min(&self) -> Option { + match *self { + Array(ref vec) => vec.first().copied(), + Bitmap(ref bits) => bits + .iter() + .enumerate() + .find(|&(_, &bit)| bit != 0) + .map(|(index, bit)| (index * 64 + (bit.trailing_zeros() as usize)) as u16), + } + } + + pub fn max(&self) -> Option { + match *self { + Array(ref vec) => vec.last().copied(), + Bitmap(ref bits) => bits + .iter() + .enumerate() + .rev() + .find(|&(_, &bit)| bit != 0) + .map(|(index, bit)| (index * 64 + (63 - bit.leading_zeros() as usize)) as u16), + } + } +} + +impl BitOr<&Store> for &Store { + type Output = Store; + + fn bitor(self, rhs: &Store) -> Store { + match (self, rhs) { + (&Array(ref vec1), &Array(ref vec2)) => Array(union_arrays(vec1, vec2)), + (&Bitmap(_), &Array(_)) => { + let mut lhs = self.clone(); + BitOrAssign::bitor_assign(&mut lhs, rhs); + lhs + } + (&Bitmap(_), &Bitmap(_)) => { + let mut lhs = self.clone(); + BitOrAssign::bitor_assign(&mut lhs, rhs); + lhs + } + (&Array(_), &Bitmap(_)) => { + let mut rhs = rhs.clone(); + BitOrAssign::bitor_assign(&mut rhs, self); + rhs + } + } + } +} + +impl BitOrAssign for Store { + fn bitor_assign(&mut self, mut rhs: Store) { + match (self, &mut rhs) { + (&mut Array(ref mut vec1), &mut Array(ref vec2)) => { + *vec1 = union_arrays(vec1, vec2); + } + (this @ &mut Bitmap(..), &mut Array(ref vec)) => { + vec.iter().for_each(|index| { + this.insert(*index); + }); + } + (&mut Bitmap(ref mut bits1), &mut Bitmap(ref bits2)) => { + for (index1, index2) in bits1.iter_mut().zip(bits2.iter()) { + BitOrAssign::bitor_assign(index1, index2); + } + } + (this @ &mut Array(..), &mut Bitmap(..)) => { + mem::swap(this, &mut rhs); + BitOrAssign::bitor_assign(this, rhs); + } + } + } +} + +impl BitOrAssign<&Store> for Store { + fn bitor_assign(&mut self, rhs: &Store) { + match (self, rhs) { + (&mut Array(ref mut vec1), &Array(ref vec2)) => { + let this = mem::take(vec1); + *vec1 = union_arrays(&this, vec2); + } + (this @ &mut Bitmap(..), &Array(ref vec)) => { + vec.iter().for_each(|index| { + this.insert(*index); + }); + } + (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { + for (index1, index2) in bits1.iter_mut().zip(bits2.iter()) { + BitOrAssign::bitor_assign(index1, index2); + } + } + (this @ &mut Array(..), &Bitmap(..)) => { + *this = this.to_bitmap(); + BitOrAssign::bitor_assign(this, rhs); + } + } + } +} + +impl BitAnd<&Store> for &Store { + type Output = Store; + + fn bitand(self, rhs: &Store) -> Store { + match (self, rhs) { + (&Array(ref vec1), &Array(ref vec2)) => Array(intersect_arrays(vec1, vec2)), + (&Bitmap(_), &Array(_)) => { + let mut rhs = rhs.clone(); + BitAndAssign::bitand_assign(&mut rhs, self); + rhs + } + _ => { + let mut lhs = self.clone(); + BitAndAssign::bitand_assign(&mut lhs, rhs); + lhs + } + } + } +} + +impl BitAndAssign for Store { + #[allow(clippy::suspicious_op_assign_impl)] + fn bitand_assign(&mut self, mut rhs: Store) { + match (self, &mut rhs) { + (&mut Array(ref mut lhs), &mut Array(ref mut rhs)) => { + if rhs.len() < lhs.len() { + mem::swap(lhs, rhs); + } + + let mut i = 0; + lhs.retain(|x| { + i += rhs.iter().skip(i).position(|y| y >= x).unwrap_or(rhs.len()); + rhs.get(i).map_or(false, |y| x == y) + }); + } + (&mut Bitmap(ref mut bits1), &mut Bitmap(ref bits2)) => { + for (index1, index2) in bits1.iter_mut().zip(bits2.iter()) { + BitAndAssign::bitand_assign(index1, index2); + } + } + (&mut Array(ref mut vec), store @ &mut Bitmap(..)) => { + vec.retain(|x| store.contains(*x)); + } + (this @ &mut Bitmap(..), &mut Array(..)) => { + mem::swap(this, &mut rhs); + BitAndAssign::bitand_assign(this, rhs); + } + } + } +} + +impl BitAndAssign<&Store> for Store { + #[allow(clippy::suspicious_op_assign_impl)] + fn bitand_assign(&mut self, rhs: &Store) { + match (self, rhs) { + (&mut Array(ref mut vec1), &Array(ref vec2)) => { + let (mut lhs, rhs) = if vec1.len() <= vec2.len() { + (mem::take(vec1), vec2.as_slice()) + } else { + (vec2.clone(), vec1.as_slice()) + }; + + let mut i = 0; + lhs.retain(|x| { + i += rhs.iter().skip(i).position(|y| y >= x).unwrap_or(rhs.len()); + rhs.get(i).map_or(false, |y| x == y) + }); + + *vec1 = lhs; + } + (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { + for (index1, index2) in bits1.iter_mut().zip(bits2.iter()) { + BitAndAssign::bitand_assign(index1, index2); + } + } + (&mut Array(ref mut vec), store @ &Bitmap(..)) => { + vec.retain(|x| store.contains(*x)); + } + (this @ &mut Bitmap(..), &Array(..)) => { + let mut new = rhs.clone(); + BitAndAssign::bitand_assign(&mut new, &*this); + *this = new; + } + } + } +} + +impl Sub<&Store> for &Store { + type Output = Store; + + fn sub(self, rhs: &Store) -> Store { + match (self, rhs) { + (&Array(ref vec1), &Array(ref vec2)) => Array(difference_arrays(vec1, vec2)), + _ => { + let mut lhs = self.clone(); + SubAssign::sub_assign(&mut lhs, rhs); + lhs + } + } + } +} + +impl SubAssign<&Store> for Store { + fn sub_assign(&mut self, rhs: &Store) { + match (self, rhs) { + (&mut Array(ref mut lhs), &Array(ref rhs)) => { + let mut i = 0; + lhs.retain(|x| { + i += rhs.iter().skip(i).position(|y| y >= x).unwrap_or(rhs.len()); + rhs.get(i).map_or(true, |y| x != y) + }); + } + (ref mut this @ &mut Bitmap(..), &Array(ref vec2)) => { + vec2.iter().for_each(|index| { + this.remove(*index); + }); + } + (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { + for (index1, index2) in bits1.iter_mut().zip(bits2.iter()) { + *index1 &= !*index2; + } + } + (&mut Array(ref mut vec), store @ &Bitmap(..)) => { + vec.retain(|x| !store.contains(*x)); + } + } + } +} + +impl BitXor<&Store> for &Store { + type Output = Store; + + fn bitxor(self, rhs: &Store) -> Store { + match (self, rhs) { + (&Array(ref vec1), &Array(ref vec2)) => Array(symmetric_difference_arrays(vec1, vec2)), + (&Array(_), &Bitmap(_)) => { + let mut lhs = rhs.clone(); + BitXorAssign::bitxor_assign(&mut lhs, self); + lhs + } + _ => { + let mut lhs = self.clone(); + BitXorAssign::bitxor_assign(&mut lhs, rhs); + lhs + } + } + } +} + +impl BitXorAssign for Store { + fn bitxor_assign(&mut self, mut rhs: Store) { + // TODO improve this function + match (self, &mut rhs) { + (&mut Array(ref mut vec1), &mut Array(ref mut vec2)) => { + let mut i1 = 0usize; + let mut iter2 = vec2.iter(); + let mut current2 = iter2.next(); + while i1 < vec1.len() { + match current2.map(|c2| vec1[i1].cmp(c2)) { + None => break, + Some(Less) => { + i1 += 1; + } + Some(Greater) => { + vec1.insert(i1, *current2.unwrap()); + i1 += 1; + current2 = iter2.next(); + } + Some(Equal) => { + vec1.remove(i1); + current2 = iter2.next(); + } + } + } + if let Some(current) = current2 { + vec1.push(*current); + vec1.extend(iter2.cloned()); + } + } + (ref mut this @ &mut Bitmap(..), &mut Array(ref mut vec2)) => { + for index in vec2 { + if this.contains(*index) { + this.remove(*index); + } else { + this.insert(*index); + } + } + } + (&mut Bitmap(ref mut bits1), &mut Bitmap(ref mut bits2)) => { + for (index1, index2) in bits1.iter_mut().zip(bits2.iter()) { + BitXorAssign::bitxor_assign(index1, index2); + } + } + (this @ &mut Array(..), &mut Bitmap(..)) => { + mem::swap(this, &mut rhs); + BitXorAssign::bitxor_assign(this, rhs); + } + } + } +} + +impl BitXorAssign<&Store> for Store { + fn bitxor_assign(&mut self, rhs: &Store) { + match (self, rhs) { + (&mut Array(ref mut vec1), &Array(ref vec2)) => { + let mut i1 = 0usize; + let mut iter2 = vec2.iter(); + let mut current2 = iter2.next(); + while i1 < vec1.len() { + match current2.map(|c2| vec1[i1].cmp(c2)) { + None => break, + Some(Less) => { + i1 += 1; + } + Some(Greater) => { + vec1.insert(i1, *current2.unwrap()); + i1 += 1; + current2 = iter2.next(); + } + Some(Equal) => { + vec1.remove(i1); + current2 = iter2.next(); + } + } + } + if let Some(current) = current2 { + vec1.push(*current); + vec1.extend(iter2.cloned()); + } + } + (ref mut this @ &mut Bitmap(..), &Array(ref vec2)) => { + for index in vec2.iter() { + if this.contains(*index) { + this.remove(*index); + } else { + this.insert(*index); + } + } + } + (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { + for (index1, index2) in bits1.iter_mut().zip(bits2.iter()) { + BitXorAssign::bitxor_assign(index1, index2); + } + } + (this @ &mut Array(..), &Bitmap(..)) => { + let mut new = rhs.clone(); + BitXorAssign::bitxor_assign(&mut new, &*this); + *this = new; + } + } + } +} + +impl<'a> IntoIterator for &'a Store { + type Item = u16; + type IntoIter = Iter<'a>; + fn into_iter(self) -> Iter<'a> { + match *self { + Array(ref vec) => Iter::Array(vec.iter()), + Bitmap(ref bits) => Iter::BitmapBorrowed(BitmapIter::new(&**bits)), + } + } +} + +impl IntoIterator for Store { + type Item = u16; + type IntoIter = Iter<'static>; + fn into_iter(self) -> Iter<'static> { + match self { + Array(vec) => Iter::Vec(vec.into_iter()), + Bitmap(bits) => Iter::BitmapOwned(BitmapIter::new(bits)), + } + } +} + +impl PartialEq for Store { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (&Array(ref vec1), &Array(ref vec2)) => vec1 == vec2, + (&Bitmap(ref bits1), &Bitmap(ref bits2)) => { + bits1.iter().zip(bits2.iter()).all(|(i1, i2)| i1 == i2) + } + _ => false, + } + } +} + +impl Clone for Store { + fn clone(&self) -> Self { + match *self { + Array(ref vec) => Array(vec.clone()), + Bitmap(ref bits) => Bitmap(Box::new(**bits)), + } + } +} + +impl> BitmapIter { + fn new(bits: B) -> BitmapIter { + BitmapIter { key: 0, value: bits.borrow()[0], bits } + } +} + +impl> Iterator for BitmapIter { + type Item = u16; + + fn next(&mut self) -> Option { + loop { + if self.value == 0 { + self.key += 1; + if self.key >= BITMAP_LENGTH { + return None; + } + self.value = unsafe { *self.bits.borrow().get_unchecked(self.key) }; + continue; + } + let lsb = self.value.trailing_zeros() as usize; + self.value &= u64::MAX << 1 << lsb; + return Some((64 * self.key + lsb) as u16); + } + } + + fn size_hint(&self) -> (usize, Option) { + panic!("Should never be called (roaring::Iter caches the size_hint itself)") + } +} + +impl<'a> Iterator for Iter<'a> { + type Item = u16; + + fn next(&mut self) -> Option { + match *self { + Iter::Array(ref mut inner) => inner.next().cloned(), + Iter::Vec(ref mut inner) => inner.next(), + Iter::BitmapBorrowed(ref mut inner) => inner.next(), + Iter::BitmapOwned(ref mut inner) => inner.next(), + } + } + + fn size_hint(&self) -> (usize, Option) { + panic!("Should never be called (roaring::Iter caches the size_hint itself)") + } +} + +#[inline] +fn union_arrays(arr1: &[u16], arr2: &[u16]) -> Vec { + let len = (arr1.len() + arr2.len()).min(4096); + let mut out = Vec::with_capacity(len); + + // Traverse both arrays + let mut i = 0; + let mut j = 0; + while i < arr1.len() && j < arr2.len() { + let a = unsafe { arr1.get_unchecked(i) }; + let b = unsafe { arr2.get_unchecked(j) }; + match a.cmp(b) { + Less => { + out.push(*a); + i += 1; + } + Greater => { + out.push(*b); + j += 1; + } + Equal => { + out.push(*a); + i += 1; + j += 1; + } + } + } + + // Store remaining elements of the arrays + out.extend_from_slice(&arr1[i..]); + out.extend_from_slice(&arr2[j..]); + + out +} + +#[inline] +fn intersect_arrays(arr1: &[u16], arr2: &[u16]) -> Vec { + let mut out = Vec::new(); + + // Traverse both arrays + let mut i = 0; + let mut j = 0; + while i < arr1.len() && j < arr2.len() { + let a = unsafe { arr1.get_unchecked(i) }; + let b = unsafe { arr2.get_unchecked(j) }; + match a.cmp(b) { + Less => i += 1, + Greater => j += 1, + Equal => { + out.push(*a); + i += 1; + j += 1; + } + } + } + + out +} + +#[inline] +fn difference_arrays(arr1: &[u16], arr2: &[u16]) -> Vec { + let mut out = Vec::new(); + + // Traverse both arrays + let mut i = 0; + let mut j = 0; + while i < arr1.len() && j < arr2.len() { + let a = unsafe { arr1.get_unchecked(i) }; + let b = unsafe { arr2.get_unchecked(j) }; + match a.cmp(b) { + Less => { + out.push(*a); + i += 1; + } + Greater => j += 1, + Equal => { + i += 1; + j += 1; + } + } + } + + // Store remaining elements of the left array + out.extend_from_slice(&arr1[i..]); + + out +} + +#[inline] +fn symmetric_difference_arrays(arr1: &[u16], arr2: &[u16]) -> Vec { + let mut out = Vec::new(); + + // Traverse both arrays + let mut i = 0; + let mut j = 0; + while i < arr1.len() && j < arr2.len() { + let a = unsafe { arr1.get_unchecked(i) }; + let b = unsafe { arr2.get_unchecked(j) }; + match a.cmp(b) { + Less => { + out.push(*a); + i += 1; + } + Greater => { + out.push(*b); + j += 1; + } + Equal => { + i += 1; + j += 1; + } + } + } + + // Store remaining elements of the arrays + out.extend_from_slice(&arr1[i..]); + out.extend_from_slice(&arr2[j..]); + + out +} + +#[inline] +fn key(index: u16) -> usize { + index as usize / 64 +} + +#[inline] +fn bit(index: u16) -> usize { + index as usize % 64 +} + +#[cfg(test)] +mod tests { + use super::*; + + fn as_vec(s: Store) -> Vec { + if let Store::Array(v) = s { + return v; + } + as_vec(s.to_array()) + } + + #[test] + #[allow(clippy::reversed_empty_ranges)] + fn test_array_insert_invalid_range() { + let mut store = Store::Array(vec![1, 2, 8, 9]); + + // Insert a range with start > end. + let new = store.insert_range(6..=1); + assert_eq!(new, 0); + + assert_eq!(as_vec(store), vec![1, 2, 8, 9]); + } + + #[test] + fn test_array_insert_range() { + let mut store = Store::Array(vec![1, 2, 8, 9]); + + let new = store.insert_range(4..=5); + assert_eq!(new, 2); + + assert_eq!(as_vec(store), vec![1, 2, 4, 5, 8, 9]); + } + + #[test] + fn test_array_insert_range_left_overlap() { + let mut store = Store::Array(vec![1, 2, 8, 9]); + + let new = store.insert_range(2..=5); + assert_eq!(new, 3); + + assert_eq!(as_vec(store), vec![1, 2, 3, 4, 5, 8, 9]); + } + + #[test] + fn test_array_insert_range_right_overlap() { + let mut store = Store::Array(vec![1, 2, 8, 9]); + + let new = store.insert_range(4..=8); + assert_eq!(new, 4); + + assert_eq!(as_vec(store), vec![1, 2, 4, 5, 6, 7, 8, 9]); + } + + #[test] + fn test_array_insert_range_full_overlap() { + let mut store = Store::Array(vec![1, 2, 8, 9]); + + let new = store.insert_range(1..=9); + assert_eq!(new, 5); + + assert_eq!(as_vec(store), vec![1, 2, 3, 4, 5, 6, 7, 8, 9]); + } + + #[test] + #[allow(clippy::reversed_empty_ranges)] + fn test_bitmap_insert_invalid_range() { + let store = Store::Array(vec![1, 2, 8, 9]); + let mut store = store.to_bitmap(); + + // Insert a range with start > end. + let new = store.insert_range(6..=1); + assert_eq!(new, 0); + + assert_eq!(as_vec(store), vec![1, 2, 8, 9]); + } + + #[test] + fn test_bitmap_insert_same_key_overlap() { + let store = Store::Array(vec![1, 2, 3, 62, 63]); + let mut store = store.to_bitmap(); + + let new = store.insert_range(1..=62); + assert_eq!(new, 58); + + assert_eq!(as_vec(store), (1..64).collect::>()); + } + + #[test] + fn test_bitmap_insert_range() { + let store = Store::Array(vec![1, 2, 130]); + let mut store = store.to_bitmap(); + + let new = store.insert_range(4..=128); + assert_eq!(new, 125); + + let mut want = vec![1, 2]; + want.extend(4..129); + want.extend(&[130]); + + assert_eq!(as_vec(store), want); + } + + #[test] + fn test_bitmap_insert_range_left_overlap() { + let store = Store::Array(vec![1, 2, 130]); + let mut store = store.to_bitmap(); + + let new = store.insert_range(1..=128); + assert_eq!(new, 126); + + let mut want = Vec::new(); + want.extend(1..129); + want.extend(&[130]); + + assert_eq!(as_vec(store), want); + } + + #[test] + fn test_bitmap_insert_range_right_overlap() { + let store = Store::Array(vec![1, 2, 130]); + let mut store = store.to_bitmap(); + + let new = store.insert_range(4..=132); + assert_eq!(new, 128); + + let mut want = vec![1, 2]; + want.extend(4..133); + + assert_eq!(as_vec(store), want); + } + + #[test] + fn test_bitmap_insert_range_full_overlap() { + let store = Store::Array(vec![1, 2, 130]); + let mut store = store.to_bitmap(); + + let new = store.insert_range(1..=134); + assert_eq!(new, 131); + + let mut want = Vec::new(); + want.extend(1..135); + + assert_eq!(as_vec(store), want); + } +} From 7820c8e8c6e8341b774ec1bc03c425e8b561c772 Mon Sep 17 00:00:00 2001 From: not-jan <61017633+not-jan@users.noreply.github.com> Date: Sun, 26 Jun 2022 01:34:34 +0200 Subject: [PATCH 02/11] Remove retain_mut as it was included in Rust in stable 1.61 --- src/bitmap/ops.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/bitmap/ops.rs b/src/bitmap/ops.rs index 1c1a65445..09ab04ff6 100644 --- a/src/bitmap/ops.rs +++ b/src/bitmap/ops.rs @@ -1,8 +1,6 @@ use std::mem; use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign}; -use retain_mut::RetainMut; - use crate::bitmap::container::Container; use crate::bitmap::Pairs; use crate::RoaringBitmap; @@ -240,7 +238,7 @@ impl BitAndAssign for RoaringBitmap { mem::swap(self, &mut rhs); } - RetainMut::retain_mut(&mut self.containers, |cont| { + (&mut self.containers).retain_mut(|cont| { let key = cont.key; match rhs.containers.binary_search_by_key(&key, |c| c.key) { Ok(loc) => { @@ -258,7 +256,7 @@ impl BitAndAssign for RoaringBitmap { impl BitAndAssign<&RoaringBitmap> for RoaringBitmap { /// An `intersection` between two sets. fn bitand_assign(&mut self, rhs: &RoaringBitmap) { - RetainMut::retain_mut(&mut self.containers, |cont| { + (&mut self.containers).retain_mut(|cont| { let key = cont.key; match rhs.containers.binary_search_by_key(&key, |c| c.key) { Ok(loc) => { @@ -335,7 +333,7 @@ impl SubAssign for RoaringBitmap { impl SubAssign<&RoaringBitmap> for RoaringBitmap { /// A `difference` between two sets. fn sub_assign(&mut self, rhs: &RoaringBitmap) { - RetainMut::retain_mut(&mut self.containers, |cont| { + (&mut self.containers).retain_mut(|cont| { match rhs.containers.binary_search_by_key(&cont.key, |c| c.key) { Ok(loc) => { SubAssign::sub_assign(cont, &rhs.containers[loc]); From a3b00ae72db62290ceb98b63cd839086508afdc9 Mon Sep 17 00:00:00 2001 From: not-jan <61017633+not-jan@users.noreply.github.com> Date: Sun, 26 Jun 2022 01:35:20 +0200 Subject: [PATCH 03/11] Add a naive implementation of `insert_range` for `RoaringTreemap` --- src/treemap/inherent.rs | 46 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/treemap/inherent.rs b/src/treemap/inherent.rs index 5ffb9db9c..3f3320800 100644 --- a/src/treemap/inherent.rs +++ b/src/treemap/inherent.rs @@ -36,6 +36,52 @@ impl RoaringTreemap { self.map.entry(hi).or_insert_with(RoaringBitmap::new).insert(lo) } + /// Inserts a range of values. + /// Returns the number of inserted values. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringTreemap; + /// + /// let mut rb = RoaringTreemap::new(); + /// rb.insert_range(2..4); + /// assert!(rb.contains(2)); + /// assert!(rb.contains(3)); + /// assert!(!rb.contains(4)); + /// ``` + pub fn insert_range>(&mut self, range: R) -> u64 { + let (start, end) = match util::convert_range_to_inclusive(range) { + Some(range) => (*range.start(), *range.end()), + None => return 0, + }; + + let (start_hi, start_lo) = util::split(start); + let (end_hi, end_lo) = util::split(end); + + let mut counter = 0u64; + + // Split the input range by the leading 32 bits + for hi in start_hi..=end_hi { + // Calculate the sub-range from the lower 32 bits + let range = if hi == end_hi && hi == start_hi { + start_lo..=end_lo + } else if hi == start_hi { + start_lo..=u32::MAX + } else if hi == end_hi { + 0..=end_hi + } else { + // This is pretty expensive, we can definitely pre-calculate what a full + // `RoaringBitmap` looks like so we might as well use it here. + 0..=u32::MAX + }; + + counter += self.map.entry(hi).or_insert_with(RoaringBitmap::new).insert_range(range) + } + + counter + } + /// Pushes `value` in the treemap only if it is greater than the current maximum value. /// /// Returns whether the value was inserted. From 0724c268ba73ff7b163f3930f807b963a5ec82d5 Mon Sep 17 00:00:00 2001 From: not-jan <61017633+not-jan@users.noreply.github.com> Date: Sun, 26 Jun 2022 01:35:39 +0200 Subject: [PATCH 04/11] Add tests for the naive implementation of `insert_range` for `RoaringTreemap` --- tests/treemap_lib.rs | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/tests/treemap_lib.rs b/tests/treemap_lib.rs index 894196de0..43f7306e1 100644 --- a/tests/treemap_lib.rs +++ b/tests/treemap_lib.rs @@ -13,11 +13,11 @@ fn smoke() { assert!(bitmap.contains(1)); assert_eq!(bitmap.len(), 1); assert!(!bitmap.is_empty()); - bitmap.insert(u64::max_value() - 2); - assert!(bitmap.contains(u64::max_value() - 2)); + bitmap.insert(u64::MAX - 2); + assert!(bitmap.contains(u64::MAX - 2)); assert_eq!(bitmap.len(), 2); - bitmap.insert(u64::max_value()); - assert!(bitmap.contains(u64::max_value())); + bitmap.insert(u64::MAX); + assert!(bitmap.contains(u64::MAX)); assert_eq!(bitmap.len(), 3); bitmap.insert(2); assert!(bitmap.contains(2)); @@ -28,9 +28,24 @@ fn smoke() { assert!(!bitmap.contains(0)); assert!(bitmap.contains(1)); assert!(!bitmap.contains(100)); - assert!(bitmap.contains(u64::max_value() - 2)); - assert!(!bitmap.contains(u64::max_value() - 1)); - assert!(bitmap.contains(u64::max_value())); + assert!(bitmap.contains(u64::MAX - 2)); + assert!(!bitmap.contains(u64::MAX - 1)); + assert!(bitmap.contains(u64::MAX)); +} + +#[test] +fn insert_range() { + let ranges = 0..0x1000; + + let mut bitmap = RoaringTreemap::new(); + assert_eq!(bitmap.insert_range(ranges), 0x1000); + assert_eq!(bitmap.len(), 0x1000); + assert_eq!(bitmap.max(), Some(0xFFF)); + + assert_eq!(bitmap.insert_range(u32::MAX as u64 - 1..u32::MAX as u64 + 1), 2); + assert!(bitmap.contains(2)); + assert!(bitmap.contains(0xFFF)); + assert!(!bitmap.contains(0x1000)); } #[test] @@ -53,16 +68,16 @@ fn test_max() { assert_eq!(bitmap.max(), Some(0)); bitmap.insert(1); assert_eq!(bitmap.max(), Some(1)); - bitmap.insert(u64::max_value()); - assert_eq!(bitmap.max(), Some(u64::max_value())); + bitmap.insert(u64::MAX); + assert_eq!(bitmap.max(), Some(u64::MAX)); } #[test] fn test_min() { let mut bitmap = RoaringTreemap::new(); assert_eq!(bitmap.min(), None); - bitmap.insert(u64::max_value()); - assert_eq!(bitmap.min(), Some(u64::max_value())); + bitmap.insert(u64::MAX); + assert_eq!(bitmap.min(), Some(u64::MAX)); bitmap.insert(1); assert_eq!(bitmap.min(), Some(1)); bitmap.insert(0); From 278d710088cf8887b3e64f6dac2e4cd6b132aaea Mon Sep 17 00:00:00 2001 From: not-jan <61017633+not-jan@users.noreply.github.com> Date: Sun, 26 Jun 2022 20:36:11 +0200 Subject: [PATCH 05/11] Fix a typo in the ranges --- src/treemap/inherent.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treemap/inherent.rs b/src/treemap/inherent.rs index 3f3320800..5f9fd9bda 100644 --- a/src/treemap/inherent.rs +++ b/src/treemap/inherent.rs @@ -69,7 +69,7 @@ impl RoaringTreemap { } else if hi == start_hi { start_lo..=u32::MAX } else if hi == end_hi { - 0..=end_hi + 0..=end_lo } else { // This is pretty expensive, we can definitely pre-calculate what a full // `RoaringBitmap` looks like so we might as well use it here. From 6c1ee3f15dd728936ba0e5fdcc213199214be3cc Mon Sep 17 00:00:00 2001 From: not-jan <61017633+not-jan@users.noreply.github.com> Date: Sun, 26 Jun 2022 20:36:35 +0200 Subject: [PATCH 06/11] Expand the test suite to cover more edge cases --- tests/treemap_lib.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/treemap_lib.rs b/tests/treemap_lib.rs index 43f7306e1..0c302af9b 100644 --- a/tests/treemap_lib.rs +++ b/tests/treemap_lib.rs @@ -36,6 +36,7 @@ fn smoke() { #[test] fn insert_range() { let ranges = 0..0x1000; + const SIGMA: u64 = u32::MAX as u64; let mut bitmap = RoaringTreemap::new(); assert_eq!(bitmap.insert_range(ranges), 0x1000); @@ -46,6 +47,14 @@ fn insert_range() { assert!(bitmap.contains(2)); assert!(bitmap.contains(0xFFF)); assert!(!bitmap.contains(0x1000)); + + bitmap.clear(); + bitmap.insert_range(2 * SIGMA..=4 * SIGMA); + + assert_eq!(bitmap.min(), Some(2 * SIGMA)); + assert_eq!(bitmap.max(), Some(4 * SIGMA)); + + assert!(bitmap.contains(3 * SIGMA)); } #[test] From f28bdce21eb790b4bae468e3463404182ab1c851 Mon Sep 17 00:00:00 2001 From: not-jan <61017633+not-jan@users.noreply.github.com> Date: Mon, 27 Jun 2022 14:18:06 +0200 Subject: [PATCH 07/11] Update CI to a later version of Rust --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8302716d0..ecb94d2d7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,7 +18,7 @@ jobs: - stable - beta - nightly - - 1.56.1 + - 1.61 env: RUSTFLAGS: "-C target-cpu=native -C opt-level=3" ROARINGRS_BENCH_OFFLINE: "true" From 0f59d819f1215ade3d156f354b7c4be89f79f9e0 Mon Sep 17 00:00:00 2001 From: not-jan <61017633+not-jan@users.noreply.github.com> Date: Tue, 28 Jun 2022 11:42:15 +0200 Subject: [PATCH 08/11] Revert "Remove retain_mut as it was included in Rust in stable 1.61" This reverts commit d1bc8c5369ac3f949c95afb22ba8bf0098c5a49c. --- src/bitmap/ops.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/bitmap/ops.rs b/src/bitmap/ops.rs index 09ab04ff6..1c1a65445 100644 --- a/src/bitmap/ops.rs +++ b/src/bitmap/ops.rs @@ -1,6 +1,8 @@ use std::mem; use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign}; +use retain_mut::RetainMut; + use crate::bitmap::container::Container; use crate::bitmap::Pairs; use crate::RoaringBitmap; @@ -238,7 +240,7 @@ impl BitAndAssign for RoaringBitmap { mem::swap(self, &mut rhs); } - (&mut self.containers).retain_mut(|cont| { + RetainMut::retain_mut(&mut self.containers, |cont| { let key = cont.key; match rhs.containers.binary_search_by_key(&key, |c| c.key) { Ok(loc) => { @@ -256,7 +258,7 @@ impl BitAndAssign for RoaringBitmap { impl BitAndAssign<&RoaringBitmap> for RoaringBitmap { /// An `intersection` between two sets. fn bitand_assign(&mut self, rhs: &RoaringBitmap) { - (&mut self.containers).retain_mut(|cont| { + RetainMut::retain_mut(&mut self.containers, |cont| { let key = cont.key; match rhs.containers.binary_search_by_key(&key, |c| c.key) { Ok(loc) => { @@ -333,7 +335,7 @@ impl SubAssign for RoaringBitmap { impl SubAssign<&RoaringBitmap> for RoaringBitmap { /// A `difference` between two sets. fn sub_assign(&mut self, rhs: &RoaringBitmap) { - (&mut self.containers).retain_mut(|cont| { + RetainMut::retain_mut(&mut self.containers, |cont| { match rhs.containers.binary_search_by_key(&cont.key, |c| c.key) { Ok(loc) => { SubAssign::sub_assign(cont, &rhs.containers[loc]); From 6a71b2070828b3ed16ea9a3ce933fd257094e0d3 Mon Sep 17 00:00:00 2001 From: not-jan <61017633+not-jan@users.noreply.github.com> Date: Tue, 28 Jun 2022 11:47:14 +0200 Subject: [PATCH 09/11] Add a benchmark for RoaringTreemap::insert_range --- benchmarks/benches/lib.rs | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/benchmarks/benches/lib.rs b/benchmarks/benches/lib.rs index d12d67b3e..4c4df36c6 100644 --- a/benchmarks/benches/lib.rs +++ b/benchmarks/benches/lib.rs @@ -8,7 +8,7 @@ use criterion::{ Throughput, }; -use roaring::{MultiOps, RoaringBitmap}; +use roaring::{MultiOps, RoaringBitmap, RoaringTreemap}; use crate::datasets::Datasets; @@ -674,6 +674,30 @@ fn insert_range_bitmap(c: &mut Criterion) { } } +fn insert_range_treemap(c: &mut Criterion) { + for &size in &[10, 100, 1_000, 5_000, 10_000, 20_000] { + let mut group = c.benchmark_group("insert_range_treemap"); + group.throughput(criterion::Throughput::Elements(size as u64)); + group.bench_function(format!("from_empty_{}", size), |b| { + let bm = RoaringTreemap::new(); + b.iter_batched( + || bm.clone(), + |mut bm| black_box(bm.insert_range(0..size)), + criterion::BatchSize::SmallInput, + ) + }); + group.bench_function(format!("pre_populated_{}", size), |b| { + let mut bm = RoaringTreemap::new(); + bm.insert_range(0..size); + b.iter_batched( + || bm.clone(), + |mut bm| black_box(bm.insert_range(0..size)), + criterion::BatchSize::SmallInput, + ) + }); + } +} + criterion_group!( benches, creation, @@ -691,6 +715,7 @@ criterion_group!( remove, remove_range_bitmap, insert_range_bitmap, + insert_range_treemap, iteration, is_empty, serialization, From 900b2b59e923ac59392fed81033ee8abd237f252 Mon Sep 17 00:00:00 2001 From: not-jan <61017633+not-jan@users.noreply.github.com> Date: Tue, 28 Jun 2022 11:55:50 +0200 Subject: [PATCH 10/11] Revert "Update CI to a later version of Rust" This reverts commit 4048674109efed6086e3ee8cc1b7c2ddb813227a. --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ecb94d2d7..8302716d0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,7 +18,7 @@ jobs: - stable - beta - nightly - - 1.61 + - 1.56.1 env: RUSTFLAGS: "-C target-cpu=native -C opt-level=3" ROARINGRS_BENCH_OFFLINE: "true" From 01c7d4888a1f34d17f74d2da001a3879489621a7 Mon Sep 17 00:00:00 2001 From: not-jan <61017633+not-jan@users.noreply.github.com> Date: Wed, 29 Jun 2022 18:59:58 +0200 Subject: [PATCH 11/11] Increase the numbers! --- benchmarks/benches/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/benches/lib.rs b/benchmarks/benches/lib.rs index 4c4df36c6..8bdf014a8 100644 --- a/benchmarks/benches/lib.rs +++ b/benchmarks/benches/lib.rs @@ -675,7 +675,7 @@ fn insert_range_bitmap(c: &mut Criterion) { } fn insert_range_treemap(c: &mut Criterion) { - for &size in &[10, 100, 1_000, 5_000, 10_000, 20_000] { + for &size in &[1_000_u64, 10_000u64, 2 * (u32::MAX as u64)] { let mut group = c.benchmark_group("insert_range_treemap"); group.throughput(criterion::Throughput::Elements(size as u64)); group.bench_function(format!("from_empty_{}", size), |b| {