Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add low-level HashTable API #466

Merged
merged 9 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/external_trait_impls/rayon/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ mod helpers;
pub(crate) mod map;
pub(crate) mod raw;
pub(crate) mod set;
pub(crate) mod table;
252 changes: 252 additions & 0 deletions src/external_trait_impls/rayon/table.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
//! Rayon extensions for `HashTable`.

use super::raw::{RawIntoParIter, RawParDrain, RawParIter};
use crate::hash_table::HashTable;
use crate::raw::{Allocator, Global};
use core::fmt;
use core::marker::PhantomData;
use rayon::iter::plumbing::UnindexedConsumer;
use rayon::iter::{IntoParallelIterator, ParallelIterator};

/// Parallel iterator over shared references to entries in a map.
///
/// This iterator is created by the [`par_iter`] method on [`HashTable`]
/// (provided by the [`IntoParallelRefIterator`] trait).
/// See its documentation for more.
///
/// [`par_iter`]: /hashbrown/struct.HashTable.html#method.par_iter
/// [`HashTable`]: /hashbrown/struct.HashTable.html
/// [`IntoParallelRefIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelRefIterator.html
pub struct ParIter<'a, T> {
inner: RawParIter<T>,
marker: PhantomData<&'a T>,
}

impl<'a, T: Sync> ParallelIterator for ParIter<'a, T> {
type Item = &'a T;

#[cfg_attr(feature = "inline-more", inline)]
fn drive_unindexed<C>(self, consumer: C) -> C::Result
where
C: UnindexedConsumer<Self::Item>,
{
self.inner
.map(|x| unsafe { x.as_ref() })
.drive_unindexed(consumer)
}
}

impl<T> Clone for ParIter<'_, T> {
#[cfg_attr(feature = "inline-more", inline)]
fn clone(&self) -> Self {
Self {
inner: self.inner.clone(),
marker: PhantomData,
}
}
}

impl<T: fmt::Debug> fmt::Debug for ParIter<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let iter = unsafe { self.inner.iter() }.map(|x| unsafe { x.as_ref() });
f.debug_list().entries(iter).finish()
}
}

/// Parallel iterator over mutable references to entries in a map.
///
/// This iterator is created by the [`par_iter_mut`] method on [`HashTable`]
/// (provided by the [`IntoParallelRefMutIterator`] trait).
/// See its documentation for more.
///
/// [`par_iter_mut`]: /hashbrown/struct.HashTable.html#method.par_iter_mut
/// [`HashTable`]: /hashbrown/struct.HashTable.html
/// [`IntoParallelRefMutIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelRefMutIterator.html
pub struct ParIterMut<'a, T> {
inner: RawParIter<T>,
marker: PhantomData<&'a mut T>,
}

impl<'a, T: Send> ParallelIterator for ParIterMut<'a, T> {
type Item = &'a mut T;

#[cfg_attr(feature = "inline-more", inline)]
fn drive_unindexed<C>(self, consumer: C) -> C::Result
where
C: UnindexedConsumer<Self::Item>,
{
self.inner
.map(|x| unsafe { x.as_mut() })
.drive_unindexed(consumer)
}
}

impl<T: fmt::Debug> fmt::Debug for ParIterMut<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
ParIter {
inner: self.inner.clone(),
marker: PhantomData,
}
.fmt(f)
}
}

/// Parallel iterator over entries of a consumed map.
///
/// This iterator is created by the [`into_par_iter`] method on [`HashTable`]
/// (provided by the [`IntoParallelIterator`] trait).
/// See its documentation for more.
///
/// [`into_par_iter`]: /hashbrown/struct.HashTable.html#method.into_par_iter
/// [`HashTable`]: /hashbrown/struct.HashTable.html
/// [`IntoParallelIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelIterator.html
pub struct IntoParIter<T, A: Allocator = Global> {
inner: RawIntoParIter<T, A>,
}

impl<T: Send, A: Allocator + Send> ParallelIterator for IntoParIter<T, A> {
type Item = T;

#[cfg_attr(feature = "inline-more", inline)]
fn drive_unindexed<C>(self, consumer: C) -> C::Result
where
C: UnindexedConsumer<Self::Item>,
{
self.inner.drive_unindexed(consumer)
}
}

impl<T: fmt::Debug, A: Allocator> fmt::Debug for IntoParIter<T, A> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
ParIter {
inner: unsafe { self.inner.par_iter() },
marker: PhantomData,
}
.fmt(f)
}
}

/// Parallel draining iterator over entries of a map.
///
/// This iterator is created by the [`par_drain`] method on [`HashTable`].
/// See its documentation for more.
///
/// [`par_drain`]: /hashbrown/struct.HashTable.html#method.par_drain
/// [`HashTable`]: /hashbrown/struct.HashTable.html
pub struct ParDrain<'a, T, A: Allocator = Global> {
inner: RawParDrain<'a, T, A>,
}

impl<T: Send, A: Allocator + Sync> ParallelIterator for ParDrain<'_, T, A> {
type Item = T;

#[cfg_attr(feature = "inline-more", inline)]
fn drive_unindexed<C>(self, consumer: C) -> C::Result
where
C: UnindexedConsumer<Self::Item>,
{
self.inner.drive_unindexed(consumer)
}
}

impl<T: fmt::Debug, A: Allocator> fmt::Debug for ParDrain<'_, T, A> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
ParIter {
inner: unsafe { self.inner.par_iter() },
marker: PhantomData,
}
.fmt(f)
}
}

impl<T: Send, A: Allocator> HashTable<T, A> {
/// Consumes (potentially in parallel) all values in an arbitrary order,
/// while preserving the map's allocated memory for reuse.
#[cfg_attr(feature = "inline-more", inline)]
pub fn par_drain(&mut self) -> ParDrain<'_, T, A> {
ParDrain {
inner: self.raw.par_drain(),
}
}
}

impl<T: Send, A: Allocator + Send> IntoParallelIterator for HashTable<T, A> {
type Item = T;
type Iter = IntoParIter<T, A>;

#[cfg_attr(feature = "inline-more", inline)]
fn into_par_iter(self) -> Self::Iter {
IntoParIter {
inner: self.raw.into_par_iter(),
}
}
}

impl<'a, T: Sync, A: Allocator> IntoParallelIterator for &'a HashTable<T, A> {
type Item = &'a T;
type Iter = ParIter<'a, T>;

#[cfg_attr(feature = "inline-more", inline)]
fn into_par_iter(self) -> Self::Iter {
ParIter {
inner: unsafe { self.raw.par_iter() },
marker: PhantomData,
}
}
}

impl<'a, T: Send, A: Allocator> IntoParallelIterator for &'a mut HashTable<T, A> {
type Item = &'a mut T;
type Iter = ParIterMut<'a, T>;

#[cfg_attr(feature = "inline-more", inline)]
fn into_par_iter(self) -> Self::Iter {
ParIterMut {
inner: unsafe { self.raw.par_iter() },
marker: PhantomData,
}
}
}

#[cfg(test)]
mod test_par_table {
use alloc::vec::Vec;
use core::sync::atomic::{AtomicUsize, Ordering};

use rayon::prelude::*;

use crate::{
hash_map::{make_hash, DefaultHashBuilder},
hash_table::HashTable,
};

#[test]
fn test_iterate() {
let hasher = DefaultHashBuilder::default();
let mut a = HashTable::new();
for i in 0..32 {
a.insert_unique(make_hash(&hasher, &i), i, |x| make_hash(&hasher, x));
}
let observed = AtomicUsize::new(0);
a.par_iter().for_each(|k| {
observed.fetch_or(1 << *k, Ordering::Relaxed);
});
assert_eq!(observed.into_inner(), 0xFFFF_FFFF);
}

#[test]
fn test_move_iter() {
let hasher = DefaultHashBuilder::default();
let hs = {
let mut hs = HashTable::new();

hs.insert_unique(make_hash(&hasher, &'a'), 'a', |x| make_hash(&hasher, x));
hs.insert_unique(make_hash(&hasher, &'b'), 'b', |x| make_hash(&hasher, x));

hs
};

let v = hs.into_par_iter().collect::<Vec<char>>();
assert!(v == ['a', 'b'] || v == ['b', 'a']);
}
}
16 changes: 16 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ mod map;
mod rustc_entry;
mod scopeguard;
mod set;
mod table;

pub mod hash_map {
//! A hash map implemented with quadratic probing and SIMD lookup.
Expand Down Expand Up @@ -113,9 +114,24 @@ pub mod hash_set {
pub use crate::external_trait_impls::rayon::set::*;
}
}
pub mod hash_table {
//! A hash table implemented with quadratic probing and SIMD lookup.
pub use crate::table::*;

#[cfg(feature = "rayon")]
/// [rayon]-based parallel iterator types for hash tables.
/// You will rarely need to interact with it directly unless you have need
/// to name one of the iterator types.
///
/// [rayon]: https://docs.rs/rayon/1.0/rayon
pub mod rayon {
pub use crate::external_trait_impls::rayon::table::*;
}
}

pub use crate::map::HashMap;
pub use crate::set::HashSet;
pub use crate::table::HashTable;

#[cfg(feature = "equivalent")]
pub use equivalent::Equivalent;
Expand Down
34 changes: 6 additions & 28 deletions src/map.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use crate::raw::{Allocator, Bucket, Global, RawDrain, RawIntoIter, RawIter, RawTable};
use crate::raw::{
Allocator, Bucket, Global, RawDrain, RawExtractIf, RawIntoIter, RawIter, RawTable,
};
use crate::{Equivalent, TryReserveError};
use core::borrow::Borrow;
use core::fmt::{self, Debug};
Expand Down Expand Up @@ -979,7 +981,7 @@ impl<K, V, S, A: Allocator> HashMap<K, V, S, A> {
{
ExtractIf {
f,
inner: ExtractIfInner {
inner: RawExtractIf {
iter: unsafe { self.table.iter() },
table: &mut self.table,
},
Expand Down Expand Up @@ -2724,7 +2726,7 @@ where
F: FnMut(&K, &mut V) -> bool,
{
f: F,
inner: ExtractIfInner<'a, K, V, A>,
inner: RawExtractIf<'a, (K, V), A>,
}

impl<K, V, F, A> Iterator for ExtractIf<'_, K, V, F, A>
Expand All @@ -2736,7 +2738,7 @@ where

#[cfg_attr(feature = "inline-more", inline)]
fn next(&mut self) -> Option<Self::Item> {
self.inner.next(&mut self.f)
self.inner.next(|&mut (ref k, ref mut v)| (self.f)(k, v))
}

#[inline]
Expand All @@ -2747,30 +2749,6 @@ where

impl<K, V, F> FusedIterator for ExtractIf<'_, K, V, F> where F: FnMut(&K, &mut V) -> bool {}

/// Portions of `ExtractIf` shared with `set::ExtractIf`
pub(super) struct ExtractIfInner<'a, K, V, A: Allocator> {
pub iter: RawIter<(K, V)>,
pub table: &'a mut RawTable<(K, V), A>,
}

impl<K, V, A: Allocator> ExtractIfInner<'_, K, V, A> {
#[cfg_attr(feature = "inline-more", inline)]
pub(super) fn next<F>(&mut self, f: &mut F) -> Option<(K, V)>
where
F: FnMut(&K, &mut V) -> bool,
{
unsafe {
for item in &mut self.iter {
let &mut (ref key, ref mut value) = item.as_mut();
if f(key, value) {
return Some(self.table.remove(item).0);
}
}
}
None
}
}

/// A mutable iterator over the values of a `HashMap` in arbitrary order.
/// The iterator element type is `&'a mut V`.
///
Expand Down
22 changes: 22 additions & 0 deletions src/raw/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4457,6 +4457,28 @@ impl Iterator for RawIterHashInner {
}
}

pub(crate) struct RawExtractIf<'a, T, A: Allocator> {
pub iter: RawIter<T>,
pub table: &'a mut RawTable<T, A>,
}

impl<T, A: Allocator> RawExtractIf<'_, T, A> {
#[cfg_attr(feature = "inline-more", inline)]
pub(crate) fn next<F>(&mut self, mut f: F) -> Option<T>
where
F: FnMut(&mut T) -> bool,
{
unsafe {
for item in &mut self.iter {
if f(item.as_mut()) {
return Some(self.table.remove(item).0);
}
}
}
None
}
}

#[cfg(test)]
mod test_map {
use super::*;
Expand Down
Loading