From deaf7eb2f4e1c8d357a5c08dddb136edfd5bf396 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20du=20Garreau?= Date: Mon, 6 Nov 2023 11:12:19 +0100 Subject: [PATCH 1/5] Specialize `fold` implementation of iterators This provides 5-8% iteration speedups --- src/map.rs | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/raw/mod.rs | 75 +++++++++++++++++++++++++++++++++++++++++++++--- src/set.rs | 68 +++++++++++++++++++++++++++++++++++++++++++ src/table.rs | 34 ++++++++++++++++++++-- 4 files changed, 249 insertions(+), 6 deletions(-) diff --git a/src/map.rs b/src/map.rs index b5e657bc6..9cd768f20 100644 --- a/src/map.rs +++ b/src/map.rs @@ -2469,6 +2469,14 @@ impl Iterator for IntoKeys { fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[inline] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, |acc, (k, _)| f(acc, k)) + } } impl ExactSizeIterator for IntoKeys { @@ -2531,6 +2539,14 @@ impl Iterator for IntoValues { fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[inline] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, |acc, (_, v)| f(acc, v)) + } } impl ExactSizeIterator for IntoValues { @@ -4722,6 +4738,17 @@ impl<'a, K, V> Iterator for Iter<'a, K, V> { fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, |acc, x| unsafe { + let (k, v) = x.as_ref(); + f(acc, (k, v)) + }) + } } impl ExactSizeIterator for Iter<'_, K, V> { #[cfg_attr(feature = "inline-more", inline)] @@ -4750,6 +4777,17 @@ impl<'a, K, V> Iterator for IterMut<'a, K, V> { fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, |acc, x| unsafe { + let (k, v) = x.as_mut(); + f(acc, (k, v)) + }) + } } impl ExactSizeIterator for IterMut<'_, K, V> { #[cfg_attr(feature = "inline-more", inline)] @@ -4780,6 +4818,14 @@ impl Iterator for IntoIter { fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, f) + } } impl ExactSizeIterator for IntoIter { #[cfg_attr(feature = "inline-more", inline)] @@ -4810,6 +4856,14 @@ impl<'a, K, V> Iterator for Keys<'a, K, V> { fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, |acc, (k, _)| f(acc, k)) + } } impl ExactSizeIterator for Keys<'_, K, V> { #[cfg_attr(feature = "inline-more", inline)] @@ -4834,6 +4888,14 @@ impl<'a, K, V> Iterator for Values<'a, K, V> { fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, |acc, (_, v)| f(acc, v)) + } } impl ExactSizeIterator for Values<'_, K, V> { #[cfg_attr(feature = "inline-more", inline)] @@ -4858,6 +4920,14 @@ impl<'a, K, V> Iterator for ValuesMut<'a, K, V> { fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, |acc, (_, v)| f(acc, v)) + } } impl ExactSizeIterator for ValuesMut<'_, K, V> { #[cfg_attr(feature = "inline-more", inline)] @@ -4886,6 +4956,14 @@ impl<'a, K, V, A: Allocator> Iterator for Drain<'a, K, V, A> { fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, f) + } } impl ExactSizeIterator for Drain<'_, K, V, A> { #[cfg_attr(feature = "inline-more", inline)] diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 25c5d1c4d..7d713e1cd 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -50,13 +50,36 @@ use self::imp::Group; // Branch prediction hint. This is currently only available on nightly but it // consistently improves performance by 10-15%. -#[cfg(not(feature = "nightly"))] -use core::convert::identity as likely; -#[cfg(not(feature = "nightly"))] -use core::convert::identity as unlikely; +// #[cfg(not(feature = "nightly"))] +// use core::convert::identity as likely; +// #[cfg(not(feature = "nightly"))] +// use core::convert::identity as unlikely; #[cfg(feature = "nightly")] use core::intrinsics::{likely, unlikely}; +#[cfg(not(feature = "nightly"))] +#[inline] +#[cold] +fn cold() {} + +#[cfg(not(feature = "nightly"))] +#[inline] +fn likely(b: bool) -> bool { + if !b { + cold() + } + b +} +#[cfg(not(feature = "nightly"))] +#[cold] +#[inline] +fn unlikely(b: bool) -> bool { + if b { + cold() + } + b +} + // Use strict provenance functions if available. #[cfg(feature = "nightly")] use core::ptr::invalid_mut; @@ -3846,6 +3869,41 @@ impl RawIterRange { self.next_ctrl = self.next_ctrl.add(Group::WIDTH); } } + + /// # Safety + /// The provided `n` value must match the actual number of items + #[allow(clippy::while_let_on_iterator)] + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn fold_impl(mut self, mut n: usize, mut acc: B, mut f: F) -> B + where + F: FnMut(B, Bucket) -> B, + { + if n == 0 { + return acc; + } + + loop { + while let Some(index) = self.current_group.next() { + debug_assert!(n != 0); + let bucket = self.data.next_n(index); + acc = f(acc, bucket); + n -= 1; + } + + if n == 0 { + return acc; + } + + // We might read past self.end up to the next group boundary, + // but this is fine because it only occurs on tables smaller + // than the group size where the trailing control bytes are all + // EMPTY. On larger tables self.end is guaranteed to be aligned + // to the group size (since tables are power-of-two sized). + self.current_group = Group::load_aligned(self.next_ctrl).match_full().into_iter(); + self.data = self.data.next_n(Group::WIDTH); + self.next_ctrl = self.next_ctrl.add(Group::WIDTH); + } + } } // We make raw iterators unconditionally Send and Sync, and let the PhantomData @@ -4069,6 +4127,15 @@ impl Iterator for RawIter { fn size_hint(&self) -> (usize, Option) { (self.items, Some(self.items)) } + + #[inline] + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + unsafe { self.iter.fold_impl(self.items, init, f) } + } } impl ExactSizeIterator for RawIter {} diff --git a/src/set.rs b/src/set.rs index 09b45fd9f..fdff46d88 100644 --- a/src/set.rs +++ b/src/set.rs @@ -1696,6 +1696,14 @@ impl<'a, K> Iterator for Iter<'a, K> { fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.iter.fold(init, f) + } } impl<'a, K> ExactSizeIterator for Iter<'a, K> { #[cfg_attr(feature = "inline-more", inline)] @@ -1726,6 +1734,14 @@ impl Iterator for IntoIter { fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.iter.fold(init, |acc, (k, ())| f(acc, k)) + } } impl ExactSizeIterator for IntoIter { #[cfg_attr(feature = "inline-more", inline)] @@ -1757,6 +1773,14 @@ impl Iterator for Drain<'_, K, A> { fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.iter.fold(init, |acc, (k, ())| f(acc, k)) + } } impl ExactSizeIterator for Drain<'_, K, A> { #[cfg_attr(feature = "inline-more", inline)] @@ -1827,6 +1851,20 @@ where let (_, upper) = self.iter.size_hint(); (0, upper) } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.iter.fold(init, |acc, elt| { + if self.other.contains(elt) { + f(acc, elt) + } else { + acc + } + }) + } } impl fmt::Debug for Intersection<'_, T, S, A> @@ -1881,6 +1919,20 @@ where let (_, upper) = self.iter.size_hint(); (0, upper) } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.iter.fold(init, |acc, elt| { + if self.other.contains(elt) { + acc + } else { + f(acc, elt) + } + }) + } } impl FusedIterator for Difference<'_, T, S, A> @@ -1927,6 +1979,14 @@ where fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.iter.fold(init, f) + } } impl FusedIterator for SymmetricDifference<'_, T, S, A> @@ -1992,6 +2052,14 @@ where fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } + #[cfg_attr(feature = "inline-more", inline)] + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.iter.fold(init, f) + } } /// A view into a single entry in a set, which may either be vacant or occupied. diff --git a/src/table.rs b/src/table.rs index bfb5dd989..53ff38b2f 100644 --- a/src/table.rs +++ b/src/table.rs @@ -1861,12 +1861,23 @@ impl<'a, T> Iterator for Iter<'a, T> { type Item = &'a T; fn next(&mut self) -> Option { - self.inner.next().map(|bucket| unsafe { bucket.as_ref() }) + // Avoid `Option::map` because it bloats LLVM IR. + let bucket = self.inner.next()?; + Some(unsafe { bucket.as_ref() }) } fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner + .fold(init, |acc, bucket| unsafe { f(acc, bucket.as_ref()) }) + } } impl ExactSizeIterator for Iter<'_, T> { @@ -1894,12 +1905,23 @@ impl<'a, T> Iterator for IterMut<'a, T> { type Item = &'a mut T; fn next(&mut self) -> Option { - self.inner.next().map(|bucket| unsafe { bucket.as_mut() }) + // Avoid `Option::map` because it bloats LLVM IR. + let bucket = self.inner.next()?; + Some(unsafe { bucket.as_mut() }) } fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner + .fold(init, |acc, bucket| unsafe { f(acc, bucket.as_mut()) }) + } } impl ExactSizeIterator for IterMut<'_, T> { @@ -1940,6 +1962,14 @@ where fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, f) + } } impl ExactSizeIterator for IntoIter From 83273fbd92bacfe2c13b8a0c75d9096b4905b38e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20du=20Garreau?= Date: Wed, 8 Nov 2023 15:20:01 +0100 Subject: [PATCH 2/5] Remove old code --- src/raw/mod.rs | 31 ++++--------------------------- 1 file changed, 4 insertions(+), 27 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 7d713e1cd..e3d28df1e 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -50,35 +50,12 @@ use self::imp::Group; // Branch prediction hint. This is currently only available on nightly but it // consistently improves performance by 10-15%. -// #[cfg(not(feature = "nightly"))] -// use core::convert::identity as likely; -// #[cfg(not(feature = "nightly"))] -// use core::convert::identity as unlikely; -#[cfg(feature = "nightly")] -use core::intrinsics::{likely, unlikely}; - #[cfg(not(feature = "nightly"))] -#[inline] -#[cold] -fn cold() {} - -#[cfg(not(feature = "nightly"))] -#[inline] -fn likely(b: bool) -> bool { - if !b { - cold() - } - b -} +use core::convert::identity as likely; #[cfg(not(feature = "nightly"))] -#[cold] -#[inline] -fn unlikely(b: bool) -> bool { - if b { - cold() - } - b -} +use core::convert::identity as unlikely; +#[cfg(feature = "nightly")] +use core::intrinsics::{likely, unlikely}; // Use strict provenance functions if available. #[cfg(feature = "nightly")] From 5aa1b77c7d85d1e10c655ddb7c171e616b285896 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20du=20Garreau?= Date: Wed, 8 Nov 2023 15:22:27 +0100 Subject: [PATCH 3/5] Apply suggestion: use match --- src/table.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/table.rs b/src/table.rs index 53ff38b2f..a7bb5fcc9 100644 --- a/src/table.rs +++ b/src/table.rs @@ -1862,8 +1862,10 @@ impl<'a, T> Iterator for Iter<'a, T> { fn next(&mut self) -> Option { // Avoid `Option::map` because it bloats LLVM IR. - let bucket = self.inner.next()?; - Some(unsafe { bucket.as_ref() }) + match self.inner.next() { + Some(bucket) => Some(unsafe { bucket.as_ref() }), + None => None, + } } fn size_hint(&self) -> (usize, Option) { @@ -1906,8 +1908,10 @@ impl<'a, T> Iterator for IterMut<'a, T> { fn next(&mut self) -> Option { // Avoid `Option::map` because it bloats LLVM IR. - let bucket = self.inner.next()?; - Some(unsafe { bucket.as_mut() }) + match self.inner.next() { + Some(bucket) => Some(unsafe { bucket.as_mut() }), + None => None, + } } fn size_hint(&self) -> (usize, Option) { From dee5c03ae50ef37265e3c6938a234ea83c9acbe0 Mon Sep 17 00:00:00 2001 From: JustForFun88 Date: Wed, 8 Nov 2023 15:52:43 +0100 Subject: [PATCH 4/5] Add comments --- src/raw/mod.rs | 59 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index e3d28df1e..cfba64daa 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -3847,8 +3847,30 @@ impl RawIterRange { } } + /// Folds every element into an accumulator by applying an operation, + /// returning the final result. + /// + /// `fold_impl()` takes three arguments: a number of items in the table, an initial value, + /// and a closure with two arguments: an 'accumulator', and an element. The closure + /// returns the value that the accumulator should have for the next iteration. + /// + /// The initial value is the value the accumulator will have on the first call. + /// + /// After applying this closure to every element of the iterator, `fold_impl()` + /// returns the accumulator. + /// /// # Safety - /// The provided `n` value must match the actual number of items + /// + /// If any of the following conditions are violated, the result is + /// [`Undefined Behavior`]: + /// + /// * The [`RawTableInner`] / [`RawTable`] must be alive and not moved, + /// i.e. table outlives the `RawIterRange`; + /// + /// * The provided `n` value must match the actual number of items + /// in the table. + /// + /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[allow(clippy::while_let_on_iterator)] #[cfg_attr(feature = "inline-more", inline)] unsafe fn fold_impl(mut self, mut n: usize, mut acc: B, mut f: F) -> B @@ -3861,6 +3883,8 @@ impl RawIterRange { loop { while let Some(index) = self.current_group.next() { + // The returned `index` will always be in the range `0..Group::WIDTH`, + // so that calling `self.data.next_n(index)` is safe (see detailed explanation below). debug_assert!(n != 0); let bucket = self.data.next_n(index); acc = f(acc, bucket); @@ -3871,11 +3895,34 @@ impl RawIterRange { return acc; } - // We might read past self.end up to the next group boundary, - // but this is fine because it only occurs on tables smaller - // than the group size where the trailing control bytes are all - // EMPTY. On larger tables self.end is guaranteed to be aligned - // to the group size (since tables are power-of-two sized). + // SAFETY: The caller of this function ensures that: + // + // 1. The provided `n` value matches the actual number of items in the table; + // 2. The table is alive and did not moved. + // + // Taking the above into account, we always stay within the bounds, because: + // + // 1. For tables smaller than the group width (self.buckets() <= Group::WIDTH), + // we will never end up in the given branch, since we should have already + // yielded all the elements of the table. + // + // 2. For tables larger than the group width. The the number of buckets is a + // power of two (2 ^ n), Group::WIDTH is also power of two (2 ^ k). Sinse + // `(2 ^ n) > (2 ^ k)`, than `(2 ^ n) % (2 ^ k) = 0`. As we start from the + // the start of the array of control bytes, and never try to iterate after + // getting all the elements, the last `self.current_group` will read bytes + // from the `self.buckets() - Group::WIDTH` index. We know also that + // `self.current_group.next()` will always retun indices within the range + // `0..Group::WIDTH`. + // + // Knowing all of the above and taking into account that we are synchronizing + // the `self.data` index with the index we used to read the `self.current_group`, + // the subsequent `self.data.next_n(index)` will always return a bucket with + // an index number less than `self.buckets()`. + // + // The last `self.next_ctrl`, whose index would be `self.buckets()`, will never + // actually be read, since we should have already yielded all the elements of + // the table. self.current_group = Group::load_aligned(self.next_ctrl).match_full().into_iter(); self.data = self.data.next_n(Group::WIDTH); self.next_ctrl = self.next_ctrl.add(Group::WIDTH); From 798ba9f591da5f391292cdfd3c963a77cdb97622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20du=20Garreau?= Date: Wed, 22 Nov 2023 13:14:49 +0100 Subject: [PATCH 5/5] Address review comments --- src/raw/mod.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index cfba64daa..ac19d5b5b 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -3850,9 +3850,10 @@ impl RawIterRange { /// Folds every element into an accumulator by applying an operation, /// returning the final result. /// - /// `fold_impl()` takes three arguments: a number of items in the table, an initial value, - /// and a closure with two arguments: an 'accumulator', and an element. The closure - /// returns the value that the accumulator should have for the next iteration. + /// `fold_impl()` takes three arguments: the number of items remaining in + /// the iterator, an initial value, and a closure with two arguments: an + /// 'accumulator', and an element. The closure returns the value that the + /// accumulator should have for the next iteration. /// /// The initial value is the value the accumulator will have on the first call. /// @@ -3877,10 +3878,6 @@ impl RawIterRange { where F: FnMut(B, Bucket) -> B, { - if n == 0 { - return acc; - } - loop { while let Some(index) = self.current_group.next() { // The returned `index` will always be in the range `0..Group::WIDTH`,