Skip to content

Commit

Permalink
Auto merge of #529 - ToMe25:set_ops_assign, r=Amanieu
Browse files Browse the repository at this point in the history
Implement XxxAssign operations on HashSets

This PR primarily implements the XxxAssign operation traits for `HashSet`.

My primary motivation to do so is for convenience, but depending on the situation they can provide a significant performance improvement as well.\*

In my tests, which may not be ideal because I don't have much benchmarking experience, the assigning operations are, with the exception of `Sub`, a minimum of 25% faster.\*
Note that when swapping the large and the small set around, some of these are significantly slower than the non-assigning variant.
Therefore using them is likely only worth it performance wise, if you already know which set is larger, and the right one of the sets just so happens to be the one you don't need to keep.

 \* Results may have changed due to #530 being merged

Here my exact benchmark results, done with the newly added benchmark suit:

<!DOCTYPE html>

VER | LSIZE | SSIZE | OP | NS/ITER | DIFF (%) | COMMENT
-- | -- | -- | -- | -- | -- | --
1 | 1000 | 100 | and | 5,682.88 |   |  
1 | 1000 | 100 | or | 41,427.82 |   |  
1 | 1000 | 100 | xor | 57,404.27 |   |  
1 | 1000 | 100 | subls | 56,262.53 |   |  
1 | 1000 | 100 | subsl | 751.42 |   |  
1 | 1000 | 2 | and | 100.16 |   |  
1 | 1000 | 2 | or | 40,435.09 |   |  
1 | 1000 | 2 | xor | 59,058.05 |   |  
1 | 1000 | 2 | subls | 58,668.34 |   |  
1 | 1000 | 2 | subsl | 18.89 |   |  
1 | 1000 | 100 | or_ass | 32,888.49 | -20.61% | unconditional insert
2 | 1000 | 100 | or_ass | 29,397.04 | -29.04% | !contains insert
3 | 1000 | 100 | or_ass | 32,399.65 | -21.79% | extend iter().cloned()
4 | 1000 | 100 | or_ass | 30,693.33 | -25.91% | get_or_insert_owned
5 | 1000 | 100 | or_ass | 33,722.59 | -18.60% | calc intersection; extend rhs.iter() !intersection contains; Requires S: Clone
1 | 1000 | 100 | add_ass | 30,114.17 | -26.66% | !contains insert
1 | 1000 | 100 | xor_ass | 32,309.85 | -43.72% | contains remove else insert
2 | 1000 | 100 | xor_ass | 40,058.48 | -30.22% | extract_if rhs contains; extend !removed contains
3 | 1000 | 100 | xor_ass | 31,801.04 | -44.60% | raw_entry().from_key() replace_entry_with / insert
4 | 1000 | 100 | xor_ass | 31,935.07 | -44.37% | raw_entry().from_key_hashed_nocheck() replace_entry_with / insert_hashed_nocheck
5 | 1000 | 100 | xor_ass | 31,843.33 | -44.53% | self.map.table.get.is_none self.map.table.insert else self.map.table.remove_entry
1 | 1000 | 100 | subls_ass | 33,366.13 | -40.70% | contains remove
1 | 1000 | 100 | subsl_ass | 10,686.02 | 1322.11% | contains remove
2 | 1000 | 100 | subls_ass | 36,351.69 | -35.39% | retain !contains
2 | 1000 | 100 | subsl_ass | 3,939.67 | 424.30% | retain !contains
3 | 1000 | 100 | subls_ass | 32,012.82 | -43.10% | unconditional remove
3 | 1000 | 100 | subsl_ass | 9,908.76 | 1218.67% | unconditional remove
4 | 1000 | 100 | subls_ass | 36,232.13 | -35.60% | self.map.retain !contains
4 | 1000 | 100 | subsl_ass | 3,939.35 | 424.25% | self.map.retain !contains
5 | 1000 | 100 | subls_ass | 31,879.32 | -43.34% | if rhs smaller self unconditional remove else retain !contains
5 | 1000 | 100 | subsl_ass | 3,946.98 | 425.27% | if rhs smaller self unconditional remove else retain !contains
1 | 1000 | 2 | add_ass | 28,324.95 | -29.27% |  
2 | 1000 | 2 | or_ass | 28,322.62 | -29.96% |  
1 | 1000 | 2 | xor_ass | 29,107.31 | -50.71% |  
3 | 1000 | 2 | xor_ass | 29,026.82 | -50.85% |  
1 | 1000 | 2 | subls_ass | 29,310.04 | -50.04% |  
1 | 1000 | 2 | subsl_ass | 4,212.56 | 22200.48% |  
2 | 1000 | 2 | subls_ass | 34,074.85 | -41.92% |  
2 | 1000 | 2 | subsl_ass | 66.43 | 251.67% |  
3 | 1000 | 2 | subls_ass | 29,340.86 | -49.99% |  
3 | 1000 | 2 | subsl_ass | 5,972.25 | 31515.93% |  
5 | 1000 | 2 | subls_ass | 29,460.49 | -49.78% |  
5 | 1000 | 2 | subsl_ass | 65.32 | 245.79% |  

In addition to the Assigning operators this PR changes a few more things:
 * It changes the allocator bound on the non-assigning set operations to `A: Allocator + Default`.
 * I also added a benchmark suit for the set operations.
  • Loading branch information
bors committed Jun 18, 2024
2 parents 65c553d + 481ef39 commit f0eece4
Show file tree
Hide file tree
Showing 2 changed files with 307 additions and 13 deletions.
148 changes: 148 additions & 0 deletions benches/set_ops.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
//! This file contains benchmarks for the ops traits implemented by HashSet.
//! Each test is intended to have a defined larger and smaller set,
//! but using a larger size for the "small" set works just as well.
//!
//! Each assigning test is done in the configuration that is faster. Cheating, I know.
//! The exception to this is Sub, because there the result differs. So I made two benchmarks for Sub.
#![feature(test)]

extern crate test;

use hashbrown::HashSet;
use test::Bencher;

/// The number of items to generate for the larger of the sets.
const LARGE_SET_SIZE: usize = 1000;

/// The number of items to generate for the smaller of the sets.
const SMALL_SET_SIZE: usize = 100;

/// The number of keys present in both sets.
const OVERLAPP: usize =
[LARGE_SET_SIZE, SMALL_SET_SIZE][(LARGE_SET_SIZE < SMALL_SET_SIZE) as usize] / 2;

/// Creates a set containing end - start unique string elements.
fn create_set(start: usize, end: usize) -> HashSet<String> {
(start..end).map(|nr| format!("key{}", nr)).collect()
}

#[bench]
fn set_ops_bit_or(b: &mut Bencher) {
let large_set = create_set(0, LARGE_SET_SIZE);
let small_set = create_set(
LARGE_SET_SIZE - OVERLAPP,
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
);
b.iter(|| &large_set | &small_set)
}

#[bench]
fn set_ops_bit_and(b: &mut Bencher) {
let large_set = create_set(0, LARGE_SET_SIZE);
let small_set = create_set(
LARGE_SET_SIZE - OVERLAPP,
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
);
b.iter(|| &large_set & &small_set)
}

#[bench]
fn set_ops_bit_xor(b: &mut Bencher) {
let large_set = create_set(0, LARGE_SET_SIZE);
let small_set = create_set(
LARGE_SET_SIZE - OVERLAPP,
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
);
b.iter(|| &large_set ^ &small_set)
}

#[bench]
fn set_ops_sub_large_small(b: &mut Bencher) {
let large_set = create_set(0, LARGE_SET_SIZE);
let small_set = create_set(
LARGE_SET_SIZE - OVERLAPP,
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
);
b.iter(|| &large_set - &small_set)
}

#[bench]
fn set_ops_sub_small_large(b: &mut Bencher) {
let large_set = create_set(0, LARGE_SET_SIZE);
let small_set = create_set(
LARGE_SET_SIZE - OVERLAPP,
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
);
b.iter(|| &small_set - &large_set)
}

#[bench]
fn set_ops_bit_or_assign(b: &mut Bencher) {
let large_set = create_set(0, LARGE_SET_SIZE);
let small_set = create_set(
LARGE_SET_SIZE - OVERLAPP,
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
);
b.iter(|| {
let mut set = large_set.clone();
set |= &small_set;
set
});
}

#[bench]
fn set_ops_bit_and_assign(b: &mut Bencher) {
let large_set = create_set(0, LARGE_SET_SIZE);
let small_set = create_set(
LARGE_SET_SIZE - OVERLAPP,
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
);
b.iter(|| {
let mut set = small_set.clone();
set &= &large_set;
set
});
}

#[bench]
fn set_ops_bit_xor_assign(b: &mut Bencher) {
let large_set = create_set(0, LARGE_SET_SIZE);
let small_set = create_set(
LARGE_SET_SIZE - OVERLAPP,
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
);
b.iter(|| {
let mut set = large_set.clone();
set ^= &small_set;
set
});
}

#[bench]
fn set_ops_sub_assign_large_small(b: &mut Bencher) {
let large_set = create_set(0, LARGE_SET_SIZE);
let small_set = create_set(
LARGE_SET_SIZE - OVERLAPP,
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
);
b.iter(|| {
let mut set = large_set.clone();
set -= &small_set;
set
});
}

#[bench]
fn set_ops_sub_assign_small_large(b: &mut Bencher) {
let large_set = create_set(0, LARGE_SET_SIZE);
let small_set = create_set(
LARGE_SET_SIZE - OVERLAPP,
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
);
b.iter(|| {
let mut set = small_set.clone();
set -= &large_set;
set
});
}
172 changes: 159 additions & 13 deletions src/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use alloc::borrow::ToOwned;
use core::fmt;
use core::hash::{BuildHasher, Hash};
use core::iter::{Chain, FusedIterator};
use core::ops::{BitAnd, BitOr, BitXor, Sub};
use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign};

use super::map::{self, DefaultHashBuilder, HashMap, Keys};
use crate::raw::{Allocator, Global, RawExtractIf};
Expand Down Expand Up @@ -1410,9 +1410,9 @@ impl<T, S, A> BitOr<&HashSet<T, S, A>> for &HashSet<T, S, A>
where
T: Eq + Hash + Clone,
S: BuildHasher + Default,
A: Allocator,
A: Allocator + Default,
{
type Output = HashSet<T, S>;
type Output = HashSet<T, S, A>;

/// Returns the union of `self` and `rhs` as a new `HashSet<T, S>`.
///
Expand All @@ -1434,7 +1434,7 @@ where
/// }
/// assert_eq!(i, expected.len());
/// ```
fn bitor(self, rhs: &HashSet<T, S, A>) -> HashSet<T, S> {
fn bitor(self, rhs: &HashSet<T, S, A>) -> HashSet<T, S, A> {
self.union(rhs).cloned().collect()
}
}
Expand All @@ -1443,9 +1443,9 @@ impl<T, S, A> BitAnd<&HashSet<T, S, A>> for &HashSet<T, S, A>
where
T: Eq + Hash + Clone,
S: BuildHasher + Default,
A: Allocator,
A: Allocator + Default,
{
type Output = HashSet<T, S>;
type Output = HashSet<T, S, A>;

/// Returns the intersection of `self` and `rhs` as a new `HashSet<T, S>`.
///
Expand All @@ -1467,17 +1467,18 @@ where
/// }
/// assert_eq!(i, expected.len());
/// ```
fn bitand(self, rhs: &HashSet<T, S, A>) -> HashSet<T, S> {
fn bitand(self, rhs: &HashSet<T, S, A>) -> HashSet<T, S, A> {
self.intersection(rhs).cloned().collect()
}
}

impl<T, S> BitXor<&HashSet<T, S>> for &HashSet<T, S>
impl<T, S, A> BitXor<&HashSet<T, S, A>> for &HashSet<T, S, A>
where
T: Eq + Hash + Clone,
S: BuildHasher + Default,
A: Allocator + Default,
{
type Output = HashSet<T, S>;
type Output = HashSet<T, S, A>;

/// Returns the symmetric difference of `self` and `rhs` as a new `HashSet<T, S>`.
///
Expand All @@ -1499,17 +1500,18 @@ where
/// }
/// assert_eq!(i, expected.len());
/// ```
fn bitxor(self, rhs: &HashSet<T, S>) -> HashSet<T, S> {
fn bitxor(self, rhs: &HashSet<T, S, A>) -> HashSet<T, S, A> {
self.symmetric_difference(rhs).cloned().collect()
}
}

impl<T, S> Sub<&HashSet<T, S>> for &HashSet<T, S>
impl<T, S, A> Sub<&HashSet<T, S, A>> for &HashSet<T, S, A>
where
T: Eq + Hash + Clone,
S: BuildHasher + Default,
A: Allocator + Default,
{
type Output = HashSet<T, S>;
type Output = HashSet<T, S, A>;

/// Returns the difference of `self` and `rhs` as a new `HashSet<T, S>`.
///
Expand All @@ -1531,11 +1533,155 @@ where
/// }
/// assert_eq!(i, expected.len());
/// ```
fn sub(self, rhs: &HashSet<T, S>) -> HashSet<T, S> {
fn sub(self, rhs: &HashSet<T, S, A>) -> HashSet<T, S, A> {
self.difference(rhs).cloned().collect()
}
}

impl<T, S, A> BitOrAssign<&HashSet<T, S, A>> for HashSet<T, S, A>
where
T: Eq + Hash + Clone,
S: BuildHasher,
A: Allocator,
{
/// Modifies this set to contain the union of `self` and `rhs`.
///
/// # Examples
///
/// ```
/// use hashbrown::HashSet;
///
/// let mut a: HashSet<_> = vec![1, 2, 3].into_iter().collect();
/// let b: HashSet<_> = vec![3, 4, 5].into_iter().collect();
///
/// a |= &b;
///
/// let mut i = 0;
/// let expected = [1, 2, 3, 4, 5];
/// for x in &a {
/// assert!(expected.contains(x));
/// i += 1;
/// }
/// assert_eq!(i, expected.len());
/// ```
fn bitor_assign(&mut self, rhs: &HashSet<T, S, A>) {
for item in rhs {
if !self.contains(item) {
self.insert(item.clone());
}
}
}
}

impl<T, S, A> BitAndAssign<&HashSet<T, S, A>> for HashSet<T, S, A>
where
T: Eq + Hash + Clone,
S: BuildHasher,
A: Allocator,
{
/// Modifies this set to contain the intersection of `self` and `rhs`.
///
/// # Examples
///
/// ```
/// use hashbrown::HashSet;
///
/// let mut a: HashSet<_> = vec![1, 2, 3].into_iter().collect();
/// let b: HashSet<_> = vec![2, 3, 4].into_iter().collect();
///
/// a &= &b;
///
/// let mut i = 0;
/// let expected = [2, 3];
/// for x in &a {
/// assert!(expected.contains(x));
/// i += 1;
/// }
/// assert_eq!(i, expected.len());
/// ```
fn bitand_assign(&mut self, rhs: &HashSet<T, S, A>) {
self.retain(|item| rhs.contains(item));
}
}

impl<T, S, A> BitXorAssign<&HashSet<T, S, A>> for HashSet<T, S, A>
where
T: Eq + Hash + Clone,
S: BuildHasher,
A: Allocator,
{
/// Modifies this set to contain the symmetric difference of `self` and `rhs`.
///
/// # Examples
///
/// ```
/// use hashbrown::HashSet;
///
/// let mut a: HashSet<_> = vec![1, 2, 3].into_iter().collect();
/// let b: HashSet<_> = vec![3, 4, 5].into_iter().collect();
///
/// a ^= &b;
///
/// let mut i = 0;
/// let expected = [1, 2, 4, 5];
/// for x in &a {
/// assert!(expected.contains(x));
/// i += 1;
/// }
/// assert_eq!(i, expected.len());
/// ```
fn bitxor_assign(&mut self, rhs: &HashSet<T, S, A>) {
for item in rhs {
let entry = self.map.raw_entry_mut().from_key(item);
match entry {
map::RawEntryMut::Occupied(e) => {
e.remove();
}
map::RawEntryMut::Vacant(e) => {
e.insert(item.to_owned(), ());
}
};
}
}
}

impl<T, S, A> SubAssign<&HashSet<T, S, A>> for HashSet<T, S, A>
where
T: Eq + Hash + Clone,
S: BuildHasher,
A: Allocator,
{
/// Modifies this set to contain the difference of `self` and `rhs`.
///
/// # Examples
///
/// ```
/// use hashbrown::HashSet;
///
/// let mut a: HashSet<_> = vec![1, 2, 3].into_iter().collect();
/// let b: HashSet<_> = vec![3, 4, 5].into_iter().collect();
///
/// a -= &b;
///
/// let mut i = 0;
/// let expected = [1, 2];
/// for x in &a {
/// assert!(expected.contains(x));
/// i += 1;
/// }
/// assert_eq!(i, expected.len());
/// ```
fn sub_assign(&mut self, rhs: &HashSet<T, S, A>) {
if rhs.len() < self.len() {
for item in rhs {
self.remove(item);
}
} else {
self.retain(|item| !rhs.contains(item));
}
}
}

/// An iterator over the items of a `HashSet`.
///
/// This `struct` is created by the [`iter`] method on [`HashSet`].
Expand Down

0 comments on commit f0eece4

Please sign in to comment.