Skip to content

Commit

Permalink
Reorganize hasher and add hasher benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
ogxd committed Nov 8, 2023
1 parent cc2f4ea commit 1238ff7
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 64 deletions.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,8 @@ harness = false

[[bench]]
name = "ilp"
harness = false

[[bench]]
name = "hashset"
harness = false
47 changes: 47 additions & 0 deletions benches/hashset.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
use criterion::{criterion_group, criterion_main, Criterion};
use fnv::FnvHashSet;
use gxhash::*;
use twox_hash::xxh3;
use std::collections::HashSet;
use std::hash::BuildHasherDefault;

fn hashmap_insertion(c: &mut Criterion) {

// Long keys
benchmark_for_string(c, "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.");

// Medium keys
benchmark_for_string(c, "https://github.com/ogxd/gxhash");

// Short keys
benchmark_for_string(c, "gxhash");
}

fn benchmark_for_string(c: &mut Criterion, string: &str) {
let mut group = c.benchmark_group(format!("HashSet<&str[{}]>", string.len()));

let mut set = HashSet::new();
group.bench_function("Default Hasher", |b| {
b.iter(|| set.insert(string))
});

let mut set = GxHashSet::default();
group.bench_function("GxHash", |b| {
b.iter(|| set.insert(string))
});

let mut set = HashSet::<&str, BuildHasherDefault<xxh3::Hash64>>::default();
group.bench_function("XxHash", |b| {
b.iter(|| set.insert(string))
});

let mut set = FnvHashSet::default();
group.bench_function("FNV-1a", |b| {
b.iter(|| set.insert(string))
});

group.finish();
}

criterion_group!(benches, hashmap_insertion);
criterion_main!(benches);
65 changes: 2 additions & 63 deletions src/gxhash/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
mod platform;
pub(crate) mod platform;

use platform::*;

Expand Down Expand Up @@ -38,7 +38,7 @@ const RANGE_3_BEGIN: isize = RANGE_2_BEGIN + 1;
const RANGE_3_END: isize = VECTOR_SIZE * 4;

#[inline(always)]
unsafe fn gxhash(input: &[u8], seed: State) -> State {
pub(crate) unsafe fn gxhash(input: &[u8], seed: State) -> State {

let len: isize = input.len() as isize;
let mut ptr = input.as_ptr() as *const State;
Expand Down Expand Up @@ -114,74 +114,13 @@ unsafe fn gxhash_process_1(mut ptr: *const State, hash_vector: State, remaining_
(hash_vector, remaining_bytes, ptr)
}

use std::hash::{Hasher, BuildHasherDefault};
use std::collections::{HashMap, HashSet};

pub struct GxHasher(State);

impl Default for GxHasher {
#[inline]
fn default() -> GxHasher {
GxHasher(unsafe { create_empty() })
}
}

impl GxHasher {
#[inline]
pub fn with_seed(seed: i32) -> GxHasher {
// Use gxhash64 to generate an initial state from a seed
GxHasher(unsafe { gxhash(&[], create_seed(seed)) })
}
}

impl Hasher for GxHasher {
#[inline]
fn finish(&self) -> u64 {
unsafe {
let p = &self.0 as *const State as *const u64;
*p
}
}

#[inline]
fn write(&mut self, bytes: &[u8]) {
self.0 = unsafe { gxhash(bytes, self.0) };
}
}

/// A builder for default FNV hashers.
pub type GxBuildHasher = BuildHasherDefault<GxHasher>;

/// A `HashMap` using a default GxHash hasher.
//#[cfg(feature = "std")]
pub type GxHashMap<K, V> = HashMap<K, V, GxBuildHasher>;

/// A `HashSet` using a default GxHash hasher.
//#[cfg(feature = "std")]
pub type GxHashSet<T> = HashSet<T, GxBuildHasher>;

#[cfg(test)]
mod tests {

use super::*;
use ahash::HashSetExt;
use rand::Rng;
use rstest::rstest;

#[test]
fn hasher_works() {
let mut hashset = GxHashSet::new();
assert!(hashset.insert(1234));
assert!(!hashset.insert(1234));
assert!(hashset.insert(42));

let mut hashset = GxHashSet::new();
assert!(hashset.insert("hello"));
assert!(hashset.insert("world"));
assert!(!hashset.insert("hello"));
assert!(hashset.insert("bye"));
}

#[rstest]
#[case(4)]
#[case(16)]
Expand Down
67 changes: 67 additions & 0 deletions src/hasher.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
use std::hash::{Hasher, BuildHasherDefault};
use std::collections::{HashMap, HashSet};
use crate::gxhash::*;
use crate::gxhash::platform::*;

pub struct GxHasher(State);

impl Default for GxHasher {
#[inline]
fn default() -> GxHasher {
GxHasher(unsafe { create_empty() })
}
}

impl GxHasher {
#[inline]
pub fn with_seed(seed: i32) -> GxHasher {
// Use gxhash64 to generate an initial state from a seed
GxHasher(unsafe { gxhash(&[], create_seed(seed)) })
}
}

impl Hasher for GxHasher {
#[inline]
fn finish(&self) -> u64 {
unsafe {
let p = &self.0 as *const State as *const u64;
*p
}
}

#[inline]
fn write(&mut self, bytes: &[u8]) {
self.0 = unsafe { gxhash(bytes, self.0) };
}
}

/// A builder for default FNV hashers.
pub type GxBuildHasher = BuildHasherDefault<GxHasher>;

/// A `HashMap` using a default GxHash hasher.
//#[cfg(feature = "std")]
pub type GxHashMap<K, V> = HashMap<K, V, GxBuildHasher>;

/// A `HashSet` using a default GxHash hasher.
//#[cfg(feature = "std")]
pub type GxHashSet<T> = HashSet<T, GxBuildHasher>;

#[cfg(test)]
mod tests {

use super::*;

#[test]
fn hasher_works() {
let mut hashset = GxHashSet::default();
assert!(hashset.insert(1234));
assert!(!hashset.insert(1234));
assert!(hashset.insert(42));

let mut hashset = GxHashSet::default();
assert!(hashset.insert("hello"));
assert!(hashset.insert("world"));
assert!(!hashset.insert("hello"));
assert!(hashset.insert("bye"));
}
}
4 changes: 3 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,7 @@
#![feature(stdsimd)]

mod gxhash;
mod hasher;

pub use gxhash::*;
pub use gxhash::*;
pub use hasher::*;

0 comments on commit 1238ff7

Please sign in to comment.