From 796558b04a2a60f049a2c57ace0b9aceaf8bd7a1 Mon Sep 17 00:00:00 2001 From: Olivier Giniaux Date: Sat, 18 Nov 2023 21:58:54 +0100 Subject: [PATCH] Only finalize on Hasher::finish --- Cargo.toml | 2 +- benches/hashset.rs | 6 ++++-- src/gxhash/mod.rs | 4 ++-- src/gxhash/platform/arm_128.rs | 3 +-- src/gxhash/platform/x86_128.rs | 5 ++--- src/gxhash/platform/x86_256.rs | 5 ++--- src/hasher.rs | 37 ++++++++++++++++++++++++---------- 7 files changed, 38 insertions(+), 24 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3901ce6..08048b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "gxhash" authors = ["Olivier Giniaux"] -version = "2.2.1" +version = "2.2.2" edition = "2021" description = "GxHash non-cryptographic algorithm" license = "MIT" diff --git a/benches/hashset.rs b/benches/hashset.rs index b7a840f..5657284 100644 --- a/benches/hashset.rs +++ b/benches/hashset.rs @@ -1,5 +1,5 @@ use ahash::AHashSet; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{criterion_group, criterion_main, Criterion, Throughput}; use fnv::FnvHashSet; use gxhash::*; use twox_hash::xxh3; @@ -19,8 +19,10 @@ fn hashmap_insertion(c: &mut Criterion) { } fn benchmark_for_string(c: &mut Criterion, string: &str) { + let mut group = c.benchmark_group(format!("HashSet<&str[{}]>", string.len())); - + group.throughput(Throughput::Bytes(string.as_bytes().len() as u64)); + let mut set = HashSet::new(); group.bench_function("Default Hasher", |b| { b.iter(|| set.insert(string)) diff --git a/src/gxhash/mod.rs b/src/gxhash/mod.rs index 08a8833..ae0fd9a 100644 --- a/src/gxhash/mod.rs +++ b/src/gxhash/mod.rs @@ -68,11 +68,11 @@ macro_rules! load_unaligned { #[inline(always)] pub(crate) unsafe fn gxhash(input: &[u8], seed: State) -> State { - finalize(compress_all(input), seed) + finalize(compress_fast(compress_all(input), seed)) } #[inline(always)] -unsafe fn compress_all(input: &[u8]) -> State { +pub(crate) unsafe fn compress_all(input: &[u8]) -> State { let len = input.len(); let mut ptr = input.as_ptr() as *const State; diff --git a/src/gxhash/platform/arm_128.rs b/src/gxhash/platform/arm_128.rs index 44b875e..8e5e151 100644 --- a/src/gxhash/platform/arm_128.rs +++ b/src/gxhash/platform/arm_128.rs @@ -95,7 +95,7 @@ unsafe fn aes_encrypt_last(data: uint8x16_t, keys: uint8x16_t) -> uint8x16_t { } #[inline(always)] -pub unsafe fn finalize(hash: State, seed: State) -> State { +pub unsafe fn finalize(hash: State) -> State { // Hardcoded AES keys let keys_1 = vld1q_u32([0x713B01D0, 0x8F2F35DB, 0xAF163956, 0x85459F85].as_ptr()); let keys_2 = vld1q_u32([0x1DE09647, 0x92CFA39C, 0x3DD99ACA, 0xB89C054F].as_ptr()); @@ -103,7 +103,6 @@ pub unsafe fn finalize(hash: State, seed: State) -> State { // 3 rounds of AES let mut hash = ReinterpretUnion{ int8: hash }.uint8; - hash = aes_encrypt(hash, ReinterpretUnion{ int8: seed }.uint8); hash = aes_encrypt(hash, ReinterpretUnion{ uint32: keys_1 }.uint8); hash = aes_encrypt(hash, ReinterpretUnion{ uint32: keys_2 }.uint8); hash = aes_encrypt_last(hash, ReinterpretUnion{ uint32: keys_3 }.uint8); diff --git a/src/gxhash/platform/x86_128.rs b/src/gxhash/platform/x86_128.rs index ed16d74..0ea9e41 100644 --- a/src/gxhash/platform/x86_128.rs +++ b/src/gxhash/platform/x86_128.rs @@ -68,15 +68,14 @@ pub unsafe fn compress_fast(a: State, b: State) -> State { #[inline(always)] #[allow(overflowing_literals)] -pub unsafe fn finalize(hash: State, seed: State) -> State { +pub unsafe fn finalize(hash: State) -> State { // Hardcoded AES keys let keys_1 = _mm_set_epi32(0x85459F85, 0xAF163956, 0x8F2F35DB, 0x713B01D0); let keys_2 = _mm_set_epi32(0xB89C054F, 0x3DD99ACA, 0x92CFA39C, 0x1DE09647); let keys_3 = _mm_set_epi32(0xD0012E32, 0x689D2B7D, 0x5544B1B7, 0xC78B122B); // 4 rounds of AES - let mut hash = _mm_aesenc_si128(hash, seed); - hash = _mm_aesenc_si128(hash, keys_1); + let mut hash = _mm_aesenc_si128(hash, keys_1); hash = _mm_aesenc_si128(hash, keys_2); hash = _mm_aesenclast_si128(hash, keys_3); diff --git a/src/gxhash/platform/x86_256.rs b/src/gxhash/platform/x86_256.rs index 363dc15..4909039 100644 --- a/src/gxhash/platform/x86_256.rs +++ b/src/gxhash/platform/x86_256.rs @@ -68,15 +68,14 @@ pub unsafe fn compress_fast(a: State, b: State) -> State { #[inline(always)] #[allow(overflowing_literals)] -pub unsafe fn finalize(hash: State, seed: State) -> State { +pub unsafe fn finalize(hash: State) -> State { // Hardcoded AES keys let keys_1 = _mm256_set_epi32(0x713B01D0, 0x8F2F35DB, 0xAF163956, 0x85459F85, 0xB49D3E21, 0xF2784542, 0x2155EE07, 0xC197CCE2); let keys_2 = _mm256_set_epi32(0x1DE09647, 0x92CFA39C, 0x3DD99ACA, 0xB89C054F, 0xCB6B2E9B, 0xC361DC58, 0x39136BD9, 0x7A83D76F); let keys_3 = _mm256_set_epi32(0xC78B122B, 0x5544B1B7, 0x689D2B7D, 0xD0012E32, 0xE2784542, 0x4155EE07, 0xC897CCE2, 0x780BF2C2); // 4 rounds of AES - let mut hash = _mm256_aesenc_epi128(hash, seed); - hash = _mm256_aesenc_epi128(hash, keys_1); + let mut hash = _mm256_aesenc_epi128(hash, keys_1); hash = _mm256_aesenc_epi128(hash, keys_2); hash = _mm256_aesenclast_epi128(hash, keys_3); diff --git a/src/hasher.rs b/src/hasher.rs index 01a8bd7..4c29a7a 100644 --- a/src/hasher.rs +++ b/src/hasher.rs @@ -1,7 +1,8 @@ use std::hash::{Hasher, BuildHasher}; use std::collections::{HashMap, HashSet}; +use std::mem::MaybeUninit; -use rand::Rng; +use rand::{Rng, RngCore}; use crate::gxhash::*; use crate::gxhash::platform::*; @@ -13,7 +14,16 @@ use crate::gxhash::platform::*; /// - DOS resistance thanks to seed randomization when using [`GxHasher::default()`] /// /// *1There might me faster alternatives, such as `fxhash` for very small input sizes, but that usually have low quality properties.* -pub struct GxHasher(State); +pub struct GxHasher { + state: State +} + +impl GxHasher { + #[inline] + fn with_state(state: State) -> GxHasher { + GxHasher { state: state } + } +} impl Default for GxHasher { /// Creates a new hasher with a empty seed. @@ -38,7 +48,7 @@ impl Default for GxHasher { /// ``` #[inline] fn default() -> GxHasher { - GxHasher(unsafe { create_empty() }) + GxHasher::with_state(unsafe { create_empty() }) } } @@ -66,7 +76,7 @@ impl GxHasher { #[inline] pub fn with_seed(seed: i64) -> GxHasher { // Use gxhash64 to generate an initial state from a seed - GxHasher(unsafe { gxhash(&[], create_seed(seed)) }) + GxHasher::with_state(unsafe { create_seed(seed) }) } /// Finish this hasher and return the hashed value as a 128 bit @@ -76,7 +86,7 @@ impl GxHasher { debug_assert!(std::mem::size_of::() >= std::mem::size_of::()); unsafe { - let p = &self.0 as *const State as *const u128; + let p = &finalize(self.state) as *const State as *const u128; *p } } @@ -86,7 +96,7 @@ impl Hasher for GxHasher { #[inline] fn finish(&self) -> u64 { unsafe { - let p = &self.0 as *const State as *const u64; + let p = &finalize(self.state) as *const State as *const u64; *p } } @@ -94,19 +104,24 @@ impl Hasher for GxHasher { #[inline] fn write(&mut self, bytes: &[u8]) { // Improvement: only compress at this stage and finalize in finish - self.0 = unsafe { gxhash(bytes, self.0) }; + self.state = unsafe { compress_fast(compress_all(bytes), self.state) }; } } /// A builder for building GxHasher with randomized seeds by default, for improved DOS resistance. -pub struct GxBuildHasher(i64); +pub struct GxBuildHasher(State); impl Default for GxBuildHasher { #[inline] fn default() -> GxBuildHasher { + let mut uninit: MaybeUninit = MaybeUninit::uninit(); let mut rng = rand::thread_rng(); - let seed: i64 = rng.gen::(); - GxBuildHasher(seed) + unsafe { + let ptr = uninit.as_mut_ptr() as *mut u8; + let slice = std::slice::from_raw_parts_mut(ptr, VECTOR_SIZE); + rng.fill_bytes(slice); + GxBuildHasher(uninit.assume_init()) + } } } @@ -114,7 +129,7 @@ impl BuildHasher for GxBuildHasher { type Hasher = GxHasher; #[inline] fn build_hasher(&self) -> GxHasher { - GxHasher::with_seed(self.0) + GxHasher::with_state(self.0) } }