Skip to content

Commit

Permalink
Only finalize on Hasher::finish
Browse files Browse the repository at this point in the history
  • Loading branch information
ogxd committed Nov 18, 2023
1 parent 50f4410 commit 796558b
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 24 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "gxhash"
authors = ["Olivier Giniaux"]
version = "2.2.1"
version = "2.2.2"
edition = "2021"
description = "GxHash non-cryptographic algorithm"
license = "MIT"
Expand Down
6 changes: 4 additions & 2 deletions benches/hashset.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use ahash::AHashSet;
use criterion::{criterion_group, criterion_main, Criterion};
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use fnv::FnvHashSet;
use gxhash::*;
use twox_hash::xxh3;
Expand All @@ -19,8 +19,10 @@ fn hashmap_insertion(c: &mut Criterion) {
}

fn benchmark_for_string(c: &mut Criterion, string: &str) {

let mut group = c.benchmark_group(format!("HashSet<&str[{}]>", string.len()));

group.throughput(Throughput::Bytes(string.as_bytes().len() as u64));

let mut set = HashSet::new();
group.bench_function("Default Hasher", |b| {
b.iter(|| set.insert(string))
Expand Down
4 changes: 2 additions & 2 deletions src/gxhash/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,11 @@ macro_rules! load_unaligned {

#[inline(always)]
pub(crate) unsafe fn gxhash(input: &[u8], seed: State) -> State {
finalize(compress_all(input), seed)
finalize(compress_fast(compress_all(input), seed))
}

#[inline(always)]
unsafe fn compress_all(input: &[u8]) -> State {
pub(crate) unsafe fn compress_all(input: &[u8]) -> State {

let len = input.len();
let mut ptr = input.as_ptr() as *const State;
Expand Down
3 changes: 1 addition & 2 deletions src/gxhash/platform/arm_128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,14 @@ unsafe fn aes_encrypt_last(data: uint8x16_t, keys: uint8x16_t) -> uint8x16_t {
}

#[inline(always)]
pub unsafe fn finalize(hash: State, seed: State) -> State {
pub unsafe fn finalize(hash: State) -> State {
// Hardcoded AES keys
let keys_1 = vld1q_u32([0x713B01D0, 0x8F2F35DB, 0xAF163956, 0x85459F85].as_ptr());
let keys_2 = vld1q_u32([0x1DE09647, 0x92CFA39C, 0x3DD99ACA, 0xB89C054F].as_ptr());
let keys_3 = vld1q_u32([0xC78B122B, 0x5544B1B7, 0x689D2B7D, 0xD0012E32].as_ptr());

// 3 rounds of AES
let mut hash = ReinterpretUnion{ int8: hash }.uint8;
hash = aes_encrypt(hash, ReinterpretUnion{ int8: seed }.uint8);
hash = aes_encrypt(hash, ReinterpretUnion{ uint32: keys_1 }.uint8);
hash = aes_encrypt(hash, ReinterpretUnion{ uint32: keys_2 }.uint8);
hash = aes_encrypt_last(hash, ReinterpretUnion{ uint32: keys_3 }.uint8);
Expand Down
5 changes: 2 additions & 3 deletions src/gxhash/platform/x86_128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,14 @@ pub unsafe fn compress_fast(a: State, b: State) -> State {

#[inline(always)]
#[allow(overflowing_literals)]
pub unsafe fn finalize(hash: State, seed: State) -> State {
pub unsafe fn finalize(hash: State) -> State {
// Hardcoded AES keys
let keys_1 = _mm_set_epi32(0x85459F85, 0xAF163956, 0x8F2F35DB, 0x713B01D0);
let keys_2 = _mm_set_epi32(0xB89C054F, 0x3DD99ACA, 0x92CFA39C, 0x1DE09647);
let keys_3 = _mm_set_epi32(0xD0012E32, 0x689D2B7D, 0x5544B1B7, 0xC78B122B);

// 4 rounds of AES
let mut hash = _mm_aesenc_si128(hash, seed);
hash = _mm_aesenc_si128(hash, keys_1);
let mut hash = _mm_aesenc_si128(hash, keys_1);
hash = _mm_aesenc_si128(hash, keys_2);
hash = _mm_aesenclast_si128(hash, keys_3);

Expand Down
5 changes: 2 additions & 3 deletions src/gxhash/platform/x86_256.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,14 @@ pub unsafe fn compress_fast(a: State, b: State) -> State {

#[inline(always)]
#[allow(overflowing_literals)]
pub unsafe fn finalize(hash: State, seed: State) -> State {
pub unsafe fn finalize(hash: State) -> State {
// Hardcoded AES keys
let keys_1 = _mm256_set_epi32(0x713B01D0, 0x8F2F35DB, 0xAF163956, 0x85459F85, 0xB49D3E21, 0xF2784542, 0x2155EE07, 0xC197CCE2);
let keys_2 = _mm256_set_epi32(0x1DE09647, 0x92CFA39C, 0x3DD99ACA, 0xB89C054F, 0xCB6B2E9B, 0xC361DC58, 0x39136BD9, 0x7A83D76F);
let keys_3 = _mm256_set_epi32(0xC78B122B, 0x5544B1B7, 0x689D2B7D, 0xD0012E32, 0xE2784542, 0x4155EE07, 0xC897CCE2, 0x780BF2C2);

// 4 rounds of AES
let mut hash = _mm256_aesenc_epi128(hash, seed);
hash = _mm256_aesenc_epi128(hash, keys_1);
let mut hash = _mm256_aesenc_epi128(hash, keys_1);
hash = _mm256_aesenc_epi128(hash, keys_2);
hash = _mm256_aesenclast_epi128(hash, keys_3);

Expand Down
37 changes: 26 additions & 11 deletions src/hasher.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use std::hash::{Hasher, BuildHasher};
use std::collections::{HashMap, HashSet};
use std::mem::MaybeUninit;

use rand::Rng;
use rand::{Rng, RngCore};

use crate::gxhash::*;
use crate::gxhash::platform::*;
Expand All @@ -13,7 +14,16 @@ use crate::gxhash::platform::*;
/// - DOS resistance thanks to seed randomization when using [`GxHasher::default()`]
///
/// *<sup>1</sup>There might me faster alternatives, such as `fxhash` for very small input sizes, but that usually have low quality properties.*
pub struct GxHasher(State);
pub struct GxHasher {
state: State
}

impl GxHasher {
#[inline]
fn with_state(state: State) -> GxHasher {
GxHasher { state: state }
}
}

impl Default for GxHasher {
/// Creates a new hasher with a empty seed.
Expand All @@ -38,7 +48,7 @@ impl Default for GxHasher {
/// ```
#[inline]
fn default() -> GxHasher {
GxHasher(unsafe { create_empty() })
GxHasher::with_state(unsafe { create_empty() })
}
}

Expand Down Expand Up @@ -66,7 +76,7 @@ impl GxHasher {
#[inline]
pub fn with_seed(seed: i64) -> GxHasher {
// Use gxhash64 to generate an initial state from a seed
GxHasher(unsafe { gxhash(&[], create_seed(seed)) })
GxHasher::with_state(unsafe { create_seed(seed) })
}

/// Finish this hasher and return the hashed value as a 128 bit
Expand All @@ -76,7 +86,7 @@ impl GxHasher {
debug_assert!(std::mem::size_of::<State>() >= std::mem::size_of::<u128>());

unsafe {
let p = &self.0 as *const State as *const u128;
let p = &finalize(self.state) as *const State as *const u128;
*p
}
}
Expand All @@ -86,35 +96,40 @@ impl Hasher for GxHasher {
#[inline]
fn finish(&self) -> u64 {
unsafe {
let p = &self.0 as *const State as *const u64;
let p = &finalize(self.state) as *const State as *const u64;
*p
}
}

#[inline]
fn write(&mut self, bytes: &[u8]) {
// Improvement: only compress at this stage and finalize in finish
self.0 = unsafe { gxhash(bytes, self.0) };
self.state = unsafe { compress_fast(compress_all(bytes), self.state) };
}
}

/// A builder for building GxHasher with randomized seeds by default, for improved DOS resistance.
pub struct GxBuildHasher(i64);
pub struct GxBuildHasher(State);

impl Default for GxBuildHasher {
#[inline]
fn default() -> GxBuildHasher {
let mut uninit: MaybeUninit<State> = MaybeUninit::uninit();
let mut rng = rand::thread_rng();
let seed: i64 = rng.gen::<i64>();
GxBuildHasher(seed)
unsafe {
let ptr = uninit.as_mut_ptr() as *mut u8;
let slice = std::slice::from_raw_parts_mut(ptr, VECTOR_SIZE);
rng.fill_bytes(slice);
GxBuildHasher(uninit.assume_init())
}
}
}

impl BuildHasher for GxBuildHasher {
type Hasher = GxHasher;
#[inline]
fn build_hasher(&self) -> GxHasher {
GxHasher::with_seed(self.0)
GxHasher::with_state(self.0)
}
}

Expand Down

0 comments on commit 796558b

Please sign in to comment.