From 14b2caa8bcf14aefe85310d9d003db6979f38b0c Mon Sep 17 00:00:00 2001 From: Olivier Giniaux Date: Sat, 6 Jan 2024 21:40:19 +0100 Subject: [PATCH] Also enable sse2 --- build.rs | 2 +- src/gxhash/mod.rs | 6 ++--- src/gxhash/platform/x86.rs | 55 +++++++++++++++++++++++++------------- 3 files changed, 40 insertions(+), 23 deletions(-) diff --git a/build.rs b/build.rs index dd11580..0142216 100644 --- a/build.rs +++ b/build.rs @@ -6,6 +6,6 @@ fn main() { && cfg!(target_arch = "x86_64") && cfg!(target_feature = "avx2") && cfg!(target_feature = "vaes") { - println!("cargo:rustc-cfg=hybrid"); + //println!("cargo:rustc-cfg=hybrid"); } } \ No newline at end of file diff --git a/src/gxhash/mod.rs b/src/gxhash/mod.rs index 1740156..2f6d550 100644 --- a/src/gxhash/mod.rs +++ b/src/gxhash/mod.rs @@ -73,9 +73,9 @@ pub(crate) unsafe fn gxhash(input: &[u8], seed: State) -> State { #[inline(always)] pub(crate) unsafe fn compress_all(input: &[u8]) -> State { - if !check_support() { - panic!("No supported"); - } + // if !check_support() { + // panic!("Not supported"); + // } let len = input.len(); let mut ptr = input.as_ptr() as *const State; diff --git a/src/gxhash/platform/x86.rs b/src/gxhash/platform/x86.rs index 8dca01c..01d1f27 100644 --- a/src/gxhash/platform/x86.rs +++ b/src/gxhash/platform/x86.rs @@ -12,22 +12,26 @@ pub unsafe fn check_support() -> bool { std::arch::is_x86_feature_detected!("aes") && std::arch::is_x86_feature_detected!("sse2") } -#[inline(always)] +#[inline] +#[target_feature(enable = "sse2")] pub unsafe fn create_empty() -> State { _mm_setzero_si128() } -#[inline(always)] +#[inline] +#[target_feature(enable = "sse2")] pub unsafe fn create_seed(seed: i64) -> State { _mm_set1_epi64x(seed) } -#[inline(always)] +#[inline] +#[target_feature(enable = "sse2")] pub unsafe fn load_unaligned(p: *const State) -> State { _mm_loadu_si128(p) } -#[inline(always)] +#[inline] +#[target_feature(enable = "sse2")] pub unsafe fn get_partial_safe(data: *const State, len: usize) -> State { // Temporary buffer filled with zeros let mut buffer = [0i8; VECTOR_SIZE]; @@ -38,7 +42,8 @@ pub unsafe fn get_partial_safe(data: *const State, len: usize) -> State { _mm_add_epi8(partial_vector, _mm_set1_epi8(len as i8)) } -#[inline(always)] +#[inline] +#[target_feature(enable = "sse2")] pub unsafe fn get_partial_unsafe(data: *const State, len: usize) -> State { let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices); @@ -58,14 +63,15 @@ pub unsafe fn aes_encrypt_last(data: State, keys: State) -> State { _mm_aesenclast_si128(data, keys) } -#[inline(always)] -#[allow(dead_code)] +#[inline] +#[target_feature(enable = "sse2")] pub unsafe fn ld(array: *const u32) -> State { _mm_loadu_si128(array as *const State) } #[cfg(not(hybrid))] -#[inline(always)] +#[inline] +#[target_feature(enable = "sse2")] pub unsafe fn compress_8(mut ptr: *const State, end_address: usize, hash_vector: State, len: usize) -> State { // Disambiguation vectors @@ -106,7 +112,8 @@ pub unsafe fn compress_8(mut ptr: *const State, end_address: usize, hash_vector: } #[cfg(hybrid)] -#[inline(always)] +#[inline] +#[target_feature(enable = "sse2")] pub unsafe fn compress_8(ptr: *const State, end_address: usize, hash_vector: State, len: usize) -> State { macro_rules! load_unaligned_x2 { ($ptr:ident, $($var:ident),+) => { @@ -144,53 +151,63 @@ pub unsafe fn compress_8(ptr: *const State, end_address: usize, hash_vector: Sta aes_encrypt(lane1, lane2) } -#[inline(always)] +#[inline] +#[target_feature(enable = "sse2")] pub unsafe fn load_u8(x: u8) -> State { _mm_set1_epi8(x as i8) } -#[inline(always)] +#[inline] +#[target_feature(enable = "sse2")] pub unsafe fn load_u16(x: u16) -> State { _mm_set1_epi16(x as i16) } -#[inline(always)] +#[inline] +#[target_feature(enable = "sse2")] pub unsafe fn load_u32(x: u32) -> State { _mm_set1_epi32(x as i32) } -#[inline(always)] +#[inline] +#[target_feature(enable = "sse2")] pub unsafe fn load_u64(x: u64) -> State { _mm_set1_epi64x(x as i64) } -#[inline(always)] +#[inline] +#[target_feature(enable = "sse2")] pub unsafe fn load_u128(x: u128) -> State { let ptr = &x as *const u128 as *const State; _mm_loadu_si128(ptr) } -#[inline(always)] +#[inline] +#[target_feature(enable = "sse2")] pub unsafe fn load_i8(x: i8) -> State { _mm_set1_epi8(x) } -#[inline(always)] +#[inline] +#[target_feature(enable = "sse2")] pub unsafe fn load_i16(x: i16) -> State { _mm_set1_epi16(x) } -#[inline(always)] +#[inline] +#[target_feature(enable = "sse2")] pub unsafe fn load_i32(x: i32) -> State { _mm_set1_epi32(x) } -#[inline(always)] +#[inline] +#[target_feature(enable = "sse2")] pub unsafe fn load_i64(x: i64) -> State { _mm_set1_epi64x(x) } -#[inline(always)] +#[inline] +#[target_feature(enable = "sse2")] pub unsafe fn load_i128(x: i128) -> State { let ptr = &x as *const i128 as *const State; _mm_loadu_si128(ptr)