diff --git a/README.md b/README.md index 322f2ec..924ba6c 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ hashset.insert("hello world"); ## Compatibility - ARM 64-bit using `NEON` intrinsics. - x86-64 bit using `SSE2` + `AES` intrinsics. -- (optional) with `avx2` feature enabled, gxhash will use `AVX2` intrinsics, for up to twice as much performance for large inputs. Only compatible on `AVX2` enabled x86-64 platforms. +- (optional and only on unstable toolchain) with `avx2` feature enabled, gxhash will use `AVX2` intrinsics, for up to twice as much performance for large inputs. Only compatible on `AVX2` enabled x86-64 platforms. > **Warning** > Other platforms are currently not supported (there is no fallback) diff --git a/src/gxhash/platform/x86_128.rs b/src/gxhash/platform/x86_128.rs index 0ea9e41..875e44e 100644 --- a/src/gxhash/platform/x86_128.rs +++ b/src/gxhash/platform/x86_128.rs @@ -36,7 +36,7 @@ pub unsafe fn get_partial_safe(data: *const State, len: usize) -> State { // Copy data into the buffer std::ptr::copy(data as *const i8, buffer.as_mut_ptr(), len); // Load the buffer into a __m256i vector - let partial_vector = _mm_loadu_epi8(buffer.as_ptr()); + let partial_vector = _mm_loadu_si128(buffer.as_ptr() as *const State); _mm_add_epi8(partial_vector, _mm_set1_epi8(len as i8)) } diff --git a/src/gxhash/platform/x86_256.rs b/src/gxhash/platform/x86_256.rs index 4909039..4cf594b 100644 --- a/src/gxhash/platform/x86_256.rs +++ b/src/gxhash/platform/x86_256.rs @@ -36,7 +36,7 @@ pub unsafe fn get_partial_safe(data: *const State, len: usize) -> State { // Copy data into the buffer std::ptr::copy(data as *const i8, buffer.as_mut_ptr(), len); // Load the buffer into a __m256i vector - let partial_vector = _mm256_loadu_epi8(buffer.as_ptr()); + let partial_vector = _mm256_loadu_si256(buffer.as_ptr() as *const State); _mm256_add_epi8(partial_vector, _mm256_set1_epi8(len as i8)) } diff --git a/src/lib.rs b/src/lib.rs index 4dff963..b6abfaf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,5 @@ -//#![feature(core_intrinsics)] -//#![feature(pointer_byte_offsets)] -#![feature(stdsimd)] -//#![feature(stmt_expr_attributes)] +// Feature 'avx2' currently requires unstable 'stdsimd' +#![cfg_attr(all(feature = "avx2", target_arch = "x86_64"), feature(stdsimd))] mod gxhash; mod hasher;