Skip to content

Commit

Permalink
Bench simd_masked_load
Browse files Browse the repository at this point in the history
  • Loading branch information
ogxd committed Nov 9, 2024
1 parent 27eeaae commit 0ad6fb3
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 6 deletions.
9 changes: 8 additions & 1 deletion src/gxhash/platform/arm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,14 @@ pub unsafe fn get_partial_safe(data: *const State, len: usize) -> State {
pub unsafe fn get_partial_unsafe_no_ub(data: *const State, len: usize) -> State {
let indices = vld1q_s8([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15].as_ptr());
let mask = vcgtq_s8(vdupq_n_s8(len as i8), indices);
let partial_vector = vandq_s8(load_unaligned(data), vreinterpretq_s8_u8(mask));
use std::arch::asm;
let mut result: State;
asm!(
"ld1 {0}, [{1}]",
out(vreg) result, in(reg) data,
options(pure, nomem, nostack)
);
let partial_vector = vandq_s8(result, vreinterpretq_s8_u8(mask));
vaddq_s8(partial_vector, vdupq_n_s8(len as i8))
}

Expand Down
15 changes: 10 additions & 5 deletions src/gxhash/platform/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,18 @@ pub unsafe fn get_partial_safe(data: *const State, len: usize) -> State {
#[inline(always)]
pub unsafe fn get_partial_unsafe_no_ub(data: *const State, len: usize) -> State {
// Using inline assembly to load out-of-bounds
use std::arch::asm;
// use std::arch::asm;
// let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
// let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
// let mut result: State;
// asm!("movdqu [{}], {}", in(reg) data, out(xmm_reg) result, options(pure, nomem, nostack));
// let partial_vector = _mm_and_si128(result, mask);
// _mm_add_epi8(partial_vector, _mm_set1_epi8(len as i8))

// Using simd_masked_load
let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
let mut result: State;
asm!("movdqu [{}], {}", in(reg) data, out(xmm_reg) result, options(pure, nomem, nostack));
let partial_vector = _mm_and_si128(result, mask);
_mm_add_epi8(partial_vector, _mm_set1_epi8(len as i8))
State::from(std::intrinsics::simd::simd_masked_load(core::simd::i8x16::from(mask), data as *const i8, core::simd::i8x16::from(_mm_set1_epi8(len as i8))))

// Using std::simd
// use std::simd::*;
Expand Down

0 comments on commit 0ad6fb3

Please sign in to comment.