diff --git a/sse2neon.h b/sse2neon.h index 80dc8990..3d7f92b2 100644 --- a/sse2neon.h +++ b/sse2neon.h @@ -2186,6 +2186,11 @@ FORCE_INLINE void _mm_prefetch(const void *p, int i) // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=m_psadbw #define _m_psadbw(a, b) _mm_sad_pu8(a, b) +// Shuffle 16-bit integers in a using the control in imm8, and store the results +// in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pshufw +#define _m_pshufw(a, imm) _mm_shuffle_pi16(a, imm) + // Compute the approximate reciprocal of packed single-precision (32-bit) // floating-point elements in a, and store the results in dst. The maximum // relative error for this approximation is less than 1.5*2^-12. diff --git a/tests/impl.cpp b/tests/impl.cpp index 3b440a9c..844ef14c 100644 --- a/tests/impl.cpp +++ b/tests/impl.cpp @@ -241,6 +241,7 @@ static float ranf(float low, float high) // Enable the tests which are using the macro of another tests result_t test_mm_slli_si128(const SSE2NEONTestImpl &impl, uint32_t i); result_t test_mm_srli_si128(const SSE2NEONTestImpl &impl, uint32_t i); +result_t test_mm_shuffle_pi16(const SSE2NEONTestImpl &impl, uint32_t i); // This function is not called from `runSingleTest`, but for other intrinsic // tests that might need to call `_mm_set_epi32`. @@ -2317,7 +2318,7 @@ result_t test_m_psadbw(const SSE2NEONTestImpl &impl, uint32_t i) result_t test_m_pshufw(const SSE2NEONTestImpl &impl, uint32_t i) { - return TEST_UNIMPL; + return test_mm_shuffle_pi16(impl, i); } result_t test_mm_rcp_ps(const SSE2NEONTestImpl &impl, uint32_t i)