Skip to content

Commit

Permalink
fix: Introduce OPTNONE to disable optimization
Browse files Browse the repository at this point in the history
Some tests/intrinsics don't behave correctly under optimization.
`OPTNONE` and `FORCE_INLINE_OPTNONE` are used to disable these
tests and intrinsics.
  • Loading branch information
howjmay committed Jul 18, 2024
1 parent cfed53d commit 05d1472
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 9 deletions.
6 changes: 3 additions & 3 deletions sse2neon.h
Original file line number Diff line number Diff line change
Expand Up @@ -2444,7 +2444,7 @@ FORCE_INLINE __m128 _mm_set_ps1(float _w)
// the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP,
// _MM_ROUND_TOWARD_ZERO
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_ROUNDING_MODE
FORCE_INLINE void _MM_SET_ROUNDING_MODE(int rounding)
FORCE_INLINE_OPTNONE void _MM_SET_ROUNDING_MODE(int rounding)
{
union {
fpcr_bitfield field;
Expand Down Expand Up @@ -4158,7 +4158,7 @@ FORCE_INLINE __m128i _mm_cvttpd_epi32(__m128d a)
// Convert packed double-precision (64-bit) floating-point elements in a to
// packed 32-bit integers with truncation, and store the results in dst.
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_pi32
FORCE_INLINE __m64 _mm_cvttpd_pi32(__m128d a)
FORCE_INLINE_OPTNONE __m64 _mm_cvttpd_pi32(__m128d a)
{
double a0 = ((double *) &a)[0];
double a1 = ((double *) &a)[1];
Expand Down Expand Up @@ -9219,7 +9219,7 @@ FORCE_INLINE int64_t _mm_popcnt_u64(uint64_t a)
#endif
}

FORCE_INLINE void _sse2neon_mm_set_denormals_zero_mode(unsigned int flag)
FORCE_INLINE_OPTNONE void _sse2neon_mm_set_denormals_zero_mode(unsigned int flag)
{
// AArch32 Advanced SIMD arithmetic always uses the Flush-to-zero setting,
// regardless of the value of the FZ bit.
Expand Down
10 changes: 10 additions & 0 deletions tests/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,16 @@ extern int64_t NaN64;
#define ALL_BIT_1_32 (*(float *) &NaN)
#define ALL_BIT_1_64 (*(double *) &NaN64)

#if defined(__GNUC__) && !defined(__clang__)
#pragma push_macro("OPTNONE")
#define OPTNONE __attribute__((optimize("O0")))
#elif defined(__clang__)
#pragma push_macro("OPTNONE")
#define OPTNONE __attribute__((optnone))
#else
#define OPTNONE
#endif

template <typename T>
result_t validate128(T a, T b)
{
Expand Down
12 changes: 6 additions & 6 deletions tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2751,7 +2751,7 @@ result_t test_mm_set_ps1(const SSE2NEONTestImpl &impl, uint32_t iter)
return validateFloat(ret, a, a, a, a);
}

result_t test_mm_set_rounding_mode(const SSE2NEONTestImpl &impl, uint32_t iter)
OPTNONE result_t test_mm_set_rounding_mode(const SSE2NEONTestImpl &impl, uint32_t iter)
{
const float *_a = impl.mTestFloatPointer1;
result_t res_toward_zero, res_to_neg_inf, res_to_pos_inf, res_nearest;
Expand Down Expand Up @@ -4444,7 +4444,7 @@ result_t test_mm_cvtepi32_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
return validateFloat(ret, trun[0], trun[1], trun[2], trun[3]);
}

result_t test_mm_cvtpd_epi32(const SSE2NEONTestImpl &impl, uint32_t iter)
OPTNONE result_t test_mm_cvtpd_epi32(const SSE2NEONTestImpl &impl, uint32_t iter)
{
const double *_a = (const double *) impl.mTestFloatPointer1;
int32_t d[2] = {};
Expand Down Expand Up @@ -8425,7 +8425,7 @@ result_t test_mm_cvtepu8_epi64(const SSE2NEONTestImpl &impl, uint32_t iter)
MM_DP_PD_TEST_CASE_WITH(0x22); \
MM_DP_PD_TEST_CASE_WITH(0x23);

result_t test_mm_dp_pd(const SSE2NEONTestImpl &impl, uint32_t iter)
OPTNONE result_t test_mm_dp_pd(const SSE2NEONTestImpl &impl, uint32_t iter)
{
GENERATE_MM_DP_PD_TEST_CASES
return TEST_SUCCESS;
Expand Down Expand Up @@ -8460,7 +8460,7 @@ result_t test_mm_dp_pd(const SSE2NEONTestImpl &impl, uint32_t iter)
MM_DP_PS_TEST_CASE_WITH(0x23); \
MM_DP_PS_TEST_CASE_WITH(0xB5);

result_t test_mm_dp_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
OPTNONE result_t test_mm_dp_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
{
GENERATE_MM_DP_PS_TEST_CASES
return TEST_SUCCESS;
Expand Down Expand Up @@ -11819,8 +11819,8 @@ result_t test_mm_popcnt_u64(const SSE2NEONTestImpl &impl, uint32_t iter)
return TEST_SUCCESS;
}

result_t test_mm_set_denormals_zero_mode(const SSE2NEONTestImpl &impl,
uint32_t iter)
OPTNONE result_t test_mm_set_denormals_zero_mode(const SSE2NEONTestImpl &impl,
uint32_t iter)
{
result_t res_set_denormals_zero_on, res_set_denormals_zero_off;
float factor = 2;
Expand Down

0 comments on commit 05d1472

Please sign in to comment.