diff --git a/src/arch/helperavx.h b/src/arch/helperavx.h index e33a0e78..effdfa4c 100644 --- a/src/arch/helperavx.h +++ b/src/arch/helperavx.h @@ -128,20 +128,20 @@ static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return _mm256_castsi256_pd( // -static vint2 vloadu_vi2_p(int32_t *p) { +static INLINE vint2 vloadu_vi2_p(int32_t *p) { vint2 r; r.x = _mm_loadu_si128((__m128i *) p ); r.y = _mm_loadu_si128((__m128i *)(p + 4)); return r; } -static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { +static INLINE void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm_storeu_si128((__m128i *) p , v.x); _mm_storeu_si128((__m128i *)(p + 4), v.y); } -static vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } -static void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } +static INLINE vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } +static INLINE void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } // @@ -570,7 +570,7 @@ static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloa // -static vquad loadu_vq_p(void *p) { +static INLINE vquad loadu_vq_p(void *p) { vquad vq; memcpy(&vq, p, VECTLENDP * 16); return vq; diff --git a/src/arch/helperavx2.h b/src/arch/helperavx2.h index 47fcdc60..81e1f027 100644 --- a/src/arch/helperavx2.h +++ b/src/arch/helperavx2.h @@ -114,10 +114,10 @@ static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return _mm256_castsi256_pd( // -static vint2 vloadu_vi2_p(int32_t *p) { return _mm256_loadu_si256((__m256i const *)p); } -static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm256_storeu_si256((__m256i *)p, v); } -static vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } -static void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } +static INLINE vint2 vloadu_vi2_p(int32_t *p) { return _mm256_loadu_si256((__m256i const *)p); } +static INLINE void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm256_storeu_si256((__m256i *)p, v); } +static INLINE vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } +static INLINE void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } // @@ -440,7 +440,7 @@ static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloa // -static vquad loadu_vq_p(void *p) { +static INLINE vquad loadu_vq_p(void *p) { vquad vq; memcpy(&vq, p, VECTLENDP * 16); return vq; diff --git a/src/arch/helperavx2_128.h b/src/arch/helperavx2_128.h index 5233db1b..be7c56a2 100644 --- a/src/arch/helperavx2_128.h +++ b/src/arch/helperavx2_128.h @@ -109,10 +109,10 @@ static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return _mm_castsi128_pd(vm) // -static vint2 vloadu_vi2_p(int32_t *p) { return _mm_loadu_si128((__m128i const *)p); } -static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm_storeu_si128((__m128i *)p, v); } -static vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } -static void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } +static INLINE vint2 vloadu_vi2_p(int32_t *p) { return _mm_loadu_si128((__m128i const *)p); } +static INLINE void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm_storeu_si128((__m128i *)p, v); } +static INLINE vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } +static INLINE void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } // @@ -413,7 +413,7 @@ static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloa // -static vquad loadu_vq_p(void *p) { +static INLINE vquad loadu_vq_p(void *p) { vquad vq = { vloadu_vi2_p((int32_t *)p), vloadu_vi2_p((int32_t *)((uint8_t *)p + sizeof(vmask))) @@ -433,7 +433,7 @@ static INLINE vargquad cast_aq_vq(vquad vq) { return aq; } -static void vstoreu_v_p_vq(void *p, vquad vq) { +static INLINE void vstoreu_v_p_vq(void *p, vquad vq) { vstoreu_v_p_vi2((int32_t *)p, vcast_vi2_vm(vq.x)); vstoreu_v_p_vi2((int32_t *)((uint8_t *)p + sizeof(vmask)), vcast_vi2_vm(vq.y)); } diff --git a/src/arch/helpersse2.h b/src/arch/helpersse2.h index 833f5f9b..141603a5 100644 --- a/src/arch/helpersse2.h +++ b/src/arch/helpersse2.h @@ -131,11 +131,11 @@ static INLINE int vtestallones_i_vo64(vopmask g) { return _mm_movemask_epi8(g) = // -static vint2 vloadu_vi2_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } -static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm_storeu_si128((__m128i *)p, v); } +static INLINE vint2 vloadu_vi2_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } +static INLINE void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm_storeu_si128((__m128i *)p, v); } -static vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } -static void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } +static INLINE vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } +static INLINE void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } // @@ -466,7 +466,7 @@ static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloa // -static vquad loadu_vq_p(void *p) { +static INLINE vquad loadu_vq_p(void *p) { vquad vq; memcpy(&vq, p, VECTLENDP * 16); return vq; diff --git a/src/libm/sleefinline_header.h.org b/src/libm/sleefinline_header.h.org index df0c8215..d0cb3617 100644 --- a/src/libm/sleefinline_header.h.org +++ b/src/libm/sleefinline_header.h.org @@ -20,7 +20,9 @@ #pragma fp_contract (off) #endif +#if !(defined(__GNUC__) && !defined (__clang__)) #pragma STDC FP_CONTRACT OFF +#endif #ifndef SLEEF_FP_ILOGB0 #define SLEEF_FP_ILOGB0 ((int)0x80000000)