From 0d21b7d8194e084ecab60ac37fa67d9a99a33310 Mon Sep 17 00:00:00 2001 From: Thomas Debesse Date: Wed, 15 May 2024 08:31:19 +0200 Subject: [PATCH] cmake: add USE_ARCH_INTRINSICS and USE_COMPILER_BUILTINS CMake options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This introduces the USE_ARCH_INTRINSICS CMake option. It is enabled by default. Disabling it is meant to disable custom asm code and usage of intrinsincs functions for the target platform in the Dæmon code base, it may also be used by games built with the Dæmon common code base. It is not meant to disable asm or intrinsincs usage in third-party libraries. It is not meant to prevent the compiler to use such intrinsics in its optimization passes. It is not meant to disable the compiler flags we set to tell the compiler to try to use such intrinsics in its optimization passes. For this, one should disable USE_CPU_RECOMMENDED_FEATURES instead. For obvious reason the asm code in the BREAKPOINT() implementation is not meant to be disabled by USE_ARCH_INTRINSICS. The macro syntax is: DAEMON_ARCH_INTRINSICS_(architecture)[_extension] Examples: - DAEMON_ARCH_INTRINSICS_i686: i686 specific code, including asm code. - DAEMON_ARCH_INTRINSICS_i686_sse: i686 SSE specific code. - DAEMON_ARCH_INTRINSICS_i686_sse2: i686 SSE2 specific code. If a platform inherits feature from an parent platform, the parent platform name is used. For example on amd64, the definition enabling SSE code is DAEMON_ARCH_INTRINSICS_i686_sse, enabling SSE code on both i686 with SSE and amd64 platforms. and both DAEMON_ARCH_INTRINSICS_amd64 and DAEMON_ARCH_INTRINSICS_i686 are available. This also introduces USE_COMPILER_BUILTINS CMake option. It is enabled by default. Disabling it is meant to test the unknown compiler code. --- cmake/DaemonArchitecture.cmake | 24 +++++++++++++++++++ cmake/DaemonFlags.cmake | 9 ++++++++ src/common/Compiler.h | 9 ++++---- src/common/Platform.h | 42 ++++++++++++++++++++++++++-------- src/engine/qcommon/q_math.cpp | 7 +++--- src/engine/qcommon/q_shared.h | 7 +++--- 6 files changed, 79 insertions(+), 19 deletions(-) diff --git a/cmake/DaemonArchitecture.cmake b/cmake/DaemonArchitecture.cmake index 662343ee7a..f59f714c71 100644 --- a/cmake/DaemonArchitecture.cmake +++ b/cmake/DaemonArchitecture.cmake @@ -90,3 +90,27 @@ endif() # Quotes cannot be part of the define as support for them is not reliable. add_definitions(-DNACL_ARCH_STRING=${NACL_ARCH}) + +option(USE_ARCH_INTRINSICS "Enable custom code using intrinsics functions or asm declarations" ON) + +macro(set_arch_intrinsics name) + if (USE_ARCH_INTRINSICS) + message(STATUS "Enabling ${name} architecture intrinsics") + add_definitions(-DDAEMON_USE_ARCH_INTRINSICS_${name}=1) + else() + message(STATUS "Disabling ${name} architecture intrinsics") + endif() +endmacro() + +if (USE_ARCH_INTRINSICS) + add_definitions(-DDAEMON_USE_ARCH_INTRINSICS=1) +endif() + +set_arch_intrinsics(${ARCH}) + +set(amd64_PARENT "i686") +set(arm64_PARENT "armhf") + +if (${ARCH}_PARENT) + set_arch_intrinsics(${${ARCH}_PARENT}) +endif() diff --git a/cmake/DaemonFlags.cmake b/cmake/DaemonFlags.cmake index ad483d3865..6807323a65 100644 --- a/cmake/DaemonFlags.cmake +++ b/cmake/DaemonFlags.cmake @@ -29,6 +29,15 @@ include(CheckCXXCompilerFlag) add_definitions(-DDAEMON_BUILD_${CMAKE_BUILD_TYPE}) +option(USE_COMPILER_BUILTINS "Enable usage of compiler builtins" ON) + +if (USE_COMPILER_BUILTINS) + add_definitions(-DDAEMON_USE_COMPILER_BUILTINS=1) + message(STATUS "Enabling compiler builtins") +else() + message(STATUS "Disabling compiler builtins") +endif() + # Set flag without checking, optional argument specifies build type macro(set_c_flag FLAG) if (${ARGC} GREATER 1) diff --git a/src/common/Compiler.h b/src/common/Compiler.h index 4dad3260c8..90d56e246a 100644 --- a/src/common/Compiler.h +++ b/src/common/Compiler.h @@ -41,14 +41,14 @@ int CountTrailingZeroes(unsigned int x); int CountTrailingZeroes(unsigned long x); int CountTrailingZeroes(unsigned long long x); -#if defined(__GNUC__) +#if defined(DAEMON_USE_ARCH_BUILTINS) && defined(__GNUC__) inline int CountTrailingZeroes(unsigned int x) { return __builtin_ctz(x); } inline int CountTrailingZeroes(unsigned long x) { return __builtin_ctzl(x); } inline int CountTrailingZeroes(unsigned long long x) { return __builtin_ctzll(x); } -#elif defined(_MSC_VER) +#elif defined(DAEMON_USE_ARCH_BUILTINS) && defined(_MSC_VER) inline int CountTrailingZeroes(unsigned int x) { unsigned long ans; _BitScanForward(&ans, x); return ans; } inline int CountTrailingZeroes(unsigned long x) @@ -75,7 +75,7 @@ int CountTrailingZeroes(unsigned long long x); #endif // GCC and Clang -#if defined(__GNUC__) +#if defined(DAEMON_USE_COMPILER_BUILTINS) && defined(__GNUC__) // Emit a nice warning when a function is used #define DEPRECATED __attribute__((__deprecated__)) @@ -119,6 +119,7 @@ int CountTrailingZeroes(unsigned long long x); // Raise an exception and break in the debugger #if defined(DAEMON_ARCH_i686) || defined(DAEMON_ARCH_amd64) + // Always run this asm code even if DAEMON_USE_ARCH_INTRINSICS is not defined. #define BREAKPOINT() __asm__ __volatile__("int $3\n\t") #elif defined(DAEMON_ARCH_nacl) // TODO: find how to implement breakpoint on NaCl @@ -177,7 +178,7 @@ See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0627r0.pdf */ #endif // Microsoft Visual C++ -#elif defined(_MSC_VER) +#elif defined(DAEMON_USE_COMPILER_BUILTINS) && defined(_MSC_VER) // Disable some warnings #pragma warning(disable : 4100) // unreferenced formal parameter diff --git a/src/common/Platform.h b/src/common/Platform.h index 9ccef8df7e..c96e02fec8 100644 --- a/src/common/Platform.h +++ b/src/common/Platform.h @@ -63,15 +63,39 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define __x86_64__ 1 #endif -// SSE support -#if defined(__x86_64__) || defined(__SSE__) || _M_IX86_FP >= 1 -#include -#if defined(__x86_64__) || defined(__SSE2__) || _M_IX86_FP >= 2 -#include -#define idx86_sse 2 -#else -#define idx86_sse 1 -#endif +/* The definition name syntax is: DAEMON_USE_ARCH_INTRINSICS_[_extension] + +Examples: + +- DAEMON_USE_ARCH_INTRINSICS_i686: i686 specific code, including asm code. +- DAEMON_USE_ARCH_INTRINSICS_i686_sse: i686 SSE specific code. +- DAEMON_USE_ARCH_INTRINSICS_i686_sse2: i686 SSE2 specific code. + +If a architecture inherits a feature from an parent architecture, the parent +architecture name is used. For example on amd64, the definition enabling +SSE code is DAEMON_USE_ARCH_INTRINSICS_i686_sse, enabling SSE code on both +i686 with SSE and amd64. + +The definitions for the architecture itself are automatically set by CMake. */ + +#if defined(DAEMON_USE_ARCH_INTRINSICS) + // Set architecture extensions definitions. + #if defined(__SSE2__) || _M_IX86_FP >= 2 + #define DAEMON_USE_ARCH_INTRINSICS_i686_sse2 + #endif + + #if defined(__SSE__) || _M_IX86_FP >= 1 + #define DAEMON_USE_ARCH_INTRINSICS_i686_sse + #endif + + // Include intrinsics-specific headers. + #if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse) + #include + #endif + + #if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse2) + #include + #endif #endif // VM Prefixes diff --git a/src/engine/qcommon/q_math.cpp b/src/engine/qcommon/q_math.cpp index 4d166b199b..80bc850f86 100644 --- a/src/engine/qcommon/q_math.cpp +++ b/src/engine/qcommon/q_math.cpp @@ -740,7 +740,7 @@ void SetPlaneSignbits( cplane_t *out ) int BoxOnPlaneSide( const vec3_t emins, const vec3_t emaxs, const cplane_t *p ) { -#if idx86_sse +#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse) auto mins = sseLoadVec3Unsafe( emins ); auto maxs = sseLoadVec3Unsafe( emaxs ); auto normal = sseLoadVec3Unsafe( p->normal ); @@ -1799,7 +1799,7 @@ void MatrixSetupShear( matrix_t m, vec_t x, vec_t y ) void MatrixMultiply( const matrix_t a, const matrix_t b, matrix_t out ) { -#if idx86_sse +#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse) //#error MatrixMultiply int i; __m128 _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; @@ -3288,7 +3288,8 @@ void QuatTransformVectorInverse( const quat_t q, const vec3_t in, vec3_t out ) VectorAdd( out, tmp2, out ); } -#if !idx86_sse +// The SSE variants are inline functions in q_shared.h file. +#if !defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse) // create an identity transform void TransInit( transform_t *t ) { diff --git a/src/engine/qcommon/q_shared.h b/src/engine/qcommon/q_shared.h index 095af5442a..456033aaab 100644 --- a/src/engine/qcommon/q_shared.h +++ b/src/engine/qcommon/q_shared.h @@ -248,7 +248,7 @@ void Com_Free_Aligned( void *ptr ); // floats (quat: 4, scale: 1, translation: 3), which is very // convenient for SSE and GLSL, which operate on 4-dimensional // float vectors. -#if idx86_sse +#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse) // Here we have a union of scalar struct and sse struct, transform_u and the // scalar struct must match transform_t so we have to use anonymous structs. // We disable compiler warnings when using -Wpedantic for this specific case. @@ -375,7 +375,7 @@ extern const quat_t quatIdentity; float y; // compute approximate inverse square root -#if defined( idx86_sse ) +#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse) // SSE rsqrt relative error bound: 3.7 * 10^-4 _mm_store_ss( &y, _mm_rsqrt_ss( _mm_load_ss( &number ) ) ); #elif idppc @@ -774,7 +774,7 @@ inline float DotProduct( const vec3_t x, const vec3_t y ) //============================================= // combining Transformations -#if idx86_sse +#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse) /* swizzles for _mm_shuffle_ps instruction */ #define SWZ_XXXX 0x00 #define SWZ_YXXX 0x01 @@ -1293,6 +1293,7 @@ inline float DotProduct( const vec3_t x, const vec3_t y ) t->sseRot = sseQuatNormalize( t->sseRot ); } #else + // The non-SSE variants are in q_math.cpp file. void TransInit( transform_t *t ); void TransCopy( const transform_t *in, transform_t *out );