Skip to content

Commit

Permalink
cmake: add USE_ARCH_INTRINSICS and USE_COMPILER_BUILTINS CMake options
Browse files Browse the repository at this point in the history
This introduces the USE_ARCH_INTRINSICS CMake option. It is enabled by
default.

Disabling it is meant to disable custom asm code and usage of intrinsincs
functions for the target platform in the Dæmon code base, it may also be
used by games built with the Dæmon common code base.

It is not meant to disable asm or intrinsincs usage in third-party libraries.

It is not meant to prevent the compiler to use such intrinsics in its
optimization passes.

It is not meant to disable the compiler flags we set to tell the compiler
to try to use such intrinsics in its optimization passes. For this, one
should disable USE_CPU_RECOMMENDED_FEATURES instead.

For obvious reason the asm code in the BREAKPOINT() implementation is not
meant to be disabled by USE_ARCH_INTRINSICS.

The macro syntax is: DAEMON_ARCH_INTRINSICS_(architecture)[_extension]

Examples:

- DAEMON_ARCH_INTRINSICS_i686: i686 specific code, including asm code.
- DAEMON_ARCH_INTRINSICS_i686_sse: i686 SSE specific code.
- DAEMON_ARCH_INTRINSICS_i686_sse2: i686 SSE2 specific code.

If a platform inherits feature from an parent platform, the parent
platform name is used. For example on amd64, the definition enabling
SSE code is DAEMON_ARCH_INTRINSICS_i686_sse, enabling SSE code on both
i686 with SSE and amd64 platforms. and both DAEMON_ARCH_INTRINSICS_amd64
and DAEMON_ARCH_INTRINSICS_i686 are available.

This also introduces USE_COMPILER_BUILTINS CMake option. It is enabled
by default.

Disabling it is meant to test the unknown compiler code.
  • Loading branch information
illwieckz committed May 16, 2024
1 parent 6bcac3e commit aaca763
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 19 deletions.
24 changes: 24 additions & 0 deletions cmake/DaemonArchitecture.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,27 @@ endif()

# Quotes cannot be part of the define as support for them is not reliable.
add_definitions(-DNACL_ARCH_STRING=${NACL_ARCH})

option(USE_ARCH_INTRINSICS "Enable custom code using intrinsics functions or asm declarations" ON)

macro(set_arch_intrinsics name)
if (USE_ARCH_INTRINSICS)
message(STATUS "Enabling ${name} architecture intrinsics")
add_definitions(-DDAEMON_USE_ARCH_INTRINSICS_${name}=1)
else()
message(STATUS "Disabling ${name} architecture intrinsics")
endif()
endmacro()

if (USE_ARCH_INTRINSICS)
add_definitions(-DDAEMON_USE_ARCH_INTRINSICS=1)
endif()

set_arch_intrinsics(${ARCH})

set(amd64_PARENT "i686")
set(arm64_PARENT "armhf")

if (${ARCH}_PARENT)
set_arch_intrinsics(${${ARCH}_PARENT})
endif()
9 changes: 9 additions & 0 deletions cmake/DaemonFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@ include(CheckCXXCompilerFlag)

add_definitions(-DDAEMON_BUILD_${CMAKE_BUILD_TYPE})

option(USE_COMPILER_BUILTINS "Enable usage of compiler builtins" ON)

if (USE_COMPILER_BUILTINS)
add_definitions(-DDAEMON_USE_COMPILER_BUILTINS=1)
message(STATUS "Enabling compiler builtins")
else()
message(STATUS "Disabling compiler builtins")
endif()

# Set flag without checking, optional argument specifies build type
macro(set_c_flag FLAG)
if (${ARGC} GREATER 1)
Expand Down
9 changes: 5 additions & 4 deletions src/common/Compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ int CountTrailingZeroes(unsigned int x);
int CountTrailingZeroes(unsigned long x);
int CountTrailingZeroes(unsigned long long x);

#if defined( __GNUC__ )
#if defined(DAEMON_USE_ARCH_BUILTINS) && defined( __GNUC__ )
inline int CountTrailingZeroes(unsigned int x)
{ return __builtin_ctz(x); }
inline int CountTrailingZeroes(unsigned long x)
{ return __builtin_ctzl(x); }
inline int CountTrailingZeroes(unsigned long long x)
{ return __builtin_ctzll(x); }
#elif defined( _MSC_VER )
#elif defined(DAEMON_USE_ARCH_BUILTINS) && defined( _MSC_VER )
inline int CountTrailingZeroes(unsigned int x)
{ unsigned long ans; _BitScanForward(&ans, x); return ans; }
inline int CountTrailingZeroes(unsigned long x)
Expand All @@ -75,7 +75,7 @@ int CountTrailingZeroes(unsigned long long x);
#endif

// GCC and Clang
#if defined( __GNUC__ )
#if defined(DAEMON_USE_COMPILER_BUILTINS) && defined( __GNUC__ )

// Emit a nice warning when a function is used
#define DEPRECATED __attribute__((__deprecated__))
Expand Down Expand Up @@ -119,6 +119,7 @@ int CountTrailingZeroes(unsigned long long x);

// Raise an exception and break in the debugger
#if defined(DAEMON_ARCH_i686) || defined(DAEMON_ARCH_amd64)
// Always run this asm code even if DAEMON_USE_ARCH_INTRINSICS is not defined.
#define BREAKPOINT() __asm__ __volatile__("int $3\n\t")
#elif defined(DAEMON_ARCH_nacl)
// TODO: find how to implement breakpoint on NaCl
Expand Down Expand Up @@ -177,7 +178,7 @@ See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0627r0.pdf */
#endif

// Microsoft Visual C++
#elif defined( _MSC_VER )
#elif defined(DAEMON_USE_COMPILER_BUILTINS) && defined( _MSC_VER )

// Disable some warnings
#pragma warning(disable : 4100) // unreferenced formal parameter
Expand Down
42 changes: 33 additions & 9 deletions src/common/Platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,39 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define __x86_64__ 1
#endif

// SSE support
#if defined(__x86_64__) || defined(__SSE__) || _M_IX86_FP >= 1
#include <xmmintrin.h>
#if defined(__x86_64__) || defined(__SSE2__) || _M_IX86_FP >= 2
#include <emmintrin.h>
#define idx86_sse 2
#else
#define idx86_sse 1
#endif
/* The definition name syntax is: DAEMON_USE_ARCH_INTRINSICS_<architecture>[_extension]
Examples:
- DAEMON_USE_ARCH_INTRINSICS_i686: i686 specific code, including asm code.
- DAEMON_USE_ARCH_INTRINSICS_i686_sse: i686 SSE specific code.
- DAEMON_USE_ARCH_INTRINSICS_i686_sse2: i686 SSE2 specific code.
If a architecture inherits a feature from an parent architecture, the parent
architecture name is used. For example on amd64, the definition enabling
SSE code is DAEMON_USE_ARCH_INTRINSICS_i686_sse, enabling SSE code on both
i686 with SSE and amd64.
The definitions for the architecture itself are automatically set by CMake. */

#if defined(DAEMON_USE_ARCH_INTRINSICS)
// Set architecture extensions definitions.
#if defined(__SSE2__) || _M_IX86_FP >= 2
#define DAEMON_USE_ARCH_INTRINSICS_i686_sse2
#endif

#if defined(__SSE__) || _M_IX86_FP >= 1
#define DAEMON_USE_ARCH_INTRINSICS_i686_sse
#endif

// Include intrinsics-specific headers.
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
#include <xmmintrin.h>
#endif

#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse2)
#include <emmintrin.h>
#endif
#endif

// VM Prefixes
Expand Down
7 changes: 4 additions & 3 deletions src/engine/qcommon/q_math.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -740,7 +740,7 @@ void SetPlaneSignbits( cplane_t *out )

int BoxOnPlaneSide( const vec3_t emins, const vec3_t emaxs, const cplane_t *p )
{
#if idx86_sse
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
auto mins = sseLoadVec3Unsafe( emins );
auto maxs = sseLoadVec3Unsafe( emaxs );
auto normal = sseLoadVec3Unsafe( p->normal );
Expand Down Expand Up @@ -1799,7 +1799,7 @@ void MatrixSetupShear( matrix_t m, vec_t x, vec_t y )

void MatrixMultiply( const matrix_t a, const matrix_t b, matrix_t out )
{
#if idx86_sse
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
//#error MatrixMultiply
int i;
__m128 _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
Expand Down Expand Up @@ -3288,7 +3288,8 @@ void QuatTransformVectorInverse( const quat_t q, const vec3_t in, vec3_t out )
VectorAdd( out, tmp2, out );
}

#if !idx86_sse
// The SSE variants are inline functions in q_shared.h file.
#if !defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
// create an identity transform
void TransInit( transform_t *t )
{
Expand Down
7 changes: 4 additions & 3 deletions src/engine/qcommon/q_shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ void Com_Free_Aligned( void *ptr );
// floats (quat: 4, scale: 1, translation: 3), which is very
// convenient for SSE and GLSL, which operate on 4-dimensional
// float vectors.
#if idx86_sse
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
// Here we have a union of scalar struct and sse struct, transform_u and the
// scalar struct must match transform_t so we have to use anonymous structs.
// We disable compiler warnings when using -Wpedantic for this specific case.
Expand Down Expand Up @@ -375,7 +375,7 @@ extern const quat_t quatIdentity;
float y;

// compute approximate inverse square root
#if defined( idx86_sse )
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
// SSE rsqrt relative error bound: 3.7 * 10^-4
_mm_store_ss( &y, _mm_rsqrt_ss( _mm_load_ss( &number ) ) );
#elif idppc
Expand Down Expand Up @@ -774,7 +774,7 @@ inline float DotProduct( const vec3_t x, const vec3_t y )
//=============================================
// combining Transformations

#if idx86_sse
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
/* swizzles for _mm_shuffle_ps instruction */
#define SWZ_XXXX 0x00
#define SWZ_YXXX 0x01
Expand Down Expand Up @@ -1293,6 +1293,7 @@ inline float DotProduct( const vec3_t x, const vec3_t y )
t->sseRot = sseQuatNormalize( t->sseRot );
}
#else
// The non-SSE variants are in q_math.cpp file.
void TransInit( transform_t *t );
void TransCopy( const transform_t *in, transform_t *out );

Expand Down

0 comments on commit aaca763

Please sign in to comment.