From 450ed8aba32437e971b9e329029703f3d4664669 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Fri, 6 Dec 2024 17:22:33 +0000 Subject: [PATCH] [compiler-rt][AArch64] Rewrite SME routines to all use FMV feature bits. When #92921 added the `__arm_get_current_vg` functionality, it used the FMV feature bits mechanism rather than the existing mechanism that was previously added for SME that called `getauxval` (on Linux platforms) or `__aarch64_sme_accessible` (required for baremetal libraries). It seems simpler to always use the FMV feature bits mechanism, but for baremetal targets we still need to rely on `__arm_sme_accessible`. --- compiler-rt/lib/builtins/CMakeLists.txt | 9 +++- .../lib/builtins/aarch64/sme-abi-assert.c | 10 ++++ .../lib/builtins/aarch64/sme-abi-init.c | 50 ------------------- compiler-rt/lib/builtins/aarch64/sme-abi.S | 29 +++++------ compiler-rt/lib/builtins/cpu_model/aarch64.c | 2 + .../cpu_model/aarch64/fmv/baremetal_sme.inc | 31 ++++++++++++ 6 files changed, 62 insertions(+), 69 deletions(-) create mode 100644 compiler-rt/lib/builtins/aarch64/sme-abi-assert.c delete mode 100644 compiler-rt/lib/builtins/aarch64/sme-abi-init.c create mode 100644 compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal_sme.inc diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 70dc7d860d8f6a..b0266c00d4efbc 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -573,10 +573,13 @@ set(aarch64_SOURCES ) if (COMPILER_RT_HAS_AARCH64_SME) - if (NOT COMPILER_RT_DISABLE_AARCH64_FMV AND COMPILER_RT_HAS_FNO_BUILTIN_FLAG AND (COMPILER_RT_HAS_AUXV OR COMPILER_RT_BAREMETAL_BUILD)) - list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-libc-mem-routines.S aarch64/sme-abi-init.c aarch64/sme-libc-routines.c) + if (NOT COMPILER_RT_DISABLE_AARCH64_FMV AND COMPILER_RT_HAS_FNO_BUILTIN_FLAG) + list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-libc-mem-routines.S aarch64/sme-abi-assert.c aarch64/sme-libc-routines.c) message(STATUS "AArch64 SME ABI routines enabled") set_source_files_properties(aarch64/sme-libc-routines.c PROPERTIES COMPILE_FLAGS "-fno-builtin") + if(COMPILER_RT_BAREMETAL_BUILD) + set(COMPILER_RT_BAREMETAL_AARCH64_SME TRUE) + endif() else() if(COMPILER_RT_DISABLE_AARCH64_FMV) message(WARNING "AArch64 SME ABI routines require function multiversioning support.") @@ -844,6 +847,8 @@ else () list(APPEND BUILTIN_DEFS DISABLE_AARCH64_FMV) endif() + append_list_if(COMPILER_RT_BAREMETAL_AARCH64_SME -DENABLE_BAREMETAL_AARCH64_SME_FMV_FEATURES BUILTIN_CFLAGS) + append_list_if(COMPILER_RT_HAS_ASM_LSE HAS_ASM_LSE BUILTIN_DEFS) foreach (arch ${BUILTIN_SUPPORTED_ARCH}) diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi-assert.c b/compiler-rt/lib/builtins/aarch64/sme-abi-assert.c new file mode 100644 index 00000000000000..4333353f8d2d1b --- /dev/null +++ b/compiler-rt/lib/builtins/aarch64/sme-abi-assert.c @@ -0,0 +1,10 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// We rely on the FMV __aarch64_cpu_features mechanism to determine +// which features are set at runtime. + +#include "../cpu_model/AArch64CPUFeatures.inc" +_Static_assert(FEAT_SVE == 30, "sme-abi.S assumes FEAT_SVE = 30"); +_Static_assert(FEAT_SME == 42, "sme-abi.S assumes FEAT_SME = 42"); diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi-init.c b/compiler-rt/lib/builtins/aarch64/sme-abi-init.c deleted file mode 100644 index d3cd8278a5d214..00000000000000 --- a/compiler-rt/lib/builtins/aarch64/sme-abi-init.c +++ /dev/null @@ -1,50 +0,0 @@ -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -__attribute__((visibility("hidden"), nocommon)) -_Bool __aarch64_has_sme_and_tpidr2_el0; - -// We have multiple ways to check that the function has SME, depending on our -// target. -// * For Linux/Glibc we can use getauxval(). -// * For Android we can use getauxval(). -// * For newlib we can use __aarch64_sme_accessible(). - -#if defined(__linux__) - -#if defined(__ANDROID__) -#include -#elif __has_include() -#include -#else -#define getauxval(x) 0 -#endif -#include "../cpu_model/aarch64/hwcap.inc" - -static _Bool has_sme(void) { return getauxval(AT_HWCAP2) & HWCAP2_SME; } - -#else // defined(__linux__) - -#if defined(COMPILER_RT_SHARED_LIB) -__attribute__((weak)) -#endif -extern _Bool __aarch64_sme_accessible(void); - -static _Bool has_sme(void) { -#if defined(COMPILER_RT_SHARED_LIB) - if (!__aarch64_sme_accessible) - return 0; -#endif - return __aarch64_sme_accessible(); -} - -#endif // defined(__linux__) - -#if __GNUC__ >= 9 -#pragma GCC diagnostic ignored "-Wprio-ctor-dtor" -#endif -__attribute__((constructor(90))) -static void init_aarch64_has_sme(void) { - __aarch64_has_sme_and_tpidr2_el0 = has_sme(); -} diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S index 623a95dd4dae5f..a6bb921bd9e6b9 100644 --- a/compiler-rt/lib/builtins/aarch64/sme-abi.S +++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S @@ -9,18 +9,15 @@ #include "../assembly.h" .set FEAT_SVE_BIT, 30 +.set FEAT_SME_BIT, 42 .set SVCR_PSTATE_SM_BIT, 0 #if !defined(__APPLE__) -#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0) -#define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0) #define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features) #define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features) #else // MachO requires @page/@pageoff directives because the global is defined // in a different file. Otherwise this file may fail to build. -#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page -#define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff #define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page #define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff #endif @@ -63,9 +60,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state) mov x0, xzr mov x1, xzr - adrp x16, TPIDR2_SYMBOL - ldrb w16, [x16, TPIDR2_SYMBOL_OFFSET] - cbz w16, 1f + adrp x16, CPU_FEATS_SYMBOL + ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET] + tbz x16, #FEAT_SME_BIT, 1f 0: orr x0, x0, #0xC000000000000000 mrs x16, SVCR @@ -116,9 +113,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save) BTI_C // If the current thread does not have access to TPIDR2_EL0, the subroutine // does nothing. - adrp x14, TPIDR2_SYMBOL - ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET] - cbz w14, 1f + adrp x14, CPU_FEATS_SYMBOL + ldr x14, [x14, CPU_FEATS_SYMBOL_OFFSET] + tbz x14, #FEAT_SME_BIT, 1f // If TPIDR2_EL0 is null, the subroutine does nothing. mrs x16, TPIDR2_EL0 @@ -157,9 +154,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable) BTI_C // If the current thread does not have access to SME, the subroutine does // nothing. - adrp x14, TPIDR2_SYMBOL - ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET] - cbz w14, 0f + adrp x14, CPU_FEATS_SYMBOL + ldr x14, [x14, CPU_FEATS_SYMBOL_OFFSET] + tbz x14, #FEAT_SME_BIT, 0f // Otherwise, the subroutine behaves as if it did the following: // * Call __arm_tpidr2_save. @@ -191,11 +188,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg) BTI_C adrp x17, CPU_FEATS_SYMBOL - ldr w17, [x17, CPU_FEATS_SYMBOL_OFFSET] + ldr x17, [x17, CPU_FEATS_SYMBOL_OFFSET] tbnz w17, #FEAT_SVE_BIT, 1f - adrp x17, TPIDR2_SYMBOL - ldrb w17, [x17, TPIDR2_SYMBOL_OFFSET] - cbz x17, 2f + tbz x17, #FEAT_SME_BIT, 2f 0: mrs x17, SVCR tbz x17, #SVCR_PSTATE_SM_BIT, 2f diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c index 74e5e01b66c547..ef15518ad5f754 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64.c +++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c @@ -80,6 +80,8 @@ struct { #include "aarch64/fmv/getauxval.inc" #elif defined(_WIN32) #include "aarch64/fmv/windows.inc" +#elif defined(ENABLE_BAREMETAL_AARCH64_SME_FMV_FEATURES) +#include "aarch64/fmv/baremetal_sme.inc" #else #include "aarch64/fmv/unimplemented.inc" #endif diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal_sme.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal_sme.inc new file mode 100644 index 00000000000000..f188e84808e019 --- /dev/null +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal_sme.inc @@ -0,0 +1,31 @@ +// For baremetal platforms, we don't really initialise '__aarch64_cpu_features', +// with exception of FEAT_SME that we can get from '__aarch64_sme_accessible'. + +#if defined(COMPILER_RT_SHARED_LIB) +__attribute__((weak)) +#endif +extern _Bool +__aarch64_sme_accessible(void); + +static _Bool has_sme(void) { +#if defined(COMPILER_RT_SHARED_LIB) + if (!__aarch64_sme_accessible) + return 0; +#endif + return __aarch64_sme_accessible(); +} + +void __init_cpu_features_resolver(unsigned long hwcap, + const __ifunc_arg_t *arg) {} + +void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { + // CPU features already initialized. + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) + return; + + unsigned long long feat = 0; + if (has_sme()) + feat |= 1ULL << FEAT_SME; + + __atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED); +}