Skip to content

Commit

Permalink
[Compiler-rt] Add AArch64 routines for __arm_agnostic("sme_za_state")
Browse files Browse the repository at this point in the history
The specification of these routines can be found here:

  ARM-software/abi-aa#264
  • Loading branch information
sdesmalen-arm committed Sep 9, 2024
1 parent 485d191 commit 9cae4ef
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 3 deletions.
3 changes: 2 additions & 1 deletion compiler-rt/cmake/builtin-config-ix.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ asm(\"cas w0, w1, [x2]\");
builtin_check_c_compiler_source(COMPILER_RT_HAS_AARCH64_SME
"
void foo(void) __arm_streaming_compatible {
asm(\".arch armv9-a+sme\");
asm(\".arch armv9-a+sme2\");
asm(\"smstart\");
asm(\"ldr zt0, [sp]\");
}
")

Expand Down
5 changes: 5 additions & 0 deletions compiler-rt/lib/builtins/aarch64/sme-abi-init.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,8 @@ __attribute__((constructor(90)))
static void init_aarch64_has_sme(void) {
__aarch64_has_sme_and_tpidr2_el0 = has_sme();
}

#include "../cpu_model/AArch64CPUFeatures.inc"
_Static_assert(FEAT_SVE== 30, "sme-abi.S assumes FEAT_SVE = 30");
_Static_assert(FEAT_SME== 42, "sme-abi.S assumes FEAT_SME = 42");
_Static_assert(FEAT_SME2 == 57, "sme-abi.S assumes FEAT_SME2 = 57");
150 changes: 148 additions & 2 deletions compiler-rt/lib/builtins/aarch64/sme-abi.S
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

#include "../assembly.h"

#define FEAT_SVE_BIT 30
#define FEAT_SME_BIT 42
#define FEAT_SME2_BIT 57
#define FEAT_SME2_MASK 0x200000000000000

#if !defined(__APPLE__)
#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
Expand All @@ -23,7 +27,7 @@
#define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff
#endif

.arch armv9-a+sme
.arch armv9-a+sme2

// Utility function which calls a system's abort() routine. Because the function
// is streaming-compatible it should disable streaming-SVE mode before calling
Expand Down Expand Up @@ -196,7 +200,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
.cfi_offset w29, -16
adrp x17, CPU_FEATS_SYMBOL
ldr w17, [x17, CPU_FEATS_SYMBOL_OFFSET]
tbnz w17, #30, 0f
tbnz w17, #FEAT_SVE_BIT, 0f
adrp x16, TPIDR2_SYMBOL
ldrb w16, [x16, TPIDR2_SYMBOL_OFFSET]
cbz w16, 1f
Expand Down Expand Up @@ -224,6 +228,148 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
ret
END_COMPILERRT_OUTLINE_FUNCTION(__arm_get_current_vg)

DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state_size)
.variant_pcs __arm_sme_state_size
BTI_C

// Test if SME is available and PSTATE = 1.
adrp x16, CPU_FEATS_SYMBOL
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
tbz x16, #FEAT_SME_BIT, 0f
mrs x16, SVCR
tbz x16, #1, 0f

// Size = HAS_FEAT_SME2 ? 32 : 96
adrp x16, CPU_FEATS_SYMBOL
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
tst x16, #FEAT_SME2_MASK
mov w17, #32
mov w16, #96
csel x16, x17, x16, eq

// Size = Size + (SVLB * SVLB)
rdsvl x17, #1
madd x0, x17, x17, x16
ret

0:
// Default case, 16 bytes is minimum (to encode VALID bit, multiple of 16 bytes)
mov w0, #16
ret
END_COMPILERRT_OUTLINE_FUNCTION(__arm_sme_state_size)

DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_save)
.variant_pcs __arm_sme_save
BTI_C

// Clear internal state bits
stp xzr, xzr, [x0]

// If SME is not available, PSTATE.ZA = 0 or TPIDR2_EL0 != 0, return.
adrp x16, CPU_FEATS_SYMBOL
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
tbz x16, #FEAT_SME_BIT, 0f
mrs x16, SVCR
tbz x16, #1, 0f
mrs x16, TPIDR2_EL0
cbnz x16, 0f

# ZA or ZT0 need saving, we can now set internal VALID bit to 1
mov w16, #1
str x16, [x0]

adrp x16, CPU_FEATS_SYMBOL
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
tbz x16, #FEAT_SME2_BIT, 2f

// Store ZT0 and ZA
add x16, x0, #32
str zt0, [x16]
add x18, x0, #96
b 3f

2:
// Has SME only
add x18, x0, #32

3:
// Set up lazy-save (x18 = pointer to buffer)
rdsvl x17, #1
str x18, [x0, #16]!
strh w17, [x0, #8]
stur wzr, [x0, #10]
strh wzr, [x0, #14]
msr TPIDR2_EL0, x0
ret

0:
// Do nothing
ret
END_COMPILERRT_OUTLINE_FUNCTION(__arm_sme_save)

DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_restore)
.variant_pcs __arm_sme_save
BTI_C

stp x29, x30, [sp, #-16]!
.cfi_def_cfa_offset 16
mov x29, sp
.cfi_def_cfa w29, 16
.cfi_offset w30, -8
.cfi_offset w29, -16

// If the VALID bit is 0, return early.
ldr x16, [x0]
tbz x16, #0, 2f

// If SME is not available, abort.
adrp x16, CPU_FEATS_SYMBOL
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
tbz x16, #FEAT_SME_BIT, 3f

// If TPIDR2_EL0 != nullptr, no lazy-save was committed, try to reload zt0.
mrs x16, TPIDR2_EL0
cbnz x16, 0f

// If TPIDR2_EL0 == nullptr and PSTATE.ZA = 1 (<=> ZA state is 'active'),
// abort.
mrs x16, SVCR
tbnz x16, #1, 3f

// Restore za.
smstart za
mov x16, x0
add x0, x0, #16
bl __arm_tpidr2_restore
mov x0, x16
msr TPIDR2_EL0, xzr

0:
smstart za

1:
// Check if zt0 needs restoring.
adrp x16, CPU_FEATS_SYMBOL
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
tbz x16, #FEAT_SME2_BIT, 2f

// Restore zt0.
add x16, x0, #32
ldr zt0, [x16]

2:
// Do nothing
.cfi_def_cfa wsp, 16
ldp x29, x30, [sp], #16
.cfi_def_cfa_offset 0
.cfi_restore w30
.cfi_restore w29
ret

3:
b SYMBOL_NAME(do_abort)
END_COMPILERRT_OUTLINE_FUNCTION(__arm_sme_restore)

NO_EXEC_STACK_DIRECTIVE

// GNU property note for BTI and PAC
Expand Down

0 comments on commit 9cae4ef

Please sign in to comment.