Skip to content

Commit

Permalink
[arm64][fpu] add fp arch extension around inline fpu asm
Browse files Browse the repository at this point in the history
This quiets warnings on clang 18 about the missing fp arch extension
feature when using fp instructions.
  • Loading branch information
travisg committed Nov 7, 2024
1 parent e3a95c6 commit eac1561
Showing 1 changed file with 48 additions and 39 deletions.
87 changes: 48 additions & 39 deletions arch/arm64/fpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,49 +29,58 @@ static void arm64_fpu_load_state(struct thread *t) {


STATIC_ASSERT(sizeof(fpstate->regs) == 16 * 32);
__asm__ volatile("ldp q0, q1, [%0, #(0 * 32)]\n"
"ldp q2, q3, [%0, #(1 * 32)]\n"
"ldp q4, q5, [%0, #(2 * 32)]\n"
"ldp q6, q7, [%0, #(3 * 32)]\n"
"ldp q8, q9, [%0, #(4 * 32)]\n"
"ldp q10, q11, [%0, #(5 * 32)]\n"
"ldp q12, q13, [%0, #(6 * 32)]\n"
"ldp q14, q15, [%0, #(7 * 32)]\n"
"ldp q16, q17, [%0, #(8 * 32)]\n"
"ldp q18, q19, [%0, #(9 * 32)]\n"
"ldp q20, q21, [%0, #(10 * 32)]\n"
"ldp q22, q23, [%0, #(11 * 32)]\n"
"ldp q24, q25, [%0, #(12 * 32)]\n"
"ldp q26, q27, [%0, #(13 * 32)]\n"
"ldp q28, q29, [%0, #(14 * 32)]\n"
"ldp q30, q31, [%0, #(15 * 32)]\n"
"msr fpcr, %1\n"
"msr fpsr, %2\n"
:: "r"(fpstate), "r"(fpstate->fpcr), "r"(fpstate->fpsr));
__asm__ volatile(
".arch_extension fp\n"
"ldp q0, q1, [%0, #(0 * 32)]\n"
"ldp q2, q3, [%0, #(1 * 32)]\n"
"ldp q4, q5, [%0, #(2 * 32)]\n"
"ldp q6, q7, [%0, #(3 * 32)]\n"
"ldp q8, q9, [%0, #(4 * 32)]\n"
"ldp q10, q11, [%0, #(5 * 32)]\n"
"ldp q12, q13, [%0, #(6 * 32)]\n"
"ldp q14, q15, [%0, #(7 * 32)]\n"
"ldp q16, q17, [%0, #(8 * 32)]\n"
"ldp q18, q19, [%0, #(9 * 32)]\n"
"ldp q20, q21, [%0, #(10 * 32)]\n"
"ldp q22, q23, [%0, #(11 * 32)]\n"
"ldp q24, q25, [%0, #(12 * 32)]\n"
"ldp q26, q27, [%0, #(13 * 32)]\n"
"ldp q28, q29, [%0, #(14 * 32)]\n"
"ldp q30, q31, [%0, #(15 * 32)]\n"
"msr fpcr, %1\n"
"msr fpsr, %2\n"
".arch_extension nofp\n"
:: "r"(fpstate), "r"((uint64_t)fpstate->fpcr), "r"((uint64_t)fpstate->fpsr));
}

void arm64_fpu_save_state(struct thread *t) {
struct fpstate *fpstate = &t->arch.fpstate;
__asm__ volatile("stp q0, q1, [%2, #(0 * 32)]\n"
"stp q2, q3, [%2, #(1 * 32)]\n"
"stp q4, q5, [%2, #(2 * 32)]\n"
"stp q6, q7, [%2, #(3 * 32)]\n"
"stp q8, q9, [%2, #(4 * 32)]\n"
"stp q10, q11, [%2, #(5 * 32)]\n"
"stp q12, q13, [%2, #(6 * 32)]\n"
"stp q14, q15, [%2, #(7 * 32)]\n"
"stp q16, q17, [%2, #(8 * 32)]\n"
"stp q18, q19, [%2, #(9 * 32)]\n"
"stp q20, q21, [%2, #(10 * 32)]\n"
"stp q22, q23, [%2, #(11 * 32)]\n"
"stp q24, q25, [%2, #(12 * 32)]\n"
"stp q26, q27, [%2, #(13 * 32)]\n"
"stp q28, q29, [%2, #(14 * 32)]\n"
"stp q30, q31, [%2, #(15 * 32)]\n"
"mrs %0, fpcr\n"
"mrs %1, fpsr\n"
: "=r"(fpstate->fpcr), "=r"(fpstate->fpsr)
: "r"(fpstate));
uint64_t fpcr, fpsr;
__asm__ volatile(
".arch_extension fp\n"
"stp q0, q1, [%2, #(0 * 32)]\n"
"stp q2, q3, [%2, #(1 * 32)]\n"
"stp q4, q5, [%2, #(2 * 32)]\n"
"stp q6, q7, [%2, #(3 * 32)]\n"
"stp q8, q9, [%2, #(4 * 32)]\n"
"stp q10, q11, [%2, #(5 * 32)]\n"
"stp q12, q13, [%2, #(6 * 32)]\n"
"stp q14, q15, [%2, #(7 * 32)]\n"
"stp q16, q17, [%2, #(8 * 32)]\n"
"stp q18, q19, [%2, #(9 * 32)]\n"
"stp q20, q21, [%2, #(10 * 32)]\n"
"stp q22, q23, [%2, #(11 * 32)]\n"
"stp q24, q25, [%2, #(12 * 32)]\n"
"stp q26, q27, [%2, #(13 * 32)]\n"
"stp q28, q29, [%2, #(14 * 32)]\n"
"stp q30, q31, [%2, #(15 * 32)]\n"
"mrs %0, fpcr\n"
"mrs %1, fpsr\n"
".arch_extension nofp\n"
: "=r"(fpcr), "=r"(fpsr)
: "r"(fpstate));
fpstate->fpcr = (uint32_t)fpcr;
fpstate->fpsr = (uint32_t)fpsr;

LTRACEF("thread %s, fpcr %x, fpsr %x\n", t->name, fpstate->fpcr, fpstate->fpsr);
}
Expand Down

0 comments on commit eac1561

Please sign in to comment.