Skip to content

Commit

Permalink
[AArch64] Fix frame-pointer offset with hazard padding (llvm#118091)
Browse files Browse the repository at this point in the history
The `-aarch64-stack-hazard-size=<val>` option disables register paring
(as the hazard padding may mean the offset is too large for STP/LDP).

This broke setting the frame-pointer offset, as the code to find the
frame record looked for a (FP, LR) register pair.

This patch resolves this by looking for FP, LR as two unpaired registers
when hazard padding is enabled.
  • Loading branch information
MacDue authored Dec 2, 2024
1 parent 7a7a426 commit 5248e1d
Show file tree
Hide file tree
Showing 3 changed files with 265 additions and 29 deletions.
19 changes: 16 additions & 3 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3167,11 +3167,24 @@ static void computeCalleeSaveRegisterPairs(
(RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
"Offset out of bounds for LDP/STP immediate");

auto isFrameRecord = [&] {
if (RPI.isPaired())
return IsWindows ? RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR
: RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP;
// Otherwise, look for the frame record as two unpaired registers. This is
// needed for -aarch64-stack-hazard-size=<val>, which disables register
// pairing (as the padding may be too large for the LDP/STP offset). Note:
// On Windows, this check works out as current reg == FP, next reg == LR,
// and on other platforms current reg == FP, previous reg == LR. This
// works out as the correct pre-increment or post-increment offsets
// respectively.
return i > 0 && RPI.Reg1 == AArch64::FP &&
CSI[i - 1].getReg() == AArch64::LR;
};

// Save the offset to frame record so that the FP register can point to the
// innermost frame record (spilled FP and LR registers).
if (NeedsFrameRecord &&
((!IsWindows && RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
(IsWindows && RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR)))
if (NeedsFrameRecord && isFrameRecord())
AFI->setCalleeSaveBaseToFrameRecordOffset(Offset);

RegPairs.push_back(RPI);
Expand Down
118 changes: 118 additions & 0 deletions llvm/test/CodeGen/AArch64/stack-hazard-windows.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=aarch64-windows-pc-msvc -aarch64-stack-hazard-size=0 | FileCheck %s --check-prefixes=CHECK0
; RUN: llc < %s -mtriple=aarch64-windows-pc-msvc -aarch64-stack-hazard-size=64 | FileCheck %s --check-prefixes=CHECK64
; RUN: llc < %s -mtriple=aarch64-windows-pc-msvc -aarch64-stack-hazard-size=1024 | FileCheck %s --check-prefixes=CHECK1024

define i32 @fpr_csr_stackobj(double %x) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
; CHECK0-LABEL: fpr_csr_stackobj:
; CHECK0: .seh_proc fpr_csr_stackobj
; CHECK0-NEXT: // %bb.0: // %entry
; CHECK0-NEXT: str x23, [sp, #-48]! // 8-byte Folded Spill
; CHECK0-NEXT: .seh_save_reg_x x23, 48
; CHECK0-NEXT: stp x29, x30, [sp, #8] // 16-byte Folded Spill
; CHECK0-NEXT: .seh_save_fplr 8
; CHECK0-NEXT: stp d9, d10, [sp, #24] // 16-byte Folded Spill
; CHECK0-NEXT: .seh_save_fregp d9, 24
; CHECK0-NEXT: add x29, sp, #8
; CHECK0-NEXT: .seh_add_fp 8
; CHECK0-NEXT: .seh_endprologue
; CHECK0-NEXT: mov w0, wzr
; CHECK0-NEXT: //APP
; CHECK0-NEXT: //NO_APP
; CHECK0-NEXT: str d0, [x29, #32]
; CHECK0-NEXT: .seh_startepilogue
; CHECK0-NEXT: ldp d9, d10, [sp, #24] // 16-byte Folded Reload
; CHECK0-NEXT: .seh_save_fregp d9, 24
; CHECK0-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
; CHECK0-NEXT: .seh_save_fplr 8
; CHECK0-NEXT: ldr x23, [sp], #48 // 8-byte Folded Reload
; CHECK0-NEXT: .seh_save_reg_x x23, 48
; CHECK0-NEXT: .seh_endepilogue
; CHECK0-NEXT: ret
; CHECK0-NEXT: .seh_endfunclet
; CHECK0-NEXT: .seh_endproc
;
; CHECK64-LABEL: fpr_csr_stackobj:
; CHECK64: .seh_proc fpr_csr_stackobj
; CHECK64-NEXT: // %bb.0: // %entry
; CHECK64-NEXT: sub sp, sp, #192
; CHECK64-NEXT: .seh_stackalloc 192
; CHECK64-NEXT: str x23, [sp, #80] // 8-byte Folded Spill
; CHECK64-NEXT: .seh_save_reg x23, 80
; CHECK64-NEXT: str x29, [sp, #88] // 8-byte Folded Spill
; CHECK64-NEXT: .seh_save_reg x29, 88
; CHECK64-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
; CHECK64-NEXT: .seh_save_reg x30, 96
; CHECK64-NEXT: str d9, [sp, #168] // 8-byte Folded Spill
; CHECK64-NEXT: .seh_save_freg d9, 168
; CHECK64-NEXT: str d10, [sp, #176] // 8-byte Folded Spill
; CHECK64-NEXT: .seh_save_freg d10, 176
; CHECK64-NEXT: add x29, sp, #88
; CHECK64-NEXT: .seh_add_fp 88
; CHECK64-NEXT: .seh_endprologue
; CHECK64-NEXT: mov w0, wzr
; CHECK64-NEXT: //APP
; CHECK64-NEXT: //NO_APP
; CHECK64-NEXT: stur d0, [x29, #-16]
; CHECK64-NEXT: .seh_startepilogue
; CHECK64-NEXT: ldr d10, [sp, #176] // 8-byte Folded Reload
; CHECK64-NEXT: .seh_save_freg d10, 176
; CHECK64-NEXT: ldr d9, [sp, #168] // 8-byte Folded Reload
; CHECK64-NEXT: .seh_save_freg d9, 168
; CHECK64-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
; CHECK64-NEXT: .seh_save_reg x30, 96
; CHECK64-NEXT: ldr x29, [sp, #88] // 8-byte Folded Reload
; CHECK64-NEXT: .seh_save_reg x29, 88
; CHECK64-NEXT: ldr x23, [sp, #80] // 8-byte Folded Reload
; CHECK64-NEXT: .seh_save_reg x23, 80
; CHECK64-NEXT: add sp, sp, #192
; CHECK64-NEXT: .seh_stackalloc 192
; CHECK64-NEXT: .seh_endepilogue
; CHECK64-NEXT: ret
; CHECK64-NEXT: .seh_endfunclet
; CHECK64-NEXT: .seh_endproc
;
; CHECK1024-LABEL: fpr_csr_stackobj:
; CHECK1024: .seh_proc fpr_csr_stackobj
; CHECK1024-NEXT: // %bb.0: // %entry
; CHECK1024-NEXT: sub sp, sp, #1072
; CHECK1024-NEXT: str x23, [sp] // 8-byte Folded Spill
; CHECK1024-NEXT: str x29, [sp, #8] // 8-byte Folded Spill
; CHECK1024-NEXT: .seh_save_reg x29, 8
; CHECK1024-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
; CHECK1024-NEXT: .seh_save_reg x30, 16
; CHECK1024-NEXT: str d9, [sp, #1048] // 8-byte Folded Spill
; CHECK1024-NEXT: .seh_save_freg d9, 1048
; CHECK1024-NEXT: str d10, [sp, #1056] // 8-byte Folded Spill
; CHECK1024-NEXT: .seh_save_freg d10, 1056
; CHECK1024-NEXT: add x29, sp, #8
; CHECK1024-NEXT: .seh_add_fp 8
; CHECK1024-NEXT: .seh_endprologue
; CHECK1024-NEXT: sub sp, sp, #1040
; CHECK1024-NEXT: mov w0, wzr
; CHECK1024-NEXT: //APP
; CHECK1024-NEXT: //NO_APP
; CHECK1024-NEXT: stur d0, [x29, #-16]
; CHECK1024-NEXT: .seh_startepilogue
; CHECK1024-NEXT: add sp, sp, #1040
; CHECK1024-NEXT: .seh_stackalloc 1040
; CHECK1024-NEXT: ldr d10, [sp, #1056] // 8-byte Folded Reload
; CHECK1024-NEXT: .seh_save_freg d10, 1056
; CHECK1024-NEXT: ldr d9, [sp, #1048] // 8-byte Folded Reload
; CHECK1024-NEXT: .seh_save_freg d9, 1048
; CHECK1024-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK1024-NEXT: .seh_save_reg x30, 16
; CHECK1024-NEXT: ldr x29, [sp, #8] // 8-byte Folded Reload
; CHECK1024-NEXT: .seh_save_reg x29, 8
; CHECK1024-NEXT: ldr x23, [sp] // 8-byte Folded Reload
; CHECK1024-NEXT: add sp, sp, #1072
; CHECK1024-NEXT: .seh_endepilogue
; CHECK1024-NEXT: ret
; CHECK1024-NEXT: .seh_endfunclet
; CHECK1024-NEXT: .seh_endproc
entry:
%a = alloca double
tail call void asm sideeffect "", "~{x23},~{d9},~{d10}"()
store double %x, ptr %a
ret i32 0
}
157 changes: 131 additions & 26 deletions llvm/test/CodeGen/AArch64/stack-hazard.ll
Original file line number Diff line number Diff line change
Expand Up @@ -337,19 +337,18 @@ define i32 @csr_d8_allocd_framepointer(double %d) "aarch64_pstate_sm_compatible"
; CHECK64-LABEL: csr_d8_allocd_framepointer:
; CHECK64: // %bb.0: // %entry
; CHECK64-NEXT: sub sp, sp, #176
; CHECK64-NEXT: str d8, [sp, #80] // 8-byte Folded Spill
; CHECK64-NEXT: stp d0, d8, [sp, #72] // 8-byte Folded Spill
; CHECK64-NEXT: stp x29, x30, [sp, #152] // 16-byte Folded Spill
; CHECK64-NEXT: add x29, sp, #80
; CHECK64-NEXT: .cfi_def_cfa w29, 96
; CHECK64-NEXT: add x29, sp, #152
; CHECK64-NEXT: .cfi_def_cfa w29, 24
; CHECK64-NEXT: .cfi_offset w30, -16
; CHECK64-NEXT: .cfi_offset w29, -24
; CHECK64-NEXT: .cfi_offset b8, -96
; CHECK64-NEXT: //APP
; CHECK64-NEXT: //NO_APP
; CHECK64-NEXT: stur d0, [x29, #-8]
; CHECK64-NEXT: ldr x29, [sp, #152] // 8-byte Folded Reload
; CHECK64-NEXT: ldr d8, [sp, #80] // 8-byte Folded Reload
; CHECK64-NEXT: mov w0, wzr
; CHECK64-NEXT: ldr d8, [sp, #80] // 8-byte Folded Reload
; CHECK64-NEXT: add sp, sp, #176
; CHECK64-NEXT: ret
;
Expand All @@ -358,17 +357,17 @@ define i32 @csr_d8_allocd_framepointer(double %d) "aarch64_pstate_sm_compatible"
; CHECK1024-NEXT: sub sp, sp, #1056
; CHECK1024-NEXT: str d8, [sp] // 8-byte Folded Spill
; CHECK1024-NEXT: str x29, [sp, #1032] // 8-byte Folded Spill
; CHECK1024-NEXT: mov x29, sp
; CHECK1024-NEXT: add x29, sp, #1032
; CHECK1024-NEXT: str x30, [sp, #1040] // 8-byte Folded Spill
; CHECK1024-NEXT: sub sp, sp, #1040
; CHECK1024-NEXT: .cfi_def_cfa w29, 1056
; CHECK1024-NEXT: .cfi_def_cfa w29, 24
; CHECK1024-NEXT: .cfi_offset w30, -16
; CHECK1024-NEXT: .cfi_offset w29, -24
; CHECK1024-NEXT: .cfi_offset b8, -1056
; CHECK1024-NEXT: mov w0, wzr
; CHECK1024-NEXT: //APP
; CHECK1024-NEXT: //NO_APP
; CHECK1024-NEXT: stur d0, [x29, #-8]
; CHECK1024-NEXT: str d0, [sp, #1032]
; CHECK1024-NEXT: add sp, sp, #1040
; CHECK1024-NEXT: ldr x30, [sp, #1040] // 8-byte Folded Reload
; CHECK1024-NEXT: ldr x29, [sp, #1032] // 8-byte Folded Reload
Expand Down Expand Up @@ -2893,8 +2892,8 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK64-NEXT: stp x29, x30, [sp, #128] // 16-byte Folded Spill
; CHECK64-NEXT: stp x9, x20, [sp, #144] // 16-byte Folded Spill
; CHECK64-NEXT: str x19, [sp, #160] // 8-byte Folded Spill
; CHECK64-NEXT: mov x29, sp
; CHECK64-NEXT: .cfi_def_cfa w29, 176
; CHECK64-NEXT: add x29, sp, #128
; CHECK64-NEXT: .cfi_def_cfa w29, 48
; CHECK64-NEXT: .cfi_offset w19, -16
; CHECK64-NEXT: .cfi_offset w20, -24
; CHECK64-NEXT: .cfi_offset w30, -40
Expand All @@ -2913,11 +2912,11 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK64-NEXT: mov w20, w0
; CHECK64-NEXT: msub x9, x8, x8, x9
; CHECK64-NEXT: mov sp, x9
; CHECK64-NEXT: stur x9, [x29, #-80]
; CHECK64-NEXT: sub x9, x29, #80
; CHECK64-NEXT: sturh wzr, [x29, #-70]
; CHECK64-NEXT: stur wzr, [x29, #-68]
; CHECK64-NEXT: sturh w8, [x29, #-72]
; CHECK64-NEXT: stur x9, [x29, #-208]
; CHECK64-NEXT: sub x9, x29, #208
; CHECK64-NEXT: sturh wzr, [x29, #-198]
; CHECK64-NEXT: stur wzr, [x29, #-196]
; CHECK64-NEXT: sturh w8, [x29, #-200]
; CHECK64-NEXT: msr TPIDR2_EL0, x9
; CHECK64-NEXT: .cfi_offset vg, -32
; CHECK64-NEXT: smstop sm
Expand All @@ -2926,14 +2925,14 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK64-NEXT: .cfi_restore vg
; CHECK64-NEXT: smstart za
; CHECK64-NEXT: mrs x8, TPIDR2_EL0
; CHECK64-NEXT: sub x0, x29, #80
; CHECK64-NEXT: sub x0, x29, #208
; CHECK64-NEXT: cbnz x8, .LBB33_2
; CHECK64-NEXT: // %bb.1: // %entry
; CHECK64-NEXT: bl __arm_tpidr2_restore
; CHECK64-NEXT: .LBB33_2: // %entry
; CHECK64-NEXT: mov w0, w20
; CHECK64-NEXT: msr TPIDR2_EL0, xzr
; CHECK64-NEXT: mov sp, x29
; CHECK64-NEXT: sub sp, x29, #128
; CHECK64-NEXT: .cfi_def_cfa wsp, 176
; CHECK64-NEXT: ldp x20, x19, [sp, #152] // 16-byte Folded Reload
; CHECK64-NEXT: ldr d14, [sp, #8] // 8-byte Folded Reload
Expand Down Expand Up @@ -2972,8 +2971,8 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK1024-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill
; CHECK1024-NEXT: str x20, [sp, #1120] // 8-byte Folded Spill
; CHECK1024-NEXT: str x19, [sp, #1128] // 8-byte Folded Spill
; CHECK1024-NEXT: mov x29, sp
; CHECK1024-NEXT: .cfi_def_cfa w29, 1136
; CHECK1024-NEXT: add x29, sp, #1088
; CHECK1024-NEXT: .cfi_def_cfa w29, 48
; CHECK1024-NEXT: .cfi_offset w19, -8
; CHECK1024-NEXT: .cfi_offset w20, -16
; CHECK1024-NEXT: .cfi_offset w28, -24
Expand All @@ -2993,14 +2992,14 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK1024-NEXT: mov w20, w0
; CHECK1024-NEXT: msub x9, x8, x8, x9
; CHECK1024-NEXT: mov sp, x9
; CHECK1024-NEXT: sub x10, x29, #784
; CHECK1024-NEXT: sub x10, x29, #1872
; CHECK1024-NEXT: stur x9, [x10, #-256]
; CHECK1024-NEXT: sub x9, x29, #774
; CHECK1024-NEXT: sub x10, x29, #772
; CHECK1024-NEXT: sub x9, x29, #1862
; CHECK1024-NEXT: sub x10, x29, #1860
; CHECK1024-NEXT: sturh wzr, [x9, #-256]
; CHECK1024-NEXT: sub x9, x29, #1040
; CHECK1024-NEXT: sub x9, x29, #2128
; CHECK1024-NEXT: stur wzr, [x10, #-256]
; CHECK1024-NEXT: sub x10, x29, #776
; CHECK1024-NEXT: sub x10, x29, #1864
; CHECK1024-NEXT: sturh w8, [x10, #-256]
; CHECK1024-NEXT: msr TPIDR2_EL0, x9
; CHECK1024-NEXT: .cfi_offset vg, -32
Expand All @@ -3010,14 +3009,14 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK1024-NEXT: .cfi_restore vg
; CHECK1024-NEXT: smstart za
; CHECK1024-NEXT: mrs x8, TPIDR2_EL0
; CHECK1024-NEXT: sub x0, x29, #1040
; CHECK1024-NEXT: sub x0, x29, #2128
; CHECK1024-NEXT: cbnz x8, .LBB33_2
; CHECK1024-NEXT: // %bb.1: // %entry
; CHECK1024-NEXT: bl __arm_tpidr2_restore
; CHECK1024-NEXT: .LBB33_2: // %entry
; CHECK1024-NEXT: mov w0, w20
; CHECK1024-NEXT: msr TPIDR2_EL0, xzr
; CHECK1024-NEXT: mov sp, x29
; CHECK1024-NEXT: sub sp, x29, #1088
; CHECK1024-NEXT: .cfi_def_cfa wsp, 1136
; CHECK1024-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK1024-NEXT: ldr x19, [sp, #1128] // 8-byte Folded Reload
Expand Down Expand Up @@ -3049,3 +3048,109 @@ entry:
ret i32 %x
}
declare void @other()

declare void @bar(ptr noundef) "aarch64_pstate_sm_compatible"

define i32 @sve_stack_object_and_vla(double %d, i64 %sz) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
; CHECK0-LABEL: sve_stack_object_and_vla:
; CHECK0: // %bb.0: // %entry
; CHECK0-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK0-NEXT: stp x28, x19, [sp, #16] // 16-byte Folded Spill
; CHECK0-NEXT: mov x29, sp
; CHECK0-NEXT: addvl sp, sp, #-1
; CHECK0-NEXT: mov x19, sp
; CHECK0-NEXT: .cfi_def_cfa w29, 32
; CHECK0-NEXT: .cfi_offset w19, -8
; CHECK0-NEXT: .cfi_offset w28, -16
; CHECK0-NEXT: .cfi_offset w30, -24
; CHECK0-NEXT: .cfi_offset w29, -32
; CHECK0-NEXT: lsl x9, x0, #2
; CHECK0-NEXT: mov x8, sp
; CHECK0-NEXT: add x9, x9, #15
; CHECK0-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK0-NEXT: sub x0, x8, x9
; CHECK0-NEXT: mov sp, x0
; CHECK0-NEXT: mov z0.s, #0 // =0x0
; CHECK0-NEXT: ptrue p0.s
; CHECK0-NEXT: st1w { z0.s }, p0, [x29, #-1, mul vl]
; CHECK0-NEXT: bl bar
; CHECK0-NEXT: mov w0, wzr
; CHECK0-NEXT: mov sp, x29
; CHECK0-NEXT: ldp x28, x19, [sp, #16] // 16-byte Folded Reload
; CHECK0-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK0-NEXT: ret
;
; CHECK64-LABEL: sve_stack_object_and_vla:
; CHECK64: // %bb.0: // %entry
; CHECK64-NEXT: sub sp, sp, #96
; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK64-NEXT: add x29, sp, #64
; CHECK64-NEXT: stp x28, x19, [sp, #80] // 16-byte Folded Spill
; CHECK64-NEXT: sub sp, sp, #64
; CHECK64-NEXT: addvl sp, sp, #-1
; CHECK64-NEXT: mov x19, sp
; CHECK64-NEXT: .cfi_def_cfa w29, 32
; CHECK64-NEXT: .cfi_offset w19, -8
; CHECK64-NEXT: .cfi_offset w28, -16
; CHECK64-NEXT: .cfi_offset w30, -24
; CHECK64-NEXT: .cfi_offset w29, -32
; CHECK64-NEXT: lsl x9, x0, #2
; CHECK64-NEXT: mov x8, sp
; CHECK64-NEXT: add x9, x9, #15
; CHECK64-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK64-NEXT: sub x0, x8, x9
; CHECK64-NEXT: mov sp, x0
; CHECK64-NEXT: mov z0.s, #0 // =0x0
; CHECK64-NEXT: ptrue p0.s
; CHECK64-NEXT: sub x8, x29, #64
; CHECK64-NEXT: st1w { z0.s }, p0, [x8, #-1, mul vl]
; CHECK64-NEXT: bl bar
; CHECK64-NEXT: mov w0, wzr
; CHECK64-NEXT: sub sp, x29, #64
; CHECK64-NEXT: ldp x28, x19, [sp, #80] // 16-byte Folded Reload
; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK64-NEXT: add sp, sp, #96
; CHECK64-NEXT: ret
;
; CHECK1024-LABEL: sve_stack_object_and_vla:
; CHECK1024: // %bb.0: // %entry
; CHECK1024-NEXT: sub sp, sp, #1056
; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
; CHECK1024-NEXT: add x29, sp, #1024
; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill
; CHECK1024-NEXT: str x28, [sp, #1040] // 8-byte Folded Spill
; CHECK1024-NEXT: str x19, [sp, #1048] // 8-byte Folded Spill
; CHECK1024-NEXT: sub sp, sp, #1024
; CHECK1024-NEXT: addvl sp, sp, #-1
; CHECK1024-NEXT: mov x19, sp
; CHECK1024-NEXT: .cfi_def_cfa w29, 32
; CHECK1024-NEXT: .cfi_offset w19, -8
; CHECK1024-NEXT: .cfi_offset w28, -16
; CHECK1024-NEXT: .cfi_offset w30, -24
; CHECK1024-NEXT: .cfi_offset w29, -32
; CHECK1024-NEXT: lsl x9, x0, #2
; CHECK1024-NEXT: mov x8, sp
; CHECK1024-NEXT: add x9, x9, #15
; CHECK1024-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK1024-NEXT: sub x0, x8, x9
; CHECK1024-NEXT: mov sp, x0
; CHECK1024-NEXT: mov z0.s, #0 // =0x0
; CHECK1024-NEXT: ptrue p0.s
; CHECK1024-NEXT: sub x8, x29, #1024
; CHECK1024-NEXT: st1w { z0.s }, p0, [x8, #-1, mul vl]
; CHECK1024-NEXT: bl bar
; CHECK1024-NEXT: mov w0, wzr
; CHECK1024-NEXT: sub sp, x29, #1024
; CHECK1024-NEXT: ldr x19, [sp, #1048] // 8-byte Folded Reload
; CHECK1024-NEXT: ldr x28, [sp, #1040] // 8-byte Folded Reload
; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload
; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
; CHECK1024-NEXT: add sp, sp, #1056
; CHECK1024-NEXT: ret
entry:
%a = alloca <vscale x 4 x i32>
%b = alloca i32, i64 %sz, align 4
store <vscale x 4 x i32> zeroinitializer, ptr %a
call void @bar(ptr noundef nonnull %b)
ret i32 0
}

0 comments on commit 5248e1d

Please sign in to comment.