-
Notifications
You must be signed in to change notification settings - Fork 12.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[GlobalISel] Combine [a,s,z]ext of undef into 0 or undef #117439
base: main
Are you sure you want to change the base?
Conversation
Alternative for llvm#113764 It builds on a minimalistic approach with the legality check in match and a blind apply. The precise patterns are used for better compile-time and modularity. It also moves the pattern check into combiner. While unary_undef_to_zero and propagate_undef_any_op rely on custom C++ code for pattern matching. Is there a limit on the number of patterns? G_ANYEXT of undef -> undef G_SEXT of undef -> 0 G_ZEXT of undef -> 0 The combine is not a member of the post legalizer combiner for AArch64. Test: llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-backend-amdgpu Author: Thorsten Schütt (tschuett) ChangesAlternative for #113764 It builds on a minimalistic approach with the legality check in match and a blind apply. The precise patterns are used for better compile-time and modularity. It also moves the pattern check into combiner. While unary_undef_to_zero and propagate_undef_any_op rely on custom C++ code for pattern matching. Is there a limit on the number of patterns? G_ANYEXT of undef -> undef The combine is not a member of the post legalizer combiner for AArch64. Test: Full diff: https://github.com/llvm/llvm-project/pull/117439.diff 7 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 55c3b72c8e027f..6662c1055aa17a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -150,6 +150,10 @@ class CombinerHelper {
/// is a legal integer constant type on the target.
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const;
+ /// \return true if the combine is running prior to legalization, or if \p Ty
+ /// is a legal undef type on the target.
+ bool isUndefLegalOrBeforeLegalizer(const LLT Ty) const;
+
/// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index b0c63fc7c7b806..fee695c4333d99 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -428,7 +428,7 @@ def unary_undef_to_zero: GICombineRule<
// replaced with undef.
def propagate_undef_any_op: GICombineRule<
(defs root:$root),
- (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST, G_ANYEXT):$root,
+ (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST):$root,
[{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]),
(apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
@@ -1857,6 +1857,27 @@ class integer_of_opcode<Instruction castOpcode> : GICombineRule <
def integer_of_truncate : integer_of_opcode<G_TRUNC>;
+def anyext_undef: GICombineRule<
+ (defs root:$root),
+ (match (G_IMPLICIT_DEF $undef),
+ (G_ANYEXT $root, $undef):$Aext,
+ [{ return Helper.isUndefLegalOrBeforeLegalizer(MRI.getType(${Aext}->getOperand(0).getReg())); }]),
+ (apply [{ Helper.replaceInstWithUndef(*${Aext}); }])>;
+
+def zext_undef: GICombineRule<
+ (defs root:$root),
+ (match (G_IMPLICIT_DEF $undef),
+ (G_ZEXT $root, $undef):$Zext,
+ [{ return Helper.isConstantLegalOrBeforeLegalizer(MRI.getType(${Zext}->getOperand(0).getReg())); }]),
+ (apply [{ Helper.replaceInstWithConstant(*${Zext}, 0); }])>;
+
+def sext_undef: GICombineRule<
+ (defs root:$root),
+ (match (G_IMPLICIT_DEF $undef),
+ (G_SEXT $root, $undef):$Sext,
+ [{ return Helper.isConstantLegalOrBeforeLegalizer(MRI.getType(${Sext}->getOperand(0).getReg())); }]),
+ (apply [{ Helper.replaceInstWithConstant(*${Sext}, 0); }])>;
+
def cast_of_cast_combines: GICombineGroup<[
truncate_of_zext,
truncate_of_sext,
@@ -1882,7 +1903,10 @@ def cast_combines: GICombineGroup<[
narrow_binop_and,
narrow_binop_or,
narrow_binop_xor,
- integer_of_truncate
+ integer_of_truncate,
+ anyext_undef,
+ sext_undef,
+ zext_undef
]>;
def canonicalize_icmp : GICombineRule<
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index d95fc8cfbcf558..29074103115f59 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -171,6 +171,10 @@ bool CombinerHelper::isConstantLegalOrBeforeLegalizer(const LLT Ty) const {
isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
}
+bool CombinerHelper::isUndefLegalOrBeforeLegalizer(const LLT Ty) const {
+ return isPreLegalize() || isLegal({TargetOpcode::G_IMPLICIT_DEF, {Ty}});
+}
+
void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
Register ToReg) const {
Observer.changingAllUsesOfReg(MRI, FromReg);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
index b045deebc56e03..25161652dafac4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
@@ -217,3 +217,55 @@ body: |
%large:_(<2 x s64>) = G_ANYEXT %bv(<2 x s32>)
$q0 = COPY %large(<2 x s64>)
$d0 = COPY %bv(<2 x s32>)
+...
+---
+name: test_combine_anyext_undef
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_anyext_undef
+ ; CHECK-PRE: %aext:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-PRE-NEXT: $x0 = COPY %aext(s64)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_anyext_undef
+ ; CHECK-POST: %undef:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-POST-NEXT: %aext:_(s64) = G_ANYEXT %undef(s32)
+ ; CHECK-POST-NEXT: $x0 = COPY %aext(s64)
+ %undef:_(s32) = G_IMPLICIT_DEF
+ %aext:_(s64) = G_ANYEXT %undef(s32)
+ $x0 = COPY %aext(s64)
+...
+---
+name: test_combine_sext_undef
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_sext_undef
+ ; CHECK-PRE: %sext:_(s64) = G_CONSTANT i64 0
+ ; CHECK-PRE-NEXT: $x0 = COPY %sext(s64)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_sext_undef
+ ; CHECK-POST: %undef:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-POST-NEXT: %sext:_(s64) = G_SEXT %undef(s32)
+ ; CHECK-POST-NEXT: $x0 = COPY %sext(s64)
+ %undef:_(s32) = G_IMPLICIT_DEF
+ %sext:_(s64) = G_SEXT %undef(s32)
+ $x0 = COPY %sext(s64)
+...
+---
+name: test_combine_zext_undef
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_zext_undef
+ ; CHECK-PRE: %zext:_(s64) = G_CONSTANT i64 0
+ ; CHECK-PRE-NEXT: $x0 = COPY %zext(s64)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_zext_undef
+ ; CHECK-POST: %undef:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-POST-NEXT: %zext:_(s64) = G_ZEXT %undef(s32)
+ ; CHECK-POST-NEXT: $x0 = COPY %zext(s64)
+ %undef:_(s32) = G_IMPLICIT_DEF
+ %zext:_(s64) = G_ZEXT %undef(s32)
+ $x0 = COPY %zext(s64)
+...
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
index 5e5fdd6d317057..e89e1516fb1f54 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
@@ -8,17 +8,10 @@
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v4i32_vector_extract_const
define i64 @extract_v2i64_undef_index(<2 x i64> %a, i32 %c) {
-; CHECK-SD-LABEL: extract_v2i64_undef_index:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: extract_v2i64_undef_index:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str q0, [sp, #-16]!
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: ldr x0, [sp], #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: extract_v2i64_undef_index:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
entry:
%d = extractelement <2 x i64> %a, i32 undef
ret i64 %d
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir
index 7893bfa1d38f08..9b39afd32ac378 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir
@@ -261,8 +261,7 @@ body: |
; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_16
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16)
+ ; CHECK-NEXT: %zext:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext
; CHECK-NEXT: $vgpr0 = COPY %result(s32)
%arg:_(s32) = COPY $vgpr0
@@ -284,8 +283,7 @@ body: |
; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_24
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16)
+ ; CHECK-NEXT: %zext:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext
; CHECK-NEXT: $vgpr0 = COPY %result(s32)
%arg:_(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
index a1a466fb04440d..384a2c63122b85 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
@@ -4074,14 +4074,12 @@ define amdgpu_kernel void @v_test_v2i16_x_add_undef_neg32(ptr addrspace(1) %out,
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_not_b32_e32 v2, 31
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
-; VI-GISEL-NEXT: s_and_b32 s0, 0xffff, s0
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
; VI-GISEL-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-GISEL-NEXT: v_or_b32_e32 v2, s0, v2
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
@@ -4191,15 +4189,12 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(ptr addrspace(1) %out,
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
-; VI-GISEL-NEXT: s_and_b32 s2, 0xffff, s0
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
-; VI-GISEL-NEXT: s_lshl_b32 s0, s2, 16
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffe0, v3
-; VI-GISEL-NEXT: v_or_b32_e32 v2, s0, v2
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
|
@llvm/pr-subscribers-llvm-globalisel Author: Thorsten Schütt (tschuett) ChangesAlternative for #113764 It builds on a minimalistic approach with the legality check in match and a blind apply. The precise patterns are used for better compile-time and modularity. It also moves the pattern check into combiner. While unary_undef_to_zero and propagate_undef_any_op rely on custom C++ code for pattern matching. Is there a limit on the number of patterns? G_ANYEXT of undef -> undef The combine is not a member of the post legalizer combiner for AArch64. Test: Full diff: https://github.com/llvm/llvm-project/pull/117439.diff 7 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 55c3b72c8e027f..6662c1055aa17a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -150,6 +150,10 @@ class CombinerHelper {
/// is a legal integer constant type on the target.
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const;
+ /// \return true if the combine is running prior to legalization, or if \p Ty
+ /// is a legal undef type on the target.
+ bool isUndefLegalOrBeforeLegalizer(const LLT Ty) const;
+
/// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index b0c63fc7c7b806..fee695c4333d99 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -428,7 +428,7 @@ def unary_undef_to_zero: GICombineRule<
// replaced with undef.
def propagate_undef_any_op: GICombineRule<
(defs root:$root),
- (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST, G_ANYEXT):$root,
+ (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST):$root,
[{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]),
(apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
@@ -1857,6 +1857,27 @@ class integer_of_opcode<Instruction castOpcode> : GICombineRule <
def integer_of_truncate : integer_of_opcode<G_TRUNC>;
+def anyext_undef: GICombineRule<
+ (defs root:$root),
+ (match (G_IMPLICIT_DEF $undef),
+ (G_ANYEXT $root, $undef):$Aext,
+ [{ return Helper.isUndefLegalOrBeforeLegalizer(MRI.getType(${Aext}->getOperand(0).getReg())); }]),
+ (apply [{ Helper.replaceInstWithUndef(*${Aext}); }])>;
+
+def zext_undef: GICombineRule<
+ (defs root:$root),
+ (match (G_IMPLICIT_DEF $undef),
+ (G_ZEXT $root, $undef):$Zext,
+ [{ return Helper.isConstantLegalOrBeforeLegalizer(MRI.getType(${Zext}->getOperand(0).getReg())); }]),
+ (apply [{ Helper.replaceInstWithConstant(*${Zext}, 0); }])>;
+
+def sext_undef: GICombineRule<
+ (defs root:$root),
+ (match (G_IMPLICIT_DEF $undef),
+ (G_SEXT $root, $undef):$Sext,
+ [{ return Helper.isConstantLegalOrBeforeLegalizer(MRI.getType(${Sext}->getOperand(0).getReg())); }]),
+ (apply [{ Helper.replaceInstWithConstant(*${Sext}, 0); }])>;
+
def cast_of_cast_combines: GICombineGroup<[
truncate_of_zext,
truncate_of_sext,
@@ -1882,7 +1903,10 @@ def cast_combines: GICombineGroup<[
narrow_binop_and,
narrow_binop_or,
narrow_binop_xor,
- integer_of_truncate
+ integer_of_truncate,
+ anyext_undef,
+ sext_undef,
+ zext_undef
]>;
def canonicalize_icmp : GICombineRule<
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index d95fc8cfbcf558..29074103115f59 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -171,6 +171,10 @@ bool CombinerHelper::isConstantLegalOrBeforeLegalizer(const LLT Ty) const {
isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
}
+bool CombinerHelper::isUndefLegalOrBeforeLegalizer(const LLT Ty) const {
+ return isPreLegalize() || isLegal({TargetOpcode::G_IMPLICIT_DEF, {Ty}});
+}
+
void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
Register ToReg) const {
Observer.changingAllUsesOfReg(MRI, FromReg);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
index b045deebc56e03..25161652dafac4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
@@ -217,3 +217,55 @@ body: |
%large:_(<2 x s64>) = G_ANYEXT %bv(<2 x s32>)
$q0 = COPY %large(<2 x s64>)
$d0 = COPY %bv(<2 x s32>)
+...
+---
+name: test_combine_anyext_undef
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_anyext_undef
+ ; CHECK-PRE: %aext:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-PRE-NEXT: $x0 = COPY %aext(s64)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_anyext_undef
+ ; CHECK-POST: %undef:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-POST-NEXT: %aext:_(s64) = G_ANYEXT %undef(s32)
+ ; CHECK-POST-NEXT: $x0 = COPY %aext(s64)
+ %undef:_(s32) = G_IMPLICIT_DEF
+ %aext:_(s64) = G_ANYEXT %undef(s32)
+ $x0 = COPY %aext(s64)
+...
+---
+name: test_combine_sext_undef
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_sext_undef
+ ; CHECK-PRE: %sext:_(s64) = G_CONSTANT i64 0
+ ; CHECK-PRE-NEXT: $x0 = COPY %sext(s64)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_sext_undef
+ ; CHECK-POST: %undef:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-POST-NEXT: %sext:_(s64) = G_SEXT %undef(s32)
+ ; CHECK-POST-NEXT: $x0 = COPY %sext(s64)
+ %undef:_(s32) = G_IMPLICIT_DEF
+ %sext:_(s64) = G_SEXT %undef(s32)
+ $x0 = COPY %sext(s64)
+...
+---
+name: test_combine_zext_undef
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_zext_undef
+ ; CHECK-PRE: %zext:_(s64) = G_CONSTANT i64 0
+ ; CHECK-PRE-NEXT: $x0 = COPY %zext(s64)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_zext_undef
+ ; CHECK-POST: %undef:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-POST-NEXT: %zext:_(s64) = G_ZEXT %undef(s32)
+ ; CHECK-POST-NEXT: $x0 = COPY %zext(s64)
+ %undef:_(s32) = G_IMPLICIT_DEF
+ %zext:_(s64) = G_ZEXT %undef(s32)
+ $x0 = COPY %zext(s64)
+...
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
index 5e5fdd6d317057..e89e1516fb1f54 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
@@ -8,17 +8,10 @@
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v4i32_vector_extract_const
define i64 @extract_v2i64_undef_index(<2 x i64> %a, i32 %c) {
-; CHECK-SD-LABEL: extract_v2i64_undef_index:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: extract_v2i64_undef_index:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str q0, [sp, #-16]!
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: ldr x0, [sp], #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: extract_v2i64_undef_index:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
entry:
%d = extractelement <2 x i64> %a, i32 undef
ret i64 %d
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir
index 7893bfa1d38f08..9b39afd32ac378 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir
@@ -261,8 +261,7 @@ body: |
; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_16
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16)
+ ; CHECK-NEXT: %zext:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext
; CHECK-NEXT: $vgpr0 = COPY %result(s32)
%arg:_(s32) = COPY $vgpr0
@@ -284,8 +283,7 @@ body: |
; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_24
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16)
+ ; CHECK-NEXT: %zext:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext
; CHECK-NEXT: $vgpr0 = COPY %result(s32)
%arg:_(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
index a1a466fb04440d..384a2c63122b85 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
@@ -4074,14 +4074,12 @@ define amdgpu_kernel void @v_test_v2i16_x_add_undef_neg32(ptr addrspace(1) %out,
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_not_b32_e32 v2, 31
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
-; VI-GISEL-NEXT: s_and_b32 s0, 0xffff, s0
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
; VI-GISEL-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-GISEL-NEXT: v_or_b32_e32 v2, s0, v2
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
@@ -4191,15 +4189,12 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(ptr addrspace(1) %out,
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
-; VI-GISEL-NEXT: s_and_b32 s2, 0xffff, s0
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
-; VI-GISEL-NEXT: s_lshl_b32 s0, s2, 16
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffe0, v3
-; VI-GISEL-NEXT: v_or_b32_e32 v2, s0, v2
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
|
Alternative for #113764
It builds on a minimalistic approach with the legality check in match and a blind apply. The precise patterns are used for better compile-time and modularity. It also moves the pattern check into combiner. While unary_undef_to_zero and propagate_undef_any_op rely on custom C++ code for pattern matching.
Is there a limit on the number of patterns?
G_ANYEXT of undef -> undef
G_SEXT of undef -> 0
G_ZEXT of undef -> 0
The combine is not a member of the post legalizer combiner for AArch64.
Test:
llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir