diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 55c3b72c8e027f..6662c1055aa17a 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -150,6 +150,10 @@ class CombinerHelper { /// is a legal integer constant type on the target. bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const; + /// \return true if the combine is running prior to legalization, or if \p Ty + /// is a legal undef type on the target. + bool isUndefLegalOrBeforeLegalizer(const LLT Ty) const; + /// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const; diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index b0c63fc7c7b806..fee695c4333d99 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -428,7 +428,7 @@ def unary_undef_to_zero: GICombineRule< // replaced with undef. def propagate_undef_any_op: GICombineRule< (defs root:$root), - (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST, G_ANYEXT):$root, + (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST):$root, [{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]), (apply [{ Helper.replaceInstWithUndef(*${root}); }])>; @@ -1857,6 +1857,27 @@ class integer_of_opcode : GICombineRule < def integer_of_truncate : integer_of_opcode; +def anyext_undef: GICombineRule< + (defs root:$root), + (match (G_IMPLICIT_DEF $undef), + (G_ANYEXT $root, $undef):$Aext, + [{ return Helper.isUndefLegalOrBeforeLegalizer(MRI.getType(${Aext}->getOperand(0).getReg())); }]), + (apply [{ Helper.replaceInstWithUndef(*${Aext}); }])>; + +def zext_undef: GICombineRule< + (defs root:$root), + (match (G_IMPLICIT_DEF $undef), + (G_ZEXT $root, $undef):$Zext, + [{ return Helper.isConstantLegalOrBeforeLegalizer(MRI.getType(${Zext}->getOperand(0).getReg())); }]), + (apply [{ Helper.replaceInstWithConstant(*${Zext}, 0); }])>; + +def sext_undef: GICombineRule< + (defs root:$root), + (match (G_IMPLICIT_DEF $undef), + (G_SEXT $root, $undef):$Sext, + [{ return Helper.isConstantLegalOrBeforeLegalizer(MRI.getType(${Sext}->getOperand(0).getReg())); }]), + (apply [{ Helper.replaceInstWithConstant(*${Sext}, 0); }])>; + def cast_of_cast_combines: GICombineGroup<[ truncate_of_zext, truncate_of_sext, @@ -1882,7 +1903,10 @@ def cast_combines: GICombineGroup<[ narrow_binop_and, narrow_binop_or, narrow_binop_xor, - integer_of_truncate + integer_of_truncate, + anyext_undef, + sext_undef, + zext_undef ]>; def canonicalize_icmp : GICombineRule< diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index d95fc8cfbcf558..29074103115f59 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -171,6 +171,10 @@ bool CombinerHelper::isConstantLegalOrBeforeLegalizer(const LLT Ty) const { isLegal({TargetOpcode::G_CONSTANT, {EltTy}}); } +bool CombinerHelper::isUndefLegalOrBeforeLegalizer(const LLT Ty) const { + return isPreLegalize() || isLegal({TargetOpcode::G_IMPLICIT_DEF, {Ty}}); +} + void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const { Observer.changingAllUsesOfReg(MRI, FromReg); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir index b045deebc56e03..25161652dafac4 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir @@ -217,3 +217,55 @@ body: | %large:_(<2 x s64>) = G_ANYEXT %bv(<2 x s32>) $q0 = COPY %large(<2 x s64>) $d0 = COPY %bv(<2 x s32>) +... +--- +name: test_combine_anyext_undef +legalized: true +body: | + bb.1: + ; CHECK-PRE-LABEL: name: test_combine_anyext_undef + ; CHECK-PRE: %aext:_(s64) = G_IMPLICIT_DEF + ; CHECK-PRE-NEXT: $x0 = COPY %aext(s64) + ; + ; CHECK-POST-LABEL: name: test_combine_anyext_undef + ; CHECK-POST: %undef:_(s32) = G_IMPLICIT_DEF + ; CHECK-POST-NEXT: %aext:_(s64) = G_ANYEXT %undef(s32) + ; CHECK-POST-NEXT: $x0 = COPY %aext(s64) + %undef:_(s32) = G_IMPLICIT_DEF + %aext:_(s64) = G_ANYEXT %undef(s32) + $x0 = COPY %aext(s64) +... +--- +name: test_combine_sext_undef +legalized: true +body: | + bb.1: + ; CHECK-PRE-LABEL: name: test_combine_sext_undef + ; CHECK-PRE: %sext:_(s64) = G_CONSTANT i64 0 + ; CHECK-PRE-NEXT: $x0 = COPY %sext(s64) + ; + ; CHECK-POST-LABEL: name: test_combine_sext_undef + ; CHECK-POST: %undef:_(s32) = G_IMPLICIT_DEF + ; CHECK-POST-NEXT: %sext:_(s64) = G_SEXT %undef(s32) + ; CHECK-POST-NEXT: $x0 = COPY %sext(s64) + %undef:_(s32) = G_IMPLICIT_DEF + %sext:_(s64) = G_SEXT %undef(s32) + $x0 = COPY %sext(s64) +... +--- +name: test_combine_zext_undef +legalized: true +body: | + bb.1: + ; CHECK-PRE-LABEL: name: test_combine_zext_undef + ; CHECK-PRE: %zext:_(s64) = G_CONSTANT i64 0 + ; CHECK-PRE-NEXT: $x0 = COPY %zext(s64) + ; + ; CHECK-POST-LABEL: name: test_combine_zext_undef + ; CHECK-POST: %undef:_(s32) = G_IMPLICIT_DEF + ; CHECK-POST-NEXT: %zext:_(s64) = G_ZEXT %undef(s32) + ; CHECK-POST-NEXT: $x0 = COPY %zext(s64) + %undef:_(s32) = G_IMPLICIT_DEF + %zext:_(s64) = G_ZEXT %undef(s32) + $x0 = COPY %zext(s64) +... diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll index 5e5fdd6d317057..e89e1516fb1f54 100644 --- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll +++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll @@ -8,17 +8,10 @@ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v4i32_vector_extract_const define i64 @extract_v2i64_undef_index(<2 x i64> %a, i32 %c) { -; CHECK-SD-LABEL: extract_v2i64_undef_index: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmov x0, d0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: extract_v2i64_undef_index: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: str q0, [sp, #-16]! -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: ldr x0, [sp], #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: extract_v2i64_undef_index: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret entry: %d = extractelement <2 x i64> %a, i32 undef ret i64 %d diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir index 7893bfa1d38f08..9b39afd32ac378 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir @@ -261,8 +261,7 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_16 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16) + ; CHECK-NEXT: %zext:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext ; CHECK-NEXT: $vgpr0 = COPY %result(s32) %arg:_(s32) = COPY $vgpr0 @@ -284,8 +283,7 @@ body: | ; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_24 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16) + ; CHECK-NEXT: %zext:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext ; CHECK-NEXT: $vgpr0 = COPY %result(s32) %arg:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll index a1a466fb04440d..384a2c63122b85 100644 --- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll @@ -4074,14 +4074,12 @@ define amdgpu_kernel void @v_test_v2i16_x_add_undef_neg32(ptr addrspace(1) %out, ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 ; VI-GISEL-NEXT: v_not_b32_e32 v2, 31 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 -; VI-GISEL-NEXT: s_and_b32 s0, 0xffff, s0 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) ; VI-GISEL-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-GISEL-NEXT: v_or_b32_e32 v2, s0, v2 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 ; VI-GISEL-NEXT: s_endpgm ; @@ -4191,15 +4189,12 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(ptr addrspace(1) %out, ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] -; VI-GISEL-NEXT: s_and_b32 s2, 0xffff, s0 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 ; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; VI-GISEL-NEXT: s_lshl_b32 s0, s2, 16 ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) ; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffe0, v3 -; VI-GISEL-NEXT: v_or_b32_e32 v2, s0, v2 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 ; VI-GISEL-NEXT: s_endpgm ;