Skip to content

Commit

Permalink
[X86] Add an override of targetShrinkDemandedConstant to limit the da…
Browse files Browse the repository at this point in the history
…mage that shrinkdemandedbits can do to zext_in_reg operations

Summary:
This patch adds an implementation of targetShrinkDemandedConstant that tries to keep shrinkdemandedbits from removing bits that would otherwise have been recognized as a movzx.

We still need a follow patch to stop moving ands across srl if the and could be represented as a movzx before the shift but not after. I think this should help with some of the cases that D42088 ended up removing during isel.

Reviewers: spatel, RKSimon

Reviewed By: spatel

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D42265

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323048 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
topperc committed Jan 20, 2018
1 parent f91a072 commit 10fbe29
Show file tree
Hide file tree
Showing 7 changed files with 88 additions and 26 deletions.
59 changes: 59 additions & 0 deletions lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27884,6 +27884,65 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// X86 Optimization Hooks
//===----------------------------------------------------------------------===//

bool
X86TargetLowering::targetShrinkDemandedConstant(SDValue Op,
const APInt &Demanded,
TargetLoweringOpt &TLO) const {
// Only optimize Ands to prevent shrinking a constant that could be
// matched by movzx.
if (Op.getOpcode() != ISD::AND)
return false;

EVT VT = Op.getValueType();

// Ignore vectors.
if (VT.isVector())
return false;

unsigned Size = VT.getSizeInBits();

// Make sure the RHS really is a constant.
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!C)
return false;

const APInt &Mask = C->getAPIntValue();

// Clear all non-demanded bits initially.
APInt ShrunkMask = Mask & Demanded;

// Find the width of the shrunk mask.
unsigned Width = ShrunkMask.getActiveBits();

// If the mask is all 0s there's nothing to do here.
if (Width == 0)
return false;

// Find the next power of 2 width, rounding up to a byte.
Width = PowerOf2Ceil(std::max(Width, 8U));
// Truncate the width to size to handle illegal types.
Width = std::min(Width, Size);

// Calculate a possible zero extend mask for this constant.
APInt ZeroExtendMask = APInt::getLowBitsSet(Size, Width);

// If we aren't changing the mask, just return true to keep it and prevent
// the caller from optimizing.
if (ZeroExtendMask == Mask)
return true;

// Make sure the bits in the ZeroExtendMask are also set in the original mask.
// TODO: We should be able to set bits that aren't demanded too.
if (!ZeroExtendMask.isSubsetOf(Mask))
return false;

// Replace the constant with the zero extend mask.
SDLoc DL(Op);
SDValue NewC = TLO.DAG.getConstant(ZeroExtendMask, DL, VT);
SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
return TLO.CombineTo(Op, NewOp);
}

void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,
Expand Down
3 changes: 3 additions & 0 deletions lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,9 @@ namespace llvm {
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;

bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
TargetLoweringOpt &TLO) const override;

/// Determine which of the bits specified in Mask are known to be either
/// zero or one and return them in the KnownZero/KnownOne bitsets.
void computeKnownBitsForTargetNode(const SDValue Op,
Expand Down
6 changes: 5 additions & 1 deletion lib/Target/X86/X86InstrCompiler.td
Original file line number Diff line number Diff line change
Expand Up @@ -1514,6 +1514,10 @@ def : Pat<(i8 (trunc GR16:$src)),
(EXTRACT_SUBREG GR16:$src, sub_8bit)>,
Requires<[In64BitMode]>;

def immff00_ffff : ImmLeaf<i32, [{
return Imm >= 0xff00 && Imm <= 0xffff;
}]>;

// h-register tricks
def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
(EXTRACT_SUBREG GR16:$src, sub_8bit_hi)>,
Expand All @@ -1534,7 +1538,7 @@ def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
(MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>;
def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
(MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>;
def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
def : Pat<(srl (and_su GR32:$src, immff00_ffff), (i8 8)),
(MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>;

// h-register tricks.
Expand Down
10 changes: 6 additions & 4 deletions test/CodeGen/X86/3addr-or.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,18 @@ define i32 @test1(i32 %x) nounwind ssp {
ret i32 %t1
}

; This test no longer requires or to be converted to 3 addr form because we are
; are able to use a zero extend instead of an 'and' which gives the register
; allocator freedom.
define i64 @test2(i8 %A, i8 %B) nounwind {
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def %esi killed %esi def %rsi
; CHECK-NEXT: # kill: def %edi killed %edi def %rdi
; CHECK-NEXT: shll $4, %edi
; CHECK-NEXT: andl $48, %edi
; CHECK-NEXT: andl $240, %esi
; CHECK-NEXT: shrq $4, %rsi
; CHECK-NEXT: leaq (%rsi,%rdi), %rax
; CHECK-NEXT: movzbl %sil, %eax
; CHECK-NEXT: shrq $4, %rax
; CHECK-NEXT: orq %rdi, %rax
; CHECK-NEXT: retq
%C = zext i8 %A to i64
%D = shl i64 %C, 4
Expand Down
2 changes: 0 additions & 2 deletions test/CodeGen/X86/popcnt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ define i16 @cnt16(i16 %x) nounwind readnone {
; X32-NEXT: andl $13107, %eax # imm = 0x3333
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: andl $-16, %ecx
; X32-NEXT: shrl $4, %ecx
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: andl $3855, %ecx # imm = 0xF0F
Expand All @@ -94,7 +93,6 @@ define i16 @cnt16(i16 %x) nounwind readnone {
; X64-NEXT: andl $13107, %edi # imm = 0x3333
; X64-NEXT: addl %eax, %edi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: andl $-16, %eax
; X64-NEXT: shrl $4, %eax
; X64-NEXT: addl %edi, %eax
; X64-NEXT: andl $3855, %eax # imm = 0xF0F
Expand Down
21 changes: 10 additions & 11 deletions test/CodeGen/X86/pr21792.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,18 @@ define void @func(<4 x float> %vx) {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
; CHECK-NEXT: pextrq $1, %xmm0, %rdx
; CHECK-NEXT: movq %rdx, %rcx
; CHECK-NEXT: shrq $32, %rcx
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: movq %rax, %r9
; CHECK-NEXT: pextrq $1, %xmm0, %rax
; CHECK-NEXT: movzwl %ax, %ecx
; CHECK-NEXT: shrq $32, %rax
; CHECK-NEXT: movq %xmm0, %rdx
; CHECK-NEXT: movzwl %dx, %r8d
; CHECK-NEXT: movq %rdx, %r9
; CHECK-NEXT: shrq $32, %r9
; CHECK-NEXT: andl $2032, %eax # imm = 0x7F0
; CHECK-NEXT: leaq stuff(%rax), %rdi
; CHECK-NEXT: leaq stuff(%r8), %rdi
; CHECK-NEXT: leaq stuff(%r9), %rsi
; CHECK-NEXT: andl $2032, %edx # imm = 0x7F0
; CHECK-NEXT: leaq stuff(%rdx), %rdx
; CHECK-NEXT: leaq stuff(%rcx), %rcx
; CHECK-NEXT: leaq stuff+8(%rax), %r8
; CHECK-NEXT: leaq stuff(%rcx), %rdx
; CHECK-NEXT: leaq stuff(%rax), %rcx
; CHECK-NEXT: leaq stuff+8(%r8), %r8
; CHECK-NEXT: leaq stuff+8(%r9), %r9
; CHECK-NEXT: callq toto
; CHECK-NEXT: popq %rax
Expand Down
13 changes: 5 additions & 8 deletions test/CodeGen/X86/zext-demanded.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,22 @@
; demanded bits shortcomings.

; The backend will insert a zext to promote the shift to i32.
; TODO: we should be able to use movzx here.
define i16 @test1(i16 %x) {
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
; CHECK-NEXT: andl $65534, %edi # imm = 0xFFFE
; CHECK-NEXT: shrl %edi
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: movzwl %di, %eax
; CHECK-NEXT: shrl %eax
; CHECK-NEXT: # kill: def %ax killed %ax killed %eax
; CHECK-NEXT: retq
%y = lshr i16 %x, 1
ret i16 %y
}

; TODO: we should be able to use movzx here.
define i32 @test2(i32 %x) {
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
; CHECK-NEXT: andl $65534, %edi # imm = 0xFFFE
; CHECK-NEXT: shrl %edi
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: movzwl %di, %eax
; CHECK-NEXT: shrl %eax
; CHECK-NEXT: retq
%y = and i32 %x, 65535
%z = lshr i32 %y, 1
Expand Down

0 comments on commit 10fbe29

Please sign in to comment.