Skip to content

Commit

Permalink
[X86] LowerSELECTWithCmpZero - extend branchless OR/XOR select codege…
Browse files Browse the repository at this point in the history
…n to handle ADD/SUB as well (llvm#107612)

Extend the "SELECT ((AND X, 1) != 0), Y, (OR/XOR Y, Z) -> (OR/XOR Y, (AND (NEG(AND X, 1)), Z))" to also handle ADD/SUB.

As SUB is not commutative, we have to be more careful and only accept LHS matches.
  • Loading branch information
RKSimon authored Sep 7, 2024
1 parent 78cf9b8 commit 6df1291
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 85 deletions.
32 changes: 23 additions & 9 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24084,18 +24084,32 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
if (!CmpVT.isScalarInteger() || !VT.isScalarInteger())
return SDValue();

// Convert OR/XOR 'identity' patterns (iff X is 0 or 1):
// select (X != 0), Y, (OR Y, Z) -> (OR Y, (AND (0 - X), Z))
// select (X != 0), Y, (XOR Y, Z) -> (XOR Y, (AND (0 - X), Z))
// Convert 'identity' patterns (iff X is 0 or 1):
// SELECT (X != 0), Y, (OR Y, Z) -> (OR Y, (AND (0 - X), Z))
// SELECT (X != 0), Y, (XOR Y, Z) -> (XOR Y, (AND (0 - X), Z))
// SELECT (X != 0), Y, (ADD Y, Z) -> (ADD Y, (AND (0 - X), Z))
// SELECT (X != 0), Y, (SUB Y, Z) -> (SUB Y, (AND (0 - X), Z))
if (!Subtarget.canUseCMOV() && X86CC == X86::COND_E &&
CmpVal.getOpcode() == ISD::AND && isOneConstant(CmpVal.getOperand(1))) {
SDValue Src1, Src2;
auto isIdentityPattern = [&]() {
if ((RHS.getOpcode() == ISD::XOR || RHS.getOpcode() == ISD::OR) &&
(RHS.getOperand(0) == LHS || RHS.getOperand(1) == LHS)) {
Src1 = RHS.getOperand(RHS.getOperand(0) == LHS ? 1 : 0);
Src2 = LHS;
return true;
switch (RHS.getOpcode()) {
case ISD::OR:
case ISD::XOR:
case ISD::ADD:
if (RHS.getOperand(0) == LHS || RHS.getOperand(1) == LHS) {
Src1 = RHS.getOperand(RHS.getOperand(0) == LHS ? 1 : 0);
Src2 = LHS;
return true;
}
break;
case ISD::SUB:
if (RHS.getOperand(0) == LHS) {
Src1 = RHS.getOperand(1);
Src2 = LHS;
return true;
}
break;
}
return false;
};
Expand All @@ -24113,7 +24127,7 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
DAG.getConstant(1, DL, VT));
SDValue Mask = DAG.getNegative(Neg, DL, VT); // -(and (x, 0x1))
SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z
return DAG.getNode(RHS.getOpcode(), DL, VT, And, Src2); // And Op y
return DAG.getNode(RHS.getOpcode(), DL, VT, Src2, And); // y Op And
}
}

Expand Down
66 changes: 30 additions & 36 deletions llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,11 @@ define i32 @add_signbit_select_shl(i32 %x, i1 %cond, ptr %dst) {
; X86-LABEL: add_signbit_select_shl:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
; X86-NEXT: je .LBB6_2
; X86-NEXT: # %bb.1:
; X86-NEXT: addl $-65536, %eax # imm = 0xFFFF0000
; X86-NEXT: .LBB6_2:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andl $1, %eax
; X86-NEXT: negl %eax
; X86-NEXT: andl $16711680, %eax # imm = 0xFF0000
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
Expand All @@ -220,12 +219,11 @@ define i32 @add_nosignbit_select_shl(i32 %x, i1 %cond, ptr %dst) {
; X86-LABEL: add_nosignbit_select_shl:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
; X86-NEXT: je .LBB7_2
; X86-NEXT: # %bb.1:
; X86-NEXT: addl $2147418112, %eax # imm = 0x7FFF0000
; X86-NEXT: .LBB7_2:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andl $1, %eax
; X86-NEXT: negl %eax
; X86-NEXT: andl $16711680, %eax # imm = 0xFF0000
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
Expand Down Expand Up @@ -425,12 +423,11 @@ define i32 @add_signbit_select_lshr(i32 %x, i1 %cond, ptr %dst) {
; X86-LABEL: add_signbit_select_lshr:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
; X86-NEXT: je .LBB14_2
; X86-NEXT: # %bb.1:
; X86-NEXT: addl $-65536, %eax # imm = 0xFFFF0000
; X86-NEXT: .LBB14_2:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andl $1, %eax
; X86-NEXT: negl %eax
; X86-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrl $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
Expand All @@ -454,12 +451,11 @@ define i32 @add_nosignbit_select_lshr(i32 %x, i1 %cond, ptr %dst) {
; X86-LABEL: add_nosignbit_select_lshr:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
; X86-NEXT: je .LBB15_2
; X86-NEXT: # %bb.1:
; X86-NEXT: addl $2147418112, %eax # imm = 0x7FFF0000
; X86-NEXT: .LBB15_2:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andl $1, %eax
; X86-NEXT: negl %eax
; X86-NEXT: andl $2147418112, %eax # imm = 0x7FFF0000
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrl $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
Expand Down Expand Up @@ -659,12 +655,11 @@ define i32 @add_signbit_select_ashr(i32 %x, i1 %cond, ptr %dst) {
; X86-LABEL: add_signbit_select_ashr:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
; X86-NEXT: je .LBB22_2
; X86-NEXT: # %bb.1:
; X86-NEXT: addl $-65536, %eax # imm = 0xFFFF0000
; X86-NEXT: .LBB22_2:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andl $1, %eax
; X86-NEXT: negl %eax
; X86-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sarl $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
Expand All @@ -688,12 +683,11 @@ define i32 @add_nosignbit_select_ashr(i32 %x, i1 %cond, ptr %dst) {
; X86-LABEL: add_nosignbit_select_ashr:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
; X86-NEXT: je .LBB23_2
; X86-NEXT: # %bb.1:
; X86-NEXT: addl $2147418112, %eax # imm = 0x7FFF0000
; X86-NEXT: .LBB23_2:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andl $1, %eax
; X86-NEXT: negl %eax
; X86-NEXT: andl $2147418112, %eax # imm = 0x7FFF0000
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sarl $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
Expand Down
72 changes: 32 additions & 40 deletions llvm/test/CodeGen/X86/select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1725,11 +1725,10 @@ define i32 @select_add(i32 %A, i32 %B, i8 %cond) {
;
; MCU-LABEL: select_add:
; MCU: # %bb.0: # %entry
; MCU-NEXT: testb $1, %cl
; MCU-NEXT: je .LBB36_2
; MCU-NEXT: # %bb.1: # %entry
; MCU-NEXT: addl %edx, %eax
; MCU-NEXT: .LBB36_2: # %entry
; MCU-NEXT: andl $1, %ecx
; MCU-NEXT: negl %ecx
; MCU-NEXT: andl %edx, %ecx
; MCU-NEXT: addl %ecx, %eax
; MCU-NEXT: retl
entry:
%and = and i8 %cond, 1
Expand Down Expand Up @@ -1773,11 +1772,10 @@ define i32 @select_add_b(i32 %A, i32 %B, i8 %cond) {
;
; MCU-LABEL: select_add_b:
; MCU: # %bb.0: # %entry
; MCU-NEXT: testb $1, %cl
; MCU-NEXT: je .LBB37_2
; MCU-NEXT: # %bb.1:
; MCU-NEXT: addl %edx, %eax
; MCU-NEXT: .LBB37_2: # %entry
; MCU-NEXT: andl $1, %ecx
; MCU-NEXT: negl %ecx
; MCU-NEXT: andl %edx, %ecx
; MCU-NEXT: addl %ecx, %eax
; MCU-NEXT: retl
entry:
%and = and i8 %cond, 1
Expand Down Expand Up @@ -1819,11 +1817,10 @@ define i32 @select_add_1(i32 %A, i32 %B, i32 %cond) {
;
; MCU-LABEL: select_add_1:
; MCU: # %bb.0: # %entry
; MCU-NEXT: testb $1, %cl
; MCU-NEXT: je .LBB38_2
; MCU-NEXT: # %bb.1: # %entry
; MCU-NEXT: addl %edx, %eax
; MCU-NEXT: .LBB38_2: # %entry
; MCU-NEXT: andl $1, %ecx
; MCU-NEXT: negl %ecx
; MCU-NEXT: andl %edx, %ecx
; MCU-NEXT: addl %ecx, %eax
; MCU-NEXT: retl
entry:
%and = and i32 %cond, 1
Expand Down Expand Up @@ -1867,11 +1864,10 @@ define i32 @select_add_1b(i32 %A, i32 %B, i32 %cond) {
;
; MCU-LABEL: select_add_1b:
; MCU: # %bb.0: # %entry
; MCU-NEXT: testb $1, %cl
; MCU-NEXT: je .LBB39_2
; MCU-NEXT: # %bb.1:
; MCU-NEXT: addl %edx, %eax
; MCU-NEXT: .LBB39_2: # %entry
; MCU-NEXT: andl $1, %ecx
; MCU-NEXT: negl %ecx
; MCU-NEXT: andl %edx, %ecx
; MCU-NEXT: addl %ecx, %eax
; MCU-NEXT: retl
entry:
%and = and i32 %cond, 1
Expand Down Expand Up @@ -1901,11 +1897,10 @@ define i32 @select_sub(i32 %A, i32 %B, i8 %cond) {
;
; MCU-LABEL: select_sub:
; MCU: # %bb.0: # %entry
; MCU-NEXT: testb $1, %cl
; MCU-NEXT: je .LBB40_2
; MCU-NEXT: # %bb.1: # %entry
; MCU-NEXT: subl %eax, %edx
; MCU-NEXT: .LBB40_2: # %entry
; MCU-NEXT: andl $1, %ecx
; MCU-NEXT: negl %ecx
; MCU-NEXT: andl %eax, %ecx
; MCU-NEXT: subl %ecx, %edx
; MCU-NEXT: movl %edx, %eax
; MCU-NEXT: retl
entry:
Expand Down Expand Up @@ -1938,11 +1933,10 @@ define i32 @select_sub_b(i32 %A, i32 %B, i8 %cond) {
;
; MCU-LABEL: select_sub_b:
; MCU: # %bb.0: # %entry
; MCU-NEXT: testb $1, %cl
; MCU-NEXT: je .LBB41_2
; MCU-NEXT: # %bb.1:
; MCU-NEXT: subl %eax, %edx
; MCU-NEXT: .LBB41_2: # %entry
; MCU-NEXT: andl $1, %ecx
; MCU-NEXT: negl %ecx
; MCU-NEXT: andl %eax, %ecx
; MCU-NEXT: subl %ecx, %edx
; MCU-NEXT: movl %edx, %eax
; MCU-NEXT: retl
entry:
Expand Down Expand Up @@ -1973,11 +1967,10 @@ define i32 @select_sub_1(i32 %A, i32 %B, i32 %cond) {
;
; MCU-LABEL: select_sub_1:
; MCU: # %bb.0: # %entry
; MCU-NEXT: testb $1, %cl
; MCU-NEXT: je .LBB42_2
; MCU-NEXT: # %bb.1: # %entry
; MCU-NEXT: subl %eax, %edx
; MCU-NEXT: .LBB42_2: # %entry
; MCU-NEXT: andl $1, %ecx
; MCU-NEXT: negl %ecx
; MCU-NEXT: andl %eax, %ecx
; MCU-NEXT: subl %ecx, %edx
; MCU-NEXT: movl %edx, %eax
; MCU-NEXT: retl
entry:
Expand Down Expand Up @@ -2010,11 +2003,10 @@ define i32 @select_sub_1b(i32 %A, i32 %B, i32 %cond) {
;
; MCU-LABEL: select_sub_1b:
; MCU: # %bb.0: # %entry
; MCU-NEXT: testb $1, %cl
; MCU-NEXT: je .LBB43_2
; MCU-NEXT: # %bb.1:
; MCU-NEXT: subl %eax, %edx
; MCU-NEXT: .LBB43_2: # %entry
; MCU-NEXT: andl $1, %ecx
; MCU-NEXT: negl %ecx
; MCU-NEXT: andl %eax, %ecx
; MCU-NEXT: subl %ecx, %edx
; MCU-NEXT: movl %edx, %eax
; MCU-NEXT: retl
entry:
Expand Down

0 comments on commit 6df1291

Please sign in to comment.