Skip to content

Commit 88bd507

Browse files
committed
[X86] Handle shifts + and in LowerSELECTWithCmpZero
shifts are the same as sub where rhs == 0 is identity. and is the inverted case where: `SELECT (AND(X,1) == 0), (AND Y, Z), Y` -> `(AND Y, (OR NEG(AND(X, 1)), Z))` With -1 as the identity. Closes #107910
1 parent d148a1a commit 88bd507

File tree

2 files changed

+68
-47
lines changed

2 files changed

+68
-47
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24086,36 +24086,38 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
2408624086

2408724087
if (X86CC == X86::COND_E && CmpVal.getOpcode() == ISD::AND &&
2408824088
isOneConstant(CmpVal.getOperand(1))) {
24089-
auto SplatLSB = [&]() {
24089+
auto SplatLSB = [&](EVT SplatVT) {
2409024090
// we need mask of all zeros or ones with same size of the other
2409124091
// operands.
2409224092
SDValue Neg = CmpVal;
24093-
if (CmpVT.bitsGT(VT))
24094-
Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpVal);
24095-
else if (CmpVT.bitsLT(VT))
24093+
if (CmpVT.bitsGT(SplatVT))
24094+
Neg = DAG.getNode(ISD::TRUNCATE, DL, SplatVT, CmpVal);
24095+
else if (CmpVT.bitsLT(SplatVT))
2409624096
Neg = DAG.getNode(
24097-
ISD::AND, DL, VT,
24098-
DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpVal.getOperand(0)),
24099-
DAG.getConstant(1, DL, VT));
24100-
return DAG.getNegative(Neg, DL, VT); // -(and (x, 0x1))
24097+
ISD::AND, DL, SplatVT,
24098+
DAG.getNode(ISD::ANY_EXTEND, DL, SplatVT, CmpVal.getOperand(0)),
24099+
DAG.getConstant(1, DL, SplatVT));
24100+
return DAG.getNegative(Neg, DL, SplatVT); // -(and (x, 0x1))
2410124101
};
2410224102

2410324103
// SELECT (AND(X,1) == 0), 0, -1 -> NEG(AND(X,1))
2410424104
if (isNullConstant(LHS) && isAllOnesConstant(RHS))
24105-
return SplatLSB();
24105+
return SplatLSB(VT);
2410624106

2410724107
// SELECT (AND(X,1) == 0), C1, C2 -> XOR(C1,AND(NEG(AND(X,1)),XOR(C1,C2))
2410824108
if (!Subtarget.canUseCMOV() && isa<ConstantSDNode>(LHS) &&
2410924109
isa<ConstantSDNode>(RHS)) {
24110-
SDValue Mask = SplatLSB();
24110+
SDValue Mask = SplatLSB(VT);
2411124111
SDValue Diff = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);
2411224112
SDValue Flip = DAG.getNode(ISD::AND, DL, VT, Mask, Diff);
2411324113
return DAG.getNode(ISD::XOR, DL, VT, LHS, Flip);
2411424114
}
2411524115

2411624116
SDValue Src1, Src2;
24117-
auto isIdentityPattern = [&]() {
24117+
auto isIdentityPatternZero = [&]() {
2411824118
switch (RHS.getOpcode()) {
24119+
default:
24120+
break;
2411924121
case ISD::OR:
2412024122
case ISD::XOR:
2412124123
case ISD::ADD:
@@ -24125,6 +24127,9 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
2412524127
return true;
2412624128
}
2412724129
break;
24130+
case ISD::SHL:
24131+
case ISD::SRA:
24132+
case ISD::SRL:
2412824133
case ISD::SUB:
2412924134
if (RHS.getOperand(0) == LHS) {
2413024135
Src1 = RHS.getOperand(1);
@@ -24136,15 +24141,40 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
2413624141
return false;
2413724142
};
2413824143

24144+
auto isIdentityPatternOnes = [&]() {
24145+
switch (LHS.getOpcode()) {
24146+
default:
24147+
break;
24148+
case ISD::AND:
24149+
if (LHS.getOperand(0) == RHS || LHS.getOperand(1) == RHS) {
24150+
Src1 = LHS.getOperand(LHS.getOperand(0) == RHS ? 1 : 0);
24151+
Src2 = RHS;
24152+
return true;
24153+
}
24154+
break;
24155+
}
24156+
return false;
24157+
};
24158+
2413924159
// Convert 'identity' patterns (iff X is 0 or 1):
2414024160
// SELECT (AND(X,1) == 0), Y, (OR Y, Z) -> (OR Y, (AND NEG(AND(X,1)), Z))
2414124161
// SELECT (AND(X,1) == 0), Y, (XOR Y, Z) -> (XOR Y, (AND NEG(AND(X,1)), Z))
2414224162
// SELECT (AND(X,1) == 0), Y, (ADD Y, Z) -> (ADD Y, (AND NEG(AND(X,1)), Z))
2414324163
// SELECT (AND(X,1) == 0), Y, (SUB Y, Z) -> (SUB Y, (AND NEG(AND(X,1)), Z))
24144-
if (!Subtarget.canUseCMOV() && isIdentityPattern()) {
24145-
SDValue Mask = SplatLSB();
24146-
SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z
24147-
return DAG.getNode(RHS.getOpcode(), DL, VT, Src2, And); // y Op And
24164+
// SELECT (AND(X,1) == 0), Y, (SHL Y, Z) -> (SHL Y, (AND NEG(AND(X,1)), Z))
24165+
// SELECT (AND(X,1) == 0), Y, (SRA Y, Z) -> (SRA Y, (AND NEG(AND(X,1)), Z))
24166+
// SELECT (AND(X,1) == 0), Y, (SRL Y, Z) -> (SRL Y, (AND NEG(AND(X,1)), Z))
24167+
if (!Subtarget.canUseCMOV() && isIdentityPatternZero()) {
24168+
SDValue Mask = SplatLSB(Src1.getValueType());
24169+
SDValue And = DAG.getNode(ISD::AND, DL, Src1.getValueType(), Mask,
24170+
Src1); // Mask & z
24171+
return DAG.getNode(RHS.getOpcode(), DL, VT, Src2, And); // y Op And
24172+
}
24173+
// SELECT (AND(X,1) == 0), (AND Y, Z), Y -> (AND Y, (OR NEG(AND(X, 1)), Z))
24174+
if (!Subtarget.canUseCMOV() && isIdentityPatternOnes()) {
24175+
SDValue Mask = SplatLSB(VT);
24176+
SDValue Or = DAG.getNode(ISD::OR, DL, VT, Mask, Src1); // Mask | z
24177+
return DAG.getNode(LHS.getOpcode(), DL, VT, Src2, Or); // y Op Or
2414824178
}
2414924179
}
2415024180

llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll

Lines changed: 23 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -711,15 +711,15 @@ define i32 @shl_signbit_select_add(i32 %x, i1 %cond, ptr %dst) {
711711
;
712712
; X86-LABEL: shl_signbit_select_add:
713713
; X86: # %bb.0:
714-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
714+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
715+
; X86-NEXT: andb $1, %cl
716+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
715717
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
716-
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
717-
; X86-NEXT: je .LBB24_2
718-
; X86-NEXT: # %bb.1:
719-
; X86-NEXT: shll $4, %eax
720-
; X86-NEXT: .LBB24_2:
718+
; X86-NEXT: negb %cl
719+
; X86-NEXT: andb $4, %cl
720+
; X86-NEXT: shll %cl, %eax
721721
; X86-NEXT: addl $123456, %eax # imm = 0x1E240
722-
; X86-NEXT: movl %eax, (%ecx)
722+
; X86-NEXT: movl %eax, (%edx)
723723
; X86-NEXT: retl
724724
%t0 = shl i32 %x, 4
725725
%t1 = select i1 %cond, i32 %t0, i32 %x
@@ -772,23 +772,15 @@ define i32 @lshr_signbit_select_add(i32 %x, i1 %cond, ptr %dst, i32 %y) {
772772
;
773773
; X86-LABEL: lshr_signbit_select_add:
774774
; X86: # %bb.0:
775-
; X86-NEXT: pushl %esi
776-
; X86-NEXT: .cfi_def_cfa_offset 8
777-
; X86-NEXT: .cfi_offset %esi, -8
778-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
779775
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
780-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
781-
; X86-NEXT: movl %esi, %eax
776+
; X86-NEXT: andb $1, %cl
777+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
778+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
779+
; X86-NEXT: negb %cl
780+
; X86-NEXT: andb {{[0-9]+}}(%esp), %cl
782781
; X86-NEXT: shrl %cl, %eax
783-
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
784-
; X86-NEXT: jne .LBB26_2
785-
; X86-NEXT: # %bb.1:
786-
; X86-NEXT: movl %esi, %eax
787-
; X86-NEXT: .LBB26_2:
788782
; X86-NEXT: addl $123456, %eax # imm = 0x1E240
789783
; X86-NEXT: movl %eax, (%edx)
790-
; X86-NEXT: popl %esi
791-
; X86-NEXT: .cfi_def_cfa_offset 4
792784
; X86-NEXT: retl
793785
%t0 = lshr i32 %x, %y
794786
%t1 = select i1 %cond, i32 %t0, i32 %x
@@ -810,15 +802,15 @@ define i32 @ashr_signbit_select_add(i32 %x, i1 %cond, ptr %dst) {
810802
;
811803
; X86-LABEL: ashr_signbit_select_add:
812804
; X86: # %bb.0:
813-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
805+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
806+
; X86-NEXT: andb $1, %cl
807+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
814808
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
815-
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
816-
; X86-NEXT: je .LBB27_2
817-
; X86-NEXT: # %bb.1:
818-
; X86-NEXT: sarl $4, %eax
819-
; X86-NEXT: .LBB27_2:
809+
; X86-NEXT: negb %cl
810+
; X86-NEXT: andb $4, %cl
811+
; X86-NEXT: sarl %cl, %eax
820812
; X86-NEXT: addl $123456, %eax # imm = 0x1E240
821-
; X86-NEXT: movl %eax, (%ecx)
813+
; X86-NEXT: movl %eax, (%edx)
822814
; X86-NEXT: retl
823815
%t0 = ashr i32 %x, 4
824816
%t1 = select i1 %cond, i32 %t0, i32 %x
@@ -841,12 +833,11 @@ define i32 @and_signbit_select_add(i32 %x, i1 %cond, ptr %dst, i32 %y) {
841833
; X86-LABEL: and_signbit_select_add:
842834
; X86: # %bb.0:
843835
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
844-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
845-
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
846-
; X86-NEXT: jne .LBB28_2
847-
; X86-NEXT: # %bb.1:
836+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
837+
; X86-NEXT: andl $1, %eax
838+
; X86-NEXT: negl %eax
839+
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
848840
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
849-
; X86-NEXT: .LBB28_2:
850841
; X86-NEXT: addl $123456, %eax # imm = 0x1E240
851842
; X86-NEXT: movl %eax, (%ecx)
852843
; X86-NEXT: retl

0 commit comments

Comments
 (0)