Skip to content

Commit f530899

Browse files
committed
[X86] Fold some (truncate (srl (add X, C1), C2)) patterns to (add (truncate (srl X, C2), C1'))
C1' will be smaller than C1 so we are able to avoid generating code with MOVABS and large constants in certain cases.
1 parent 3d73525 commit f530899

File tree

2 files changed

+76
-20
lines changed

2 files changed

+76
-20
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48472,6 +48472,64 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
4847248472
return SDValue();
4847348473
}
4847448474

48475+
// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to
48476+
// (add (truncate (srl X, C2), C1')). C1' will be smaller than C1 so we are able
48477+
// to avoid generating code with MOVABS and large constants in certain cases.
48478+
static SDValue combineSetCCTruncAdd(SDValue EFLAGS, X86::CondCode &CC,
48479+
SelectionDAG &DAG) {
48480+
if (!(CC == X86::COND_E || CC == X86::COND_NE || CC == X86::COND_AE ||
48481+
CC == X86::COND_B))
48482+
return SDValue();
48483+
48484+
EVT VT = EFLAGS.getValueType();
48485+
if (EFLAGS.getOpcode() == X86ISD::SUB && VT == MVT::i32) {
48486+
SDValue CmpLHS = EFLAGS.getOperand(0);
48487+
auto *CmpConstant = dyn_cast<ConstantSDNode>(EFLAGS.getOperand(1));
48488+
48489+
if (CmpLHS.getOpcode() != ISD::TRUNCATE || !CmpConstant)
48490+
return SDValue();
48491+
48492+
SDValue Srl = CmpLHS.getOperand(0);
48493+
EVT SrlVT = Srl.getValueType();
48494+
if (Srl.getOpcode() != ISD::SRL || SrlVT != MVT::i64)
48495+
return SDValue();
48496+
48497+
SDValue Add = Srl.getOperand(0);
48498+
// Avoid changing the ADD if it is used elsewhere.
48499+
if (Add.getOpcode() != ISD::ADD || !Add.hasOneUse())
48500+
return SDValue();
48501+
48502+
auto *AddConstant = dyn_cast<ConstantSDNode>(Add.getOperand(1));
48503+
auto *SrlConstant = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
48504+
if (!AddConstant || !SrlConstant)
48505+
return SDValue();
48506+
48507+
APInt AddConstVal = AddConstant->getAPIntValue();
48508+
APInt SrlConstVal = SrlConstant->getAPIntValue();
48509+
if (!SrlConstVal.ugt(VT.getSizeInBits()))
48510+
return SDValue();
48511+
48512+
APInt CmpConstVal = CmpConstant->getAPIntValue();
48513+
APInt ShiftedAddConst = AddConstVal.lshr(SrlConstVal);
48514+
if (!CmpConstVal.ult(ShiftedAddConst.trunc(VT.getSizeInBits())) ||
48515+
(ShiftedAddConst.shl(SrlConstVal)) != AddConstVal)
48516+
return SDValue();
48517+
48518+
SDLoc DL(EFLAGS);
48519+
SDValue AddLHSSrl =
48520+
DAG.getNode(ISD::SRL, DL, SrlVT, Add.getOperand(0), Srl.getOperand(1));
48521+
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
48522+
48523+
APInt NewAddConstVal =
48524+
(~((~AddConstVal).lshr(SrlConstVal))).trunc(VT.getSizeInBits());
48525+
SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
48526+
SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
48527+
return DAG.getNode(X86ISD::CMP, DL, VT, NewAddNode, EFLAGS.getOperand(1));
48528+
}
48529+
48530+
return SDValue();
48531+
}
48532+
4847548533
/// Optimize an EFLAGS definition used according to the condition code \p CC
4847648534
/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
4847748535
/// uses of chain values.
@@ -48494,6 +48552,9 @@ static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
4849448552
if (SDValue R = combineSetCCMOVMSK(EFLAGS, CC, DAG, Subtarget))
4849548553
return R;
4849648554

48555+
if (SDValue R = combineSetCCTruncAdd(EFLAGS, CC, DAG))
48556+
return R;
48557+
4849748558
return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget);
4849848559
}
4849948560

llvm/test/CodeGen/X86/combine-setcc-trunc-add.ll

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,9 @@
66
define i1 @test_ult_trunc_add(i64 %x) {
77
; X64-LABEL: test_ult_trunc_add:
88
; X64: # %bb.0: # %entry
9-
; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000
10-
; X64-NEXT: addq %rdi, %rax
11-
; X64-NEXT: shrq $48, %rax
12-
; X64-NEXT: cmpl $3, %eax
9+
; X64-NEXT: shrq $48, %rdi
10+
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
11+
; X64-NEXT: cmpl $3, %edi
1312
; X64-NEXT: setb %al
1413
; X64-NEXT: retq
1514
entry:
@@ -23,10 +22,9 @@ entry:
2322
define i1 @test_ult_add(i64 %x) {
2423
; X64-LABEL: test_ult_add:
2524
; X64: # %bb.0: # %entry
26-
; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000
27-
; X64-NEXT: addq %rdi, %rax
28-
; X64-NEXT: shrq $48, %rax
29-
; X64-NEXT: cmpl $3, %eax
25+
; X64-NEXT: shrq $48, %rdi
26+
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
27+
; X64-NEXT: cmpl $3, %edi
3028
; X64-NEXT: setb %al
3129
; X64-NEXT: retq
3230
entry:
@@ -38,10 +36,9 @@ entry:
3836
define i1 @test_ugt_trunc_add(i64 %x) {
3937
; X64-LABEL: test_ugt_trunc_add:
4038
; X64: # %bb.0: # %entry
41-
; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000
42-
; X64-NEXT: addq %rdi, %rax
43-
; X64-NEXT: shrq $48, %rax
44-
; X64-NEXT: cmpl $4, %eax
39+
; X64-NEXT: shrq $48, %rdi
40+
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
41+
; X64-NEXT: cmpl $4, %edi
4542
; X64-NEXT: setae %al
4643
; X64-NEXT: retq
4744
entry:
@@ -70,10 +67,9 @@ entry:
7067
define i1 @test_eq_trunc_add(i64 %x) {
7168
; X64-LABEL: test_eq_trunc_add:
7269
; X64: # %bb.0: # %entry
73-
; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000
74-
; X64-NEXT: addq %rdi, %rax
75-
; X64-NEXT: shrq $48, %rax
76-
; X64-NEXT: cmpl $3, %eax
70+
; X64-NEXT: shrq $48, %rdi
71+
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
72+
; X64-NEXT: cmpl $3, %edi
7773
; X64-NEXT: sete %al
7874
; X64-NEXT: retq
7975
entry:
@@ -100,10 +96,9 @@ entry:
10096
define i1 @test_ne_trunc_add(i64 %x) {
10197
; X64-LABEL: test_ne_trunc_add:
10298
; X64: # %bb.0: # %entry
103-
; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000
104-
; X64-NEXT: addq %rdi, %rax
105-
; X64-NEXT: shrq $48, %rax
106-
; X64-NEXT: cmpl $3, %eax
99+
; X64-NEXT: shrq $48, %rdi
100+
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
101+
; X64-NEXT: cmpl $3, %edi
107102
; X64-NEXT: setne %al
108103
; X64-NEXT: retq
109104
entry:

0 commit comments

Comments
 (0)