Skip to content

Commit b773400

Browse files
committed
[X86] Generalize fold
1 parent bd8a2ef commit b773400

File tree

2 files changed

+48
-63
lines changed

2 files changed

+48
-63
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+37-52
Original file line numberDiff line numberDiff line change
@@ -48472,55 +48472,6 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
4847248472
return SDValue();
4847348473
}
4847448474

48475-
// Attempt to fold some (setcc (sub (truncate (srl (add X, C1), C2)), C3), CC)
48476-
// patterns to (setcc (cmp (add (truncate (srl X, C2)), C1'), C3), CC). C1' will
48477-
// be smaller than C1 so we are able to avoid generating code with MOVABS and
48478-
// large constants in certain cases.
48479-
static SDValue combineSetCCTruncAdd(SDValue EFLAGS, X86::CondCode &CC,
48480-
SelectionDAG &DAG) {
48481-
using namespace llvm::SDPatternMatch;
48482-
if (!(CC == X86::COND_E || CC == X86::COND_NE || CC == X86::COND_AE ||
48483-
CC == X86::COND_B))
48484-
return SDValue();
48485-
48486-
SDValue AddLhs;
48487-
APInt AddConst, SrlConst, CmpConst;
48488-
if (!sd_match(EFLAGS,
48489-
m_AllOf(m_SpecificVT(MVT::i32),
48490-
m_BinOp(X86ISD::SUB,
48491-
m_Trunc(m_Srl(m_Add(m_Value(AddLhs),
48492-
m_ConstInt(AddConst)),
48493-
m_ConstInt(SrlConst))),
48494-
m_ConstInt(CmpConst)))))
48495-
return SDValue();
48496-
48497-
SDValue Srl;
48498-
if (!sd_match(EFLAGS.getOperand(0).getOperand(0),
48499-
m_AllOf(m_SpecificVT(MVT::i64), m_Value(Srl))))
48500-
return SDValue();
48501-
48502-
// Avoid changing the ADD if it is used elsewhere.
48503-
if (!Srl.getOperand(0).hasOneUse())
48504-
return SDValue();
48505-
48506-
EVT VT = EFLAGS.getValueType();
48507-
APInt ShiftedAddConst = AddConst.lshr(SrlConst);
48508-
if (!CmpConst.ult(ShiftedAddConst.trunc(VT.getSizeInBits())) ||
48509-
(ShiftedAddConst.shl(SrlConst)) != AddConst)
48510-
return SDValue();
48511-
48512-
SDLoc DL(EFLAGS);
48513-
SDValue AddLHSSrl =
48514-
DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, Srl.getOperand(1));
48515-
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
48516-
48517-
APInt NewAddConstVal =
48518-
(~((~AddConst).lshr(SrlConst))).trunc(VT.getSizeInBits());
48519-
SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
48520-
SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
48521-
return DAG.getNode(X86ISD::CMP, DL, VT, NewAddNode, EFLAGS.getOperand(1));
48522-
}
48523-
4852448475
/// Optimize an EFLAGS definition used according to the condition code \p CC
4852548476
/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
4852648477
/// uses of chain values.
@@ -48543,9 +48494,6 @@ static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
4854348494
if (SDValue R = combineSetCCMOVMSK(EFLAGS, CC, DAG, Subtarget))
4854448495
return R;
4854548496

48546-
if (SDValue R = combineSetCCTruncAdd(EFLAGS, CC, DAG))
48547-
return R;
48548-
4854948497
return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget);
4855048498
}
4855148499

@@ -53652,6 +53600,40 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
5365253600
DAG.getUNDEF(SrcVT)));
5365353601
}
5365453602

53603+
// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to
53604+
// (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able
53605+
// to avoid generating code with MOVABS and large constants in certain cases.
53606+
static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG,
53607+
const SDLoc &DL) {
53608+
using namespace llvm::SDPatternMatch;
53609+
53610+
SDValue AddLhs;
53611+
APInt AddConst, SrlConst;
53612+
if (VT != MVT::i32 ||
53613+
!sd_match(N, m_AllOf(m_SpecificVT(MVT::i64),
53614+
m_Srl(m_OneUse(m_Add(m_Value(AddLhs),
53615+
m_ConstInt(AddConst))),
53616+
m_ConstInt(SrlConst)))))
53617+
return SDValue();
53618+
53619+
if (!SrlConst.ugt(31) || AddConst.lshr(SrlConst).shl(SrlConst) != AddConst)
53620+
return SDValue();
53621+
53622+
SDValue AddLHSSrl =
53623+
DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1));
53624+
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
53625+
53626+
APInt NewAddConstVal =
53627+
(~((~AddConst).lshr(SrlConst))).trunc(VT.getSizeInBits());
53628+
SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
53629+
SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
53630+
53631+
APInt CleanupSizeConstVal = (SrlConst - 32).zextOrTrunc(VT.getSizeInBits());
53632+
SDValue CleanupSizeConst = DAG.getConstant(CleanupSizeConstVal, DL, VT);
53633+
SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewAddNode, CleanupSizeConst);
53634+
return DAG.getNode(ISD::SRL, DL, VT, Shl, CleanupSizeConst);
53635+
}
53636+
5365553637
/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
5365653638
/// the codegen.
5365753639
/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
@@ -53697,6 +53679,9 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
5369753679
if (!Src.hasOneUse())
5369853680
return SDValue();
5369953681

53682+
if (SDValue R = combinei64TruncSrlAdd(Src, VT, DAG, DL))
53683+
return R;
53684+
5370053685
// Only support vector truncation for now.
5370153686
// TODO: i64 scalar math would benefit as well.
5370253687
if (!VT.isVector())

llvm/test/CodeGen/X86/combine-setcc-trunc-add.ll renamed to llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll

+11-11
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ define i1 @test_ult_trunc_add(i64 %x) {
88
; X64: # %bb.0: # %entry
99
; X64-NEXT: shrq $48, %rdi
1010
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
11-
; X64-NEXT: cmpl $3, %edi
11+
; X64-NEXT: movzwl %di, %eax
12+
; X64-NEXT: cmpl $3, %eax
1213
; X64-NEXT: setb %al
1314
; X64-NEXT: retq
1415
entry:
@@ -24,7 +25,8 @@ define i1 @test_ult_add(i64 %x) {
2425
; X64: # %bb.0: # %entry
2526
; X64-NEXT: shrq $48, %rdi
2627
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
27-
; X64-NEXT: cmpl $3, %edi
28+
; X64-NEXT: movzwl %di, %eax
29+
; X64-NEXT: cmpl $3, %eax
2830
; X64-NEXT: setb %al
2931
; X64-NEXT: retq
3032
entry:
@@ -38,7 +40,8 @@ define i1 @test_ugt_trunc_add(i64 %x) {
3840
; X64: # %bb.0: # %entry
3941
; X64-NEXT: shrq $48, %rdi
4042
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
41-
; X64-NEXT: cmpl $4, %edi
43+
; X64-NEXT: movzwl %di, %eax
44+
; X64-NEXT: cmpl $4, %eax
4245
; X64-NEXT: setae %al
4346
; X64-NEXT: retq
4447
entry:
@@ -68,8 +71,7 @@ define i1 @test_eq_trunc_add(i64 %x) {
6871
; X64-LABEL: test_eq_trunc_add:
6972
; X64: # %bb.0: # %entry
7073
; X64-NEXT: shrq $48, %rdi
71-
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
72-
; X64-NEXT: cmpl $3, %edi
74+
; X64-NEXT: cmpl $65525, %edi # imm = 0xFFF5
7375
; X64-NEXT: sete %al
7476
; X64-NEXT: retq
7577
entry:
@@ -97,8 +99,7 @@ define i1 @test_ne_trunc_add(i64 %x) {
9799
; X64-LABEL: test_ne_trunc_add:
98100
; X64: # %bb.0: # %entry
99101
; X64-NEXT: shrq $48, %rdi
100-
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
101-
; X64-NEXT: cmpl $3, %edi
102+
; X64-NEXT: cmpl $65525, %edi # imm = 0xFFF5
102103
; X64-NEXT: setne %al
103104
; X64-NEXT: retq
104105
entry:
@@ -125,10 +126,9 @@ entry:
125126
define i32 @test_trunc_add(i64 %x) {
126127
; X64-LABEL: test_trunc_add:
127128
; X64: # %bb.0: # %entry
128-
; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000
129-
; X64-NEXT: addq %rdi, %rax
130-
; X64-NEXT: shrq $48, %rax
131-
; X64-NEXT: # kill: def $eax killed $eax killed $rax
129+
; X64-NEXT: shrq $48, %rdi
130+
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
131+
; X64-NEXT: movzwl %di, %eax
132132
; X64-NEXT: retq
133133
entry:
134134
%add = add i64 %x, 3940649673949184

0 commit comments

Comments
 (0)