@@ -48472,55 +48472,6 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
48472
48472
return SDValue();
48473
48473
}
48474
48474
48475
- // Attempt to fold some (setcc (sub (truncate (srl (add X, C1), C2)), C3), CC)
48476
- // patterns to (setcc (cmp (add (truncate (srl X, C2)), C1'), C3), CC). C1' will
48477
- // be smaller than C1 so we are able to avoid generating code with MOVABS and
48478
- // large constants in certain cases.
48479
- static SDValue combineSetCCTruncAdd(SDValue EFLAGS, X86::CondCode &CC,
48480
- SelectionDAG &DAG) {
48481
- using namespace llvm::SDPatternMatch;
48482
- if (!(CC == X86::COND_E || CC == X86::COND_NE || CC == X86::COND_AE ||
48483
- CC == X86::COND_B))
48484
- return SDValue();
48485
-
48486
- SDValue AddLhs;
48487
- APInt AddConst, SrlConst, CmpConst;
48488
- if (!sd_match(EFLAGS,
48489
- m_AllOf(m_SpecificVT(MVT::i32),
48490
- m_BinOp(X86ISD::SUB,
48491
- m_Trunc(m_Srl(m_Add(m_Value(AddLhs),
48492
- m_ConstInt(AddConst)),
48493
- m_ConstInt(SrlConst))),
48494
- m_ConstInt(CmpConst)))))
48495
- return SDValue();
48496
-
48497
- SDValue Srl;
48498
- if (!sd_match(EFLAGS.getOperand(0).getOperand(0),
48499
- m_AllOf(m_SpecificVT(MVT::i64), m_Value(Srl))))
48500
- return SDValue();
48501
-
48502
- // Avoid changing the ADD if it is used elsewhere.
48503
- if (!Srl.getOperand(0).hasOneUse())
48504
- return SDValue();
48505
-
48506
- EVT VT = EFLAGS.getValueType();
48507
- APInt ShiftedAddConst = AddConst.lshr(SrlConst);
48508
- if (!CmpConst.ult(ShiftedAddConst.trunc(VT.getSizeInBits())) ||
48509
- (ShiftedAddConst.shl(SrlConst)) != AddConst)
48510
- return SDValue();
48511
-
48512
- SDLoc DL(EFLAGS);
48513
- SDValue AddLHSSrl =
48514
- DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, Srl.getOperand(1));
48515
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
48516
-
48517
- APInt NewAddConstVal =
48518
- (~((~AddConst).lshr(SrlConst))).trunc(VT.getSizeInBits());
48519
- SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
48520
- SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
48521
- return DAG.getNode(X86ISD::CMP, DL, VT, NewAddNode, EFLAGS.getOperand(1));
48522
- }
48523
-
48524
48475
/// Optimize an EFLAGS definition used according to the condition code \p CC
48525
48476
/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
48526
48477
/// uses of chain values.
@@ -48543,9 +48494,6 @@ static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
48543
48494
if (SDValue R = combineSetCCMOVMSK(EFLAGS, CC, DAG, Subtarget))
48544
48495
return R;
48545
48496
48546
- if (SDValue R = combineSetCCTruncAdd(EFLAGS, CC, DAG))
48547
- return R;
48548
-
48549
48497
return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget);
48550
48498
}
48551
48499
@@ -53652,6 +53600,40 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
53652
53600
DAG.getUNDEF(SrcVT)));
53653
53601
}
53654
53602
53603
+ // Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to
53604
+ // (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able
53605
+ // to avoid generating code with MOVABS and large constants in certain cases.
53606
+ static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG,
53607
+ const SDLoc &DL) {
53608
+ using namespace llvm::SDPatternMatch;
53609
+
53610
+ SDValue AddLhs;
53611
+ APInt AddConst, SrlConst;
53612
+ if (VT != MVT::i32 ||
53613
+ !sd_match(N, m_AllOf(m_SpecificVT(MVT::i64),
53614
+ m_Srl(m_OneUse(m_Add(m_Value(AddLhs),
53615
+ m_ConstInt(AddConst))),
53616
+ m_ConstInt(SrlConst)))))
53617
+ return SDValue();
53618
+
53619
+ if (!SrlConst.ugt(31) || AddConst.lshr(SrlConst).shl(SrlConst) != AddConst)
53620
+ return SDValue();
53621
+
53622
+ SDValue AddLHSSrl =
53623
+ DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1));
53624
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
53625
+
53626
+ APInt NewAddConstVal =
53627
+ (~((~AddConst).lshr(SrlConst))).trunc(VT.getSizeInBits());
53628
+ SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
53629
+ SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
53630
+
53631
+ APInt CleanupSizeConstVal = (SrlConst - 32).zextOrTrunc(VT.getSizeInBits());
53632
+ SDValue CleanupSizeConst = DAG.getConstant(CleanupSizeConstVal, DL, VT);
53633
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewAddNode, CleanupSizeConst);
53634
+ return DAG.getNode(ISD::SRL, DL, VT, Shl, CleanupSizeConst);
53635
+ }
53636
+
53655
53637
/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
53656
53638
/// the codegen.
53657
53639
/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
@@ -53697,6 +53679,9 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
53697
53679
if (!Src.hasOneUse())
53698
53680
return SDValue();
53699
53681
53682
+ if (SDValue R = combinei64TruncSrlAdd(Src, VT, DAG, DL))
53683
+ return R;
53684
+
53700
53685
// Only support vector truncation for now.
53701
53686
// TODO: i64 scalar math would benefit as well.
53702
53687
if (!VT.isVector())
0 commit comments