@@ -231,12 +231,19 @@ class LoopIdiomRecognize {
231
231
bool recognizePopcount ();
232
232
void transformLoopToPopcount (BasicBlock *PreCondBB, Instruction *CntInst,
233
233
PHINode *CntPhi, Value *Var);
234
+ bool isProfitableToInsertFFS (Intrinsic::ID IntrinID, Value *InitX,
235
+ bool ZeroCheck, size_t CanonicalSize);
236
+ bool insertFFSIfProfitable (Intrinsic::ID IntrinID, Value *InitX,
237
+ Instruction *DefX, PHINode *CntPhi,
238
+ Instruction *CntInst);
234
239
bool recognizeAndInsertFFS (); // / Find First Set: ctlz or cttz
240
+ bool recognizeShiftUntilLessThan ();
235
241
void transformLoopToCountable (Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
236
242
Instruction *CntInst, PHINode *CntPhi,
237
243
Value *Var, Instruction *DefX,
238
244
const DebugLoc &DL, bool ZeroCheck,
239
- bool IsCntPhiUsedOutsideLoop);
245
+ bool IsCntPhiUsedOutsideLoop,
246
+ bool InsertSub = false );
240
247
241
248
bool recognizeShiftUntilBitTest ();
242
249
bool recognizeShiftUntilZero ();
@@ -1482,7 +1489,8 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
1482
1489
<< CurLoop->getHeader ()->getName () << " \n " );
1483
1490
1484
1491
return recognizePopcount () || recognizeAndInsertFFS () ||
1485
- recognizeShiftUntilBitTest () || recognizeShiftUntilZero ();
1492
+ recognizeShiftUntilBitTest () || recognizeShiftUntilZero () ||
1493
+ recognizeShiftUntilLessThan ();
1486
1494
}
1487
1495
1488
1496
// / Check if the given conditional branch is based on the comparison between
@@ -1517,6 +1525,34 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
1517
1525
return nullptr ;
1518
1526
}
1519
1527
1528
+ // / Check if the given conditional branch is based on an unsigned less-than
1529
+ // / comparison between a variable and a constant, and if the comparison is false
1530
+ // / the control yields to the loop entry. If the branch matches the behaviour,
1531
+ // / the variable involved in the comparison is returned.
1532
+ static Value *matchShiftULTCondition (BranchInst *BI, BasicBlock *LoopEntry,
1533
+ uint64_t &Threshold) {
1534
+ if (!BI || !BI->isConditional ())
1535
+ return nullptr ;
1536
+
1537
+ ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition ());
1538
+ if (!Cond)
1539
+ return nullptr ;
1540
+
1541
+ ConstantInt *CmpConst = dyn_cast<ConstantInt>(Cond->getOperand (1 ));
1542
+ if (!CmpConst)
1543
+ return nullptr ;
1544
+
1545
+ BasicBlock *FalseSucc = BI->getSuccessor (1 );
1546
+ ICmpInst::Predicate Pred = Cond->getPredicate ();
1547
+
1548
+ if (Pred == ICmpInst::ICMP_ULT && FalseSucc == LoopEntry) {
1549
+ Threshold = CmpConst->getZExtValue ();
1550
+ return Cond->getOperand (0 );
1551
+ }
1552
+
1553
+ return nullptr ;
1554
+ }
1555
+
1520
1556
// Check if the recurrence variable `VarX` is in the right form to create
1521
1557
// the idiom. Returns the value coerced to a PHINode if so.
1522
1558
static PHINode *getRecurrenceVar (Value *VarX, Instruction *DefX,
@@ -1528,6 +1564,107 @@ static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
1528
1564
return nullptr ;
1529
1565
}
1530
1566
1567
+ // / Return true if the idiom is detected in the loop.
1568
+ // /
1569
+ // / Additionally:
1570
+ // / 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1571
+ // / or nullptr if there is no such.
1572
+ // / 2) \p CntPhi is set to the corresponding phi node
1573
+ // / or nullptr if there is no such.
1574
+ // / 3) \p InitX is set to the value whose CTLZ could be used.
1575
+ // / 4) \p DefX is set to the instruction calculating Loop exit condition.
1576
+ // / 5) \p Threshold is set to the constant involved in the unsigned less-than
1577
+ // / comparison.
1578
+ // /
1579
+ // / The core idiom we are trying to detect is:
1580
+ // / \code
1581
+ // / if (x0 < 2)
1582
+ // / goto loop-exit // the precondition of the loop
1583
+ // / cnt0 = init-val
1584
+ // / do {
1585
+ // / x = phi (x0, x.next); //PhiX
1586
+ // / cnt = phi (cnt0, cnt.next)
1587
+ // /
1588
+ // / cnt.next = cnt + 1;
1589
+ // / ...
1590
+ // / x.next = x >> 1; // DefX
1591
+ // / } while (x >= 4)
1592
+ // / loop-exit:
1593
+ // / \endcode
1594
+ static bool detectShiftUntilLessThanIdiom (Loop *CurLoop, const DataLayout &DL,
1595
+ Intrinsic::ID &IntrinID,
1596
+ Value *&InitX, Instruction *&CntInst,
1597
+ PHINode *&CntPhi, Instruction *&DefX,
1598
+ uint64_t &Threshold) {
1599
+ BasicBlock *LoopEntry;
1600
+
1601
+ DefX = nullptr ;
1602
+ CntInst = nullptr ;
1603
+ CntPhi = nullptr ;
1604
+ LoopEntry = *(CurLoop->block_begin ());
1605
+
1606
+ // step 1: Check if the loop-back branch is in desirable form.
1607
+ if (Value *T = matchShiftULTCondition (
1608
+ dyn_cast<BranchInst>(LoopEntry->getTerminator ()), LoopEntry,
1609
+ Threshold))
1610
+ DefX = dyn_cast<Instruction>(T);
1611
+ else
1612
+ return false ;
1613
+
1614
+ // step 2: Check the recurrence of variable X
1615
+ if (!DefX || !isa<PHINode>(DefX))
1616
+ return false ;
1617
+
1618
+ PHINode *VarPhi = cast<PHINode>(DefX);
1619
+ int Idx = VarPhi->getBasicBlockIndex (LoopEntry);
1620
+ if (Idx == -1 )
1621
+ return false ;
1622
+
1623
+ DefX = dyn_cast<Instruction>(VarPhi->getIncomingValue (Idx));
1624
+ if (!DefX || DefX->getNumOperands () == 0 || DefX->getOperand (0 ) != VarPhi)
1625
+ return false ;
1626
+
1627
+ // step 3: detect instructions corresponding to "x.next = x >> 1"
1628
+ if (DefX->getOpcode () != Instruction::LShr)
1629
+ return false ;
1630
+
1631
+ IntrinID = Intrinsic::ctlz;
1632
+ ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand (1 ));
1633
+ if (!Shft || !Shft->isOne ())
1634
+ return false ;
1635
+
1636
+ InitX = VarPhi->getIncomingValueForBlock (CurLoop->getLoopPreheader ());
1637
+
1638
+ // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1639
+ // or cnt.next = cnt + -1.
1640
+ // TODO: We can skip the step. If loop trip count is known (CTLZ),
1641
+ // then all uses of "cnt.next" could be optimized to the trip count
1642
+ // plus "cnt0". Currently it is not optimized.
1643
+ // This step could be used to detect POPCNT instruction:
1644
+ // cnt.next = cnt + (x.next & 1)
1645
+ for (Instruction &Inst : llvm::make_range (
1646
+ LoopEntry->getFirstNonPHI ()->getIterator (), LoopEntry->end ())) {
1647
+ if (Inst.getOpcode () != Instruction::Add)
1648
+ continue ;
1649
+
1650
+ ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand (1 ));
1651
+ if (!Inc || (!Inc->isOne () && !Inc->isMinusOne ()))
1652
+ continue ;
1653
+
1654
+ PHINode *Phi = getRecurrenceVar (Inst.getOperand (0 ), &Inst, LoopEntry);
1655
+ if (!Phi)
1656
+ continue ;
1657
+
1658
+ CntInst = &Inst;
1659
+ CntPhi = Phi;
1660
+ break ;
1661
+ }
1662
+ if (!CntInst)
1663
+ return false ;
1664
+
1665
+ return true ;
1666
+ }
1667
+
1531
1668
// / Return true iff the idiom is detected in the loop.
1532
1669
// /
1533
1670
// / Additionally:
@@ -1756,27 +1893,35 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
1756
1893
return true ;
1757
1894
}
1758
1895
1759
- // / Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1760
- // / to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1761
- // / trip count returns true; otherwise, returns false.
1762
- bool LoopIdiomRecognize::recognizeAndInsertFFS () {
1763
- // Give up if the loop has multiple blocks or multiple backedges.
1764
- if (CurLoop-> getNumBackEdges () != 1 || CurLoop-> getNumBlocks () != 1 )
1765
- return false ;
1896
+ // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1897
+ // profitable if we delete the loop.
1898
+ bool LoopIdiomRecognize::isProfitableToInsertFFS (Intrinsic::ID IntrinID,
1899
+ Value *InitX, bool ZeroCheck,
1900
+ size_t CanonicalSize) {
1901
+ const Value *Args[] = {InitX,
1902
+ ConstantInt::getBool (InitX-> getContext (), ZeroCheck)} ;
1766
1903
1767
- Intrinsic::ID IntrinID;
1768
- Value *InitX;
1769
- Instruction *DefX = nullptr ;
1770
- PHINode *CntPhi = nullptr ;
1771
- Instruction *CntInst = nullptr ;
1772
- // Help decide if transformation is profitable. For ShiftUntilZero idiom,
1773
- // this is always 6.
1774
- size_t IdiomCanonicalSize = 6 ;
1904
+ // @llvm.dbg doesn't count as they have no semantic effect.
1905
+ auto InstWithoutDebugIt = CurLoop->getHeader ()->instructionsWithoutDebug ();
1906
+ uint32_t HeaderSize =
1907
+ std::distance (InstWithoutDebugIt.begin (), InstWithoutDebugIt.end ());
1775
1908
1776
- if (!detectShiftUntilZeroIdiom (CurLoop, *DL, IntrinID, InitX,
1777
- CntInst, CntPhi, DefX))
1909
+ IntrinsicCostAttributes Attrs (IntrinID, InitX->getType (), Args);
1910
+ InstructionCost Cost = TTI->getIntrinsicInstrCost (
1911
+ Attrs, TargetTransformInfo::TCK_SizeAndLatency);
1912
+ if (HeaderSize != CanonicalSize && Cost > TargetTransformInfo::TCC_Basic)
1778
1913
return false ;
1779
1914
1915
+ return true ;
1916
+ }
1917
+
1918
+ // / Convert CTLZ / CTTZ idiom loop into countable loop.
1919
+ // / If CTLZ / CTTZ inserted as a new trip count returns true; otherwise,
1920
+ // / returns false.
1921
+ bool LoopIdiomRecognize::insertFFSIfProfitable (Intrinsic::ID IntrinID,
1922
+ Value *InitX, Instruction *DefX,
1923
+ PHINode *CntPhi,
1924
+ Instruction *CntInst) {
1780
1925
bool IsCntPhiUsedOutsideLoop = false ;
1781
1926
for (User *U : CntPhi->users ())
1782
1927
if (!CurLoop->contains (cast<Instruction>(U))) {
@@ -1818,35 +1963,107 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
1818
1963
ZeroCheck = true ;
1819
1964
}
1820
1965
1821
- // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1822
- // profitable if we delete the loop.
1823
-
1824
- // the loop has only 6 instructions:
1966
+ // FFS idiom loop has only 6 instructions:
1825
1967
// %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
1826
1968
// %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
1827
1969
// %shr = ashr %n.addr.0, 1
1828
1970
// %tobool = icmp eq %shr, 0
1829
1971
// %inc = add nsw %i.0, 1
1830
1972
// br i1 %tobool
1973
+ size_t IdiomCanonicalSize = 6 ;
1974
+ if (!isProfitableToInsertFFS (IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
1975
+ return false ;
1831
1976
1832
- const Value *Args[] = {InitX,
1833
- ConstantInt::getBool (InitX->getContext (), ZeroCheck)};
1977
+ transformLoopToCountable (IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1978
+ DefX->getDebugLoc (), ZeroCheck,
1979
+ IsCntPhiUsedOutsideLoop);
1980
+ return true ;
1981
+ }
1834
1982
1835
- // @llvm.dbg doesn't count as they have no semantic effect.
1836
- auto InstWithoutDebugIt = CurLoop->getHeader ()->instructionsWithoutDebug ();
1837
- uint32_t HeaderSize =
1838
- std::distance (InstWithoutDebugIt.begin (), InstWithoutDebugIt.end ());
1983
+ // / Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1984
+ // / to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1985
+ // / trip count returns true; otherwise, returns false.
1986
+ bool LoopIdiomRecognize::recognizeAndInsertFFS () {
1987
+ // Give up if the loop has multiple blocks or multiple backedges.
1988
+ if (CurLoop->getNumBackEdges () != 1 || CurLoop->getNumBlocks () != 1 )
1989
+ return false ;
1839
1990
1840
- IntrinsicCostAttributes Attrs (IntrinID, InitX->getType (), Args);
1841
- InstructionCost Cost =
1842
- TTI->getIntrinsicInstrCost (Attrs, TargetTransformInfo::TCK_SizeAndLatency);
1843
- if (HeaderSize != IdiomCanonicalSize &&
1844
- Cost > TargetTransformInfo::TCC_Basic)
1991
+ Intrinsic::ID IntrinID;
1992
+ Value *InitX;
1993
+ Instruction *DefX = nullptr ;
1994
+ PHINode *CntPhi = nullptr ;
1995
+ Instruction *CntInst = nullptr ;
1996
+
1997
+ if (!detectShiftUntilZeroIdiom (CurLoop, *DL, IntrinID, InitX, CntInst, CntPhi,
1998
+ DefX))
1999
+ return false ;
2000
+
2001
+ return insertFFSIfProfitable (IntrinID, InitX, DefX, CntPhi, CntInst);
2002
+ }
2003
+
2004
+ bool LoopIdiomRecognize::recognizeShiftUntilLessThan () {
2005
+ // Give up if the loop has multiple blocks or multiple backedges.
2006
+ if (CurLoop->getNumBackEdges () != 1 || CurLoop->getNumBlocks () != 1 )
2007
+ return false ;
2008
+
2009
+ Intrinsic::ID IntrinID;
2010
+ Value *InitX;
2011
+ Instruction *DefX = nullptr ;
2012
+ PHINode *CntPhi = nullptr ;
2013
+ Instruction *CntInst = nullptr ;
2014
+
2015
+ uint64_t LoopThreshold;
2016
+ if (!detectShiftUntilLessThanIdiom (CurLoop, *DL, IntrinID, InitX, CntInst,
2017
+ CntPhi, DefX, LoopThreshold))
2018
+ return false ;
2019
+
2020
+ if (LoopThreshold == 2 ) {
2021
+ // Treat as regular FFS.
2022
+ return insertFFSIfProfitable (IntrinID, InitX, DefX, CntPhi, CntInst);
2023
+ }
2024
+
2025
+ // Look for Floor Log2 Idiom.
2026
+ if (LoopThreshold != 4 )
2027
+ return false ;
2028
+
2029
+ // Abort if CntPhi is used outside of the loop.
2030
+ for (User *U : CntPhi->users ())
2031
+ if (!CurLoop->contains (cast<Instruction>(U)))
2032
+ return false ;
2033
+
2034
+ // It is safe to assume Preheader exist as it was checked in
2035
+ // parent function RunOnLoop.
2036
+ BasicBlock *PH = CurLoop->getLoopPreheader ();
2037
+ auto *PreCondBB = PH->getSinglePredecessor ();
2038
+ if (!PreCondBB)
2039
+ return false ;
2040
+ auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator ());
2041
+ if (!PreCondBI)
2042
+ return false ;
2043
+
2044
+ uint64_t PreLoopThreshold;
2045
+ if (matchShiftULTCondition (PreCondBI, PH, PreLoopThreshold) != InitX ||
2046
+ PreLoopThreshold != 2 )
1845
2047
return false ;
1846
2048
2049
+ bool ZeroCheck = true ;
2050
+
2051
+ // the loop has only 6 instructions:
2052
+ // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
2053
+ // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
2054
+ // %shr = ashr %n.addr.0, 1
2055
+ // %tobool = icmp ult %n.addr.0, C
2056
+ // %inc = add nsw %i.0, 1
2057
+ // br i1 %tobool
2058
+ size_t IdiomCanonicalSize = 6 ;
2059
+ if (!isProfitableToInsertFFS (IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
2060
+ return false ;
2061
+
2062
+ // log2(x) = w − 1 − clz(x)
1847
2063
transformLoopToCountable (IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1848
2064
DefX->getDebugLoc (), ZeroCheck,
1849
- IsCntPhiUsedOutsideLoop);
2065
+ /* IsCntPhiUsedOutsideLoop=*/ false ,
2066
+ /* InsertSub=*/ true );
1850
2067
return true ;
1851
2068
}
1852
2069
@@ -1961,7 +2178,7 @@ static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
1961
2178
void LoopIdiomRecognize::transformLoopToCountable (
1962
2179
Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
1963
2180
PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
1964
- bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
2181
+ bool ZeroCheck, bool IsCntPhiUsedOutsideLoop, bool InsertSub ) {
1965
2182
BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator ());
1966
2183
1967
2184
// Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
@@ -1991,6 +2208,8 @@ void LoopIdiomRecognize::transformLoopToCountable(
1991
2208
Type *CountTy = Count->getType ();
1992
2209
Count = Builder.CreateSub (
1993
2210
ConstantInt::get (CountTy, CountTy->getIntegerBitWidth ()), Count);
2211
+ if (InsertSub)
2212
+ Count = Builder.CreateSub (Count, ConstantInt::get (CountTy, 1 ));
1994
2213
Value *NewCount = Count;
1995
2214
if (IsCntPhiUsedOutsideLoop)
1996
2215
Count = Builder.CreateAdd (Count, ConstantInt::get (CountTy, 1 ));
0 commit comments