35
35
#include " llvm/CodeGen/PseudoSourceValueManager.h"
36
36
#include " llvm/CodeGen/SlotIndexes.h"
37
37
#include " llvm/CodeGen/TargetOpcodes.h"
38
+ #include " llvm/CodeGen/TargetSubtargetInfo.h"
38
39
#include " llvm/CodeGen/WinEHFuncInfo.h"
39
40
#include " llvm/Config/llvm-config.h"
40
41
#include " llvm/IR/Constants.h"
@@ -99,7 +100,6 @@ static cl::opt<unsigned> MaxCandidatesOpt(
99
100
" Max number of candidates that will be evaluated, 0 means no limit" ));
100
101
101
102
STATISTIC (NumMarkerSeen, " Number of lifetime markers found." );
102
- STATISTIC (GeneratedWorse, " Number of times worse layout were generated" );
103
103
STATISTIC (StackSpaceSaved, " Number of bytes saved due to merging slots." );
104
104
STATISTIC (StackSlotMerged, " Number of stack slot merged." );
105
105
STATISTIC (EscapedAllocas, " Number of allocas that escaped the lifetime region" );
@@ -400,7 +400,9 @@ class StackColoring {
400
400
// Use to make overlap queries faster
401
401
SmallVector<unsigned , 4 > StartLiveness;
402
402
403
- uint64_t SlotPriority = 0 ;
403
+ int64_t SlotPriority = 0 ;
404
+
405
+ unsigned UseCount = 0 ;
404
406
405
407
unsigned Offset = InvalidIdx;
406
408
@@ -653,9 +655,11 @@ StackColoring::SlotInfo::dump(const StackColoring *State) const {
653
655
dbgs () << " :" ;
654
656
if (Offset != InvalidIdx)
655
657
dbgs () << " offset=" << Offset;
658
+ dbgs () << " uses=" << UseCount;
659
+ dbgs () << " prio=" << SlotPriority;
656
660
if (State) {
657
661
if (State->MFI ->getObjectAllocation (Slot))
658
- dbgs () << " \" " << State->MFI ->getObjectAllocation (Slot)->getName ()
662
+ dbgs () << " alloca= \" " << State->MFI ->getObjectAllocation (Slot)->getName ()
659
663
<< " \" " ;
660
664
if (State->MFI ->isSpillSlotObjectIndex (Slot))
661
665
dbgs () << " spill" ;
@@ -803,6 +807,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
803
807
int Slot = MO.getIndex ();
804
808
if (Slot < 0 )
805
809
continue ;
810
+ Slot2Info[Slot].UseCount ++;
806
811
if (!BetweenStartEnd.test (Slot)) {
807
812
ConservativeSlots.set (Slot);
808
813
}
@@ -1525,35 +1530,24 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
1525
1530
if (SlotStack.size () <= 1 )
1526
1531
return InvalidIdx;
1527
1532
1528
- // This Whole block is only used to try and order the stack, such that the
1529
- // Slots are processed in an order that helps getting good packing
1530
- {
1531
- // Find how much usage of every livepoint there is.
1532
- SmallVector<unsigned > CumulatedUsage;
1533
- CumulatedUsage.resize (LivenessSize, 0 );
1534
-
1535
- for (unsigned Idx = 0 ; Idx < SlotStack.size (); Idx++) {
1536
- SlotInfo &Info = Slot2Info[SlotStack[Idx]];
1537
- for (unsigned Pt : Info.Liveness .set_bits ()) {
1538
- CumulatedUsage[Pt] += Info.Size ;
1539
- }
1540
- }
1533
+ // This logic is optimized for x86_64, it probably needs to be adapted to
1534
+ // other targets to get good code-size/stack-size balance.
1535
+ // Its inspired from X86FrameLowering::orderFrameObjects, but modified weight
1536
+ // in alignments helping with stack size
1537
+ auto IsLower = [&](unsigned Lhs, unsigned Rhs) {
1538
+ SlotInfo &L = Slot2Info[Lhs];
1539
+ SlotInfo &R = Slot2Info[Rhs];
1540
+ uint64_t DensityLScaled = static_cast <uint64_t >(L.UseCount ) *
1541
+ static_cast <uint64_t >(R.Size + Log2 (R.Align ));
1542
+ uint64_t DensityRScaled = static_cast <uint64_t >(R.UseCount ) *
1543
+ static_cast <uint64_t >(L.Size + Log2 (L.Align ));
1544
+ return DensityLScaled < DensityRScaled;
1545
+ };
1546
+ std::stable_sort (SlotStack.begin (), SlotStack.end (), IsLower);
1541
1547
1542
- for (unsigned Idx = 0 ; Idx < SlotStack.size (); Idx++) {
1543
- SlotInfo &Info = Slot2Info[SlotStack[Idx]];
1544
- for (unsigned Pt : Info.Liveness .set_bits ()) {
1545
- // Since the goal is to minimize the max usage, blocks that are in high
1546
- // contention areas are given more priority
1547
- Info.SlotPriority +=
1548
- (uint64_t )CumulatedUsage[Pt] * (uint64_t )CumulatedUsage[Pt] +
1549
- (uint64_t )Info.Size * (uint64_t )Info.Align .value ();
1550
- }
1551
- }
1552
- std::stable_sort (
1553
- SlotStack.begin (), SlotStack.end (), [&](unsigned Lhs, unsigned Rhs) {
1554
- return Slot2Info[Lhs].SlotPriority < Slot2Info[Rhs].SlotPriority ;
1555
- });
1556
- }
1548
+ int Prio = 0 ;
1549
+ for (int Slot : SlotStack)
1550
+ Slot2Info[Slot].SlotPriority = Prio++;
1557
1551
1558
1552
SlotInfo *LastQueryLhs = nullptr ;
1559
1553
SlotInfo *LastQueryRhs = nullptr ;
@@ -1666,24 +1660,27 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
1666
1660
1667
1661
Offset = alignTo (Offset, Info.Align );
1668
1662
1669
- LLVM_DEBUG (dbgs () << " fi#" << Candidates[K] << " @" << Offset << " ->" ;
1670
- if (PrevSlot == InvalidIdx) dbgs () << " bottom" ;
1671
- else dbgs () << " fi#" << PrevSlot; dbgs () << " , " ;);
1663
+ LLVM_DEBUG ({
1664
+ dbgs () << " fi#" << Candidates[K] << " @" << Offset;
1665
+ if (PrevSlot != InvalidIdx)
1666
+ dbgs () << " ->" << " fi#" << PrevSlot;
1667
+ dbgs () << " , " ;
1668
+ });
1672
1669
1673
1670
bool IsBetter = [&] {
1671
+ if (BestIdx == InvalidIdx)
1672
+ return true ;
1673
+ SlotInfo &Best = Slot2Info[Candidates[BestIdx]];
1674
1674
if (BestOffset != Offset)
1675
1675
return BestOffset > Offset;
1676
- SlotInfo &Other = Slot2Info[Candidates[K]];
1677
- if (Other.Align != Info.Align )
1678
- return Other.Align < Info.Align ;
1679
- if (Other.Size != Info.Size )
1680
- return Other.Size < Info.Size ;
1681
- if (Other.SlotPriority != Info.SlotPriority )
1682
- return Other.SlotPriority < Info.SlotPriority ;
1676
+ if (Best.SlotPriority != Info.SlotPriority )
1677
+ return Best.SlotPriority < Info.SlotPriority ;
1678
+ if (Best.Align != Info.Align )
1679
+ return Best.Align < Info.Align ;
1683
1680
1684
1681
// Both are always stored in Slot2Info, so this is equivalent to
1685
1682
// FrameIndex comparaison
1686
- return &Other < &Info;
1683
+ return &Best < &Info;
1687
1684
}();
1688
1685
1689
1686
if (IsBetter) {
@@ -1726,7 +1723,6 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
1726
1723
LLVM_DEBUG (dbgs () << " MergedSize=" << FinalSize << " OrigPesSize="
1727
1724
<< OrigPesSize << " OrigOptSize" << OrigOptSize << " \n " );
1728
1725
if (FinalSize >= OrigPesSize) {
1729
- GeneratedWorse++;
1730
1726
return InvalidIdx;
1731
1727
}
1732
1728
@@ -1774,6 +1770,7 @@ bool StackColoring::run(MachineFunction &Func) {
1774
1770
Intervals.reserve (NumSlots);
1775
1771
LiveStarts.resize (NumSlots);
1776
1772
1773
+ Slot2Info.resize (NumSlots);
1777
1774
unsigned NumMarkers = collectMarkers (NumSlots);
1778
1775
1779
1776
unsigned TotalSize = 0 ;
@@ -1792,7 +1789,6 @@ bool StackColoring::run(MachineFunction &Func) {
1792
1789
return removeAllMarkers ();
1793
1790
}
1794
1791
1795
- Slot2Info.resize (NumSlots);
1796
1792
for (unsigned i = 0 ; i < NumSlots; ++i) {
1797
1793
std::unique_ptr<LiveRange> LI (new LiveRange ());
1798
1794
LI->getNextValue (Indexes->getZeroIndex (), VNInfoAllocator);
0 commit comments