Skip to content

Commit f7ae304

Browse files
committed
Update selection heristics to avoid code-size regression in average
1 parent df2fb92 commit f7ae304

File tree

2 files changed

+41
-45
lines changed

2 files changed

+41
-45
lines changed

llvm/include/llvm/CodeGen/MachineFrameInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -772,7 +772,7 @@ class MachineFrameInfo {
772772
// If ID == 0, MaxAlignment will need to be updated separately.
773773
}
774774

775-
int getUnderlyingSlot(int ObjectIdx) {
775+
int getUnderlyingSlot(int ObjectIdx) const {
776776
assert(unsigned(ObjectIdx + NumFixedObjects) < Objects.size() &&
777777
"Invalid Object Idx!");
778778
return Objects[ObjectIdx + NumFixedObjects].UnderlyingSlot;

llvm/lib/CodeGen/StackColoring.cpp

Lines changed: 40 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "llvm/CodeGen/PseudoSourceValueManager.h"
3636
#include "llvm/CodeGen/SlotIndexes.h"
3737
#include "llvm/CodeGen/TargetOpcodes.h"
38+
#include "llvm/CodeGen/TargetSubtargetInfo.h"
3839
#include "llvm/CodeGen/WinEHFuncInfo.h"
3940
#include "llvm/Config/llvm-config.h"
4041
#include "llvm/IR/Constants.h"
@@ -99,7 +100,6 @@ static cl::opt<unsigned> MaxCandidatesOpt(
99100
"Max number of candidates that will be evaluated, 0 means no limit"));
100101

101102
STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
102-
STATISTIC(GeneratedWorse, "Number of times worse layout were generated");
103103
STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
104104
STATISTIC(StackSlotMerged, "Number of stack slot merged.");
105105
STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
@@ -400,7 +400,9 @@ class StackColoring {
400400
// Use to make overlap queries faster
401401
SmallVector<unsigned, 4> StartLiveness;
402402

403-
uint64_t SlotPriority = 0;
403+
int64_t SlotPriority = 0;
404+
405+
unsigned UseCount = 0;
404406

405407
unsigned Offset = InvalidIdx;
406408

@@ -653,9 +655,11 @@ StackColoring::SlotInfo::dump(const StackColoring *State) const {
653655
dbgs() << ":";
654656
if (Offset != InvalidIdx)
655657
dbgs() << " offset=" << Offset;
658+
dbgs() << " uses=" << UseCount;
659+
dbgs() << " prio=" << SlotPriority;
656660
if (State) {
657661
if (State->MFI->getObjectAllocation(Slot))
658-
dbgs() << " \"" << State->MFI->getObjectAllocation(Slot)->getName()
662+
dbgs() << " alloca=\"" << State->MFI->getObjectAllocation(Slot)->getName()
659663
<< "\"";
660664
if (State->MFI->isSpillSlotObjectIndex(Slot))
661665
dbgs() << " spill";
@@ -803,6 +807,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
803807
int Slot = MO.getIndex();
804808
if (Slot < 0)
805809
continue;
810+
Slot2Info[Slot].UseCount++;
806811
if (!BetweenStartEnd.test(Slot)) {
807812
ConservativeSlots.set(Slot);
808813
}
@@ -1525,35 +1530,24 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
15251530
if (SlotStack.size() <= 1)
15261531
return InvalidIdx;
15271532

1528-
// This Whole block is only used to try and order the stack, such that the
1529-
// Slots are processed in an order that helps getting good packing
1530-
{
1531-
// Find how much usage of every livepoint there is.
1532-
SmallVector<unsigned> CumulatedUsage;
1533-
CumulatedUsage.resize(LivenessSize, 0);
1534-
1535-
for (unsigned Idx = 0; Idx < SlotStack.size(); Idx++) {
1536-
SlotInfo &Info = Slot2Info[SlotStack[Idx]];
1537-
for (unsigned Pt : Info.Liveness.set_bits()) {
1538-
CumulatedUsage[Pt] += Info.Size;
1539-
}
1540-
}
1533+
// This logic is optimized for x86_64, it probably needs to be adapted to
1534+
// other targets to get good code-size/stack-size balance.
1535+
// Its inspired from X86FrameLowering::orderFrameObjects, but modified weight
1536+
// in alignments helping with stack size
1537+
auto IsLower = [&](unsigned Lhs, unsigned Rhs) {
1538+
SlotInfo &L = Slot2Info[Lhs];
1539+
SlotInfo &R = Slot2Info[Rhs];
1540+
uint64_t DensityLScaled = static_cast<uint64_t>(L.UseCount) *
1541+
static_cast<uint64_t>(R.Size + Log2(R.Align));
1542+
uint64_t DensityRScaled = static_cast<uint64_t>(R.UseCount) *
1543+
static_cast<uint64_t>(L.Size + Log2(L.Align));
1544+
return DensityLScaled < DensityRScaled;
1545+
};
1546+
std::stable_sort(SlotStack.begin(), SlotStack.end(), IsLower);
15411547

1542-
for (unsigned Idx = 0; Idx < SlotStack.size(); Idx++) {
1543-
SlotInfo &Info = Slot2Info[SlotStack[Idx]];
1544-
for (unsigned Pt : Info.Liveness.set_bits()) {
1545-
// Since the goal is to minimize the max usage, blocks that are in high
1546-
// contention areas are given more priority
1547-
Info.SlotPriority +=
1548-
(uint64_t)CumulatedUsage[Pt] * (uint64_t)CumulatedUsage[Pt] +
1549-
(uint64_t)Info.Size * (uint64_t)Info.Align.value();
1550-
}
1551-
}
1552-
std::stable_sort(
1553-
SlotStack.begin(), SlotStack.end(), [&](unsigned Lhs, unsigned Rhs) {
1554-
return Slot2Info[Lhs].SlotPriority < Slot2Info[Rhs].SlotPriority;
1555-
});
1556-
}
1548+
int Prio = 0;
1549+
for (int Slot : SlotStack)
1550+
Slot2Info[Slot].SlotPriority = Prio++;
15571551

15581552
SlotInfo *LastQueryLhs = nullptr;
15591553
SlotInfo *LastQueryRhs = nullptr;
@@ -1666,24 +1660,27 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
16661660

16671661
Offset = alignTo(Offset, Info.Align);
16681662

1669-
LLVM_DEBUG(dbgs() << "fi#" << Candidates[K] << "@" << Offset << "->";
1670-
if (PrevSlot == InvalidIdx) dbgs() << "bottom";
1671-
else dbgs() << "fi#" << PrevSlot; dbgs() << ", ";);
1663+
LLVM_DEBUG({
1664+
dbgs() << "fi#" << Candidates[K] << "@" << Offset;
1665+
if (PrevSlot != InvalidIdx)
1666+
dbgs() << "->" << "fi#" << PrevSlot;
1667+
dbgs() << ", ";
1668+
});
16721669

16731670
bool IsBetter = [&] {
1671+
if (BestIdx == InvalidIdx)
1672+
return true;
1673+
SlotInfo &Best = Slot2Info[Candidates[BestIdx]];
16741674
if (BestOffset != Offset)
16751675
return BestOffset > Offset;
1676-
SlotInfo &Other = Slot2Info[Candidates[K]];
1677-
if (Other.Align != Info.Align)
1678-
return Other.Align < Info.Align;
1679-
if (Other.Size != Info.Size)
1680-
return Other.Size < Info.Size;
1681-
if (Other.SlotPriority != Info.SlotPriority)
1682-
return Other.SlotPriority < Info.SlotPriority;
1676+
if (Best.SlotPriority != Info.SlotPriority)
1677+
return Best.SlotPriority < Info.SlotPriority;
1678+
if (Best.Align != Info.Align)
1679+
return Best.Align < Info.Align;
16831680

16841681
// Both are always stored in Slot2Info, so this is equivalent to
16851682
// FrameIndex comparaison
1686-
return &Other < &Info;
1683+
return &Best < &Info;
16871684
}();
16881685

16891686
if (IsBetter) {
@@ -1726,7 +1723,6 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
17261723
LLVM_DEBUG(dbgs() << "MergedSize=" << FinalSize << " OrigPesSize="
17271724
<< OrigPesSize << " OrigOptSize" << OrigOptSize << "\n");
17281725
if (FinalSize >= OrigPesSize) {
1729-
GeneratedWorse++;
17301726
return InvalidIdx;
17311727
}
17321728

@@ -1774,6 +1770,7 @@ bool StackColoring::run(MachineFunction &Func) {
17741770
Intervals.reserve(NumSlots);
17751771
LiveStarts.resize(NumSlots);
17761772

1773+
Slot2Info.resize(NumSlots);
17771774
unsigned NumMarkers = collectMarkers(NumSlots);
17781775

17791776
unsigned TotalSize = 0;
@@ -1792,7 +1789,6 @@ bool StackColoring::run(MachineFunction &Func) {
17921789
return removeAllMarkers();
17931790
}
17941791

1795-
Slot2Info.resize(NumSlots);
17961792
for (unsigned i = 0; i < NumSlots; ++i) {
17971793
std::unique_ptr<LiveRange> LI(new LiveRange());
17981794
LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);

0 commit comments

Comments
 (0)