Skip to content

Commit 4d8cf2a

Browse files
committed
Resubmit with fix: [NFC] Refactor MBB hotness/coldness into templated PSI functions.
In D152399, we calculate BPI->BFI in MachineFunctionSplit pass just to use PSI->isFunctionHotInCallGraph, which is expensive. Instead, we can implement this directly with MBFI. Reviewer mentioned in the comment, that machine_size_opts already has isFunctionColdInCallGraph, isFunctionHotInCallGraphNthPercentile, etc implemented. These can be refactored and reused across MFS and machine size opts. This CL does this - it refactors out those internal static functions into PSI as templated functions, so they can be accessed easily. Differential Revision: https://reviews.llvm.org/D153927
1 parent dcef530 commit 4d8cf2a

File tree

5 files changed

+217
-362
lines changed

5 files changed

+217
-362
lines changed

llvm/include/llvm/Analysis/ProfileSummaryInfo.h

Lines changed: 197 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
#define LLVM_ANALYSIS_PROFILESUMMARYINFO_H
1616

1717
#include "llvm/ADT/DenseMap.h"
18+
#include "llvm/Analysis/BlockFrequencyInfo.h"
19+
#include "llvm/CodeGen/MachineFunction.h"
20+
#include "llvm/IR/Function.h"
21+
#include "llvm/IR/Instructions.h"
1822
#include "llvm/IR/PassManager.h"
1923
#include "llvm/IR/ProfileSummary.h"
2024
#include "llvm/Pass.h"
@@ -23,9 +27,7 @@
2327

2428
namespace llvm {
2529
class BasicBlock;
26-
class BlockFrequencyInfo;
2730
class CallBase;
28-
class Function;
2931

3032
/// Analysis providing profile information.
3133
///
@@ -107,28 +109,77 @@ class ProfileSummaryInfo {
107109
bool hasHugeWorkingSetSize() const;
108110
/// Returns true if the working set size of the code is considered large.
109111
bool hasLargeWorkingSetSize() const;
110-
/// Returns true if \p F has hot function entry.
111-
bool isFunctionEntryHot(const Function *F) const;
112+
/// Returns true if \p F has hot function entry. If it returns false, it
113+
/// either means it is not hot or it is unknown whether it is hot or not (for
114+
/// example, no profile data is available).
115+
template <typename FuncT> bool isFunctionEntryHot(const FuncT *F) const {
116+
if (!F || !hasProfileSummary())
117+
return false;
118+
std::optional<Function::ProfileCount> FunctionCount = getEntryCount(F);
119+
// FIXME: The heuristic used below for determining hotness is based on
120+
// preliminary SPEC tuning for inliner. This will eventually be a
121+
// convenience method that calls isHotCount.
122+
return FunctionCount && isHotCount(FunctionCount->getCount());
123+
}
124+
112125
/// Returns true if \p F contains hot code.
113-
bool isFunctionHotInCallGraph(const Function *F,
114-
BlockFrequencyInfo &BFI) const;
126+
template <typename FuncT, typename BFIT>
127+
bool isFunctionHotInCallGraph(const FuncT *F, BFIT &BFI) const {
128+
if (!F || !hasProfileSummary())
129+
return false;
130+
if (auto FunctionCount = getEntryCount(F))
131+
if (isHotCount(FunctionCount->getCount()))
132+
return true;
133+
134+
if (auto TotalCallCount = getTotalCallCount(F)) {
135+
if (isHotCount(*TotalCallCount))
136+
return true;
137+
}
138+
139+
for (const auto &BB : *F)
140+
if (isHotBlock(&BB, &BFI))
141+
return true;
142+
return false;
143+
}
115144
/// Returns true if \p F has cold function entry.
116145
bool isFunctionEntryCold(const Function *F) const;
117146
/// Returns true if \p F contains only cold code.
118-
bool isFunctionColdInCallGraph(const Function *F,
119-
BlockFrequencyInfo &BFI) const;
147+
template <typename FuncT, typename BFIT>
148+
bool isFunctionColdInCallGraph(const FuncT *F, BFIT &BFI) const {
149+
if (!F || !hasProfileSummary())
150+
return false;
151+
if (auto FunctionCount = getEntryCount(F))
152+
if (!isColdCount(FunctionCount->getCount()))
153+
return false;
154+
155+
if (auto TotalCallCount = getTotalCallCount(F)) {
156+
if (!isColdCount(*TotalCallCount))
157+
return false;
158+
}
159+
160+
for (const auto &BB : *F)
161+
if (!isColdBlock(&BB, &BFI))
162+
return false;
163+
return true;
164+
}
120165
/// Returns true if the hotness of \p F is unknown.
121166
bool isFunctionHotnessUnknown(const Function &F) const;
122167
/// Returns true if \p F contains hot code with regard to a given hot
123168
/// percentile cutoff value.
169+
template <typename FuncT, typename BFIT>
124170
bool isFunctionHotInCallGraphNthPercentile(int PercentileCutoff,
125-
const Function *F,
126-
BlockFrequencyInfo &BFI) const;
171+
const FuncT *F, BFIT &BFI) const {
172+
return isFunctionHotOrColdInCallGraphNthPercentile<true, FuncT, BFIT>(
173+
PercentileCutoff, F, BFI);
174+
}
127175
/// Returns true if \p F contains cold code with regard to a given cold
128176
/// percentile cutoff value.
177+
template <typename FuncT, typename BFIT>
129178
bool isFunctionColdInCallGraphNthPercentile(int PercentileCutoff,
130-
const Function *F,
131-
BlockFrequencyInfo &BFI) const;
179+
const FuncT *F, BFIT &BFI) const {
180+
return isFunctionHotOrColdInCallGraphNthPercentile<false, FuncT, BFIT>(
181+
PercentileCutoff, F, BFI);
182+
}
132183
/// Returns true if count \p C is considered hot.
133184
bool isHotCount(uint64_t C) const;
134185
/// Returns true if count \p C is considered cold.
@@ -143,22 +194,57 @@ class ProfileSummaryInfo {
143194
/// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where
144195
/// the first two digits are the whole part. E.g. 995000 for 99.5 percentile.
145196
bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C) const;
197+
146198
/// Returns true if BasicBlock \p BB is considered hot.
147-
bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const;
199+
template <typename BBType, typename BFIT>
200+
bool isHotBlock(const BBType *BB, BFIT *BFI) const {
201+
auto Count = BFI->getBlockProfileCount(BB);
202+
return Count && isHotCount(*Count);
203+
}
204+
148205
/// Returns true if BasicBlock \p BB is considered cold.
149-
bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const;
150-
/// Returns true if BasicBlock \p BB is considered hot with regard to a given
151-
/// hot percentile cutoff value.
152-
/// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where
153-
/// the first two digits are the whole part. E.g. 995000 for 99.5 percentile.
154-
bool isHotBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB,
155-
BlockFrequencyInfo *BFI) const;
206+
template <typename BBType, typename BFIT>
207+
bool isColdBlock(const BBType *BB, BFIT *BFI) const {
208+
auto Count = BFI->getBlockProfileCount(BB);
209+
return Count && isColdCount(*Count);
210+
}
211+
212+
template <typename BFIT>
213+
bool isColdBlock(BlockFrequency BlockFreq, const BFIT *BFI) const {
214+
auto Count = BFI->getProfileCountFromFreq(BlockFreq.getFrequency());
215+
return Count && isColdCount(*Count);
216+
}
217+
218+
template <typename BBType, typename BFIT>
219+
bool isHotBlockNthPercentile(int PercentileCutoff, const BBType *BB,
220+
BFIT *BFI) const {
221+
return isHotOrColdBlockNthPercentile<true, BBType, BFIT>(PercentileCutoff,
222+
BB, BFI);
223+
}
224+
225+
template <typename BFIT>
226+
bool isHotBlockNthPercentile(int PercentileCutoff, BlockFrequency BlockFreq,
227+
BFIT *BFI) const {
228+
return isHotOrColdBlockNthPercentile<true, BFIT>(PercentileCutoff,
229+
BlockFreq, BFI);
230+
}
231+
156232
/// Returns true if BasicBlock \p BB is considered cold with regard to a given
157233
/// cold percentile cutoff value.
158234
/// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where
159235
/// the first two digits are the whole part. E.g. 995000 for 99.5 percentile.
160-
bool isColdBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB,
161-
BlockFrequencyInfo *BFI) const;
236+
template <typename BBType, typename BFIT>
237+
bool isColdBlockNthPercentile(int PercentileCutoff, const BBType *BB,
238+
BFIT *BFI) const {
239+
return isHotOrColdBlockNthPercentile<false, BBType, BFIT>(PercentileCutoff,
240+
BB, BFI);
241+
}
242+
template <typename BFIT>
243+
bool isColdBlockNthPercentile(int PercentileCutoff, BlockFrequency BlockFreq,
244+
BFIT *BFI) const {
245+
return isHotOrColdBlockNthPercentile<false, BFIT>(PercentileCutoff,
246+
BlockFreq, BFI);
247+
}
162248
/// Returns true if the call site \p CB is considered hot.
163249
bool isHotCallSite(const CallBase &CB, BlockFrequencyInfo *BFI) const;
164250
/// Returns true if call site \p CB is considered cold.
@@ -178,18 +264,97 @@ class ProfileSummaryInfo {
178264
return ColdCountThreshold.value_or(0);
179265
}
180266

181-
private:
182-
template <bool isHot>
183-
bool isFunctionHotOrColdInCallGraphNthPercentile(
184-
int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const;
185-
template <bool isHot>
186-
bool isHotOrColdCountNthPercentile(int PercentileCutoff, uint64_t C) const;
187-
template <bool isHot>
188-
bool isHotOrColdBlockNthPercentile(int PercentileCutoff,
189-
const BasicBlock *BB,
190-
BlockFrequencyInfo *BFI) const;
267+
private:
268+
template <typename FuncT>
269+
std::optional<uint64_t> getTotalCallCount(const FuncT *F) const {
270+
return std::nullopt;
271+
}
272+
273+
template <bool isHot, typename FuncT, typename BFIT>
274+
bool isFunctionHotOrColdInCallGraphNthPercentile(int PercentileCutoff,
275+
const FuncT *F,
276+
BFIT &FI) const {
277+
if (!F || !hasProfileSummary())
278+
return false;
279+
if (auto FunctionCount = getEntryCount(F)) {
280+
if (isHot &&
281+
isHotCountNthPercentile(PercentileCutoff, FunctionCount->getCount()))
282+
return true;
283+
if (!isHot && !isColdCountNthPercentile(PercentileCutoff,
284+
FunctionCount->getCount()))
285+
return false;
286+
}
287+
if (auto TotalCallCount = getTotalCallCount(F)) {
288+
if (isHot && isHotCountNthPercentile(PercentileCutoff, *TotalCallCount))
289+
return true;
290+
if (!isHot &&
291+
!isColdCountNthPercentile(PercentileCutoff, *TotalCallCount))
292+
return false;
293+
}
294+
for (const auto &BB : *F) {
295+
if (isHot && isHotBlockNthPercentile(PercentileCutoff, &BB, &FI))
296+
return true;
297+
if (!isHot && !isColdBlockNthPercentile(PercentileCutoff, &BB, &FI))
298+
return false;
299+
}
300+
return !isHot;
301+
}
302+
303+
template <bool isHot>
304+
bool isHotOrColdCountNthPercentile(int PercentileCutoff, uint64_t C) const;
305+
306+
template <bool isHot, typename BBType, typename BFIT>
307+
bool isHotOrColdBlockNthPercentile(int PercentileCutoff, const BBType *BB,
308+
BFIT *BFI) const {
309+
auto Count = BFI->getBlockProfileCount(BB);
310+
if (isHot)
311+
return Count && isHotCountNthPercentile(PercentileCutoff, *Count);
312+
else
313+
return Count && isColdCountNthPercentile(PercentileCutoff, *Count);
314+
}
315+
316+
template <bool isHot, typename BFIT>
317+
bool isHotOrColdBlockNthPercentile(int PercentileCutoff,
318+
BlockFrequency BlockFreq,
319+
BFIT *BFI) const {
320+
auto Count = BFI->getProfileCountFromFreq(BlockFreq.getFrequency());
321+
if (isHot)
322+
return Count && isHotCountNthPercentile(PercentileCutoff, *Count);
323+
else
324+
return Count && isColdCountNthPercentile(PercentileCutoff, *Count);
325+
}
326+
327+
template <typename FuncT>
328+
std::optional<Function::ProfileCount> getEntryCount(const FuncT *F) const {
329+
return F->getEntryCount();
330+
}
191331
};
192332

333+
template <>
334+
inline std::optional<uint64_t>
335+
ProfileSummaryInfo::getTotalCallCount<Function>(const Function *F) const {
336+
if (!hasSampleProfile())
337+
return std::nullopt;
338+
uint64_t TotalCallCount = 0;
339+
for (const auto &BB : *F) {
340+
for (const auto &I : BB) {
341+
if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
342+
if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr)) {
343+
TotalCallCount += *CallCount;
344+
}
345+
}
346+
}
347+
}
348+
return TotalCallCount;
349+
}
350+
351+
template <>
352+
inline std::optional<Function::ProfileCount>
353+
ProfileSummaryInfo::getEntryCount<MachineFunction>(
354+
const MachineFunction *F) const {
355+
return F->getFunction().getEntryCount();
356+
}
357+
193358
/// An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
194359
class ProfileSummaryInfoWrapperPass : public ImmutablePass {
195360
std::unique_ptr<ProfileSummaryInfo> PSI;

llvm/include/llvm/Transforms/Utils/SizeOpts.h

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ static inline bool isPGSOColdCodeOnly(ProfileSummaryInfo *PSI) {
4747
(PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize());
4848
}
4949

50-
template<typename AdapterT, typename FuncT, typename BFIT>
50+
template <typename FuncT, typename BFIT>
5151
bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI,
5252
BFIT *BFI, PGSOQueryType QueryType) {
5353
assert(F);
@@ -58,34 +58,34 @@ bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI,
5858
if (!EnablePGSO)
5959
return false;
6060
if (isPGSOColdCodeOnly(PSI))
61-
return AdapterT::isFunctionColdInCallGraph(F, PSI, *BFI);
61+
return PSI->isFunctionColdInCallGraph(F, *BFI);
6262
if (PSI->hasSampleProfile())
6363
// The "isCold" check seems to work better for Sample PGO as it could have
6464
// many profile-unannotated functions.
65-
return AdapterT::isFunctionColdInCallGraphNthPercentile(
66-
PgsoCutoffSampleProf, F, PSI, *BFI);
67-
return !AdapterT::isFunctionHotInCallGraphNthPercentile(PgsoCutoffInstrProf,
68-
F, PSI, *BFI);
65+
return PSI->isFunctionColdInCallGraphNthPercentile(PgsoCutoffSampleProf, F,
66+
*BFI);
67+
return !PSI->isFunctionHotInCallGraphNthPercentile(PgsoCutoffInstrProf, F,
68+
*BFI);
6969
}
7070

71-
template<typename AdapterT, typename BlockTOrBlockFreq, typename BFIT>
72-
bool shouldOptimizeForSizeImpl(BlockTOrBlockFreq BBOrBlockFreq, ProfileSummaryInfo *PSI,
73-
BFIT *BFI, PGSOQueryType QueryType) {
71+
template <typename BlockTOrBlockFreq, typename BFIT>
72+
bool shouldOptimizeForSizeImpl(BlockTOrBlockFreq BBOrBlockFreq,
73+
ProfileSummaryInfo *PSI, BFIT *BFI,
74+
PGSOQueryType QueryType) {
7475
if (!PSI || !BFI || !PSI->hasProfileSummary())
7576
return false;
7677
if (ForcePGSO)
7778
return true;
7879
if (!EnablePGSO)
7980
return false;
8081
if (isPGSOColdCodeOnly(PSI))
81-
return AdapterT::isColdBlock(BBOrBlockFreq, PSI, BFI);
82+
return PSI->isColdBlock(BBOrBlockFreq, BFI);
8283
if (PSI->hasSampleProfile())
8384
// The "isCold" check seems to work better for Sample PGO as it could have
8485
// many profile-unannotated functions.
85-
return AdapterT::isColdBlockNthPercentile(PgsoCutoffSampleProf,
86-
BBOrBlockFreq, PSI, BFI);
87-
return !AdapterT::isHotBlockNthPercentile(PgsoCutoffInstrProf, BBOrBlockFreq,
88-
PSI, BFI);
86+
return PSI->isColdBlockNthPercentile(PgsoCutoffSampleProf, BBOrBlockFreq,
87+
BFI);
88+
return !PSI->isHotBlockNthPercentile(PgsoCutoffInstrProf, BBOrBlockFreq, BFI);
8989
}
9090

9191
/// Returns true if function \p F is suggested to be size-optimized based on the

0 commit comments

Comments
 (0)