-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[InlineCost] Cache collectEphemeralValues() to save compile time #130210
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: vporpo (vporpo) Changes
Full diff: https://github.com/llvm/llvm-project/pull/130210.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index 18fb7377ff667..3e03ab36df938 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -236,6 +236,7 @@ class DefaultInlineAdvisor : public InlineAdvisor {
std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override;
InlineParams Params;
+ EphValuesCacheT EphValuesCache;
};
/// Used for dynamically registering InlineAdvisors as plugins
diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index ed54b0c077b4a..da05a9be84352 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@@ -14,6 +14,7 @@
#define LLVM_ANALYSIS_INLINECOST_H
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/Analysis/InlineModelFeatureMaps.h"
#include "llvm/IR/PassManager.h"
@@ -31,6 +32,9 @@ class Function;
class ProfileSummaryInfo;
class TargetTransformInfo;
class TargetLibraryInfo;
+class Value;
+
+using EphValuesCacheT = MapVector<Function *, SmallPtrSet<const Value *, 32>>;
namespace InlineConstants {
// Various thresholds used by inline cost analysis.
@@ -273,14 +277,13 @@ int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call,
///
/// Also note that calling this function *dynamically* computes the cost of
/// inlining the callsite. It is an expensive, heavyweight call.
-InlineCost
-getInlineCost(CallBase &Call, const InlineParams &Params,
- TargetTransformInfo &CalleeTTI,
- function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
- function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
- function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
- ProfileSummaryInfo *PSI = nullptr,
- OptimizationRemarkEmitter *ORE = nullptr);
+InlineCost getInlineCost(
+ CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
+ function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
+ ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr,
+ EphValuesCacheT *EphValuesCache = nullptr);
/// Get an InlineCost with the callee explicitly specified.
/// This allows you to calculate the cost of inlining a function via a
@@ -294,7 +297,8 @@ getInlineCost(CallBase &Call, Function *Callee, const InlineParams &Params,
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
ProfileSummaryInfo *PSI = nullptr,
- OptimizationRemarkEmitter *ORE = nullptr);
+ OptimizationRemarkEmitter *ORE = nullptr,
+ EphValuesCacheT *EphValuesCache = nullptr);
/// Returns InlineResult::success() if the call site should be always inlined
/// because of user directives, and the inlining is viable. Returns
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index 12553dd446a61..0417303cae16e 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -133,7 +133,8 @@ void DefaultInlineAdvice::recordInliningImpl() {
}
std::optional<llvm::InlineCost> static getDefaultInlineAdvice(
- CallBase &CB, FunctionAnalysisManager &FAM, const InlineParams &Params) {
+ CallBase &CB, FunctionAnalysisManager &FAM, const InlineParams &Params,
+ EphValuesCacheT *EphValuesCache = nullptr) {
Function &Caller = *CB.getCaller();
ProfileSummaryInfo *PSI =
FAM.getResult<ModuleAnalysisManagerFunctionProxy>(Caller)
@@ -158,7 +159,8 @@ std::optional<llvm::InlineCost> static getDefaultInlineAdvice(
Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
DEBUG_TYPE);
return getInlineCost(CB, Params, CalleeTTI, GetAssumptionCache, GetTLI,
- GetBFI, PSI, RemarksEnabled ? &ORE : nullptr);
+ GetBFI, PSI, RemarksEnabled ? &ORE : nullptr,
+ EphValuesCache);
};
return llvm::shouldInline(
CB, CalleeTTI, GetInlineCost, ORE,
@@ -167,7 +169,7 @@ std::optional<llvm::InlineCost> static getDefaultInlineAdvice(
std::unique_ptr<InlineAdvice>
DefaultInlineAdvisor::getAdviceImpl(CallBase &CB) {
- auto OIC = getDefaultInlineAdvice(CB, FAM, Params);
+ auto OIC = getDefaultInlineAdvice(CB, FAM, Params, &EphValuesCache);
return std::make_unique<DefaultInlineAdvice>(
this, CB, OIC,
FAM.getResult<OptimizationRemarkEmitterAnalysis>(*CB.getCaller()));
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index c6b927c8eee2f..5c2cdbbf2e333 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -173,6 +173,11 @@ static cl::opt<bool> DisableGEPConstOperand(
"disable-gep-const-evaluation", cl::Hidden, cl::init(false),
cl::desc("Disables evaluation of GetElementPtr with constant operands"));
+static cl::opt<unsigned> EphValuesCacheSizeLimit(
+ "inline-ephvalues-cache-size-limit", cl::Hidden, cl::init(8),
+ cl::desc(
+ "Clear the cache of ephemeral values if it grows larger than this"));
+
namespace llvm {
std::optional<int> getStringFnAttrAsInt(const Attribute &Attr) {
if (Attr.isValid()) {
@@ -269,6 +274,10 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// easily cacheable. Instead, use the cover function paramHasAttr.
CallBase &CandidateCall;
+ /// Collecting the ephemeral values over and over again can be expensive, so
+ /// cache them.
+ EphValuesCacheT *EphValuesCache;
+
/// Extension points for handling callsite features.
// Called before a basic block was analyzed.
virtual void onBlockStart(const BasicBlock *BB) {}
@@ -510,10 +519,11 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
ProfileSummaryInfo *PSI = nullptr,
- OptimizationRemarkEmitter *ORE = nullptr)
+ OptimizationRemarkEmitter *ORE = nullptr,
+ EphValuesCacheT *EphValuesCache = nullptr)
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
GetTLI(GetTLI), PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE),
- CandidateCall(Call) {}
+ CandidateCall(Call), EphValuesCache(EphValuesCache) {}
InlineResult analyze();
@@ -1126,9 +1136,9 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
ProfileSummaryInfo *PSI = nullptr,
OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
- bool IgnoreThreshold = false)
+ bool IgnoreThreshold = false, EphValuesCacheT *EphValuesCache = nullptr)
: CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI, PSI,
- ORE),
+ ORE, EphValuesCache),
ComputeFullInlineCost(OptComputeFullInlineCost ||
Params.ComputeFullInlineCost || ORE ||
isCostBenefitAnalysisEnabled()),
@@ -2781,11 +2791,27 @@ InlineResult CallAnalyzer::analyze() {
NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
NumAllocaArgs = SROAArgValues.size();
- // FIXME: If a caller has multiple calls to a callee, we end up recomputing
- // the ephemeral values multiple times (and they're completely determined by
- // the callee, so this is purely duplicate work).
- SmallPtrSet<const Value *, 32> EphValues;
- CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
+ // Collecting the ephemeral values of `F` can be expensive, so collect them
+ // once per function and cache them for future reuse.
+ SmallPtrSet<const Value *, 32> EphValuesSet;
+ SmallPtrSet<const Value *, 32> *EphValues;
+ auto &AC = GetAssumptionCache(F);
+ if (EphValuesCache == nullptr) {
+ // No cache is being used, so collect the values every time.
+ CodeMetrics::collectEphemeralValues(&F, &AC, EphValuesSet);
+ EphValues = &EphValuesSet;
+ } else {
+ // If the ephemeral values for `F` are in the cache, then reuse them. Else
+ // collect them from scratch.
+ if (EphValuesCache->size() >= EphValuesCacheSizeLimit)
+ // If the cache grows larger than a limit remove the first element.
+ // NOTE: This is a linear-time operation so keep the cache size small!
+ EphValuesCache->erase(EphValuesCache->begin()->first);
+ auto Pair = EphValuesCache->insert({&F, {}});
+ if (Pair.second)
+ CodeMetrics::collectEphemeralValues(&F, &AC, Pair.first->second);
+ EphValues = &Pair.first->second;
+ }
// The worklist of live basic blocks in the callee *after* inlining. We avoid
// adding basic blocks of the callee which can be proven to be dead for this
@@ -2824,7 +2850,7 @@ InlineResult CallAnalyzer::analyze() {
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
- InlineResult IR = analyzeBlock(BB, EphValues);
+ InlineResult IR = analyzeBlock(BB, *EphValues);
if (!IR.isSuccess())
return IR;
@@ -2967,9 +2993,11 @@ InlineCost llvm::getInlineCost(
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
- ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
+ ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
+ EphValuesCacheT *EphValuesCache) {
return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
- GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
+ GetAssumptionCache, GetTLI, GetBFI, PSI, ORE,
+ EphValuesCache);
}
std::optional<int> llvm::getInliningCostEstimate(
@@ -3089,7 +3117,8 @@ InlineCost llvm::getInlineCost(
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
- ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
+ ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
+ EphValuesCacheT *EphValuesCache) {
auto UserDecision =
llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
@@ -3105,7 +3134,9 @@ InlineCost llvm::getInlineCost(
<< ")\n");
InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
- GetAssumptionCache, GetBFI, GetTLI, PSI, ORE);
+ GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
+ /*BoostIndirect=*/true, /*IgnoreThreshold=*/false,
+ EphValuesCache);
InlineResult ShouldInline = CA.analyze();
LLVM_DEBUG(CA.dump());
|
This solves the same problem as #129279 but using caching instead of introducing a hard limit. |
can we make this a proper pass manager analysis and query the analysis manager instead of an inliner-specific cache, just like |
@aeubanks perhaps something along these lines ? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks reasonable to me.
@@ -388,6 +389,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, | |||
Advice->recordUnsuccessfulInlining(IR); | |||
continue; | |||
} | |||
// The caller was modified, so invalidate Ephemeral Values. | |||
FAM.getResult<EphemeralValuesAnalysis>(F).clear(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hm, shouldn't we be invalidating all analyses on the function here, rather than just EphemeralValues?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think so, but let's make this a separate patch in case it breaks something. I added a TODO.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, that sounds reasonable.
@aeubanks any thoughts or comments ? |
#define LLVM_ANALYSIS_EPHEMERALVALUESCACHE_H | ||
|
||
#include "llvm/ADT/SmallPtrSet.h" | ||
#include "llvm/Analysis/AssumptionCache.h" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you could forward-declare AssumptionCache?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
llvm/lib/Analysis/InlineCost.cpp
Outdated
// ephemeral values cache if available. | ||
SmallPtrSet<const Value *, 32> EphValuesStorage; | ||
const SmallPtrSetImpl<const Value *> *EphValues = &EphValuesStorage; | ||
auto &AC = GetAssumptionCache(F); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can move the GetAssumptionCache() into the branch that uses it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr, | ||
ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr, | ||
std::optional<function_ref<EphemeralValuesCache &(Function &)>> | ||
GetEphValuesCache = std::nullopt); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
function_ref accepts nullptr, so I think it would be better to use function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache = nullptr
here, to save the std::optional wrapper. See GetBFI above.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
@@ -388,6 +389,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, | |||
Advice->recordUnsuccessfulInlining(IR); | |||
continue; | |||
} | |||
// The caller was modified, so invalidate Ephemeral Values. | |||
FAM.getResult<EphemeralValuesAnalysis>(F).clear(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, that sounds reasonable.
…sis pass This analysis pass collects the ephemeral values of a function and caches them for fast lookups. The collection of the ephemeral values is done lazily when the user calls `EphemeralValuesCache::ephValues()`.
`CallAnalyzer::analyze()` can take several hours to run if the function contains thousands of @llvm.assume() calls and there are thousands of callsites. The time is spent in `collectEphemeralvalues()`. This patch adds caching to InlineCost and will only collect the ephemeral values once per function.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sorry, I wasn't getting email updates for this PR for some reason
EphValues.clear(); | ||
Collected = false; | ||
} | ||
const SmallPtrSetImpl<const Value *> &ephValues() { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why do we need an extra layer of indirection? can't we just eagerly calculate the ephemeral values in run()
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you mean removing the indirection by incorporating the functionality of the EphemeralValuesCache
inside the EphemeralValuesAnalysis
class? Isn't it better to follow the two-class design where you implement the analysis in one class and have a separate wrapper class for the pass in case someone needs to use the analysis without a pass ?
As for the eager vs lazy approach, my guess is that lazy is preferable because it may save you some compile time if you don't end up calling the analysis, but I don't feel strongly about it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CodeMetrics::collectEphemeralValues()
is the API if someone wants to calculate ephemeral values, I don't think we need another layer on top of that. I think EphemeralValuesAnalysis::Result
could just be SmallPtrSet<const Value *, 32>
the pass should only call FAM.getAnalysis
if it's going to use it, so the laziness should be done at the caller level
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/64/builds/2541 Here is the relevant piece of the build log for the reference
|
… class This is a follow-up to llvm#130210. The EphemeralValuesAnalysis pass used to return an EphemeralValuesCache object which used to hold the ephemeral values and used to provide a lazy collection of the ephemeral values, and an invalidation using the `clear()` function. This patch removes the EphemeralValuesCache class completely and instead returns the SmallVector containing the ephemeral values.
… class (#132454) This is a follow-up to #130210. The EphemeralValuesAnalysis pass used to return an EphemeralValuesCache object which used to hold the ephemeral values and used to provide a lazy collection of the ephemeral values, and an invalidation using the `clear()` function. This patch removes the EphemeralValuesCache class completely and instead returns the SmallVector containing the ephemeral values.
…ValuesCache class (#132454) This is a follow-up to llvm/llvm-project#130210. The EphemeralValuesAnalysis pass used to return an EphemeralValuesCache object which used to hold the ephemeral values and used to provide a lazy collection of the ephemeral values, and an invalidation using the `clear()` function. This patch removes the EphemeralValuesCache class completely and instead returns the SmallVector containing the ephemeral values.
CallAnalyzer::analyze()
can take several hours to run if the function contains thousands of @llvm.assume() calls and there are thousands of callsites. The time is spent incollectEphemeralvalues()
.This patch adds caching and will only collect the ephemeral values once per function.