Skip to content

Commit 4c76540

Browse files
committed
[Coro] Use DebugInfoCache to speed up cloning in CoroSplitPass
Summary: We can use a DebugInfoFinder from DebugInfoCache which is already primed on a compile unit to speed up collection of global debug info. The pass could likely be another 2x+ faster if we avoid rebuilding the set of global debug info. This needs further massaging of CloneFunction and ValueMapper, though, and can be done incrementally on top of this. Comparing performance of CoroSplitPass at various points in this stack, this is anecdata from a sample cpp file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt GlobalDI | Cached CU DIFinder (cur.) | |-----------------+----------+----------------+-------------------+---------------------------| | CoroSplitPass | 306ms | 221ms | 68ms | 17ms | | CoroCloner | 101ms | 72ms | 0.5ms | 0.5ms | | CollectGlobalDI | - | - | 63ms | 13ms | |-----------------+----------+----------------+-------------------+---------------------------| | Speed up | 1x | 1.4x | 4.5x | 18x | Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample cpp file with time trace to get the avg. duration of the pass and inner scopes.
1 parent d1479b9 commit 4c76540

13 files changed

+63
-13
lines changed

llvm/lib/Analysis/CGSCCPassManager.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/ADT/SmallPtrSet.h"
1515
#include "llvm/ADT/SmallVector.h"
1616
#include "llvm/ADT/iterator_range.h"
17+
#include "llvm/Analysis/DebugInfoCache.h"
1718
#include "llvm/Analysis/LazyCallGraph.h"
1819
#include "llvm/IR/Constant.h"
1920
#include "llvm/IR/InstIterator.h"
@@ -141,6 +142,11 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
141142
// Get the call graph for this module.
142143
LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
143144

145+
// Prime DebugInfoCache.
146+
// TODO: Currently, the only user is CoroSplitPass. Consider running
147+
// conditionally.
148+
AM.getResult<DebugInfoCacheAnalysis>(M);
149+
144150
// Get Function analysis manager from its proxy.
145151
FunctionAnalysisManager &FAM =
146152
AM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M)->getManager();
@@ -352,6 +358,7 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
352358
// analysis proxies by handling them above and in any nested pass managers.
353359
PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
354360
PA.preserve<LazyCallGraphAnalysis>();
361+
PA.preserve<DebugInfoCacheAnalysis>();
355362
PA.preserve<CGSCCAnalysisManagerModuleProxy>();
356363
PA.preserve<FunctionAnalysisManagerModuleProxy>();
357364
return PA;

llvm/lib/Transforms/Coroutines/CoroSplit.cpp

+43-12
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "llvm/Analysis/CFG.h"
3232
#include "llvm/Analysis/CallGraph.h"
3333
#include "llvm/Analysis/ConstantFolding.h"
34+
#include "llvm/Analysis/DebugInfoCache.h"
3435
#include "llvm/Analysis/LazyCallGraph.h"
3536
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
3637
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -82,15 +83,39 @@ using namespace llvm;
8283

8384
namespace {
8485

86+
const DebugInfoFinder *cachedDIFinder(Function &F,
87+
const DebugInfoCache *DICache) {
88+
if (!DICache)
89+
return nullptr;
90+
91+
auto *SP = F.getSubprogram();
92+
auto *CU = SP ? SP->getUnit() : nullptr;
93+
if (!CU)
94+
return nullptr;
95+
96+
auto Found = DICache->Result.find(CU);
97+
if (Found == DICache->Result.end())
98+
return nullptr;
99+
100+
return &Found->getSecond();
101+
}
102+
85103
/// Collect (a known) subset of global debug info metadata potentially used by
86104
/// the function \p F.
87105
///
88106
/// This metadata set can be used to avoid cloning debug info not owned by \p F
89107
/// and is shared among all potential clones \p F.
90-
void collectGlobalDebugInfo(Function &F, MetadataSetTy &GlobalDebugInfo) {
108+
void collectGlobalDebugInfo(Function &F, MetadataSetTy &GlobalDebugInfo,
109+
const DebugInfoCache *DICache) {
91110
TimeTraceScope FunctionScope("CollectGlobalDebugInfo");
92111

93112
DebugInfoFinder DIFinder;
113+
114+
// Copy DIFinder from cache which is primed on F's compile unit when available
115+
auto *PrimedDIFinder = cachedDIFinder(F, DICache);
116+
if (PrimedDIFinder)
117+
DIFinder = *PrimedDIFinder;
118+
94119
DISubprogram *SPClonedWithinModule = ProcessSubprogramAttachment(
95120
F, CloneFunctionChangeType::LocalChangesOnly, DIFinder);
96121

@@ -1514,11 +1539,11 @@ namespace {
15141539
struct SwitchCoroutineSplitter {
15151540
static void split(Function &F, coro::Shape &Shape,
15161541
SmallVectorImpl<Function *> &Clones,
1517-
TargetTransformInfo &TTI) {
1542+
TargetTransformInfo &TTI, const DebugInfoCache *DICache) {
15181543
assert(Shape.ABI == coro::ABI::Switch);
15191544

15201545
MetadataSetTy GlobalDebugInfo;
1521-
collectGlobalDebugInfo(F, GlobalDebugInfo);
1546+
collectGlobalDebugInfo(F, GlobalDebugInfo, DICache);
15221547

15231548
// Create a resume clone by cloning the body of the original function,
15241549
// setting new entry block and replacing coro.suspend an appropriate value
@@ -1832,7 +1857,8 @@ CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
18321857

18331858
static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
18341859
SmallVectorImpl<Function *> &Clones,
1835-
TargetTransformInfo &TTI) {
1860+
TargetTransformInfo &TTI,
1861+
const DebugInfoCache *DICache) {
18361862
assert(Shape.ABI == coro::ABI::Async);
18371863
assert(Clones.empty());
18381864
// Reset various things that the optimizer might have decided it
@@ -1919,7 +1945,7 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
19191945
assert(Clones.size() == Shape.CoroSuspends.size());
19201946

19211947
MetadataSetTy GlobalDebugInfo;
1922-
collectGlobalDebugInfo(F, GlobalDebugInfo);
1948+
collectGlobalDebugInfo(F, GlobalDebugInfo, DICache);
19231949

19241950
for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) {
19251951
auto *Suspend = Shape.CoroSuspends[Idx];
@@ -1932,7 +1958,8 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
19321958

19331959
static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
19341960
SmallVectorImpl<Function *> &Clones,
1935-
TargetTransformInfo &TTI) {
1961+
TargetTransformInfo &TTI,
1962+
const DebugInfoCache *DICache) {
19361963
assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce);
19371964
assert(Clones.empty());
19381965

@@ -2053,7 +2080,7 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
20532080
assert(Clones.size() == Shape.CoroSuspends.size());
20542081

20552082
MetadataSetTy GlobalDebugInfo;
2056-
collectGlobalDebugInfo(F, GlobalDebugInfo);
2083+
collectGlobalDebugInfo(F, GlobalDebugInfo, DICache);
20572084

20582085
for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
20592086
auto Suspend = Shape.CoroSuspends[i];
@@ -2108,7 +2135,8 @@ static bool hasSafeElideCaller(Function &F) {
21082135
static coro::Shape
21092136
splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
21102137
TargetTransformInfo &TTI, bool OptimizeFrame,
2111-
std::function<bool(Instruction &)> MaterializableCallback) {
2138+
std::function<bool(Instruction &)> MaterializableCallback,
2139+
const DebugInfoCache *DICache) {
21122140
PrettyStackTraceFunction prettyStackTrace(F);
21132141

21142142
// The suspend-crossing algorithm in buildCoroutineFrame get tripped
@@ -2138,14 +2166,14 @@ splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
21382166
} else {
21392167
switch (Shape.ABI) {
21402168
case coro::ABI::Switch:
2141-
SwitchCoroutineSplitter::split(F, Shape, Clones, TTI);
2169+
SwitchCoroutineSplitter::split(F, Shape, Clones, TTI, DICache);
21422170
break;
21432171
case coro::ABI::Async:
2144-
splitAsyncCoroutine(F, Shape, Clones, TTI);
2172+
splitAsyncCoroutine(F, Shape, Clones, TTI, DICache);
21452173
break;
21462174
case coro::ABI::Retcon:
21472175
case coro::ABI::RetconOnce:
2148-
splitRetconCoroutine(F, Shape, Clones, TTI);
2176+
splitRetconCoroutine(F, Shape, Clones, TTI, DICache);
21492177
break;
21502178
}
21512179
}
@@ -2282,6 +2310,9 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
22822310
auto &FAM =
22832311
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
22842312

2313+
const auto &MAMProxy = AM.getResult<ModuleAnalysisManagerCGSCCProxy>(C, CG);
2314+
const auto *DICache = MAMProxy.getCachedResult<DebugInfoCacheAnalysis>(M);
2315+
22852316
// Check for uses of llvm.coro.prepare.retcon/async.
22862317
SmallVector<Function *, 2> PrepareFns;
22872318
addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon");
@@ -2307,7 +2338,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
23072338
SmallVector<Function *, 4> Clones;
23082339
coro::Shape Shape =
23092340
splitCoroutine(F, Clones, FAM.getResult<TargetIRAnalysis>(F),
2310-
OptimizeFrame, MaterializableCallback);
2341+
OptimizeFrame, MaterializableCallback, DICache);
23112342
CurrentSCC = &updateCallGraphAfterCoroutineSplit(
23122343
*N, Shape, Clones, *CurrentSCC, CG, AM, UR, FAM);
23132344

llvm/test/Other/new-pass-manager.ll

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
; CHECK-CGSCC-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*(FunctionAnalysisManager|AnalysisManager<.*Function.*>).*}},{{.*}}Module>
2424
; CHECK-CGSCC-PASS-NEXT: Running analysis: LazyCallGraphAnalysis
2525
; CHECK-CGSCC-PASS-NEXT: Running analysis: TargetLibraryAnalysis
26+
; CHECK-CGSCC-PASS-NEXT: Running analysis: DebugInfoCacheAnalysis
2627
; CHECK-CGSCC-PASS-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
2728
; CHECK-CGSCC-PASS-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
2829
; CHECK-CGSCC-PASS-NEXT: Running pass: NoOpCGSCCPass

llvm/test/Other/new-pm-defaults.ll

+1
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@
138138
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
139139
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
140140
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
141+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
141142
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
142143
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
143144
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-lto-defaults.ll

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
4848
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
4949
; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis
50+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
5051
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
5152
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}>
5253
; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass

llvm/test/Other/new-pm-pgo-preinline.ll

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
; CHECK-Osz-NEXT: Running analysis: InlineAdvisorAnalysis
66
; CHECK-Osz-NEXT: Running analysis: InnerAnalysisManagerProxy
77
; CHECK-Osz-NEXT: Running analysis: LazyCallGraphAnalysis
8+
; CHECK-Osz-NEXT: Running analysis: DebugInfoCacheAnalysis
89
; CHECK-Osz-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy on (foo)
910
; CHECK-Osz-NEXT: Running analysis: OuterAnalysisManagerProxy
1011
; CHECK-Osz-NEXT: Running pass: InlinerPass on (foo)

llvm/test/Other/new-pm-thinlto-postlink-defaults.ll

+1
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
7474
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
7575
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
76+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
7677
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
7778
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
7879
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll

+1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
6262
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
6363
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
64+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
6465
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
6566
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
6667
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll

+1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
; CHECK-O-NEXT: Invalidating analysis: AAManager
7171
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
7272
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
73+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
7374
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
7475
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
7576
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-prelink-defaults.ll

+1
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@
105105
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
106106
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
107107
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
108+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
108109
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
109110
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
110111
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll

+1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
6262
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
6363
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
64+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
6465
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy on (foo)
6566
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
6667
; CHECK-O-NEXT: Running pass: InlinerPass on (foo)

llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll

+1
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
; CHECK-O-NEXT: Invalidating analysis: AAManager
7676
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
7777
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
78+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
7879
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
7980
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
8081
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/unittests/Analysis/CGSCCPassManagerTest.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "llvm/Analysis/CGSCCPassManager.h"
10+
#include "llvm/Analysis/DebugInfoCache.h"
1011
#include "llvm/Analysis/LazyCallGraph.h"
1112
#include "llvm/Analysis/TargetLibraryInfo.h"
1213
#include "llvm/AsmParser/Parser.h"
@@ -16,8 +17,8 @@
1617
#include "llvm/IR/Instructions.h"
1718
#include "llvm/IR/LLVMContext.h"
1819
#include "llvm/IR/Module.h"
19-
#include "llvm/IR/PassManager.h"
2020
#include "llvm/IR/PassInstrumentation.h"
21+
#include "llvm/IR/PassManager.h"
2122
#include "llvm/IR/Verifier.h"
2223
#include "llvm/Support/SourceMgr.h"
2324
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
@@ -255,6 +256,7 @@ class CGSCCPassManagerTest : public ::testing::Test {
255256
"}\n")) {
256257
FAM.registerPass([&] { return TargetLibraryAnalysis(); });
257258
MAM.registerPass([&] { return LazyCallGraphAnalysis(); });
259+
MAM.registerPass([&] { return DebugInfoCacheAnalysis(); });
258260
MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(FAM); });
259261

260262
// Register required pass instrumentation analysis.

0 commit comments

Comments
 (0)