Skip to content

Commit f43a8bc

Browse files
committed
[Coro] Use DebugInfoCache to speed up cloning in CoroSplitPass
Summary: We can use a DebugInfoFinder from DebugInfoCache which is already primed on a compile unit to speed up collection of global debug info. The pass could likely be another 2x+ faster if we avoid rebuilding the set of global debug info. This needs further massaging of CloneFunction and ValueMapper, though, and can be done incrementally on top of this. Comparing performance of CoroSplitPass at various points in this stack, this is anecdata from a sample cpp file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt GlobalDI | Cached CU DIFinder (cur.) | |-----------------+----------+----------------+-------------------+---------------------------| | CoroSplitPass | 306ms | 221ms | 68ms | 17ms | | CoroCloner | 101ms | 72ms | 0.5ms | 0.5ms | | CollectGlobalDI | - | - | 63ms | 13ms | |-----------------+----------+----------------+-------------------+---------------------------| | Speed up | 1x | 1.4x | 4.5x | 18x | Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample cpp file with time trace to get the avg. duration of the pass and inner scopes.
1 parent 3982c34 commit f43a8bc

14 files changed

+73
-17
lines changed

llvm/include/llvm/Transforms/Coroutines/ABI.h

+9-4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#ifndef LLVM_TRANSFORMS_COROUTINES_ABI_H
1616
#define LLVM_TRANSFORMS_COROUTINES_ABI_H
1717

18+
#include "llvm/Analysis/DebugInfoCache.h"
1819
#include "llvm/Analysis/TargetTransformInfo.h"
1920
#include "llvm/Transforms/Coroutines/CoroShape.h"
2021
#include "llvm/Transforms/Coroutines/MaterializationUtils.h"
@@ -53,7 +54,8 @@ class BaseABI {
5354
// Perform the function splitting according to the ABI.
5455
virtual void splitCoroutine(Function &F, coro::Shape &Shape,
5556
SmallVectorImpl<Function *> &Clones,
56-
TargetTransformInfo &TTI) = 0;
57+
TargetTransformInfo &TTI,
58+
const DebugInfoCache *DICache) = 0;
5759

5860
Function &F;
5961
coro::Shape &Shape;
@@ -73,7 +75,8 @@ class SwitchABI : public BaseABI {
7375

7476
void splitCoroutine(Function &F, coro::Shape &Shape,
7577
SmallVectorImpl<Function *> &Clones,
76-
TargetTransformInfo &TTI) override;
78+
TargetTransformInfo &TTI,
79+
const DebugInfoCache *DICache) override;
7780
};
7881

7982
class AsyncABI : public BaseABI {
@@ -86,7 +89,8 @@ class AsyncABI : public BaseABI {
8689

8790
void splitCoroutine(Function &F, coro::Shape &Shape,
8891
SmallVectorImpl<Function *> &Clones,
89-
TargetTransformInfo &TTI) override;
92+
TargetTransformInfo &TTI,
93+
const DebugInfoCache *DICache) override;
9094
};
9195

9296
class AnyRetconABI : public BaseABI {
@@ -99,7 +103,8 @@ class AnyRetconABI : public BaseABI {
99103

100104
void splitCoroutine(Function &F, coro::Shape &Shape,
101105
SmallVectorImpl<Function *> &Clones,
102-
TargetTransformInfo &TTI) override;
106+
TargetTransformInfo &TTI,
107+
const DebugInfoCache *DICache) override;
103108
};
104109

105110
} // end namespace coro

llvm/lib/Analysis/CGSCCPassManager.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/ADT/SmallPtrSet.h"
1515
#include "llvm/ADT/SmallVector.h"
1616
#include "llvm/ADT/iterator_range.h"
17+
#include "llvm/Analysis/DebugInfoCache.h"
1718
#include "llvm/Analysis/LazyCallGraph.h"
1819
#include "llvm/IR/Constant.h"
1920
#include "llvm/IR/InstIterator.h"
@@ -139,6 +140,11 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
139140
// Get the call graph for this module.
140141
LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
141142

143+
// Prime DebugInfoCache.
144+
// TODO: Currently, the only user is CoroSplitPass. Consider running
145+
// conditionally.
146+
AM.getResult<DebugInfoCacheAnalysis>(M);
147+
142148
// Get Function analysis manager from its proxy.
143149
FunctionAnalysisManager &FAM =
144150
AM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M)->getManager();
@@ -350,6 +356,7 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
350356
// analysis proxies by handling them above and in any nested pass managers.
351357
PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
352358
PA.preserve<LazyCallGraphAnalysis>();
359+
PA.preserve<DebugInfoCacheAnalysis>();
353360
PA.preserve<CGSCCAnalysisManagerModuleProxy>();
354361
PA.preserve<FunctionAnalysisManagerModuleProxy>();
355362
return PA;

llvm/lib/Transforms/Coroutines/CoroSplit.cpp

+44-12
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "llvm/Analysis/CFG.h"
3232
#include "llvm/Analysis/CallGraph.h"
3333
#include "llvm/Analysis/ConstantFolding.h"
34+
#include "llvm/Analysis/DebugInfoCache.h"
3435
#include "llvm/Analysis/LazyCallGraph.h"
3536
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
3637
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -85,15 +86,39 @@ using namespace llvm;
8586

8687
namespace {
8788

89+
const DebugInfoFinder *cachedDIFinder(Function &F,
90+
const DebugInfoCache *DICache) {
91+
if (!DICache)
92+
return nullptr;
93+
94+
auto *SP = F.getSubprogram();
95+
auto *CU = SP ? SP->getUnit() : nullptr;
96+
if (!CU)
97+
return nullptr;
98+
99+
auto Found = DICache->Result.find(CU);
100+
if (Found == DICache->Result.end())
101+
return nullptr;
102+
103+
return &Found->getSecond();
104+
}
105+
88106
/// Collect (a known) subset of global debug info metadata potentially used by
89107
/// the function \p F.
90108
///
91109
/// This metadata set can be used to avoid cloning debug info not owned by \p F
92110
/// and is shared among all potential clones \p F.
93-
void collectGlobalDebugInfo(Function &F, MetadataSetTy &GlobalDebugInfo) {
111+
void collectGlobalDebugInfo(Function &F, MetadataSetTy &GlobalDebugInfo,
112+
const DebugInfoCache *DICache) {
94113
TimeTraceScope FunctionScope("CollectGlobalDebugInfo");
95114

96115
DebugInfoFinder DIFinder;
116+
117+
// Copy DIFinder from cache which is primed on F's compile unit when available
118+
auto *PrimedDIFinder = cachedDIFinder(F, DICache);
119+
if (PrimedDIFinder)
120+
DIFinder = *PrimedDIFinder;
121+
97122
DISubprogram *SPClonedWithinModule = CollectDebugInfoForCloning(
98123
F, CloneFunctionChangeType::LocalChangesOnly, DIFinder);
99124

@@ -1517,11 +1542,11 @@ namespace {
15171542
struct SwitchCoroutineSplitter {
15181543
static void split(Function &F, coro::Shape &Shape,
15191544
SmallVectorImpl<Function *> &Clones,
1520-
TargetTransformInfo &TTI) {
1545+
TargetTransformInfo &TTI, const DebugInfoCache *DICache) {
15211546
assert(Shape.ABI == coro::ABI::Switch);
15221547

15231548
MetadataSetTy GlobalDebugInfo;
1524-
collectGlobalDebugInfo(F, GlobalDebugInfo);
1549+
collectGlobalDebugInfo(F, GlobalDebugInfo, DICache);
15251550

15261551
// Create a resume clone by cloning the body of the original function,
15271552
// setting new entry block and replacing coro.suspend an appropriate value
@@ -1835,7 +1860,8 @@ CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
18351860

18361861
void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
18371862
SmallVectorImpl<Function *> &Clones,
1838-
TargetTransformInfo &TTI) {
1863+
TargetTransformInfo &TTI,
1864+
const DebugInfoCache *DICache) {
18391865
assert(Shape.ABI == coro::ABI::Async);
18401866
assert(Clones.empty());
18411867
// Reset various things that the optimizer might have decided it
@@ -1922,7 +1948,7 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
19221948
assert(Clones.size() == Shape.CoroSuspends.size());
19231949

19241950
MetadataSetTy GlobalDebugInfo;
1925-
collectGlobalDebugInfo(F, GlobalDebugInfo);
1951+
collectGlobalDebugInfo(F, GlobalDebugInfo, DICache);
19261952

19271953
for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) {
19281954
auto *Suspend = CS;
@@ -1935,7 +1961,8 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
19351961

19361962
void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
19371963
SmallVectorImpl<Function *> &Clones,
1938-
TargetTransformInfo &TTI) {
1964+
TargetTransformInfo &TTI,
1965+
const DebugInfoCache *DICache) {
19391966
assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce);
19401967
assert(Clones.empty());
19411968

@@ -2057,7 +2084,7 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
20572084
assert(Clones.size() == Shape.CoroSuspends.size());
20582085

20592086
MetadataSetTy GlobalDebugInfo;
2060-
collectGlobalDebugInfo(F, GlobalDebugInfo);
2087+
collectGlobalDebugInfo(F, GlobalDebugInfo, DICache);
20612088

20622089
for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) {
20632090
auto Suspend = CS;
@@ -2111,13 +2138,15 @@ static bool hasSafeElideCaller(Function &F) {
21112138

21122139
void coro::SwitchABI::splitCoroutine(Function &F, coro::Shape &Shape,
21132140
SmallVectorImpl<Function *> &Clones,
2114-
TargetTransformInfo &TTI) {
2115-
SwitchCoroutineSplitter::split(F, Shape, Clones, TTI);
2141+
TargetTransformInfo &TTI,
2142+
const DebugInfoCache *DICache) {
2143+
SwitchCoroutineSplitter::split(F, Shape, Clones, TTI, DICache);
21162144
}
21172145

21182146
static void doSplitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
21192147
coro::BaseABI &ABI, TargetTransformInfo &TTI,
2120-
bool OptimizeFrame) {
2148+
bool OptimizeFrame,
2149+
const DebugInfoCache *DICache) {
21212150
PrettyStackTraceFunction prettyStackTrace(F);
21222151

21232152
auto &Shape = ABI.Shape;
@@ -2142,7 +2171,7 @@ static void doSplitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
21422171
if (isNoSuspendCoroutine) {
21432172
handleNoSuspendCoroutine(Shape);
21442173
} else {
2145-
ABI.splitCoroutine(F, Shape, Clones, TTI);
2174+
ABI.splitCoroutine(F, Shape, Clones, TTI, DICache);
21462175
}
21472176

21482177
// Replace all the swifterror operations in the original function.
@@ -2339,6 +2368,9 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
23392368
auto &FAM =
23402369
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
23412370

2371+
const auto &MAMProxy = AM.getResult<ModuleAnalysisManagerCGSCCProxy>(C, CG);
2372+
const auto *DICache = MAMProxy.getCachedResult<DebugInfoCacheAnalysis>(M);
2373+
23422374
// Check for uses of llvm.coro.prepare.retcon/async.
23432375
SmallVector<Function *, 2> PrepareFns;
23442376
addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon");
@@ -2375,7 +2407,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
23752407

23762408
SmallVector<Function *, 4> Clones;
23772409
auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
2378-
doSplitCoroutine(F, Clones, *ABI, TTI, OptimizeFrame);
2410+
doSplitCoroutine(F, Clones, *ABI, TTI, OptimizeFrame, DICache);
23792411
CurrentSCC = &updateCallGraphAfterCoroutineSplit(
23802412
*N, Shape, Clones, *CurrentSCC, CG, AM, UR, FAM);
23812413

llvm/test/Other/new-pass-manager.ll

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
; CHECK-CGSCC-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*(FunctionAnalysisManager|AnalysisManager<.*Function.*>).*}},{{.*}}Module>
2424
; CHECK-CGSCC-PASS-NEXT: Running analysis: LazyCallGraphAnalysis
2525
; CHECK-CGSCC-PASS-NEXT: Running analysis: TargetLibraryAnalysis
26+
; CHECK-CGSCC-PASS-NEXT: Running analysis: DebugInfoCacheAnalysis
2627
; CHECK-CGSCC-PASS-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
2728
; CHECK-CGSCC-PASS-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
2829
; CHECK-CGSCC-PASS-NEXT: Running pass: NoOpCGSCCPass

llvm/test/Other/new-pm-defaults.ll

+1
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@
139139
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
140140
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
141141
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
142+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
142143
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
143144
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
144145
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-lto-defaults.ll

+1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
; CHECK-O23SZ-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
4444
; CHECK-O23SZ-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
4545
; CHECK-O23SZ-NEXT: Running analysis: LazyCallGraphAnalysis
46+
; CHECK-O23SZ-NEXT: Running analysis: DebugInfoCacheAnalysis
4647
; CHECK-O23SZ-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
4748
; CHECK-O23SZ-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}>
4849
; CHECK-O23SZ-NEXT: Running pass: PostOrderFunctionAttrsPass

llvm/test/Other/new-pm-pgo-preinline.ll

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
; CHECK-Osz-NEXT: Running analysis: InlineAdvisorAnalysis
66
; CHECK-Osz-NEXT: Running analysis: InnerAnalysisManagerProxy
77
; CHECK-Osz-NEXT: Running analysis: LazyCallGraphAnalysis
8+
; CHECK-Osz-NEXT: Running analysis: DebugInfoCacheAnalysis
89
; CHECK-Osz-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy on (foo)
910
; CHECK-Osz-NEXT: Running analysis: OuterAnalysisManagerProxy
1011
; CHECK-Osz-NEXT: Running pass: InlinerPass on (foo)

llvm/test/Other/new-pm-thinlto-postlink-defaults.ll

+1
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
7575
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
7676
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
77+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
7778
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
7879
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
7980
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll

+1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
6363
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
6464
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
65+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
6566
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
6667
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
6768
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll

+1
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
; CHECK-O-NEXT: Invalidating analysis: AAManager
7272
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
7373
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
74+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
7475
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
7576
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
7677
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-prelink-defaults.ll

+1
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@
106106
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
107107
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
108108
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
109+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
109110
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
110111
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
111112
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll

+1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
6363
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
6464
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
65+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
6566
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy on (foo)
6667
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
6768
; CHECK-O-NEXT: Running pass: InlinerPass on (foo)

llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll

+1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
; CHECK-O-NEXT: Invalidating analysis: AAManager
7777
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
7878
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
79+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
7980
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
8081
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
8182
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/unittests/Analysis/CGSCCPassManagerTest.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "llvm/Analysis/CGSCCPassManager.h"
10+
#include "llvm/Analysis/DebugInfoCache.h"
1011
#include "llvm/Analysis/LazyCallGraph.h"
1112
#include "llvm/Analysis/TargetLibraryInfo.h"
1213
#include "llvm/AsmParser/Parser.h"
@@ -16,8 +17,8 @@
1617
#include "llvm/IR/Instructions.h"
1718
#include "llvm/IR/LLVMContext.h"
1819
#include "llvm/IR/Module.h"
19-
#include "llvm/IR/PassManager.h"
2020
#include "llvm/IR/PassInstrumentation.h"
21+
#include "llvm/IR/PassManager.h"
2122
#include "llvm/IR/Verifier.h"
2223
#include "llvm/Support/SourceMgr.h"
2324
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
@@ -255,6 +256,7 @@ class CGSCCPassManagerTest : public ::testing::Test {
255256
"}\n")) {
256257
FAM.registerPass([&] { return TargetLibraryAnalysis(); });
257258
MAM.registerPass([&] { return LazyCallGraphAnalysis(); });
259+
MAM.registerPass([&] { return DebugInfoCacheAnalysis(); });
258260
MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(FAM); });
259261

260262
// Register required pass instrumentation analysis.

0 commit comments

Comments
 (0)