Skip to content

Commit d646191

Browse files
committed
[Coro] Use DebugInfoCache to speed up cloning in CoroSplitPass
Summary: We can use a DebugInfoFinder from DebugInfoCache which is already primed on a compile unit to speed up collection of global debug info. The pass could likely be another 2x+ faster if we avoid rebuilding the set of global debug info. This needs further massaging of CloneFunction and ValueMapper, though, and can be done incrementally on top of this. Comparing performance of CoroSplitPass at various points in this stack, this is anecdata from a sample cpp file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt GlobalDI | Cached CU DIFinder (cur.) | |-----------------+----------+----------------+-------------------+---------------------------| | CoroSplitPass | 306ms | 221ms | 68ms | 17ms | | CoroCloner | 101ms | 72ms | 0.5ms | 0.5ms | | CollectGlobalDI | - | - | 63ms | 13ms | |-----------------+----------+----------------+-------------------+---------------------------| | Speed up | 1x | 1.4x | 4.5x | 18x | Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample cpp file with time trace to get the avg. duration of the pass and inner scopes.
1 parent 2cde791 commit d646191

14 files changed

+73
-17
lines changed

llvm/include/llvm/Transforms/Coroutines/ABI.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#ifndef LLVM_TRANSFORMS_COROUTINES_ABI_H
1616
#define LLVM_TRANSFORMS_COROUTINES_ABI_H
1717

18+
#include "llvm/Analysis/DebugInfoCache.h"
1819
#include "llvm/Analysis/TargetTransformInfo.h"
1920
#include "llvm/Transforms/Coroutines/CoroShape.h"
2021
#include "llvm/Transforms/Coroutines/MaterializationUtils.h"
@@ -53,7 +54,8 @@ class BaseABI {
5354
// Perform the function splitting according to the ABI.
5455
virtual void splitCoroutine(Function &F, coro::Shape &Shape,
5556
SmallVectorImpl<Function *> &Clones,
56-
TargetTransformInfo &TTI) = 0;
57+
TargetTransformInfo &TTI,
58+
const DebugInfoCache *DICache) = 0;
5759

5860
Function &F;
5961
coro::Shape &Shape;
@@ -73,7 +75,8 @@ class SwitchABI : public BaseABI {
7375

7476
void splitCoroutine(Function &F, coro::Shape &Shape,
7577
SmallVectorImpl<Function *> &Clones,
76-
TargetTransformInfo &TTI) override;
78+
TargetTransformInfo &TTI,
79+
const DebugInfoCache *DICache) override;
7780
};
7881

7982
class AsyncABI : public BaseABI {
@@ -86,7 +89,8 @@ class AsyncABI : public BaseABI {
8689

8790
void splitCoroutine(Function &F, coro::Shape &Shape,
8891
SmallVectorImpl<Function *> &Clones,
89-
TargetTransformInfo &TTI) override;
92+
TargetTransformInfo &TTI,
93+
const DebugInfoCache *DICache) override;
9094
};
9195

9296
class AnyRetconABI : public BaseABI {
@@ -99,7 +103,8 @@ class AnyRetconABI : public BaseABI {
99103

100104
void splitCoroutine(Function &F, coro::Shape &Shape,
101105
SmallVectorImpl<Function *> &Clones,
102-
TargetTransformInfo &TTI) override;
106+
TargetTransformInfo &TTI,
107+
const DebugInfoCache *DICache) override;
103108
};
104109

105110
} // end namespace coro

llvm/lib/Analysis/CGSCCPassManager.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/ADT/SmallPtrSet.h"
1515
#include "llvm/ADT/SmallVector.h"
1616
#include "llvm/ADT/iterator_range.h"
17+
#include "llvm/Analysis/DebugInfoCache.h"
1718
#include "llvm/Analysis/LazyCallGraph.h"
1819
#include "llvm/IR/Constant.h"
1920
#include "llvm/IR/InstIterator.h"
@@ -141,6 +142,11 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
141142
// Get the call graph for this module.
142143
LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
143144

145+
// Prime DebugInfoCache.
146+
// TODO: Currently, the only user is CoroSplitPass. Consider running
147+
// conditionally.
148+
AM.getResult<DebugInfoCacheAnalysis>(M);
149+
144150
// Get Function analysis manager from its proxy.
145151
FunctionAnalysisManager &FAM =
146152
AM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M)->getManager();
@@ -352,6 +358,7 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
352358
// analysis proxies by handling them above and in any nested pass managers.
353359
PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
354360
PA.preserve<LazyCallGraphAnalysis>();
361+
PA.preserve<DebugInfoCacheAnalysis>();
355362
PA.preserve<CGSCCAnalysisManagerModuleProxy>();
356363
PA.preserve<FunctionAnalysisManagerModuleProxy>();
357364
return PA;

llvm/lib/Transforms/Coroutines/CoroSplit.cpp

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "llvm/Analysis/CFG.h"
3131
#include "llvm/Analysis/CallGraph.h"
3232
#include "llvm/Analysis/ConstantFolding.h"
33+
#include "llvm/Analysis/DebugInfoCache.h"
3334
#include "llvm/Analysis/LazyCallGraph.h"
3435
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
3536
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -84,15 +85,39 @@ using namespace llvm;
8485

8586
namespace {
8687

88+
const DebugInfoFinder *cachedDIFinder(Function &F,
89+
const DebugInfoCache *DICache) {
90+
if (!DICache)
91+
return nullptr;
92+
93+
auto *SP = F.getSubprogram();
94+
auto *CU = SP ? SP->getUnit() : nullptr;
95+
if (!CU)
96+
return nullptr;
97+
98+
auto Found = DICache->Result.find(CU);
99+
if (Found == DICache->Result.end())
100+
return nullptr;
101+
102+
return &Found->getSecond();
103+
}
104+
87105
/// Collect (a known) subset of global debug info metadata potentially used by
88106
/// the function \p F.
89107
///
90108
/// This metadata set can be used to avoid cloning debug info not owned by \p F
91109
/// and is shared among all potential clones \p F.
92-
void collectGlobalDebugInfo(Function &F, MetadataSetTy &GlobalDebugInfo) {
110+
void collectGlobalDebugInfo(Function &F, MetadataSetTy &GlobalDebugInfo,
111+
const DebugInfoCache *DICache) {
93112
TimeTraceScope FunctionScope("CollectGlobalDebugInfo");
94113

95114
DebugInfoFinder DIFinder;
115+
116+
// Copy DIFinder from cache which is primed on F's compile unit when available
117+
auto *PrimedDIFinder = cachedDIFinder(F, DICache);
118+
if (PrimedDIFinder)
119+
DIFinder = *PrimedDIFinder;
120+
96121
DISubprogram *SPClonedWithinModule = ProcessSubprogramAttachment(
97122
F, CloneFunctionChangeType::LocalChangesOnly, DIFinder);
98123

@@ -1516,11 +1541,11 @@ namespace {
15161541
struct SwitchCoroutineSplitter {
15171542
static void split(Function &F, coro::Shape &Shape,
15181543
SmallVectorImpl<Function *> &Clones,
1519-
TargetTransformInfo &TTI) {
1544+
TargetTransformInfo &TTI, const DebugInfoCache *DICache) {
15201545
assert(Shape.ABI == coro::ABI::Switch);
15211546

15221547
MetadataSetTy GlobalDebugInfo;
1523-
collectGlobalDebugInfo(F, GlobalDebugInfo);
1548+
collectGlobalDebugInfo(F, GlobalDebugInfo, DICache);
15241549

15251550
// Create a resume clone by cloning the body of the original function,
15261551
// setting new entry block and replacing coro.suspend an appropriate value
@@ -1834,7 +1859,8 @@ CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
18341859

18351860
void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
18361861
SmallVectorImpl<Function *> &Clones,
1837-
TargetTransformInfo &TTI) {
1862+
TargetTransformInfo &TTI,
1863+
const DebugInfoCache *DICache) {
18381864
assert(Shape.ABI == coro::ABI::Async);
18391865
assert(Clones.empty());
18401866
// Reset various things that the optimizer might have decided it
@@ -1921,7 +1947,7 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
19211947
assert(Clones.size() == Shape.CoroSuspends.size());
19221948

19231949
MetadataSetTy GlobalDebugInfo;
1924-
collectGlobalDebugInfo(F, GlobalDebugInfo);
1950+
collectGlobalDebugInfo(F, GlobalDebugInfo, DICache);
19251951

19261952
for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) {
19271953
auto *Suspend = Shape.CoroSuspends[Idx];
@@ -1934,7 +1960,8 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
19341960

19351961
void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
19361962
SmallVectorImpl<Function *> &Clones,
1937-
TargetTransformInfo &TTI) {
1963+
TargetTransformInfo &TTI,
1964+
const DebugInfoCache *DICache) {
19381965
assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce);
19391966
assert(Clones.empty());
19401967

@@ -2055,7 +2082,7 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
20552082
assert(Clones.size() == Shape.CoroSuspends.size());
20562083

20572084
MetadataSetTy GlobalDebugInfo;
2058-
collectGlobalDebugInfo(F, GlobalDebugInfo);
2085+
collectGlobalDebugInfo(F, GlobalDebugInfo, DICache);
20592086

20602087
for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
20612088
auto Suspend = Shape.CoroSuspends[i];
@@ -2109,13 +2136,15 @@ static bool hasSafeElideCaller(Function &F) {
21092136

21102137
void coro::SwitchABI::splitCoroutine(Function &F, coro::Shape &Shape,
21112138
SmallVectorImpl<Function *> &Clones,
2112-
TargetTransformInfo &TTI) {
2113-
SwitchCoroutineSplitter::split(F, Shape, Clones, TTI);
2139+
TargetTransformInfo &TTI,
2140+
const DebugInfoCache *DICache) {
2141+
SwitchCoroutineSplitter::split(F, Shape, Clones, TTI, DICache);
21142142
}
21152143

21162144
static void doSplitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
21172145
coro::BaseABI &ABI, TargetTransformInfo &TTI,
2118-
bool OptimizeFrame) {
2146+
bool OptimizeFrame,
2147+
const DebugInfoCache *DICache) {
21192148
PrettyStackTraceFunction prettyStackTrace(F);
21202149

21212150
auto &Shape = ABI.Shape;
@@ -2140,7 +2169,7 @@ static void doSplitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
21402169
if (isNoSuspendCoroutine) {
21412170
handleNoSuspendCoroutine(Shape);
21422171
} else {
2143-
ABI.splitCoroutine(F, Shape, Clones, TTI);
2172+
ABI.splitCoroutine(F, Shape, Clones, TTI, DICache);
21442173
}
21452174

21462175
// Replace all the swifterror operations in the original function.
@@ -2337,6 +2366,9 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
23372366
auto &FAM =
23382367
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
23392368

2369+
const auto &MAMProxy = AM.getResult<ModuleAnalysisManagerCGSCCProxy>(C, CG);
2370+
const auto *DICache = MAMProxy.getCachedResult<DebugInfoCacheAnalysis>(M);
2371+
23402372
// Check for uses of llvm.coro.prepare.retcon/async.
23412373
SmallVector<Function *, 2> PrepareFns;
23422374
addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon");
@@ -2373,7 +2405,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
23732405

23742406
SmallVector<Function *, 4> Clones;
23752407
auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
2376-
doSplitCoroutine(F, Clones, *ABI, TTI, OptimizeFrame);
2408+
doSplitCoroutine(F, Clones, *ABI, TTI, OptimizeFrame, DICache);
23772409
CurrentSCC = &updateCallGraphAfterCoroutineSplit(
23782410
*N, Shape, Clones, *CurrentSCC, CG, AM, UR, FAM);
23792411

llvm/test/Other/new-pass-manager.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
; CHECK-CGSCC-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*(FunctionAnalysisManager|AnalysisManager<.*Function.*>).*}},{{.*}}Module>
2424
; CHECK-CGSCC-PASS-NEXT: Running analysis: LazyCallGraphAnalysis
2525
; CHECK-CGSCC-PASS-NEXT: Running analysis: TargetLibraryAnalysis
26+
; CHECK-CGSCC-PASS-NEXT: Running analysis: DebugInfoCacheAnalysis
2627
; CHECK-CGSCC-PASS-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
2728
; CHECK-CGSCC-PASS-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
2829
; CHECK-CGSCC-PASS-NEXT: Running pass: NoOpCGSCCPass

llvm/test/Other/new-pm-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@
138138
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
139139
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
140140
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
141+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
141142
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
142143
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
143144
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-lto-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
4848
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
4949
; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis
50+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
5051
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
5152
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}>
5253
; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass

llvm/test/Other/new-pm-pgo-preinline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
; CHECK-Osz-NEXT: Running analysis: InlineAdvisorAnalysis
66
; CHECK-Osz-NEXT: Running analysis: InnerAnalysisManagerProxy
77
; CHECK-Osz-NEXT: Running analysis: LazyCallGraphAnalysis
8+
; CHECK-Osz-NEXT: Running analysis: DebugInfoCacheAnalysis
89
; CHECK-Osz-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy on (foo)
910
; CHECK-Osz-NEXT: Running analysis: OuterAnalysisManagerProxy
1011
; CHECK-Osz-NEXT: Running pass: InlinerPass on (foo)

llvm/test/Other/new-pm-thinlto-postlink-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
7474
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
7575
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
76+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
7677
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
7778
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
7879
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
6262
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
6363
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
64+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
6465
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
6566
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
6667
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
; CHECK-O-NEXT: Invalidating analysis: AAManager
7171
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
7272
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
73+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
7374
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
7475
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
7576
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-prelink-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@
105105
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
106106
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
107107
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
108+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
108109
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
109110
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
110111
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
6262
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
6363
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
64+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
6465
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy on (foo)
6566
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
6667
; CHECK-O-NEXT: Running pass: InlinerPass on (foo)

llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
; CHECK-O-NEXT: Invalidating analysis: AAManager
7676
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
7777
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
78+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
7879
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
7980
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
8081
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/unittests/Analysis/CGSCCPassManagerTest.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "llvm/Analysis/CGSCCPassManager.h"
10+
#include "llvm/Analysis/DebugInfoCache.h"
1011
#include "llvm/Analysis/LazyCallGraph.h"
1112
#include "llvm/Analysis/TargetLibraryInfo.h"
1213
#include "llvm/AsmParser/Parser.h"
@@ -16,8 +17,8 @@
1617
#include "llvm/IR/Instructions.h"
1718
#include "llvm/IR/LLVMContext.h"
1819
#include "llvm/IR/Module.h"
19-
#include "llvm/IR/PassManager.h"
2020
#include "llvm/IR/PassInstrumentation.h"
21+
#include "llvm/IR/PassManager.h"
2122
#include "llvm/IR/Verifier.h"
2223
#include "llvm/Support/SourceMgr.h"
2324
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
@@ -255,6 +256,7 @@ class CGSCCPassManagerTest : public ::testing::Test {
255256
"}\n")) {
256257
FAM.registerPass([&] { return TargetLibraryAnalysis(); });
257258
MAM.registerPass([&] { return LazyCallGraphAnalysis(); });
259+
MAM.registerPass([&] { return DebugInfoCacheAnalysis(); });
258260
MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(FAM); });
259261

260262
// Register required pass instrumentation analysis.

0 commit comments

Comments
 (0)