Skip to content

Commit 81f1380

Browse files
committed
[Coro] Use DebugInfoCache to speed up cloning in CoroSplitPass
Summary: We can use a DebugInfoFinder from DebugInfoCache which is already primed on a compile unit to speed up collection of module-level debug info. The pass could likely be another 2x+ faster if we avoid rebuilding the set of common debug info. This needs further massaging of CloneFunction and ValueMapper, though, and can be done incrementally on top of this. Comparing performance of CoroSplitPass at various points in this stack, this is anecdata from a sample cpp file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt CommonDI | Cached CU DIFinder (cur.) | |-----------------|----------|----------------|-------------------|---------------------------| | CoroSplitPass | 306ms | 221ms | 68ms | 17ms | | CoroCloner | 101ms | 72ms | 0.5ms | 0.5ms | | CollectCommonDI | - | - | 63ms | 13ms | | Speed up | 1x | 1.4x | 4.5x | 18x | Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample cpp file with time trace to get the avg. duration of the pass and inner scopes. stack-info: PR: #118630, branch: users/artempyanykh/fast-coro-upstream/11
1 parent f727512 commit 81f1380

14 files changed

+72
-17
lines changed

llvm/include/llvm/Transforms/Coroutines/ABI.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#ifndef LLVM_TRANSFORMS_COROUTINES_ABI_H
1616
#define LLVM_TRANSFORMS_COROUTINES_ABI_H
1717

18+
#include "llvm/Analysis/DebugInfoCache.h"
1819
#include "llvm/Analysis/TargetTransformInfo.h"
1920
#include "llvm/Transforms/Coroutines/CoroShape.h"
2021
#include "llvm/Transforms/Coroutines/MaterializationUtils.h"
@@ -53,7 +54,8 @@ class BaseABI {
5354
// Perform the function splitting according to the ABI.
5455
virtual void splitCoroutine(Function &F, coro::Shape &Shape,
5556
SmallVectorImpl<Function *> &Clones,
56-
TargetTransformInfo &TTI) = 0;
57+
TargetTransformInfo &TTI,
58+
const DebugInfoCache *DICache) = 0;
5759

5860
Function &F;
5961
coro::Shape &Shape;
@@ -73,7 +75,8 @@ class SwitchABI : public BaseABI {
7375

7476
void splitCoroutine(Function &F, coro::Shape &Shape,
7577
SmallVectorImpl<Function *> &Clones,
76-
TargetTransformInfo &TTI) override;
78+
TargetTransformInfo &TTI,
79+
const DebugInfoCache *DICache) override;
7780
};
7881

7982
class AsyncABI : public BaseABI {
@@ -86,7 +89,8 @@ class AsyncABI : public BaseABI {
8689

8790
void splitCoroutine(Function &F, coro::Shape &Shape,
8891
SmallVectorImpl<Function *> &Clones,
89-
TargetTransformInfo &TTI) override;
92+
TargetTransformInfo &TTI,
93+
const DebugInfoCache *DICache) override;
9094
};
9195

9296
class AnyRetconABI : public BaseABI {
@@ -99,7 +103,8 @@ class AnyRetconABI : public BaseABI {
99103

100104
void splitCoroutine(Function &F, coro::Shape &Shape,
101105
SmallVectorImpl<Function *> &Clones,
102-
TargetTransformInfo &TTI) override;
106+
TargetTransformInfo &TTI,
107+
const DebugInfoCache *DICache) override;
103108
};
104109

105110
} // end namespace coro

llvm/lib/Analysis/CGSCCPassManager.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/ADT/SmallPtrSet.h"
1515
#include "llvm/ADT/SmallVector.h"
1616
#include "llvm/ADT/iterator_range.h"
17+
#include "llvm/Analysis/DebugInfoCache.h"
1718
#include "llvm/Analysis/LazyCallGraph.h"
1819
#include "llvm/IR/Constant.h"
1920
#include "llvm/IR/InstIterator.h"
@@ -139,6 +140,11 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
139140
// Get the call graph for this module.
140141
LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
141142

143+
// Prime DebugInfoCache.
144+
// TODO: Currently, the only user is CoroSplitPass. Consider running
145+
// conditionally.
146+
AM.getResult<DebugInfoCacheAnalysis>(M);
147+
142148
// Get Function analysis manager from its proxy.
143149
FunctionAnalysisManager &FAM =
144150
AM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M)->getManager();
@@ -350,6 +356,7 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
350356
// analysis proxies by handling them above and in any nested pass managers.
351357
PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
352358
PA.preserve<LazyCallGraphAnalysis>();
359+
PA.preserve<DebugInfoCacheAnalysis>();
353360
PA.preserve<CGSCCAnalysisManagerModuleProxy>();
354361
PA.preserve<FunctionAnalysisManagerModuleProxy>();
355362
return PA;

llvm/lib/Transforms/Coroutines/CoroSplit.cpp

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "llvm/Analysis/CFG.h"
3333
#include "llvm/Analysis/CallGraph.h"
3434
#include "llvm/Analysis/ConstantFolding.h"
35+
#include "llvm/Analysis/DebugInfoCache.h"
3536
#include "llvm/Analysis/LazyCallGraph.h"
3637
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
3738
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -79,15 +80,38 @@ using namespace llvm;
7980
#define DEBUG_TYPE "coro-split"
8081

8182
namespace {
83+
const DebugInfoFinder *cachedDIFinder(Function &F,
84+
const DebugInfoCache *DICache) {
85+
if (!DICache)
86+
return nullptr;
87+
88+
auto *SP = F.getSubprogram();
89+
auto *CU = SP ? SP->getUnit() : nullptr;
90+
if (!CU)
91+
return nullptr;
92+
93+
if (auto Found = DICache->Result.find(CU); Found != DICache->Result.end())
94+
return &Found->getSecond();
95+
96+
return nullptr;
97+
}
98+
8299
/// Collect (a known) subset of global debug info metadata potentially used by
83100
/// the function \p F.
84101
///
85102
/// This metadata set can be used to avoid cloning debug info not owned by \p F
86103
/// and is shared among all potential clones \p F.
87-
MetadataSetTy collectCommonDebugInfo(Function &F) {
104+
MetadataSetTy collectCommonDebugInfo(Function &F,
105+
const DebugInfoCache *DICache) {
88106
TimeTraceScope FunctionScope("CollectCommonDebugInfo");
89107

90108
DebugInfoFinder DIFinder;
109+
110+
// Copy DIFinder from cache which is primed on F's compile unit when available
111+
auto *PrimedDIFinder = cachedDIFinder(F, DICache);
112+
if (PrimedDIFinder)
113+
DIFinder = *PrimedDIFinder;
114+
91115
DISubprogram *SPClonedWithinModule = CollectDebugInfoForCloning(
92116
F, CloneFunctionChangeType::LocalChangesOnly, DIFinder);
93117

@@ -1393,10 +1417,10 @@ namespace {
13931417
struct SwitchCoroutineSplitter {
13941418
static void split(Function &F, coro::Shape &Shape,
13951419
SmallVectorImpl<Function *> &Clones,
1396-
TargetTransformInfo &TTI) {
1420+
TargetTransformInfo &TTI, const DebugInfoCache *DICache) {
13971421
assert(Shape.ABI == coro::ABI::Switch);
13981422

1399-
MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F)};
1423+
MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F, DICache)};
14001424

14011425
// Create a resume clone by cloning the body of the original function,
14021426
// setting new entry block and replacing coro.suspend an appropriate value
@@ -1710,7 +1734,8 @@ CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
17101734

17111735
void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
17121736
SmallVectorImpl<Function *> &Clones,
1713-
TargetTransformInfo &TTI) {
1737+
TargetTransformInfo &TTI,
1738+
const DebugInfoCache *DICache) {
17141739
assert(Shape.ABI == coro::ABI::Async);
17151740
assert(Clones.empty());
17161741
// Reset various things that the optimizer might have decided it
@@ -1796,7 +1821,7 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
17961821

17971822
assert(Clones.size() == Shape.CoroSuspends.size());
17981823

1799-
MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F)};
1824+
MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F, DICache)};
18001825

18011826
for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) {
18021827
auto *Suspend = CS;
@@ -1809,7 +1834,8 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
18091834

18101835
void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
18111836
SmallVectorImpl<Function *> &Clones,
1812-
TargetTransformInfo &TTI) {
1837+
TargetTransformInfo &TTI,
1838+
const DebugInfoCache *DICache) {
18131839
assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce);
18141840
assert(Clones.empty());
18151841

@@ -1930,7 +1956,7 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
19301956

19311957
assert(Clones.size() == Shape.CoroSuspends.size());
19321958

1933-
MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F)};
1959+
MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F, DICache)};
19341960

19351961
for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) {
19361962
auto Suspend = CS;
@@ -1984,13 +2010,15 @@ static bool hasSafeElideCaller(Function &F) {
19842010

19852011
void coro::SwitchABI::splitCoroutine(Function &F, coro::Shape &Shape,
19862012
SmallVectorImpl<Function *> &Clones,
1987-
TargetTransformInfo &TTI) {
1988-
SwitchCoroutineSplitter::split(F, Shape, Clones, TTI);
2013+
TargetTransformInfo &TTI,
2014+
const DebugInfoCache *DICache) {
2015+
SwitchCoroutineSplitter::split(F, Shape, Clones, TTI, DICache);
19892016
}
19902017

19912018
static void doSplitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
19922019
coro::BaseABI &ABI, TargetTransformInfo &TTI,
1993-
bool OptimizeFrame) {
2020+
bool OptimizeFrame,
2021+
const DebugInfoCache *DICache) {
19942022
PrettyStackTraceFunction prettyStackTrace(F);
19952023

19962024
auto &Shape = ABI.Shape;
@@ -2015,7 +2043,7 @@ static void doSplitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
20152043
if (isNoSuspendCoroutine) {
20162044
handleNoSuspendCoroutine(Shape);
20172045
} else {
2018-
ABI.splitCoroutine(F, Shape, Clones, TTI);
2046+
ABI.splitCoroutine(F, Shape, Clones, TTI, DICache);
20192047
}
20202048

20212049
// Replace all the swifterror operations in the original function.
@@ -2212,6 +2240,9 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
22122240
auto &FAM =
22132241
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
22142242

2243+
const auto &MAMProxy = AM.getResult<ModuleAnalysisManagerCGSCCProxy>(C, CG);
2244+
const auto *DICache = MAMProxy.getCachedResult<DebugInfoCacheAnalysis>(M);
2245+
22152246
// Check for uses of llvm.coro.prepare.retcon/async.
22162247
SmallVector<Function *, 2> PrepareFns;
22172248
addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon");
@@ -2248,7 +2279,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
22482279

22492280
SmallVector<Function *, 4> Clones;
22502281
auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
2251-
doSplitCoroutine(F, Clones, *ABI, TTI, OptimizeFrame);
2282+
doSplitCoroutine(F, Clones, *ABI, TTI, OptimizeFrame, DICache);
22522283
CurrentSCC = &updateCallGraphAfterCoroutineSplit(
22532284
*N, Shape, Clones, *CurrentSCC, CG, AM, UR, FAM);
22542285

llvm/test/Other/new-pass-manager.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
; CHECK-CGSCC-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*(FunctionAnalysisManager|AnalysisManager<.*Function.*>).*}},{{.*}}Module>
2424
; CHECK-CGSCC-PASS-NEXT: Running analysis: LazyCallGraphAnalysis
2525
; CHECK-CGSCC-PASS-NEXT: Running analysis: TargetLibraryAnalysis
26+
; CHECK-CGSCC-PASS-NEXT: Running analysis: DebugInfoCacheAnalysis
2627
; CHECK-CGSCC-PASS-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
2728
; CHECK-CGSCC-PASS-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
2829
; CHECK-CGSCC-PASS-NEXT: Running pass: NoOpCGSCCPass

llvm/test/Other/new-pm-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@
139139
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
140140
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
141141
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
142+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
142143
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
143144
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
144145
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-lto-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
; CHECK-O23SZ-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
4444
; CHECK-O23SZ-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
4545
; CHECK-O23SZ-NEXT: Running analysis: LazyCallGraphAnalysis
46+
; CHECK-O23SZ-NEXT: Running analysis: DebugInfoCacheAnalysis
4647
; CHECK-O23SZ-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
4748
; CHECK-O23SZ-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}>
4849
; CHECK-O23SZ-NEXT: Running pass: PostOrderFunctionAttrsPass

llvm/test/Other/new-pm-pgo-preinline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
; CHECK-Osz-NEXT: Running analysis: InlineAdvisorAnalysis
66
; CHECK-Osz-NEXT: Running analysis: InnerAnalysisManagerProxy
77
; CHECK-Osz-NEXT: Running analysis: LazyCallGraphAnalysis
8+
; CHECK-Osz-NEXT: Running analysis: DebugInfoCacheAnalysis
89
; CHECK-Osz-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy on (foo)
910
; CHECK-Osz-NEXT: Running analysis: OuterAnalysisManagerProxy
1011
; CHECK-Osz-NEXT: Running pass: InlinerPass on (foo)

llvm/test/Other/new-pm-thinlto-postlink-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
7575
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
7676
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
77+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
7778
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
7879
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
7980
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
6363
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
6464
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
65+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
6566
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
6667
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
6768
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
; CHECK-O-NEXT: Invalidating analysis: AAManager
7272
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
7373
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
74+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
7475
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
7576
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
7677
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

0 commit comments

Comments
 (0)