Skip to content

Commit bec4d02

Browse files
committed
[Coro] Use DebugInfoCache to speed up cloning in CoroSplitPass
Summary: We can use a DebugInfoFinder from DebugInfoCache which is already primed on a compile unit to speed up collection of global debug info. The pass could likely be another 2x+ faster if we avoid rebuilding the set of global debug info. This needs further massaging of CloneFunction and ValueMapper, though, and can be done incrementally on top of this. Comparing performance of CoroSplitPass at various points in this stack, this is anecdata from a sample cpp file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt GlobalDI | Cached CU DIFinder (cur.) | |-----------------+----------+----------------+-------------------+---------------------------| | CoroSplitPass | 306ms | 221ms | 68ms | 17ms | | CoroCloner | 101ms | 72ms | 0.5ms | 0.5ms | | CollectGlobalDI | - | - | 63ms | 13ms | |-----------------+----------+----------------+-------------------+---------------------------| | Speed up | 1x | 1.4x | 4.5x | 18x | Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample cpp file with time trace to get the avg. duration of the pass and inner scopes. stack-info: PR: #118630, branch: users/artempyanykh/fast-coro-upstream/11
1 parent bdb7970 commit bec4d02

14 files changed

+73
-17
lines changed

llvm/include/llvm/Transforms/Coroutines/ABI.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#ifndef LLVM_TRANSFORMS_COROUTINES_ABI_H
1616
#define LLVM_TRANSFORMS_COROUTINES_ABI_H
1717

18+
#include "llvm/Analysis/DebugInfoCache.h"
1819
#include "llvm/Analysis/TargetTransformInfo.h"
1920
#include "llvm/Transforms/Coroutines/CoroShape.h"
2021
#include "llvm/Transforms/Coroutines/MaterializationUtils.h"
@@ -53,7 +54,8 @@ class BaseABI {
5354
// Perform the function splitting according to the ABI.
5455
virtual void splitCoroutine(Function &F, coro::Shape &Shape,
5556
SmallVectorImpl<Function *> &Clones,
56-
TargetTransformInfo &TTI) = 0;
57+
TargetTransformInfo &TTI,
58+
const DebugInfoCache *DICache) = 0;
5759

5860
Function &F;
5961
coro::Shape &Shape;
@@ -73,7 +75,8 @@ class SwitchABI : public BaseABI {
7375

7476
void splitCoroutine(Function &F, coro::Shape &Shape,
7577
SmallVectorImpl<Function *> &Clones,
76-
TargetTransformInfo &TTI) override;
78+
TargetTransformInfo &TTI,
79+
const DebugInfoCache *DICache) override;
7780
};
7881

7982
class AsyncABI : public BaseABI {
@@ -86,7 +89,8 @@ class AsyncABI : public BaseABI {
8689

8790
void splitCoroutine(Function &F, coro::Shape &Shape,
8891
SmallVectorImpl<Function *> &Clones,
89-
TargetTransformInfo &TTI) override;
92+
TargetTransformInfo &TTI,
93+
const DebugInfoCache *DICache) override;
9094
};
9195

9296
class AnyRetconABI : public BaseABI {
@@ -99,7 +103,8 @@ class AnyRetconABI : public BaseABI {
99103

100104
void splitCoroutine(Function &F, coro::Shape &Shape,
101105
SmallVectorImpl<Function *> &Clones,
102-
TargetTransformInfo &TTI) override;
106+
TargetTransformInfo &TTI,
107+
const DebugInfoCache *DICache) override;
103108
};
104109

105110
} // end namespace coro

llvm/lib/Analysis/CGSCCPassManager.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/ADT/SmallPtrSet.h"
1515
#include "llvm/ADT/SmallVector.h"
1616
#include "llvm/ADT/iterator_range.h"
17+
#include "llvm/Analysis/DebugInfoCache.h"
1718
#include "llvm/Analysis/LazyCallGraph.h"
1819
#include "llvm/IR/Constant.h"
1920
#include "llvm/IR/InstIterator.h"
@@ -139,6 +140,11 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
139140
// Get the call graph for this module.
140141
LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
141142

143+
// Prime DebugInfoCache.
144+
// TODO: Currently, the only user is CoroSplitPass. Consider running
145+
// conditionally.
146+
AM.getResult<DebugInfoCacheAnalysis>(M);
147+
142148
// Get Function analysis manager from its proxy.
143149
FunctionAnalysisManager &FAM =
144150
AM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M)->getManager();
@@ -350,6 +356,7 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
350356
// analysis proxies by handling them above and in any nested pass managers.
351357
PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
352358
PA.preserve<LazyCallGraphAnalysis>();
359+
PA.preserve<DebugInfoCacheAnalysis>();
353360
PA.preserve<CGSCCAnalysisManagerModuleProxy>();
354361
PA.preserve<FunctionAnalysisManagerModuleProxy>();
355362
return PA;

llvm/lib/Transforms/Coroutines/CoroSplit.cpp

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "llvm/Analysis/CFG.h"
3333
#include "llvm/Analysis/CallGraph.h"
3434
#include "llvm/Analysis/ConstantFolding.h"
35+
#include "llvm/Analysis/DebugInfoCache.h"
3536
#include "llvm/Analysis/LazyCallGraph.h"
3637
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
3738
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -79,15 +80,39 @@ using namespace llvm;
7980
#define DEBUG_TYPE "coro-split"
8081

8182
namespace {
83+
const DebugInfoFinder *cachedDIFinder(Function &F,
84+
const DebugInfoCache *DICache) {
85+
if (!DICache)
86+
return nullptr;
87+
88+
auto *SP = F.getSubprogram();
89+
auto *CU = SP ? SP->getUnit() : nullptr;
90+
if (!CU)
91+
return nullptr;
92+
93+
auto Found = DICache->Result.find(CU);
94+
if (Found == DICache->Result.end())
95+
return nullptr;
96+
97+
return &Found->getSecond();
98+
}
99+
82100
/// Collect (a known) subset of global debug info metadata potentially used by
83101
/// the function \p F.
84102
///
85103
/// This metadata set can be used to avoid cloning debug info not owned by \p F
86104
/// and is shared among all potential clones \p F.
87-
void collectGlobalDebugInfo(Function &F, MetadataSetTy &GlobalDebugInfo) {
105+
void collectGlobalDebugInfo(Function &F, MetadataSetTy &GlobalDebugInfo,
106+
const DebugInfoCache *DICache) {
88107
TimeTraceScope FunctionScope("CollectGlobalDebugInfo");
89108

90109
DebugInfoFinder DIFinder;
110+
111+
// Copy DIFinder from cache which is primed on F's compile unit when available
112+
auto *PrimedDIFinder = cachedDIFinder(F, DICache);
113+
if (PrimedDIFinder)
114+
DIFinder = *PrimedDIFinder;
115+
91116
DISubprogram *SPClonedWithinModule = CollectDebugInfoForCloning(
92117
F, CloneFunctionChangeType::LocalChangesOnly, DIFinder);
93118

@@ -1394,11 +1419,11 @@ namespace {
13941419
struct SwitchCoroutineSplitter {
13951420
static void split(Function &F, coro::Shape &Shape,
13961421
SmallVectorImpl<Function *> &Clones,
1397-
TargetTransformInfo &TTI) {
1422+
TargetTransformInfo &TTI, const DebugInfoCache *DICache) {
13981423
assert(Shape.ABI == coro::ABI::Switch);
13991424

14001425
MetadataSetTy GlobalDebugInfo;
1401-
collectGlobalDebugInfo(F, GlobalDebugInfo);
1426+
collectGlobalDebugInfo(F, GlobalDebugInfo, DICache);
14021427

14031428
// Create a resume clone by cloning the body of the original function,
14041429
// setting new entry block and replacing coro.suspend an appropriate value
@@ -1712,7 +1737,8 @@ CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
17121737

17131738
void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
17141739
SmallVectorImpl<Function *> &Clones,
1715-
TargetTransformInfo &TTI) {
1740+
TargetTransformInfo &TTI,
1741+
const DebugInfoCache *DICache) {
17161742
assert(Shape.ABI == coro::ABI::Async);
17171743
assert(Clones.empty());
17181744
// Reset various things that the optimizer might have decided it
@@ -1799,7 +1825,7 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
17991825
assert(Clones.size() == Shape.CoroSuspends.size());
18001826

18011827
MetadataSetTy GlobalDebugInfo;
1802-
collectGlobalDebugInfo(F, GlobalDebugInfo);
1828+
collectGlobalDebugInfo(F, GlobalDebugInfo, DICache);
18031829

18041830
for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) {
18051831
auto *Suspend = CS;
@@ -1812,7 +1838,8 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
18121838

18131839
void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
18141840
SmallVectorImpl<Function *> &Clones,
1815-
TargetTransformInfo &TTI) {
1841+
TargetTransformInfo &TTI,
1842+
const DebugInfoCache *DICache) {
18161843
assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce);
18171844
assert(Clones.empty());
18181845

@@ -1934,7 +1961,7 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
19341961
assert(Clones.size() == Shape.CoroSuspends.size());
19351962

19361963
MetadataSetTy GlobalDebugInfo;
1937-
collectGlobalDebugInfo(F, GlobalDebugInfo);
1964+
collectGlobalDebugInfo(F, GlobalDebugInfo, DICache);
19381965

19391966
for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) {
19401967
auto Suspend = CS;
@@ -1988,13 +2015,15 @@ static bool hasSafeElideCaller(Function &F) {
19882015

19892016
void coro::SwitchABI::splitCoroutine(Function &F, coro::Shape &Shape,
19902017
SmallVectorImpl<Function *> &Clones,
1991-
TargetTransformInfo &TTI) {
1992-
SwitchCoroutineSplitter::split(F, Shape, Clones, TTI);
2018+
TargetTransformInfo &TTI,
2019+
const DebugInfoCache *DICache) {
2020+
SwitchCoroutineSplitter::split(F, Shape, Clones, TTI, DICache);
19932021
}
19942022

19952023
static void doSplitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
19962024
coro::BaseABI &ABI, TargetTransformInfo &TTI,
1997-
bool OptimizeFrame) {
2025+
bool OptimizeFrame,
2026+
const DebugInfoCache *DICache) {
19982027
PrettyStackTraceFunction prettyStackTrace(F);
19992028

20002029
auto &Shape = ABI.Shape;
@@ -2019,7 +2048,7 @@ static void doSplitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
20192048
if (isNoSuspendCoroutine) {
20202049
handleNoSuspendCoroutine(Shape);
20212050
} else {
2022-
ABI.splitCoroutine(F, Shape, Clones, TTI);
2051+
ABI.splitCoroutine(F, Shape, Clones, TTI, DICache);
20232052
}
20242053

20252054
// Replace all the swifterror operations in the original function.
@@ -2216,6 +2245,9 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
22162245
auto &FAM =
22172246
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
22182247

2248+
const auto &MAMProxy = AM.getResult<ModuleAnalysisManagerCGSCCProxy>(C, CG);
2249+
const auto *DICache = MAMProxy.getCachedResult<DebugInfoCacheAnalysis>(M);
2250+
22192251
// Check for uses of llvm.coro.prepare.retcon/async.
22202252
SmallVector<Function *, 2> PrepareFns;
22212253
addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon");
@@ -2252,7 +2284,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
22522284

22532285
SmallVector<Function *, 4> Clones;
22542286
auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
2255-
doSplitCoroutine(F, Clones, *ABI, TTI, OptimizeFrame);
2287+
doSplitCoroutine(F, Clones, *ABI, TTI, OptimizeFrame, DICache);
22562288
CurrentSCC = &updateCallGraphAfterCoroutineSplit(
22572289
*N, Shape, Clones, *CurrentSCC, CG, AM, UR, FAM);
22582290

llvm/test/Other/new-pass-manager.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
; CHECK-CGSCC-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*(FunctionAnalysisManager|AnalysisManager<.*Function.*>).*}},{{.*}}Module>
2424
; CHECK-CGSCC-PASS-NEXT: Running analysis: LazyCallGraphAnalysis
2525
; CHECK-CGSCC-PASS-NEXT: Running analysis: TargetLibraryAnalysis
26+
; CHECK-CGSCC-PASS-NEXT: Running analysis: DebugInfoCacheAnalysis
2627
; CHECK-CGSCC-PASS-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
2728
; CHECK-CGSCC-PASS-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
2829
; CHECK-CGSCC-PASS-NEXT: Running pass: NoOpCGSCCPass

llvm/test/Other/new-pm-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@
139139
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
140140
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
141141
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
142+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
142143
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
143144
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
144145
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-lto-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
; CHECK-O23SZ-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
4444
; CHECK-O23SZ-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
4545
; CHECK-O23SZ-NEXT: Running analysis: LazyCallGraphAnalysis
46+
; CHECK-O23SZ-NEXT: Running analysis: DebugInfoCacheAnalysis
4647
; CHECK-O23SZ-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
4748
; CHECK-O23SZ-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}>
4849
; CHECK-O23SZ-NEXT: Running pass: PostOrderFunctionAttrsPass

llvm/test/Other/new-pm-pgo-preinline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
; CHECK-Osz-NEXT: Running analysis: InlineAdvisorAnalysis
66
; CHECK-Osz-NEXT: Running analysis: InnerAnalysisManagerProxy
77
; CHECK-Osz-NEXT: Running analysis: LazyCallGraphAnalysis
8+
; CHECK-Osz-NEXT: Running analysis: DebugInfoCacheAnalysis
89
; CHECK-Osz-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy on (foo)
910
; CHECK-Osz-NEXT: Running analysis: OuterAnalysisManagerProxy
1011
; CHECK-Osz-NEXT: Running pass: InlinerPass on (foo)

llvm/test/Other/new-pm-thinlto-postlink-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
7575
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
7676
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
77+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
7778
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
7879
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
7980
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
6363
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
6464
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
65+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
6566
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
6667
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
6768
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
; CHECK-O-NEXT: Invalidating analysis: AAManager
7272
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
7373
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
74+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
7475
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
7576
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
7677
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

0 commit comments

Comments
 (0)