Skip to content

Commit fa73123

Browse files
committed
[Coro] Use DebugInfoCache to speed up cloning in CoroSplitPass
Summary: We can use a DebugInfoFinder from DebugInfoCache which is already primed on a compile unit to speed up collection of module-level debug info. The pass could likely be another 2x+ faster if we avoid rebuilding the set of global debug info. This needs further massaging of CloneFunction and ValueMapper, though, and can be done incrementally on top of this. Comparing performance of CoroSplitPass at various points in this stack, this is anecdata from a sample cpp file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt CommonDI | Cached CU DIFinder (cur.) | |-----------------|----------|----------------|-------------------|---------------------------| | CoroSplitPass | 306ms | 221ms | 68ms | 17ms | | CoroCloner | 101ms | 72ms | 0.5ms | 0.5ms | | CollectGlobalDI | - | - | 63ms | 13ms | | Speed up | 1x | 1.4x | 4.5x | 18x | Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample cpp file with time trace to get the avg. duration of the pass and inner scopes. stack-info: PR: #118630, branch: users/artempyanykh/fast-coro-upstream/11
1 parent 0407e73 commit fa73123

14 files changed

+72
-17
lines changed

llvm/include/llvm/Transforms/Coroutines/ABI.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#ifndef LLVM_TRANSFORMS_COROUTINES_ABI_H
1616
#define LLVM_TRANSFORMS_COROUTINES_ABI_H
1717

18+
#include "llvm/Analysis/DebugInfoCache.h"
1819
#include "llvm/Analysis/TargetTransformInfo.h"
1920
#include "llvm/Transforms/Coroutines/CoroShape.h"
2021
#include "llvm/Transforms/Coroutines/MaterializationUtils.h"
@@ -53,7 +54,8 @@ class BaseABI {
5354
// Perform the function splitting according to the ABI.
5455
virtual void splitCoroutine(Function &F, coro::Shape &Shape,
5556
SmallVectorImpl<Function *> &Clones,
56-
TargetTransformInfo &TTI) = 0;
57+
TargetTransformInfo &TTI,
58+
const DebugInfoCache *DICache) = 0;
5759

5860
Function &F;
5961
coro::Shape &Shape;
@@ -73,7 +75,8 @@ class SwitchABI : public BaseABI {
7375

7476
void splitCoroutine(Function &F, coro::Shape &Shape,
7577
SmallVectorImpl<Function *> &Clones,
76-
TargetTransformInfo &TTI) override;
78+
TargetTransformInfo &TTI,
79+
const DebugInfoCache *DICache) override;
7780
};
7881

7982
class AsyncABI : public BaseABI {
@@ -86,7 +89,8 @@ class AsyncABI : public BaseABI {
8689

8790
void splitCoroutine(Function &F, coro::Shape &Shape,
8891
SmallVectorImpl<Function *> &Clones,
89-
TargetTransformInfo &TTI) override;
92+
TargetTransformInfo &TTI,
93+
const DebugInfoCache *DICache) override;
9094
};
9195

9296
class AnyRetconABI : public BaseABI {
@@ -99,7 +103,8 @@ class AnyRetconABI : public BaseABI {
99103

100104
void splitCoroutine(Function &F, coro::Shape &Shape,
101105
SmallVectorImpl<Function *> &Clones,
102-
TargetTransformInfo &TTI) override;
106+
TargetTransformInfo &TTI,
107+
const DebugInfoCache *DICache) override;
103108
};
104109

105110
} // end namespace coro

llvm/lib/Analysis/CGSCCPassManager.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/ADT/SmallPtrSet.h"
1515
#include "llvm/ADT/SmallVector.h"
1616
#include "llvm/ADT/iterator_range.h"
17+
#include "llvm/Analysis/DebugInfoCache.h"
1718
#include "llvm/Analysis/LazyCallGraph.h"
1819
#include "llvm/IR/Constant.h"
1920
#include "llvm/IR/InstIterator.h"
@@ -139,6 +140,11 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
139140
// Get the call graph for this module.
140141
LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
141142

143+
// Prime DebugInfoCache.
144+
// TODO: Currently, the only user is CoroSplitPass. Consider running
145+
// conditionally.
146+
AM.getResult<DebugInfoCacheAnalysis>(M);
147+
142148
// Get Function analysis manager from its proxy.
143149
FunctionAnalysisManager &FAM =
144150
AM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M)->getManager();
@@ -350,6 +356,7 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
350356
// analysis proxies by handling them above and in any nested pass managers.
351357
PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
352358
PA.preserve<LazyCallGraphAnalysis>();
359+
PA.preserve<DebugInfoCacheAnalysis>();
353360
PA.preserve<CGSCCAnalysisManagerModuleProxy>();
354361
PA.preserve<FunctionAnalysisManagerModuleProxy>();
355362
return PA;

llvm/lib/Transforms/Coroutines/CoroSplit.cpp

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "llvm/Analysis/CFG.h"
3333
#include "llvm/Analysis/CallGraph.h"
3434
#include "llvm/Analysis/ConstantFolding.h"
35+
#include "llvm/Analysis/DebugInfoCache.h"
3536
#include "llvm/Analysis/LazyCallGraph.h"
3637
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
3738
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -79,16 +80,39 @@ using namespace llvm;
7980
#define DEBUG_TYPE "coro-split"
8081

8182
namespace {
83+
const DebugInfoFinder *cachedDIFinder(Function &F,
84+
const DebugInfoCache *DICache) {
85+
if (!DICache)
86+
return nullptr;
87+
88+
auto *SP = F.getSubprogram();
89+
auto *CU = SP ? SP->getUnit() : nullptr;
90+
if (!CU)
91+
return nullptr;
92+
93+
if (auto Found = DICache->Result.find(CU); Found != DICache->Result.end())
94+
return &Found->getSecond();
95+
96+
return nullptr;
97+
}
98+
8299
/// Collect (a known) subset of global debug info metadata potentially used by
83100
/// the function \p F.
84101
///
85102
/// This metadata set can be used to avoid cloning debug info not owned by \p F
86103
/// and is shared among all potential clones \p F.
87-
MetadataSetTy collectCommonDebugInfo(Function &F) {
104+
MetadataSetTy collectCommonDebugInfo(Function &F,
105+
const DebugInfoCache *DICache) {
88106
TimeTraceScope FunctionScope("CollectCommonDebugInfo");
89107

90108
MetadataSetTy CommonDebugInfo;
91109
DebugInfoFinder DIFinder;
110+
111+
// Copy DIFinder from cache which is primed on F's compile unit when available
112+
auto *PrimedDIFinder = cachedDIFinder(F, DICache);
113+
if (PrimedDIFinder)
114+
DIFinder = *PrimedDIFinder;
115+
92116
DISubprogram *SPClonedWithinModule = CollectDebugInfoForCloning(
93117
F, CloneFunctionChangeType::LocalChangesOnly, DIFinder);
94118

@@ -1396,10 +1420,10 @@ namespace {
13961420
struct SwitchCoroutineSplitter {
13971421
static void split(Function &F, coro::Shape &Shape,
13981422
SmallVectorImpl<Function *> &Clones,
1399-
TargetTransformInfo &TTI) {
1423+
TargetTransformInfo &TTI, const DebugInfoCache *DICache) {
14001424
assert(Shape.ABI == coro::ABI::Switch);
14011425

1402-
MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F)};
1426+
MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F, DICache)};
14031427

14041428
// Create a resume clone by cloning the body of the original function,
14051429
// setting new entry block and replacing coro.suspend an appropriate value
@@ -1713,7 +1737,8 @@ CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
17131737

17141738
void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
17151739
SmallVectorImpl<Function *> &Clones,
1716-
TargetTransformInfo &TTI) {
1740+
TargetTransformInfo &TTI,
1741+
const DebugInfoCache *DICache) {
17171742
assert(Shape.ABI == coro::ABI::Async);
17181743
assert(Clones.empty());
17191744
// Reset various things that the optimizer might have decided it
@@ -1799,7 +1824,7 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
17991824

18001825
assert(Clones.size() == Shape.CoroSuspends.size());
18011826

1802-
MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F)};
1827+
MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F, DICache)};
18031828

18041829
for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) {
18051830
auto *Suspend = CS;
@@ -1812,7 +1837,8 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
18121837

18131838
void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
18141839
SmallVectorImpl<Function *> &Clones,
1815-
TargetTransformInfo &TTI) {
1840+
TargetTransformInfo &TTI,
1841+
const DebugInfoCache *DICache) {
18161842
assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce);
18171843
assert(Clones.empty());
18181844

@@ -1933,7 +1959,7 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
19331959

19341960
assert(Clones.size() == Shape.CoroSuspends.size());
19351961

1936-
MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F)};
1962+
MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F, DICache)};
19371963

19381964
for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) {
19391965
auto Suspend = CS;
@@ -1987,13 +2013,15 @@ static bool hasSafeElideCaller(Function &F) {
19872013

19882014
void coro::SwitchABI::splitCoroutine(Function &F, coro::Shape &Shape,
19892015
SmallVectorImpl<Function *> &Clones,
1990-
TargetTransformInfo &TTI) {
1991-
SwitchCoroutineSplitter::split(F, Shape, Clones, TTI);
2016+
TargetTransformInfo &TTI,
2017+
const DebugInfoCache *DICache) {
2018+
SwitchCoroutineSplitter::split(F, Shape, Clones, TTI, DICache);
19922019
}
19932020

19942021
static void doSplitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
19952022
coro::BaseABI &ABI, TargetTransformInfo &TTI,
1996-
bool OptimizeFrame) {
2023+
bool OptimizeFrame,
2024+
const DebugInfoCache *DICache) {
19972025
PrettyStackTraceFunction prettyStackTrace(F);
19982026

19992027
auto &Shape = ABI.Shape;
@@ -2018,7 +2046,7 @@ static void doSplitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
20182046
if (isNoSuspendCoroutine) {
20192047
handleNoSuspendCoroutine(Shape);
20202048
} else {
2021-
ABI.splitCoroutine(F, Shape, Clones, TTI);
2049+
ABI.splitCoroutine(F, Shape, Clones, TTI, DICache);
20222050
}
20232051

20242052
// Replace all the swifterror operations in the original function.
@@ -2215,6 +2243,9 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
22152243
auto &FAM =
22162244
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
22172245

2246+
const auto &MAMProxy = AM.getResult<ModuleAnalysisManagerCGSCCProxy>(C, CG);
2247+
const auto *DICache = MAMProxy.getCachedResult<DebugInfoCacheAnalysis>(M);
2248+
22182249
// Check for uses of llvm.coro.prepare.retcon/async.
22192250
SmallVector<Function *, 2> PrepareFns;
22202251
addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon");
@@ -2251,7 +2282,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
22512282

22522283
SmallVector<Function *, 4> Clones;
22532284
auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
2254-
doSplitCoroutine(F, Clones, *ABI, TTI, OptimizeFrame);
2285+
doSplitCoroutine(F, Clones, *ABI, TTI, OptimizeFrame, DICache);
22552286
CurrentSCC = &updateCallGraphAfterCoroutineSplit(
22562287
*N, Shape, Clones, *CurrentSCC, CG, AM, UR, FAM);
22572288

llvm/test/Other/new-pass-manager.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
; CHECK-CGSCC-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*(FunctionAnalysisManager|AnalysisManager<.*Function.*>).*}},{{.*}}Module>
2424
; CHECK-CGSCC-PASS-NEXT: Running analysis: LazyCallGraphAnalysis
2525
; CHECK-CGSCC-PASS-NEXT: Running analysis: TargetLibraryAnalysis
26+
; CHECK-CGSCC-PASS-NEXT: Running analysis: DebugInfoCacheAnalysis
2627
; CHECK-CGSCC-PASS-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
2728
; CHECK-CGSCC-PASS-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
2829
; CHECK-CGSCC-PASS-NEXT: Running pass: NoOpCGSCCPass

llvm/test/Other/new-pm-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@
139139
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
140140
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
141141
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
142+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
142143
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
143144
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
144145
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-lto-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
; CHECK-O23SZ-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
4444
; CHECK-O23SZ-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
4545
; CHECK-O23SZ-NEXT: Running analysis: LazyCallGraphAnalysis
46+
; CHECK-O23SZ-NEXT: Running analysis: DebugInfoCacheAnalysis
4647
; CHECK-O23SZ-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
4748
; CHECK-O23SZ-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}>
4849
; CHECK-O23SZ-NEXT: Running pass: PostOrderFunctionAttrsPass

llvm/test/Other/new-pm-pgo-preinline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
; CHECK-Osz-NEXT: Running analysis: InlineAdvisorAnalysis
66
; CHECK-Osz-NEXT: Running analysis: InnerAnalysisManagerProxy
77
; CHECK-Osz-NEXT: Running analysis: LazyCallGraphAnalysis
8+
; CHECK-Osz-NEXT: Running analysis: DebugInfoCacheAnalysis
89
; CHECK-Osz-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy on (foo)
910
; CHECK-Osz-NEXT: Running analysis: OuterAnalysisManagerProxy
1011
; CHECK-Osz-NEXT: Running pass: InlinerPass on (foo)

llvm/test/Other/new-pm-thinlto-postlink-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
7575
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
7676
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
77+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
7778
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
7879
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
7980
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
6363
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
6464
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
65+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
6566
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
6667
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
6768
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
; CHECK-O-NEXT: Invalidating analysis: AAManager
7272
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
7373
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
74+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
7475
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
7576
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
7677
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-prelink-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@
106106
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
107107
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
108108
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
109+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
109110
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
110111
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
111112
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
6363
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
6464
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
65+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
6566
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy on (foo)
6667
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
6768
; CHECK-O-NEXT: Running pass: InlinerPass on (foo)

llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
; CHECK-O-NEXT: Invalidating analysis: AAManager
7777
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
7878
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
79+
; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
7980
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
8081
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
8182
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass

llvm/unittests/Analysis/CGSCCPassManagerTest.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "llvm/Analysis/CGSCCPassManager.h"
10+
#include "llvm/Analysis/DebugInfoCache.h"
1011
#include "llvm/Analysis/LazyCallGraph.h"
1112
#include "llvm/Analysis/TargetLibraryInfo.h"
1213
#include "llvm/AsmParser/Parser.h"
@@ -16,8 +17,8 @@
1617
#include "llvm/IR/Instructions.h"
1718
#include "llvm/IR/LLVMContext.h"
1819
#include "llvm/IR/Module.h"
19-
#include "llvm/IR/PassManager.h"
2020
#include "llvm/IR/PassInstrumentation.h"
21+
#include "llvm/IR/PassManager.h"
2122
#include "llvm/IR/Verifier.h"
2223
#include "llvm/Support/SourceMgr.h"
2324
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
@@ -255,6 +256,7 @@ class CGSCCPassManagerTest : public ::testing::Test {
255256
"}\n")) {
256257
FAM.registerPass([&] { return TargetLibraryAnalysis(); });
257258
MAM.registerPass([&] { return LazyCallGraphAnalysis(); });
259+
MAM.registerPass([&] { return DebugInfoCacheAnalysis(); });
258260
MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(FAM); });
259261

260262
// Register required pass instrumentation analysis.

0 commit comments

Comments
 (0)