-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[Coro] Prebuild a module-level debug info set and share it between all coroutine clones #118628
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-coroutines @llvm/pr-subscribers-llvm-transforms Author: Artem Pianykh (artempyanykh) Changes[Coro] Prebuild a global debug info set and share it between all coroutine clones Summary: This diff builds the data once and shares it between all clones. Anecdata for a sample cpp source file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt GlobalDI (cur.) | Note that CollectGlobalDI happens once per coroutine rather than per clone. Test Plan: Compiled a sample internal source file, checked time trace output for scope timings. Full diff: https://github.com/llvm/llvm-project/pull/118628.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h b/llvm/lib/Transforms/Coroutines/CoroCloner.h
index d1887980fb3bcb..e7121d26bd08f3 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCloner.h
+++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h
@@ -48,6 +48,7 @@ class BaseCloner {
CloneKind FKind;
IRBuilder<> Builder;
TargetTransformInfo &TTI;
+ const MetadataSetTy &GlobalDebugInfo;
ValueToValueMapTy VMap;
Function *NewF = nullptr;
@@ -60,12 +61,12 @@ class BaseCloner {
/// Create a cloner for a continuation lowering.
BaseCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
Function *NewF, AnyCoroSuspendInst *ActiveSuspend,
- TargetTransformInfo &TTI)
+ TargetTransformInfo &TTI, const MetadataSetTy &GlobalDebugInfo)
: OrigF(OrigF), Suffix(Suffix), Shape(Shape),
FKind(Shape.ABI == ABI::Async ? CloneKind::Async
: CloneKind::Continuation),
- Builder(OrigF.getContext()), TTI(TTI), NewF(NewF),
- ActiveSuspend(ActiveSuspend) {
+ Builder(OrigF.getContext()), TTI(TTI), GlobalDebugInfo(GlobalDebugInfo),
+ NewF(NewF), ActiveSuspend(ActiveSuspend) {
assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce ||
Shape.ABI == ABI::Async);
assert(NewF && "need existing function for continuation");
@@ -74,9 +75,11 @@ class BaseCloner {
public:
BaseCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
- CloneKind FKind, TargetTransformInfo &TTI)
+ CloneKind FKind, TargetTransformInfo &TTI,
+ const MetadataSetTy &GlobalDebugInfo)
: OrigF(OrigF), Suffix(Suffix), Shape(Shape), FKind(FKind),
- Builder(OrigF.getContext()), TTI(TTI) {}
+ Builder(OrigF.getContext()), TTI(TTI),
+ GlobalDebugInfo(GlobalDebugInfo) {}
virtual ~BaseCloner() {}
@@ -84,12 +87,14 @@ class BaseCloner {
static Function *createClone(Function &OrigF, const Twine &Suffix,
coro::Shape &Shape, Function *NewF,
AnyCoroSuspendInst *ActiveSuspend,
- TargetTransformInfo &TTI) {
+ TargetTransformInfo &TTI,
+ const MetadataSetTy &GlobalDebugInfo) {
assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce ||
Shape.ABI == ABI::Async);
TimeTraceScope FunctionScope("BaseCloner");
- BaseCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI);
+ BaseCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI,
+ GlobalDebugInfo);
Cloner.create();
return Cloner.getFunction();
}
@@ -129,8 +134,9 @@ class SwitchCloner : public BaseCloner {
protected:
/// Create a cloner for a switch lowering.
SwitchCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
- CloneKind FKind, TargetTransformInfo &TTI)
- : BaseCloner(OrigF, Suffix, Shape, FKind, TTI) {}
+ CloneKind FKind, TargetTransformInfo &TTI,
+ const MetadataSetTy &GlobalDebugInfo)
+ : BaseCloner(OrigF, Suffix, Shape, FKind, TTI, GlobalDebugInfo) {}
void create() override;
@@ -138,11 +144,12 @@ class SwitchCloner : public BaseCloner {
/// Create a clone for a switch lowering.
static Function *createClone(Function &OrigF, const Twine &Suffix,
coro::Shape &Shape, CloneKind FKind,
- TargetTransformInfo &TTI) {
+ TargetTransformInfo &TTI,
+ const MetadataSetTy &GlobalDebugInfo) {
assert(Shape.ABI == ABI::Switch);
TimeTraceScope FunctionScope("SwitchCloner");
- SwitchCloner Cloner(OrigF, Suffix, Shape, FKind, TTI);
+ SwitchCloner Cloner(OrigF, Suffix, Shape, FKind, TTI, GlobalDebugInfo);
Cloner.create();
return Cloner.getFunction();
}
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 3808147fc26009..2803b340bd22e0 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -43,6 +43,7 @@
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalValue.h"
@@ -77,6 +78,25 @@ using namespace llvm;
#define DEBUG_TYPE "coro-split"
+namespace {
+/// Collect (a known) subset of global debug info metadata potentially used by
+/// the function \p F.
+///
+/// This metadata set can be used to avoid cloning debug info not owned by \p F
+/// and is shared among all potential clones \p F.
+void collectGlobalDebugInfo(Function &F, MetadataSetTy &GlobalDebugInfo) {
+ TimeTraceScope FunctionScope("CollectGlobalDebugInfo");
+
+ DebugInfoFinder DIFinder;
+ DISubprogram *SPClonedWithinModule = CollectDebugInfoForCloning(
+ F, CloneFunctionChangeType::LocalChangesOnly, DIFinder);
+
+ FindDebugInfoToIdentityMap(GlobalDebugInfo,
+ CloneFunctionChangeType::LocalChangesOnly,
+ DIFinder, SPClonedWithinModule);
+}
+} // end anonymous namespace
+
// FIXME:
// Lower the intrinisc in CoroEarly phase if coroutine frame doesn't escape
// and it is known that other transformations, for example, sanitizers
@@ -891,8 +911,11 @@ void coro::BaseCloner::create() {
auto savedLinkage = NewF->getLinkage();
NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
- CloneFunctionInto(NewF, &OrigF, VMap,
- CloneFunctionChangeType::LocalChangesOnly, Returns);
+ CloneFunctionAttributesInto(NewF, &OrigF, VMap, false);
+ CloneFunctionMetadataInto(NewF, &OrigF, VMap, RF_None, nullptr, nullptr,
+ &GlobalDebugInfo);
+ CloneFunctionBodyInto(NewF, &OrigF, VMap, RF_None, Returns, "", nullptr,
+ nullptr, nullptr, &GlobalDebugInfo);
auto &Context = NewF->getContext();
@@ -1374,16 +1397,22 @@ struct SwitchCoroutineSplitter {
TargetTransformInfo &TTI) {
assert(Shape.ABI == coro::ABI::Switch);
+ MetadataSetTy GlobalDebugInfo;
+ collectGlobalDebugInfo(F, GlobalDebugInfo);
+
// Create a resume clone by cloning the body of the original function,
// setting new entry block and replacing coro.suspend an appropriate value
// to force resume or cleanup pass for every suspend point.
createResumeEntryBlock(F, Shape);
auto *ResumeClone = coro::SwitchCloner::createClone(
- F, ".resume", Shape, coro::CloneKind::SwitchResume, TTI);
+ F, ".resume", Shape, coro::CloneKind::SwitchResume, TTI,
+ GlobalDebugInfo);
auto *DestroyClone = coro::SwitchCloner::createClone(
- F, ".destroy", Shape, coro::CloneKind::SwitchUnwind, TTI);
+ F, ".destroy", Shape, coro::CloneKind::SwitchUnwind, TTI,
+ GlobalDebugInfo);
auto *CleanupClone = coro::SwitchCloner::createClone(
- F, ".cleanup", Shape, coro::CloneKind::SwitchCleanup, TTI);
+ F, ".cleanup", Shape, coro::CloneKind::SwitchCleanup, TTI,
+ GlobalDebugInfo);
postSplitCleanup(*ResumeClone);
postSplitCleanup(*DestroyClone);
@@ -1768,12 +1797,16 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
}
assert(Clones.size() == Shape.CoroSuspends.size());
+
+ MetadataSetTy GlobalDebugInfo;
+ collectGlobalDebugInfo(F, GlobalDebugInfo);
+
for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) {
auto *Suspend = CS;
auto *Clone = Clones[Idx];
coro::BaseCloner::createClone(F, "resume." + Twine(Idx), Shape, Clone,
- Suspend, TTI);
+ Suspend, TTI, GlobalDebugInfo);
}
}
@@ -1899,12 +1932,16 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
}
assert(Clones.size() == Shape.CoroSuspends.size());
+
+ MetadataSetTy GlobalDebugInfo;
+ collectGlobalDebugInfo(F, GlobalDebugInfo);
+
for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) {
auto Suspend = CS;
auto Clone = Clones[Idx];
coro::BaseCloner::createClone(F, "resume." + Twine(Idx), Shape, Clone,
- Suspend, TTI);
+ Suspend, TTI, GlobalDebugInfo);
}
}
|
0a3a9ea
to
ecbbd61
Compare
77d348f
to
aa6401c
Compare
nit: Perhaps 'Common' is a better word than 'Global'. Global made me think of global variables but I realized in the patch that is not what you are doing. Seems you are just creating debug info based on the original function and sharing that with the continuations / splits? Or perhaps just 'CoroDebugInfo'? |
…utine clones Summary: CoroCloner, by calling into CloneFunctionInto, does a lot of repeated work priming DIFinder and building a list of global debug info metadata. For programs compiled with full debug info this gets very expensive. This diff builds the data once and shares it between all clones. Anecdata for a sample cpp source file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt GlobalDI (cur.) | |-----------------+----------+----------------+--------------------------| | CoroSplitPass | 306ms | 221ms | 68ms | | CoroCloner | 101ms | 72ms | 0.5ms | | CollectGlobalDI | - | - | 63ms | |-----------------+----------+----------------+--------------------------| | Speed up | 1x | 1.4x | 4.5x | Note that CollectGlobalDI happens once *per coroutine* rather than per clone. Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample internal source file, checked time trace output for scope timings. stack-info: PR: llvm/llvm-project#118628, branch: users/artempyanykh/fast-coro-upstream/9
ecbbd61
to
e80a2a7
Compare
…utine clones Summary: CoroCloner, by calling into CloneFunctionInto, does a lot of repeated work priming DIFinder and building a list of global debug info metadata. For programs compiled with full debug info this gets very expensive. This diff builds the data once and shares it between all clones. Anecdata for a sample cpp source file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt GlobalDI (cur.) | |-----------------+----------+----------------+--------------------------| | CoroSplitPass | 306ms | 221ms | 68ms | | CoroCloner | 101ms | 72ms | 0.5ms | | CollectGlobalDI | - | - | 63ms | |-----------------+----------+----------------+--------------------------| | Speed up | 1x | 1.4x | 4.5x | Note that CollectGlobalDI happens once *per coroutine* rather than per clone. Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample internal source file, checked time trace output for scope timings. stack-info: PR: #118628, branch: users/artempyanykh/fast-coro-upstream/9
aa6401c
to
24860c1
Compare
24860c1
to
48abbb2
Compare
…utine clones Summary: CoroCloner, by calling into CloneFunctionInto, does a lot of repeated work priming DIFinder and building a list of global debug info metadata. For programs compiled with full debug info this gets very expensive. This diff builds the data once and shares it between all clones. Anecdata for a sample cpp source file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt GlobalDI (cur.) | |-----------------+----------+----------------+--------------------------| | CoroSplitPass | 306ms | 221ms | 68ms | | CoroCloner | 101ms | 72ms | 0.5ms | | CollectGlobalDI | - | - | 63ms | |-----------------+----------+----------------+--------------------------| | Speed up | 1x | 1.4x | 4.5x | Note that CollectGlobalDI happens once *per coroutine* rather than per clone. Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample internal source file, checked time trace output for scope timings. stack-info: PR: llvm/llvm-project#118628, branch: users/artempyanykh/fast-coro-upstream/9
ce1b653
to
6ee8c03
Compare
…utine clones Summary: CoroCloner, by calling into CloneFunctionInto, does a lot of repeated work priming DIFinder and building a list of global debug info metadata. For programs compiled with full debug info this gets very expensive. This diff builds the data once and shares it between all clones. Anecdata for a sample cpp source file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt GlobalDI (cur.) | |-----------------+----------+----------------+--------------------------| | CoroSplitPass | 306ms | 221ms | 68ms | | CoroCloner | 101ms | 72ms | 0.5ms | | CollectGlobalDI | - | - | 63ms | |-----------------+----------+----------------+--------------------------| | Speed up | 1x | 1.4x | 4.5x | Note that CollectGlobalDI happens once *per coroutine* rather than per clone. Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample internal source file, checked time trace output for scope timings. stack-info: PR: #118628, branch: users/artempyanykh/fast-coro-upstream/9
48abbb2
to
77892ea
Compare
…l coroutine clones Summary: CoroCloner, by calling into CloneFunctionInto, does a lot of repeated work priming DIFinder and building a list of common module-level debug info metadata. For programs compiled with full debug info this can get very expensive. This diff builds the data once and shares it between all clones. Anecdata for a sample cpp source file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt CommonDI (cur.) | |-----------------|----------|----------------|--------------------------| | CoroSplitPass | 306ms | 221ms | 68ms | | CoroCloner | 101ms | 72ms | 0.5ms | | CollectGlobalDI | - | - | 63ms | | Speed up | 1x | 1.4x | 4.5x | Note that CollectCommonDebugInfo happens once *per coroutine* rather than per clone. Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample internal source file, checked time trace output for scope timings. stack-info: PR: llvm/llvm-project#118628, branch: users/artempyanykh/fast-coro-upstream/9
28527bc
to
fbe503b
Compare
…l coroutine clones Summary: CoroCloner, by calling into CloneFunctionInto, does a lot of repeated work priming DIFinder and building a list of common module-level debug info metadata. For programs compiled with full debug info this can get very expensive. This diff builds the data once and shares it between all clones. Anecdata for a sample cpp source file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt CommonDI (cur.) | |-----------------|----------|----------------|--------------------------| | CoroSplitPass | 306ms | 221ms | 68ms | | CoroCloner | 101ms | 72ms | 0.5ms | | CollectGlobalDI | - | - | 63ms | | Speed up | 1x | 1.4x | 4.5x | Note that CollectCommonDebugInfo happens once *per coroutine* rather than per clone. Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample internal source file, checked time trace output for scope timings. stack-info: PR: #118628, branch: users/artempyanykh/fast-coro-upstream/9
9188f0e
to
f58b71d
Compare
fbe503b
to
d9e79cc
Compare
…l coroutine clones Summary: CoroCloner, by calling into CloneFunctionInto, does a lot of repeated work priming DIFinder and building a list of common module-level debug info metadata. For programs compiled with full debug info this can get very expensive. This diff builds the data once and shares it between all clones. Anecdata for a sample cpp source file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt CommonDI (cur.) | |-----------------|----------|----------------|--------------------------| | CoroSplitPass | 306ms | 221ms | 68ms | | CoroCloner | 101ms | 72ms | 0.5ms | | CollectGlobalDI | - | - | 63ms | | Speed up | 1x | 1.4x | 4.5x | Note that CollectCommonDebugInfo happens once *per coroutine* rather than per clone. Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample internal source file, checked time trace output for scope timings. stack-info: PR: #118628, branch: users/artempyanykh/fast-coro-upstream/9
f58b71d
to
c613ae3
Compare
d9e79cc
to
52d4c5e
Compare
…l coroutine clones Summary: CoroCloner, by calling into CloneFunctionInto, does a lot of repeated work priming DIFinder and building a list of common module-level debug info metadata. For programs compiled with full debug info this can get very expensive. This diff builds the data once and shares it between all clones. Anecdata for a sample cpp source file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt CommonDI (cur.) | |-----------------|----------|----------------|--------------------------| | CoroSplitPass | 306ms | 221ms | 68ms | | CoroCloner | 101ms | 72ms | 0.5ms | | CollectGlobalDI | - | - | 63ms | | Speed up | 1x | 1.4x | 4.5x | Note that CollectCommonDebugInfo happens once *per coroutine* rather than per clone. Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample internal source file, checked time trace output for scope timings. stack-info: PR: #118628, branch: users/artempyanykh/fast-coro-upstream/9
c613ae3
to
a49247f
Compare
a49247f
to
1554948
Compare
…l coroutine clones Summary: CoroCloner, by calling into CloneFunctionInto, does a lot of repeated work priming DIFinder and building a list of common module-level debug info metadata. For programs compiled with full debug info this can get very expensive. This diff builds the data once and shares it between all clones. Anecdata for a sample cpp source file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt CommonDI (cur.) | |-----------------|----------|----------------|--------------------------| | CoroSplitPass | 306ms | 221ms | 68ms | | CoroCloner | 101ms | 72ms | 0.5ms | | CollectGlobalDI | - | - | 63ms | | Speed up | 1x | 1.4x | 4.5x | Note that CollectCommonDebugInfo happens once *per coroutine* rather than per clone. Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample internal source file, checked time trace output for scope timings. stack-info: PR: llvm/llvm-project#118628, branch: users/artempyanykh/fast-coro-upstream/9
1554948
to
63a4780
Compare
63a4780
to
83dea70
Compare
…l coroutine clones Summary: CoroCloner, by calling into CloneFunctionInto, does a lot of repeated work priming DIFinder and building a list of common module-level debug info metadata. For programs compiled with full debug info this can get very expensive. This diff builds the data once and shares it between all clones. Anecdata for a sample cpp source file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt CommonDI (cur.) | |-----------------|----------|----------------|--------------------------| | CoroSplitPass | 306ms | 221ms | 68ms | | CoroCloner | 101ms | 72ms | 0.5ms | | CollectGlobalDI | - | - | 63ms | | Speed up | 1x | 1.4x | 4.5x | Note that CollectCommonDebugInfo happens once *per coroutine* rather than per clone. Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample internal source file, checked time trace output for scope timings. stack-info: PR: llvm/llvm-project#118628, branch: users/artempyanykh/fast-coro-upstream/9
…l coroutine clones Summary: CoroCloner, by calling into CloneFunctionInto, does a lot of repeated work priming DIFinder and building a list of common module-level debug info metadata. For programs compiled with full debug info this can get very expensive. This diff builds the data once and shares it between all clones. Anecdata for a sample cpp source file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt CommonDI (cur.) | |-----------------|----------|----------------|--------------------------| | CoroSplitPass | 306ms | 221ms | 68ms | | CoroCloner | 101ms | 72ms | 0.5ms | | CollectGlobalDI | - | - | 63ms | | Speed up | 1x | 1.4x | 4.5x | Note that CollectCommonDebugInfo happens once *per coroutine* rather than per clone. Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample internal source file, checked time trace output for scope timings. stack-info: PR: #118628, branch: users/artempyanykh/fast-coro-upstream/9
83dea70
to
5bbda30
Compare
…l coroutine clones Summary: CoroCloner, by calling into CloneFunctionInto, does a lot of repeated work priming DIFinder and building a list of common module-level debug info metadata. For programs compiled with full debug info this can get very expensive. This diff builds the data once and shares it between all clones. Anecdata for a sample cpp source file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt CommonDI (cur.) | |-----------------|----------|----------------|--------------------------| | CoroSplitPass | 306ms | 221ms | 68ms | | CoroCloner | 101ms | 72ms | 0.5ms | | CollectCommonDI | - | - | 63ms | | Speed up | 1x | 1.4x | 4.5x | Note that CollectCommonDebugInfo happens once *per coroutine* rather than per clone. Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample internal source file, checked time trace output for scope timings. stack-info: PR: #118628, branch: users/artempyanykh/fast-coro-upstream/9
5bbda30
to
3802460
Compare
Stacked PRs:
[Coro] Prebuild a module-level debug info set and share it between all coroutine clones
Summary:
CoroCloner, by calling into CloneFunctionInto, does a lot of repeated
work priming DIFinder and building a list of common module-level debug
info metadata. For programs compiled with full debug info this can get
very expensive.
This diff builds the data once and shares it between all clones.
Anecdata for a sample cpp source file compiled with full debug info:
Note that CollectCommonDebugInfo happens once per coroutine rather than per clone.
Test Plan:
ninja check-llvm-unit
ninja check-llvm
Compiled a sample internal source file, checked time trace output for scope timings.