Skip to content

Commit d9e79cc

Browse files
committed
[Utils] Identity map module-level debug info on first use in CloneFunction*
Summary: To avoid cloning module-level debug info (owned by the module rather than the function), CloneFunction implementation used to eagerly identity map such debug info into ValueMap's MD map. In larger modules with meaningful volume of debug info this gets very expensive. By passing such debug info metadata via an IdentityMD set for the ValueMapper to map on first use, we get several benefits: 1. Mapping metadata is not cheap, particularly because of tracking. When cloning a Function we identity map lots of global module-level metadata to avoid cloning it, while only a fraction of it is actually used by the function. Mapping on first use is a lot faster for modules with meaningful amount of debug info. 2. Eagerly identity mapping metadata makes it harder to cache module-level data (e.g. a set of metadata nodes in a \a DICompileUnit). With this patch we can cache certain module-level metadata calculations to speed things up further. Anecdata from compiling a sample cpp file with full debug info shows that this moderately speeds up CoroSplitPass which is one of the heavier users of cloning: | | Baseline | IdentityMD set | |-----------------|----------|----------------| | CoroSplitPass | 306ms | 221ms | | CoroCloner | 101ms | 72ms | | Speed up | 1x | 1.4x | Test Plan: ninja check-llvm-unit ninja check-llvm stack-info: PR: #118627, branch: users/artempyanykh/fast-coro-upstream/8
1 parent 8f32caf commit d9e79cc

File tree

4 files changed

+104
-61
lines changed

4 files changed

+104
-61
lines changed

llvm/include/llvm/Transforms/Utils/Cloning.h

+10-9
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,8 @@ void CloneFunctionAttributesInto(Function *NewFunc, const Function *OldFunc,
193193
void CloneFunctionMetadataInto(Function &NewFunc, const Function &OldFunc,
194194
ValueToValueMapTy &VMap, RemapFlags RemapFlag,
195195
ValueMapTypeRemapper *TypeMapper = nullptr,
196-
ValueMaterializer *Materializer = nullptr);
196+
ValueMaterializer *Materializer = nullptr,
197+
const MetadataSetTy *IdentityMD = nullptr);
197198

198199
/// Clone OldFunc's body into NewFunc.
199200
void CloneFunctionBodyInto(Function &NewFunc, const Function &OldFunc,
@@ -202,7 +203,8 @@ void CloneFunctionBodyInto(Function &NewFunc, const Function &OldFunc,
202203
const char *NameSuffix = "",
203204
ClonedCodeInfo *CodeInfo = nullptr,
204205
ValueMapTypeRemapper *TypeMapper = nullptr,
205-
ValueMaterializer *Materializer = nullptr);
206+
ValueMaterializer *Materializer = nullptr,
207+
const MetadataSetTy *IdentityMD = nullptr);
206208

207209
void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
208210
const Instruction *StartingInst,
@@ -242,13 +244,12 @@ DISubprogram *CollectDebugInfoForCloning(const Function &F,
242244
CloneFunctionChangeType Changes,
243245
DebugInfoFinder &DIFinder);
244246

245-
/// Build a map of debug info to use during Metadata cloning.
246-
/// Returns true if cloning would need module level changes and false if there
247-
/// would only be local changes.
248-
bool BuildDebugInfoMDMap(DenseMap<const Metadata *, TrackingMDRef> &MD,
249-
CloneFunctionChangeType Changes,
250-
DebugInfoFinder &DIFinder,
251-
DISubprogram *SPClonedWithinModule);
247+
/// Based on \p Changes and \p DIFinder populate \p MD with debug info that
248+
/// needs to be identity mapped during Metadata cloning.
249+
void FindDebugInfoToIdentityMap(MetadataSetTy &MD,
250+
CloneFunctionChangeType Changes,
251+
DebugInfoFinder &DIFinder,
252+
DISubprogram *SPClonedWithinModule);
252253

253254
/// This class captures the data input to the InlineFunction call, and records
254255
/// the auxiliary results produced by it.

llvm/include/llvm/Transforms/Utils/ValueMapper.h

+49-18
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define LLVM_TRANSFORMS_UTILS_VALUEMAPPER_H
1616

1717
#include "llvm/ADT/ArrayRef.h"
18+
#include "llvm/ADT/SmallPtrSet.h"
1819
#include "llvm/ADT/simple_ilist.h"
1920
#include "llvm/IR/ValueHandle.h"
2021
#include "llvm/IR/ValueMap.h"
@@ -35,6 +36,7 @@ class Value;
3536

3637
using ValueToValueMapTy = ValueMap<const Value *, WeakTrackingVH>;
3738
using DbgRecordIterator = simple_ilist<DbgRecord>::iterator;
39+
using MetadataSetTy = SmallPtrSet<const Metadata *, 16>;
3840

3941
/// This is a class that can be implemented by clients to remap types when
4042
/// cloning constants and instructions.
@@ -136,6 +138,18 @@ inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) {
136138
/// alternate \a ValueToValueMapTy and \a ValueMaterializer and returns a ID to
137139
/// pass into the schedule*() functions.
138140
///
141+
/// NOTE: \c IdentityMD is used by CloneFunction* to directly specify metadata
142+
/// that should be identity mapped (and hence not cloned). The metadata will be
143+
/// identity mapped in \c VM on first use. There are several reasons for doing
144+
/// it this way rather than eagerly identity mapping metadata nodes in \c VM:
145+
/// 1. Mapping metadata is not cheap, particularly because of tracking.
146+
/// 2. When cloning a Function we identity map lots of global module-level
147+
/// metadata to avoid cloning it, while only a fraction of it is actually
148+
/// used by the function. Mapping on first use is a lot faster for modules
149+
/// with meaningful amount of debug info.
150+
/// 3. Eagerly identity mapping metadata makes it harder to cache module-level
151+
/// data (e.g. a set of metadata nodes in a \a DICompileUnit).
152+
///
139153
/// TODO: lib/Linker really doesn't need the \a ValueHandle in the \a
140154
/// ValueToValueMapTy. We should template \a ValueMapper (and its
141155
/// implementation classes), and explicitly instantiate on two concrete
@@ -152,7 +166,8 @@ class ValueMapper {
152166
public:
153167
ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags = RF_None,
154168
ValueMapTypeRemapper *TypeMapper = nullptr,
155-
ValueMaterializer *Materializer = nullptr);
169+
ValueMaterializer *Materializer = nullptr,
170+
const MetadataSetTy *IdentityMD = nullptr);
156171
ValueMapper(ValueMapper &&) = delete;
157172
ValueMapper(const ValueMapper &) = delete;
158173
ValueMapper &operator=(ValueMapper &&) = delete;
@@ -218,8 +233,10 @@ class ValueMapper {
218233
inline Value *MapValue(const Value *V, ValueToValueMapTy &VM,
219234
RemapFlags Flags = RF_None,
220235
ValueMapTypeRemapper *TypeMapper = nullptr,
221-
ValueMaterializer *Materializer = nullptr) {
222-
return ValueMapper(VM, Flags, TypeMapper, Materializer).mapValue(*V);
236+
ValueMaterializer *Materializer = nullptr,
237+
const MetadataSetTy *IdentityMD = nullptr) {
238+
return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
239+
.mapValue(*V);
223240
}
224241

225242
/// Lookup or compute a mapping for a piece of metadata.
@@ -231,7 +248,9 @@ inline Value *MapValue(const Value *V, ValueToValueMapTy &VM,
231248
/// \c MD.
232249
/// 3. Else if \c MD is a \a ConstantAsMetadata, call \a MapValue() and
233250
/// re-wrap its return (returning nullptr on nullptr).
234-
/// 4. Else, \c MD is an \a MDNode. These are remapped, along with their
251+
/// 4. Else if \c MD is in \c IdentityMD then add an identity mapping for it
252+
/// and return it.
253+
/// 5. Else, \c MD is an \a MDNode. These are remapped, along with their
235254
/// transitive operands. Distinct nodes are duplicated or moved depending
236255
/// on \a RF_MoveDistinctNodes. Uniqued nodes are remapped like constants.
237256
///
@@ -240,16 +259,20 @@ inline Value *MapValue(const Value *V, ValueToValueMapTy &VM,
240259
inline Metadata *MapMetadata(const Metadata *MD, ValueToValueMapTy &VM,
241260
RemapFlags Flags = RF_None,
242261
ValueMapTypeRemapper *TypeMapper = nullptr,
243-
ValueMaterializer *Materializer = nullptr) {
244-
return ValueMapper(VM, Flags, TypeMapper, Materializer).mapMetadata(*MD);
262+
ValueMaterializer *Materializer = nullptr,
263+
const MetadataSetTy *IdentityMD = nullptr) {
264+
return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
265+
.mapMetadata(*MD);
245266
}
246267

247268
/// Version of MapMetadata with type safety for MDNode.
248269
inline MDNode *MapMetadata(const MDNode *MD, ValueToValueMapTy &VM,
249270
RemapFlags Flags = RF_None,
250271
ValueMapTypeRemapper *TypeMapper = nullptr,
251-
ValueMaterializer *Materializer = nullptr) {
252-
return ValueMapper(VM, Flags, TypeMapper, Materializer).mapMDNode(*MD);
272+
ValueMaterializer *Materializer = nullptr,
273+
const MetadataSetTy *IdentityMD = nullptr) {
274+
return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
275+
.mapMDNode(*MD);
253276
}
254277

255278
/// Convert the instruction operands from referencing the current values into
@@ -263,17 +286,21 @@ inline MDNode *MapMetadata(const MDNode *MD, ValueToValueMapTy &VM,
263286
inline void RemapInstruction(Instruction *I, ValueToValueMapTy &VM,
264287
RemapFlags Flags = RF_None,
265288
ValueMapTypeRemapper *TypeMapper = nullptr,
266-
ValueMaterializer *Materializer = nullptr) {
267-
ValueMapper(VM, Flags, TypeMapper, Materializer).remapInstruction(*I);
289+
ValueMaterializer *Materializer = nullptr,
290+
const MetadataSetTy *IdentityMD = nullptr) {
291+
ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
292+
.remapInstruction(*I);
268293
}
269294

270295
/// Remap the Values used in the DbgRecord \a DR using the value map \a
271296
/// VM.
272297
inline void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM,
273298
RemapFlags Flags = RF_None,
274299
ValueMapTypeRemapper *TypeMapper = nullptr,
275-
ValueMaterializer *Materializer = nullptr) {
276-
ValueMapper(VM, Flags, TypeMapper, Materializer).remapDbgRecord(M, *DR);
300+
ValueMaterializer *Materializer = nullptr,
301+
const MetadataSetTy *IdentityMD = nullptr) {
302+
ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
303+
.remapDbgRecord(M, *DR);
277304
}
278305

279306
/// Remap the Values used in the DbgRecords \a Range using the value map \a
@@ -283,8 +310,9 @@ inline void RemapDbgRecordRange(Module *M,
283310
ValueToValueMapTy &VM,
284311
RemapFlags Flags = RF_None,
285312
ValueMapTypeRemapper *TypeMapper = nullptr,
286-
ValueMaterializer *Materializer = nullptr) {
287-
ValueMapper(VM, Flags, TypeMapper, Materializer)
313+
ValueMaterializer *Materializer = nullptr,
314+
const MetadataSetTy *IdentityMD = nullptr) {
315+
ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
288316
.remapDbgRecordRange(M, Range);
289317
}
290318

@@ -297,16 +325,19 @@ inline void RemapDbgRecordRange(Module *M,
297325
inline void RemapFunction(Function &F, ValueToValueMapTy &VM,
298326
RemapFlags Flags = RF_None,
299327
ValueMapTypeRemapper *TypeMapper = nullptr,
300-
ValueMaterializer *Materializer = nullptr) {
301-
ValueMapper(VM, Flags, TypeMapper, Materializer).remapFunction(F);
328+
ValueMaterializer *Materializer = nullptr,
329+
const MetadataSetTy *IdentityMD = nullptr) {
330+
ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD).remapFunction(F);
302331
}
303332

304333
/// Version of MapValue with type safety for Constant.
305334
inline Constant *MapValue(const Constant *V, ValueToValueMapTy &VM,
306335
RemapFlags Flags = RF_None,
307336
ValueMapTypeRemapper *TypeMapper = nullptr,
308-
ValueMaterializer *Materializer = nullptr) {
309-
return ValueMapper(VM, Flags, TypeMapper, Materializer).mapConstant(*V);
337+
ValueMaterializer *Materializer = nullptr,
338+
const MetadataSetTy *IdentityMD = nullptr) {
339+
return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
340+
.mapConstant(*V);
310341
}
311342

312343
} // end namespace llvm

llvm/lib/Transforms/Utils/CloneFunction.cpp

+30-30
Original file line numberDiff line numberDiff line change
@@ -152,61 +152,54 @@ DISubprogram *llvm::CollectDebugInfoForCloning(const Function &F,
152152
return SPClonedWithinModule;
153153
}
154154

155-
bool llvm::BuildDebugInfoMDMap(DenseMap<const Metadata *, TrackingMDRef> &MD,
156-
CloneFunctionChangeType Changes,
157-
DebugInfoFinder &DIFinder,
158-
DISubprogram *SPClonedWithinModule) {
159-
bool ModuleLevelChanges = Changes > CloneFunctionChangeType::LocalChangesOnly;
155+
void llvm::FindDebugInfoToIdentityMap(MetadataSetTy &MD,
156+
CloneFunctionChangeType Changes,
157+
DebugInfoFinder &DIFinder,
158+
DISubprogram *SPClonedWithinModule) {
160159
if (Changes < CloneFunctionChangeType::DifferentModule &&
161160
DIFinder.subprogram_count() > 0) {
162-
// Turn on module-level changes, since we need to clone (some of) the
163-
// debug info metadata.
161+
// Even if Changes are local only, we turn on module-level changes, since we
162+
// need to clone (some of) the debug info metadata.
164163
//
165164
// FIXME: Metadata effectively owned by a function should be made
166165
// local, and only that local metadata should be cloned.
167-
ModuleLevelChanges = true;
168-
169-
auto mapToSelfIfNew = [&MD](MDNode *N) {
170-
// Avoid clobbering an existing mapping.
171-
(void)MD.try_emplace(N, N);
172-
};
173166

174167
// Avoid cloning types, compile units, and (other) subprograms.
175168
for (DISubprogram *ISP : DIFinder.subprograms()) {
176169
if (ISP != SPClonedWithinModule)
177-
mapToSelfIfNew(ISP);
170+
MD.insert(ISP);
178171
}
179172

180173
// If a subprogram isn't going to be cloned skip its lexical blocks as well.
181174
for (DIScope *S : DIFinder.scopes()) {
182175
auto *LScope = dyn_cast<DILocalScope>(S);
183176
if (LScope && LScope->getSubprogram() != SPClonedWithinModule)
184-
mapToSelfIfNew(S);
177+
MD.insert(S);
185178
}
186179

187180
for (DICompileUnit *CU : DIFinder.compile_units())
188-
mapToSelfIfNew(CU);
181+
MD.insert(CU);
189182

190183
for (DIType *Type : DIFinder.types())
191-
mapToSelfIfNew(Type);
184+
MD.insert(Type);
192185
} else {
193186
assert(!SPClonedWithinModule &&
194187
"Subprogram should be in DIFinder->subprogram_count()...");
195188
}
196-
197-
return ModuleLevelChanges;
198189
}
199190

200191
void llvm::CloneFunctionMetadataInto(Function &NewFunc, const Function &OldFunc,
201192
ValueToValueMapTy &VMap,
202193
RemapFlags RemapFlag,
203194
ValueMapTypeRemapper *TypeMapper,
204-
ValueMaterializer *Materializer) {
195+
ValueMaterializer *Materializer,
196+
const MetadataSetTy *IdentityMD) {
205197
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
206198
OldFunc.getAllMetadata(MDs);
207199
for (auto MD : MDs) {
208-
NewFunc.addMetadata(MD.first, *MapMetadata(MD.second, VMap, RemapFlag,
209-
TypeMapper, Materializer));
200+
NewFunc.addMetadata(MD.first,
201+
*MapMetadata(MD.second, VMap, RemapFlag, TypeMapper,
202+
Materializer, IdentityMD));
210203
}
211204
}
212205

@@ -216,7 +209,8 @@ void llvm::CloneFunctionBodyInto(Function &NewFunc, const Function &OldFunc,
216209
const char *NameSuffix,
217210
ClonedCodeInfo *CodeInfo,
218211
ValueMapTypeRemapper *TypeMapper,
219-
ValueMaterializer *Materializer) {
212+
ValueMaterializer *Materializer,
213+
const MetadataSetTy *IdentityMD) {
220214
if (OldFunc.isDeclaration())
221215
return;
222216

@@ -258,9 +252,10 @@ void llvm::CloneFunctionBodyInto(Function &NewFunc, const Function &OldFunc,
258252
// Loop over all instructions, fixing each one as we find it, and any
259253
// attached debug-info records.
260254
for (Instruction &II : *BB) {
261-
RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer);
255+
RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer,
256+
IdentityMD);
262257
RemapDbgRecordRange(II.getModule(), II.getDbgRecordRange(), VMap,
263-
RemapFlag, TypeMapper, Materializer);
258+
RemapFlag, TypeMapper, Materializer, IdentityMD);
264259
}
265260
}
266261

@@ -322,16 +317,21 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
322317
DISubprogram *SPClonedWithinModule =
323318
CollectDebugInfoForCloning(*OldFunc, Changes, DIFinder);
324319

325-
ModuleLevelChanges =
326-
BuildDebugInfoMDMap(VMap.MD(), Changes, DIFinder, SPClonedWithinModule);
320+
MetadataSetTy IdentityMD;
321+
FindDebugInfoToIdentityMap(IdentityMD, Changes, DIFinder,
322+
SPClonedWithinModule);
327323

328-
const auto RemapFlag = ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges;
324+
// Current implementation always upgrades from local changes to module level
325+
// changes due to the way metadata cloning is done. See
326+
// BuildDebugInfoToIdentityMap for more details.
327+
const auto RemapFlag = RF_None;
329328

330329
CloneFunctionMetadataInto(*NewFunc, *OldFunc, VMap, RemapFlag, TypeMapper,
331-
Materializer);
330+
Materializer, &IdentityMD);
332331

333332
CloneFunctionBodyInto(*NewFunc, *OldFunc, VMap, RemapFlag, Returns,
334-
NameSuffix, CodeInfo, TypeMapper, Materializer);
333+
NameSuffix, CodeInfo, TypeMapper, Materializer,
334+
&IdentityMD);
335335

336336
// Only update !llvm.dbg.cu for DifferentModule (not CloneModule). In the
337337
// same module, the compile unit will already be listed (or not). When

llvm/lib/Transforms/Utils/ValueMapper.cpp

+15-4
Original file line numberDiff line numberDiff line change
@@ -120,12 +120,14 @@ class Mapper {
120120
SmallVector<WorklistEntry, 4> Worklist;
121121
SmallVector<DelayedBasicBlock, 1> DelayedBBs;
122122
SmallVector<Constant *, 16> AppendingInits;
123+
const MetadataSetTy *IdentityMD;
123124

124125
public:
125126
Mapper(ValueToValueMapTy &VM, RemapFlags Flags,
126-
ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer)
127+
ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer,
128+
const MetadataSetTy *IdentityMD)
127129
: Flags(Flags), TypeMapper(TypeMapper),
128-
MCs(1, MappingContext(VM, Materializer)) {}
130+
MCs(1, MappingContext(VM, Materializer)), IdentityMD(IdentityMD) {}
129131

130132
/// ValueMapper should explicitly call \a flush() before destruction.
131133
~Mapper() { assert(!hasWorkToDo() && "Expected to be flushed"); }
@@ -899,6 +901,14 @@ std::optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) {
899901
return wrapConstantAsMetadata(*CMD, mapValue(CMD->getValue()));
900902
}
901903

904+
// Map metadata from IdentityMD on first use. We need to add these nodes to
905+
// the mapping as otherwise metadata nodes numbering gets messed up. This is
906+
// still economical because the amount of data in IdentityMD may be a lot
907+
// larger than what will actually get used.
908+
if (IdentityMD && IdentityMD->contains(MD)) {
909+
return getVM().MD()[MD] = TrackingMDRef(const_cast<Metadata *>(MD));
910+
}
911+
902912
assert(isa<MDNode>(MD) && "Expected a metadata node");
903913

904914
return std::nullopt;
@@ -1198,8 +1208,9 @@ class FlushingMapper {
11981208

11991209
ValueMapper::ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags,
12001210
ValueMapTypeRemapper *TypeMapper,
1201-
ValueMaterializer *Materializer)
1202-
: pImpl(new Mapper(VM, Flags, TypeMapper, Materializer)) {}
1211+
ValueMaterializer *Materializer,
1212+
const MetadataSetTy *IdentityMD)
1213+
: pImpl(new Mapper(VM, Flags, TypeMapper, Materializer, IdentityMD)) {}
12031214

12041215
ValueMapper::~ValueMapper() { delete getAsMapper(pImpl); }
12051216

0 commit comments

Comments
 (0)