Skip to content

Commit d89914f

Browse files
[memprof] Add Version2 of IndexedMemProfRecord serialization (#87455)
I'm currently developing a new version of the indexed memprof format where we deduplicate call stacks in IndexedAllocationInfo::CallStack and IndexedMemProfRecord::CallSites. We refer to call stacks with integer IDs, namely CallStackId, just as we refer to Frame with FrameId. The deduplication will cut down the profile file size by 80% in a large memprof file of mine. As a step toward the goal, this patch teaches IndexedMemProfRecord::{serialize,deserialize} to speak Version2. A subsequent patch will add Version2 support to llvm-profdata. The essense of the patch is to replace the serialization of a call stack, a vector of FrameIDs, with that of a CallStackId. That is: const IndexedAllocationInfo &N = ...; ... LE.write<uint64_t>(N.CallStack.size()); for (const FrameId &Id : N.CallStack) LE.write<FrameId>(Id); becomes: LE.write<CallStackId>(N.CSId);
1 parent 3a7b522 commit d89914f

File tree

5 files changed

+226
-54
lines changed

5 files changed

+226
-54
lines changed

llvm/include/llvm/ProfileData/MemProf.h

Lines changed: 22 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ enum IndexedVersion : uint64_t {
2222
Version0 = 0,
2323
// Version 1: Added a version field to the header.
2424
Version1 = 1,
25+
// Version 2: Added a call stack table. Under development.
26+
Version2 = 2,
2527
};
2628

2729
constexpr uint64_t MinimumSupportedVersion = Version0;
@@ -289,23 +291,14 @@ struct IndexedAllocationInfo {
289291
: CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB) {}
290292

291293
// Returns the size in bytes when this allocation info struct is serialized.
292-
size_t serializedSize() const {
293-
return sizeof(uint64_t) + // The number of frames to serialize.
294-
sizeof(FrameId) * CallStack.size() + // The callstack frame ids.
295-
PortableMemInfoBlock::serializedSize(); // The size of the payload.
296-
}
294+
size_t serializedSize(IndexedVersion Version) const;
297295

298296
bool operator==(const IndexedAllocationInfo &Other) const {
299297
if (Other.Info != Info)
300298
return false;
301299

302-
if (Other.CallStack.size() != CallStack.size())
300+
if (Other.CSId != CSId)
303301
return false;
304-
305-
for (size_t J = 0; J < Other.CallStack.size(); J++) {
306-
if (Other.CallStack[J] != CallStack[J])
307-
return false;
308-
}
309302
return true;
310303
}
311304

@@ -357,6 +350,9 @@ struct IndexedMemProfRecord {
357350
// inline location list may include additional entries, users should pick
358351
// the last entry in the list with the same function GUID.
359352
llvm::SmallVector<llvm::SmallVector<FrameId>> CallSites;
353+
// Conceptually the same as above. We are going to keep both CallSites and
354+
// CallSiteIds while we are transitioning from CallSites to CallSiteIds.
355+
llvm::SmallVector<CallStackId> CallSiteIds;
360356

361357
void clear() {
362358
AllocSites.clear();
@@ -370,47 +366,31 @@ struct IndexedMemProfRecord {
370366
CallSites.append(Other.CallSites);
371367
}
372368

373-
size_t serializedSize() const {
374-
size_t Result = sizeof(GlobalValue::GUID);
375-
for (const IndexedAllocationInfo &N : AllocSites)
376-
Result += N.serializedSize();
377-
378-
// The number of callsites we have information for.
379-
Result += sizeof(uint64_t);
380-
for (const auto &Frames : CallSites) {
381-
// The number of frame ids to serialize.
382-
Result += sizeof(uint64_t);
383-
Result += Frames.size() * sizeof(FrameId);
384-
}
385-
return Result;
386-
}
369+
size_t serializedSize(IndexedVersion Version) const;
387370

388371
bool operator==(const IndexedMemProfRecord &Other) const {
389372
if (Other.AllocSites.size() != AllocSites.size())
390373
return false;
391374

392-
if (Other.CallSites.size() != CallSites.size())
393-
return false;
394-
395375
for (size_t I = 0; I < AllocSites.size(); I++) {
396376
if (AllocSites[I] != Other.AllocSites[I])
397377
return false;
398378
}
399379

400-
for (size_t I = 0; I < CallSites.size(); I++) {
401-
if (CallSites[I] != Other.CallSites[I])
402-
return false;
403-
}
380+
if (Other.CallSiteIds != CallSiteIds)
381+
return false;
404382
return true;
405383
}
406384

407385
// Serializes the memprof records in \p Records to the ostream \p OS based
408386
// on the schema provided in \p Schema.
409-
void serialize(const MemProfSchema &Schema, raw_ostream &OS);
387+
void serialize(const MemProfSchema &Schema, raw_ostream &OS,
388+
IndexedVersion Version);
410389

411390
// Deserializes memprof records from the Buffer.
412391
static IndexedMemProfRecord deserialize(const MemProfSchema &Schema,
413-
const unsigned char *Buffer);
392+
const unsigned char *Buffer,
393+
IndexedVersion Version);
414394

415395
// Returns the GUID for the function name after canonicalization. For
416396
// memprof, we remove any .llvm suffix added by LTO. MemProfRecords are
@@ -480,7 +460,8 @@ class RecordLookupTrait {
480460
using offset_type = uint64_t;
481461

482462
RecordLookupTrait() = delete;
483-
RecordLookupTrait(const MemProfSchema &S) : Schema(S) {}
463+
RecordLookupTrait(IndexedVersion V, const MemProfSchema &S)
464+
: Version(V), Schema(S) {}
484465

485466
static bool EqualKey(uint64_t A, uint64_t B) { return A == B; }
486467
static uint64_t GetInternalKey(uint64_t K) { return K; }
@@ -507,19 +488,21 @@ class RecordLookupTrait {
507488

508489
data_type ReadData(uint64_t K, const unsigned char *D,
509490
offset_type /*Unused*/) {
510-
Record = IndexedMemProfRecord::deserialize(Schema, D);
491+
Record = IndexedMemProfRecord::deserialize(Schema, D, Version);
511492
return Record;
512493
}
513494

514495
private:
496+
// Holds the MemProf version.
497+
IndexedVersion Version;
515498
// Holds the memprof schema used to deserialize records.
516499
MemProfSchema Schema;
517500
// Holds the records from one function deserialized from the indexed format.
518501
IndexedMemProfRecord Record;
519502
};
520503

521504
// Trait for writing IndexedMemProfRecord data to the on-disk hash table.
522-
class RecordWriterTrait {
505+
template <IndexedVersion Version> class RecordWriterTrait {
523506
public:
524507
using key_type = uint64_t;
525508
using key_type_ref = uint64_t;
@@ -546,7 +529,7 @@ class RecordWriterTrait {
546529
endian::Writer LE(Out, llvm::endianness::little);
547530
offset_type N = sizeof(K);
548531
LE.write<offset_type>(N);
549-
offset_type M = V.serializedSize();
532+
offset_type M = V.serializedSize(Version);
550533
LE.write<offset_type>(M);
551534
return std::make_pair(N, M);
552535
}
@@ -560,7 +543,7 @@ class RecordWriterTrait {
560543
void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
561544
offset_type /*Unused*/) {
562545
assert(Schema != nullptr && "MemProf schema is not initialized!");
563-
V.serialize(*Schema, Out);
546+
V.serialize(*Schema, Out, Version);
564547
// Clear the IndexedMemProfRecord which results in clearing/freeing its
565548
// vectors of allocs and callsites. This is owned by the associated on-disk
566549
// hash table, but unused after this point. See also the comment added to

llvm/lib/ProfileData/InstrProfReader.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1303,7 +1303,7 @@ Error IndexedInstrProfReader::readHeader() {
13031303
MemProfRecordTable.reset(MemProfRecordHashTable::Create(
13041304
/*Buckets=*/Start + RecordTableOffset,
13051305
/*Payload=*/Ptr,
1306-
/*Base=*/Start, memprof::RecordLookupTrait(Schema)));
1306+
/*Base=*/Start, memprof::RecordLookupTrait(memprof::Version1, Schema)));
13071307

13081308
// Initialize the frame table reader with the payload and bucket offsets.
13091309
MemProfFrameTable.reset(MemProfFrameHashTable::Create(

llvm/lib/ProfileData/InstrProfWriter.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -557,9 +557,11 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
557557
OS.write(static_cast<uint64_t>(Id));
558558
}
559559

560-
auto RecordWriter = std::make_unique<memprof::RecordWriterTrait>();
560+
auto RecordWriter =
561+
std::make_unique<memprof::RecordWriterTrait<memprof::Version1>>();
561562
RecordWriter->Schema = &Schema;
562-
OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
563+
OnDiskChainedHashTableGenerator<
564+
memprof::RecordWriterTrait<memprof::Version1>>
563565
RecordTableGenerator;
564566
for (auto &I : MemProfRecordData) {
565567
// Insert the key (func hash) and value (memprof record).

llvm/lib/ProfileData/MemProf.cpp

Lines changed: 161 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,33 +10,139 @@
1010

1111
namespace llvm {
1212
namespace memprof {
13+
namespace {
14+
size_t serializedSizeV0(const IndexedAllocationInfo &IAI) {
15+
size_t Size = 0;
16+
// The number of frames to serialize.
17+
Size += sizeof(uint64_t);
18+
// The callstack frame ids.
19+
Size += sizeof(FrameId) * IAI.CallStack.size();
20+
// The size of the payload.
21+
Size += PortableMemInfoBlock::serializedSize();
22+
return Size;
23+
}
1324

14-
void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
15-
raw_ostream &OS) {
25+
size_t serializedSizeV2(const IndexedAllocationInfo &IAI) {
26+
size_t Size = 0;
27+
// The CallStackId
28+
Size += sizeof(CallStackId);
29+
// The size of the payload.
30+
Size += PortableMemInfoBlock::serializedSize();
31+
return Size;
32+
}
33+
} // namespace
34+
35+
size_t IndexedAllocationInfo::serializedSize(IndexedVersion Version) const {
36+
switch (Version) {
37+
case Version0:
38+
case Version1:
39+
return serializedSizeV0(*this);
40+
case Version2:
41+
return serializedSizeV2(*this);
42+
}
43+
llvm_unreachable("unsupported MemProf version");
44+
}
45+
46+
namespace {
47+
size_t serializedSizeV0(const IndexedMemProfRecord &Record) {
48+
size_t Result = sizeof(GlobalValue::GUID);
49+
for (const IndexedAllocationInfo &N : Record.AllocSites)
50+
Result += N.serializedSize(Version0);
51+
52+
// The number of callsites we have information for.
53+
Result += sizeof(uint64_t);
54+
for (const auto &Frames : Record.CallSites) {
55+
// The number of frame ids to serialize.
56+
Result += sizeof(uint64_t);
57+
Result += Frames.size() * sizeof(FrameId);
58+
}
59+
return Result;
60+
}
61+
62+
size_t serializedSizeV2(const IndexedMemProfRecord &Record) {
63+
size_t Result = sizeof(GlobalValue::GUID);
64+
for (const IndexedAllocationInfo &N : Record.AllocSites)
65+
Result += N.serializedSize(Version2);
66+
67+
// The number of callsites we have information for.
68+
Result += sizeof(uint64_t);
69+
// The CallStackId
70+
Result += Record.CallSiteIds.size() * sizeof(CallStackId);
71+
return Result;
72+
}
73+
} // namespace
74+
75+
size_t IndexedMemProfRecord::serializedSize(IndexedVersion Version) const {
76+
switch (Version) {
77+
case Version0:
78+
case Version1:
79+
return serializedSizeV0(*this);
80+
case Version2:
81+
return serializedSizeV2(*this);
82+
}
83+
llvm_unreachable("unsupported MemProf version");
84+
}
85+
86+
namespace {
87+
void serializeV0(const IndexedMemProfRecord &Record,
88+
const MemProfSchema &Schema, raw_ostream &OS) {
1689
using namespace support;
1790

1891
endian::Writer LE(OS, llvm::endianness::little);
1992

20-
LE.write<uint64_t>(AllocSites.size());
21-
for (const IndexedAllocationInfo &N : AllocSites) {
93+
LE.write<uint64_t>(Record.AllocSites.size());
94+
for (const IndexedAllocationInfo &N : Record.AllocSites) {
2295
LE.write<uint64_t>(N.CallStack.size());
2396
for (const FrameId &Id : N.CallStack)
2497
LE.write<FrameId>(Id);
2598
N.Info.serialize(Schema, OS);
2699
}
27100

28101
// Related contexts.
29-
LE.write<uint64_t>(CallSites.size());
30-
for (const auto &Frames : CallSites) {
102+
LE.write<uint64_t>(Record.CallSites.size());
103+
for (const auto &Frames : Record.CallSites) {
31104
LE.write<uint64_t>(Frames.size());
32105
for (const FrameId &Id : Frames)
33106
LE.write<FrameId>(Id);
34107
}
35108
}
36109

37-
IndexedMemProfRecord
38-
IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
39-
const unsigned char *Ptr) {
110+
void serializeV2(const IndexedMemProfRecord &Record,
111+
const MemProfSchema &Schema, raw_ostream &OS) {
112+
using namespace support;
113+
114+
endian::Writer LE(OS, llvm::endianness::little);
115+
116+
LE.write<uint64_t>(Record.AllocSites.size());
117+
for (const IndexedAllocationInfo &N : Record.AllocSites) {
118+
LE.write<CallStackId>(N.CSId);
119+
N.Info.serialize(Schema, OS);
120+
}
121+
122+
// Related contexts.
123+
LE.write<uint64_t>(Record.CallSiteIds.size());
124+
for (const auto &CSId : Record.CallSiteIds)
125+
LE.write<CallStackId>(CSId);
126+
}
127+
} // namespace
128+
129+
void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
130+
raw_ostream &OS, IndexedVersion Version) {
131+
switch (Version) {
132+
case Version0:
133+
case Version1:
134+
serializeV0(*this, Schema, OS);
135+
return;
136+
case Version2:
137+
serializeV2(*this, Schema, OS);
138+
return;
139+
}
140+
llvm_unreachable("unsupported MemProf version");
141+
}
142+
143+
namespace {
144+
IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema,
145+
const unsigned char *Ptr) {
40146
using namespace support;
41147

42148
IndexedMemProfRecord Record;
@@ -73,11 +179,57 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
73179
Frames.push_back(Id);
74180
}
75181
Record.CallSites.push_back(Frames);
182+
Record.CallSiteIds.push_back(hashCallStack(Frames));
76183
}
77184

78185
return Record;
79186
}
80187

188+
IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema,
189+
const unsigned char *Ptr) {
190+
using namespace support;
191+
192+
IndexedMemProfRecord Record;
193+
194+
// Read the meminfo nodes.
195+
const uint64_t NumNodes =
196+
endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
197+
for (uint64_t I = 0; I < NumNodes; I++) {
198+
IndexedAllocationInfo Node;
199+
Node.CSId =
200+
endian::readNext<CallStackId, llvm::endianness::little, unaligned>(Ptr);
201+
Node.Info.deserialize(Schema, Ptr);
202+
Ptr += PortableMemInfoBlock::serializedSize();
203+
Record.AllocSites.push_back(Node);
204+
}
205+
206+
// Read the callsite information.
207+
const uint64_t NumCtxs =
208+
endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
209+
for (uint64_t J = 0; J < NumCtxs; J++) {
210+
CallStackId CSId =
211+
endian::readNext<CallStackId, llvm::endianness::little, unaligned>(Ptr);
212+
Record.CallSiteIds.push_back(CSId);
213+
}
214+
215+
return Record;
216+
}
217+
} // namespace
218+
219+
IndexedMemProfRecord
220+
IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
221+
const unsigned char *Ptr,
222+
IndexedVersion Version) {
223+
switch (Version) {
224+
case Version0:
225+
case Version1:
226+
return deserializeV0(Schema, Ptr);
227+
case Version2:
228+
return deserializeV2(Schema, Ptr);
229+
}
230+
llvm_unreachable("unsupported MemProf version");
231+
}
232+
81233
GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) {
82234
// Canonicalize the function name to drop suffixes such as ".llvm.". Note
83235
// we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop

0 commit comments

Comments
 (0)