Skip to content

[memprof] Add Version2 of IndexedMemProfRecord serialization #87455

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 22 additions & 39 deletions llvm/include/llvm/ProfileData/MemProf.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ enum IndexedVersion : uint64_t {
Version0 = 0,
// Version 1: Added a version field to the header.
Version1 = 1,
// Version 2: Added a call stack table. Under development.
Version2 = 2,
};

constexpr uint64_t MinimumSupportedVersion = Version0;
Expand Down Expand Up @@ -289,23 +291,14 @@ struct IndexedAllocationInfo {
: CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB) {}

// Returns the size in bytes when this allocation info struct is serialized.
size_t serializedSize() const {
return sizeof(uint64_t) + // The number of frames to serialize.
sizeof(FrameId) * CallStack.size() + // The callstack frame ids.
PortableMemInfoBlock::serializedSize(); // The size of the payload.
}
size_t serializedSize(IndexedVersion Version) const;

bool operator==(const IndexedAllocationInfo &Other) const {
if (Other.Info != Info)
return false;

if (Other.CallStack.size() != CallStack.size())
if (Other.CSId != CSId)
return false;

for (size_t J = 0; J < Other.CallStack.size(); J++) {
if (Other.CallStack[J] != CallStack[J])
return false;
}
return true;
}

Expand Down Expand Up @@ -357,6 +350,9 @@ struct IndexedMemProfRecord {
// inline location list may include additional entries, users should pick
// the last entry in the list with the same function GUID.
llvm::SmallVector<llvm::SmallVector<FrameId>> CallSites;
// Conceptually the same as above. We are going to keep both CallSites and
// CallSiteIds while we are transitioning from CallSites to CallSiteIds.
llvm::SmallVector<CallStackId> CallSiteIds;

void clear() {
AllocSites.clear();
Expand All @@ -370,47 +366,31 @@ struct IndexedMemProfRecord {
CallSites.append(Other.CallSites);
}

size_t serializedSize() const {
size_t Result = sizeof(GlobalValue::GUID);
for (const IndexedAllocationInfo &N : AllocSites)
Result += N.serializedSize();

// The number of callsites we have information for.
Result += sizeof(uint64_t);
for (const auto &Frames : CallSites) {
// The number of frame ids to serialize.
Result += sizeof(uint64_t);
Result += Frames.size() * sizeof(FrameId);
}
return Result;
}
size_t serializedSize(IndexedVersion Version) const;

bool operator==(const IndexedMemProfRecord &Other) const {
if (Other.AllocSites.size() != AllocSites.size())
return false;

if (Other.CallSites.size() != CallSites.size())
return false;

for (size_t I = 0; I < AllocSites.size(); I++) {
if (AllocSites[I] != Other.AllocSites[I])
return false;
}

for (size_t I = 0; I < CallSites.size(); I++) {
if (CallSites[I] != Other.CallSites[I])
return false;
}
if (Other.CallSiteIds != CallSiteIds)
return false;
return true;
}

// Serializes the memprof records in \p Records to the ostream \p OS based
// on the schema provided in \p Schema.
void serialize(const MemProfSchema &Schema, raw_ostream &OS);
void serialize(const MemProfSchema &Schema, raw_ostream &OS,
IndexedVersion Version);

// Deserializes memprof records from the Buffer.
static IndexedMemProfRecord deserialize(const MemProfSchema &Schema,
const unsigned char *Buffer);
const unsigned char *Buffer,
IndexedVersion Version);

// Returns the GUID for the function name after canonicalization. For
// memprof, we remove any .llvm suffix added by LTO. MemProfRecords are
Expand Down Expand Up @@ -480,7 +460,8 @@ class RecordLookupTrait {
using offset_type = uint64_t;

RecordLookupTrait() = delete;
RecordLookupTrait(const MemProfSchema &S) : Schema(S) {}
RecordLookupTrait(IndexedVersion V, const MemProfSchema &S)
: Version(V), Schema(S) {}

static bool EqualKey(uint64_t A, uint64_t B) { return A == B; }
static uint64_t GetInternalKey(uint64_t K) { return K; }
Expand All @@ -507,19 +488,21 @@ class RecordLookupTrait {

data_type ReadData(uint64_t K, const unsigned char *D,
offset_type /*Unused*/) {
Record = IndexedMemProfRecord::deserialize(Schema, D);
Record = IndexedMemProfRecord::deserialize(Schema, D, Version);
return Record;
}

private:
// Holds the MemProf version.
IndexedVersion Version;
// Holds the memprof schema used to deserialize records.
MemProfSchema Schema;
// Holds the records from one function deserialized from the indexed format.
IndexedMemProfRecord Record;
};

// Trait for writing IndexedMemProfRecord data to the on-disk hash table.
class RecordWriterTrait {
template <IndexedVersion Version> class RecordWriterTrait {
public:
using key_type = uint64_t;
using key_type_ref = uint64_t;
Expand All @@ -546,7 +529,7 @@ class RecordWriterTrait {
endian::Writer LE(Out, llvm::endianness::little);
offset_type N = sizeof(K);
LE.write<offset_type>(N);
offset_type M = V.serializedSize();
offset_type M = V.serializedSize(Version);
LE.write<offset_type>(M);
return std::make_pair(N, M);
}
Expand All @@ -560,7 +543,7 @@ class RecordWriterTrait {
void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
offset_type /*Unused*/) {
assert(Schema != nullptr && "MemProf schema is not initialized!");
V.serialize(*Schema, Out);
V.serialize(*Schema, Out, Version);
// Clear the IndexedMemProfRecord which results in clearing/freeing its
// vectors of allocs and callsites. This is owned by the associated on-disk
// hash table, but unused after this point. See also the comment added to
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/ProfileData/InstrProfReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1303,7 +1303,7 @@ Error IndexedInstrProfReader::readHeader() {
MemProfRecordTable.reset(MemProfRecordHashTable::Create(
/*Buckets=*/Start + RecordTableOffset,
/*Payload=*/Ptr,
/*Base=*/Start, memprof::RecordLookupTrait(Schema)));
/*Base=*/Start, memprof::RecordLookupTrait(memprof::Version1, Schema)));

// Initialize the frame table reader with the payload and bucket offsets.
MemProfFrameTable.reset(MemProfFrameHashTable::Create(
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/ProfileData/InstrProfWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -557,9 +557,11 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
OS.write(static_cast<uint64_t>(Id));
}

auto RecordWriter = std::make_unique<memprof::RecordWriterTrait>();
auto RecordWriter =
std::make_unique<memprof::RecordWriterTrait<memprof::Version1>>();
RecordWriter->Schema = &Schema;
OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
OnDiskChainedHashTableGenerator<
memprof::RecordWriterTrait<memprof::Version1>>
RecordTableGenerator;
for (auto &I : MemProfRecordData) {
// Insert the key (func hash) and value (memprof record).
Expand Down
170 changes: 161 additions & 9 deletions llvm/lib/ProfileData/MemProf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,139 @@

namespace llvm {
namespace memprof {
namespace {
size_t serializedSizeV0(const IndexedAllocationInfo &IAI) {
size_t Size = 0;
// The number of frames to serialize.
Size += sizeof(uint64_t);
// The callstack frame ids.
Size += sizeof(FrameId) * IAI.CallStack.size();
// The size of the payload.
Size += PortableMemInfoBlock::serializedSize();
return Size;
}

void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
raw_ostream &OS) {
size_t serializedSizeV2(const IndexedAllocationInfo &IAI) {
size_t Size = 0;
// The CallStackId
Size += sizeof(CallStackId);
// The size of the payload.
Size += PortableMemInfoBlock::serializedSize();
return Size;
}
} // namespace

size_t IndexedAllocationInfo::serializedSize(IndexedVersion Version) const {
switch (Version) {
case Version0:
case Version1:
return serializedSizeV0(*this);
case Version2:
return serializedSizeV2(*this);
}
llvm_unreachable("unsupported MemProf version");
}

namespace {
size_t serializedSizeV0(const IndexedMemProfRecord &Record) {
size_t Result = sizeof(GlobalValue::GUID);
for (const IndexedAllocationInfo &N : Record.AllocSites)
Result += N.serializedSize(Version0);

// The number of callsites we have information for.
Result += sizeof(uint64_t);
for (const auto &Frames : Record.CallSites) {
// The number of frame ids to serialize.
Result += sizeof(uint64_t);
Result += Frames.size() * sizeof(FrameId);
}
return Result;
}

size_t serializedSizeV2(const IndexedMemProfRecord &Record) {
size_t Result = sizeof(GlobalValue::GUID);
for (const IndexedAllocationInfo &N : Record.AllocSites)
Result += N.serializedSize(Version2);

// The number of callsites we have information for.
Result += sizeof(uint64_t);
// The CallStackId
Result += Record.CallSiteIds.size() * sizeof(CallStackId);
return Result;
}
} // namespace

size_t IndexedMemProfRecord::serializedSize(IndexedVersion Version) const {
switch (Version) {
case Version0:
case Version1:
return serializedSizeV0(*this);
case Version2:
return serializedSizeV2(*this);
}
llvm_unreachable("unsupported MemProf version");
}

namespace {
void serializeV0(const IndexedMemProfRecord &Record,
const MemProfSchema &Schema, raw_ostream &OS) {
using namespace support;

endian::Writer LE(OS, llvm::endianness::little);

LE.write<uint64_t>(AllocSites.size());
for (const IndexedAllocationInfo &N : AllocSites) {
LE.write<uint64_t>(Record.AllocSites.size());
for (const IndexedAllocationInfo &N : Record.AllocSites) {
LE.write<uint64_t>(N.CallStack.size());
for (const FrameId &Id : N.CallStack)
LE.write<FrameId>(Id);
N.Info.serialize(Schema, OS);
}

// Related contexts.
LE.write<uint64_t>(CallSites.size());
for (const auto &Frames : CallSites) {
LE.write<uint64_t>(Record.CallSites.size());
for (const auto &Frames : Record.CallSites) {
LE.write<uint64_t>(Frames.size());
for (const FrameId &Id : Frames)
LE.write<FrameId>(Id);
}
}

IndexedMemProfRecord
IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
const unsigned char *Ptr) {
void serializeV2(const IndexedMemProfRecord &Record,
const MemProfSchema &Schema, raw_ostream &OS) {
using namespace support;

endian::Writer LE(OS, llvm::endianness::little);

LE.write<uint64_t>(Record.AllocSites.size());
for (const IndexedAllocationInfo &N : Record.AllocSites) {
LE.write<CallStackId>(N.CSId);
N.Info.serialize(Schema, OS);
}

// Related contexts.
LE.write<uint64_t>(Record.CallSiteIds.size());
for (const auto &CSId : Record.CallSiteIds)
LE.write<CallStackId>(CSId);
}
} // namespace

void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
raw_ostream &OS, IndexedVersion Version) {
switch (Version) {
case Version0:
case Version1:
serializeV0(*this, Schema, OS);
return;
case Version2:
serializeV2(*this, Schema, OS);
return;
}
llvm_unreachable("unsupported MemProf version");
}

namespace {
IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema,
const unsigned char *Ptr) {
using namespace support;

IndexedMemProfRecord Record;
Expand Down Expand Up @@ -73,11 +179,57 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
Frames.push_back(Id);
}
Record.CallSites.push_back(Frames);
Record.CallSiteIds.push_back(hashCallStack(Frames));
}

return Record;
}

IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema,
const unsigned char *Ptr) {
using namespace support;

IndexedMemProfRecord Record;

// Read the meminfo nodes.
const uint64_t NumNodes =
endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
for (uint64_t I = 0; I < NumNodes; I++) {
IndexedAllocationInfo Node;
Node.CSId =
endian::readNext<CallStackId, llvm::endianness::little, unaligned>(Ptr);
Node.Info.deserialize(Schema, Ptr);
Ptr += PortableMemInfoBlock::serializedSize();
Record.AllocSites.push_back(Node);
}

// Read the callsite information.
const uint64_t NumCtxs =
endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
for (uint64_t J = 0; J < NumCtxs; J++) {
CallStackId CSId =
endian::readNext<CallStackId, llvm::endianness::little, unaligned>(Ptr);
Record.CallSiteIds.push_back(CSId);
}

return Record;
}
} // namespace

IndexedMemProfRecord
IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
const unsigned char *Ptr,
IndexedVersion Version) {
switch (Version) {
case Version0:
case Version1:
return deserializeV0(Schema, Ptr);
case Version2:
return deserializeV2(Schema, Ptr);
}
llvm_unreachable("unsupported MemProf version");
}

GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) {
// Canonicalize the function name to drop suffixes such as ".llvm.". Note
// we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop
Expand Down
Loading