From a20a826654a0686b3690f46830a9d9e98b3f045b Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Mon, 25 Mar 2024 15:03:38 -0700 Subject: [PATCH 1/7] [memprof] Add Version2 of IndexedMemProfRecord serialization I'm currently developing a new version of the indexed memprof format where we deduplicate call stacks in IndexedAllocationInfo::CallStack and IndexedMemProfRecord::CallSites. We refer to call stacks with integer IDs, namely CallStackId, just as we refer to Frame with FrameId. The deduplication will cut down the profile file size by 80% in a large memprof file of mine. As a step toward the goal, this patch teaches IndexedMemProfRecord::{serialize,deserialize} to speak Version2. A subsequent patch will add Version2 support to llvm-profdata. The essense of the patch is to replace the serialization of a call stack, a vector of FrameIDs, with that of a CallStackId. That is: const IndexedAllocationInfo &N = ...; ... LE.write(N.CallStack.size()); for (const FrameId &Id : N.CallStack) LE.write(Id); becomes: LE.write(N.CSId); --- llvm/include/llvm/ProfileData/MemProf.h | 52 +++++++++++---- llvm/lib/ProfileData/MemProf.cpp | 77 ++++++++++++++-------- llvm/unittests/ProfileData/MemProfTest.cpp | 39 ++++++++++- 3 files changed, 127 insertions(+), 41 deletions(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index ff00900a1466a..22a805843f533 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -22,6 +22,8 @@ enum IndexedVersion : uint64_t { Version0 = 0, // Version 1: Added a version field to the header. Version1 = 1, + // Version 2: Added a call stack table. Under development. + Version2 = 2, }; constexpr uint64_t MinimumSupportedVersion = Version0; @@ -289,10 +291,20 @@ struct IndexedAllocationInfo { : CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB) {} // Returns the size in bytes when this allocation info struct is serialized. - size_t serializedSize() const { - return sizeof(uint64_t) + // The number of frames to serialize. - sizeof(FrameId) * CallStack.size() + // The callstack frame ids. - PortableMemInfoBlock::serializedSize(); // The size of the payload. + size_t serializedSize(IndexedVersion Version = Version0) const { + size_t Size = 0; + if (Version <= Version1) { + // The number of frames to serialize. + Size += sizeof(uint64_t); + // The callstack frame ids. + Size += sizeof(FrameId) * CallStack.size(); + } else { + // The CallStackId + Size += sizeof(CallStackId); + } + // The size of the payload. + Size += PortableMemInfoBlock::serializedSize(); + return Size; } bool operator==(const IndexedAllocationInfo &Other) const { @@ -306,6 +318,9 @@ struct IndexedAllocationInfo { if (Other.CallStack[J] != CallStack[J]) return false; } + + if (Other.CSId != CSId) + return false; return true; } @@ -357,6 +372,9 @@ struct IndexedMemProfRecord { // inline location list may include additional entries, users should pick // the last entry in the list with the same function GUID. llvm::SmallVector> CallSites; + // Conceptually the same as above. We are going to keep both CallSites and + // CallSiteIds while we are transitioning from CallSites to CallSitesIds. + llvm::SmallVector CallSiteIds; void clear() { AllocSites.clear(); @@ -370,17 +388,22 @@ struct IndexedMemProfRecord { CallSites.append(Other.CallSites); } - size_t serializedSize() const { + size_t serializedSize(IndexedVersion Version = Version0) const { size_t Result = sizeof(GlobalValue::GUID); for (const IndexedAllocationInfo &N : AllocSites) - Result += N.serializedSize(); + Result += N.serializedSize(Version); // The number of callsites we have information for. Result += sizeof(uint64_t); - for (const auto &Frames : CallSites) { - // The number of frame ids to serialize. - Result += sizeof(uint64_t); - Result += Frames.size() * sizeof(FrameId); + if (Version <= Version1) { + for (const auto &Frames : CallSites) { + // The number of frame ids to serialize. + Result += sizeof(uint64_t); + Result += Frames.size() * sizeof(FrameId); + } + } else { + // The CallStackId + Result += CallSiteIds.size() * sizeof(CallStackId); } return Result; } @@ -401,16 +424,21 @@ struct IndexedMemProfRecord { if (CallSites[I] != Other.CallSites[I]) return false; } + + if (Other.CallSiteIds != CallSiteIds) + return false; return true; } // Serializes the memprof records in \p Records to the ostream \p OS based // on the schema provided in \p Schema. - void serialize(const MemProfSchema &Schema, raw_ostream &OS); + void serialize(const MemProfSchema &Schema, raw_ostream &OS, + IndexedVersion Version = Version0); // Deserializes memprof records from the Buffer. static IndexedMemProfRecord deserialize(const MemProfSchema &Schema, - const unsigned char *Buffer); + const unsigned char *Buffer, + IndexedVersion Version = Version0); // Returns the GUID for the function name after canonicalization. For // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index 6c419811d59e2..e5c5fb58c25ab 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -12,31 +12,42 @@ namespace llvm { namespace memprof { void IndexedMemProfRecord::serialize(const MemProfSchema &Schema, - raw_ostream &OS) { + raw_ostream &OS, IndexedVersion Version) { using namespace support; endian::Writer LE(OS, llvm::endianness::little); LE.write(AllocSites.size()); for (const IndexedAllocationInfo &N : AllocSites) { - LE.write(N.CallStack.size()); - for (const FrameId &Id : N.CallStack) - LE.write(Id); + if (Version <= Version1) { + LE.write(N.CallStack.size()); + for (const FrameId &Id : N.CallStack) + LE.write(Id); + } else { + LE.write(N.CSId); + } N.Info.serialize(Schema, OS); } // Related contexts. - LE.write(CallSites.size()); - for (const auto &Frames : CallSites) { - LE.write(Frames.size()); - for (const FrameId &Id : Frames) - LE.write(Id); + if (Version <= Version1) { + LE.write(CallSites.size()); + for (const auto &Frames : CallSites) { + LE.write(Frames.size()); + for (const FrameId &Id : Frames) + LE.write(Id); + } + } else { + LE.write(CallSiteIds.size()); + for (const auto &CSId : CallSiteIds) + LE.write(CSId); } } IndexedMemProfRecord IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, - const unsigned char *Ptr) { + const unsigned char *Ptr, + IndexedVersion Version) { using namespace support; IndexedMemProfRecord Record; @@ -46,14 +57,20 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, endian::readNext(Ptr); for (uint64_t I = 0; I < NumNodes; I++) { IndexedAllocationInfo Node; - const uint64_t NumFrames = - endian::readNext(Ptr); - for (uint64_t J = 0; J < NumFrames; J++) { - const FrameId Id = - endian::readNext(Ptr); - Node.CallStack.push_back(Id); + if (Version <= Version1) { + const uint64_t NumFrames = + endian::readNext(Ptr); + for (uint64_t J = 0; J < NumFrames; J++) { + const FrameId Id = + endian::readNext(Ptr); + Node.CallStack.push_back(Id); + } + Node.CSId = hashCallStack(Node.CallStack); + } else { + Node.CSId = + endian::readNext( + Ptr); } - Node.CSId = hashCallStack(Node.CallStack); Node.Info.deserialize(Schema, Ptr); Ptr += PortableMemInfoBlock::serializedSize(); Record.AllocSites.push_back(Node); @@ -63,16 +80,24 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, const uint64_t NumCtxs = endian::readNext(Ptr); for (uint64_t J = 0; J < NumCtxs; J++) { - const uint64_t NumFrames = - endian::readNext(Ptr); - llvm::SmallVector Frames; - Frames.reserve(NumFrames); - for (uint64_t K = 0; K < NumFrames; K++) { - const FrameId Id = - endian::readNext(Ptr); - Frames.push_back(Id); + if (Version <= Version1) { + const uint64_t NumFrames = + endian::readNext(Ptr); + llvm::SmallVector Frames; + Frames.reserve(NumFrames); + for (uint64_t K = 0; K < NumFrames; K++) { + const FrameId Id = + endian::readNext(Ptr); + Frames.push_back(Id); + } + Record.CallSites.push_back(Frames); + Record.CallSiteIds.push_back(hashCallStack(Frames)); + } else { + CallStackId CSId = + endian::readNext( + Ptr); + Record.CallSiteIds.push_back(CSId); } - Record.CallSites.push_back(Frames); } return Record; diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index 1cca44e9b0370..ae5015c7dee7b 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -265,7 +265,7 @@ TEST(MemProf, PortableWrapper) { EXPECT_EQ(3UL, ReadBlock.getAllocCpuId()); } -TEST(MemProf, RecordSerializationRoundTrip) { +TEST(MemProf, RecordSerializationRoundTripVersion0) { const MemProfSchema Schema = getFullSchema(); MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000, @@ -284,14 +284,47 @@ TEST(MemProf, RecordSerializationRoundTrip) { Info); } Record.CallSites.assign(CallSites); + for (const auto &CS : CallSites) + Record.CallSiteIds.push_back(llvm::memprof::hashCallStack(CS)); std::string Buffer; llvm::raw_string_ostream OS(Buffer); - Record.serialize(Schema, OS); + Record.serialize(Schema, OS, llvm::memprof::Version0); OS.flush(); const IndexedMemProfRecord GotRecord = IndexedMemProfRecord::deserialize( - Schema, reinterpret_cast(Buffer.data())); + Schema, reinterpret_cast(Buffer.data()), + llvm::memprof::Version0); + + EXPECT_EQ(Record, GotRecord); +} + +TEST(MemProf, RecordSerializationRoundTripVerion2) { + const MemProfSchema Schema = getFullSchema(); + + MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000, + /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3, + /*dealloc_cpu=*/4); + + llvm::SmallVector CallStackIds = {0x123, 0x456}; + + llvm::SmallVector CallSiteIds = {0x333, 0x444}; + + IndexedMemProfRecord Record; + for (const auto &CSId : CallStackIds) { + // Use the same info block for both allocation sites. + Record.AllocSites.emplace_back(llvm::SmallVector(), CSId, Info); + } + Record.CallSiteIds.assign(CallSiteIds); + + std::string Buffer; + llvm::raw_string_ostream OS(Buffer); + Record.serialize(Schema, OS, llvm::memprof::Version2); + OS.flush(); + + const IndexedMemProfRecord GotRecord = IndexedMemProfRecord::deserialize( + Schema, reinterpret_cast(Buffer.data()), + llvm::memprof::Version2); EXPECT_EQ(Record, GotRecord); } From a3f96b5f91401852397a64f82bd7a1b221ed5a8a Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 3 Apr 2024 12:03:33 -0700 Subject: [PATCH 2/7] Removed CallStack-based comparison. Removed CallSites-based comparison. Made Version a required parameter in serialize/deserialize/serializedSize. --- llvm/include/llvm/ProfileData/MemProf.h | 30 +++++----------------- llvm/unittests/ProfileData/MemProfTest.cpp | 4 ++- 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 22a805843f533..3e93f086cd0f2 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -291,7 +291,7 @@ struct IndexedAllocationInfo { : CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB) {} // Returns the size in bytes when this allocation info struct is serialized. - size_t serializedSize(IndexedVersion Version = Version0) const { + size_t serializedSize(IndexedVersion Version) const { size_t Size = 0; if (Version <= Version1) { // The number of frames to serialize. @@ -311,14 +311,6 @@ struct IndexedAllocationInfo { if (Other.Info != Info) return false; - if (Other.CallStack.size() != CallStack.size()) - return false; - - for (size_t J = 0; J < Other.CallStack.size(); J++) { - if (Other.CallStack[J] != CallStack[J]) - return false; - } - if (Other.CSId != CSId) return false; return true; @@ -388,7 +380,7 @@ struct IndexedMemProfRecord { CallSites.append(Other.CallSites); } - size_t serializedSize(IndexedVersion Version = Version0) const { + size_t serializedSize(IndexedVersion Version) const { size_t Result = sizeof(GlobalValue::GUID); for (const IndexedAllocationInfo &N : AllocSites) Result += N.serializedSize(Version); @@ -412,19 +404,11 @@ struct IndexedMemProfRecord { if (Other.AllocSites.size() != AllocSites.size()) return false; - if (Other.CallSites.size() != CallSites.size()) - return false; - for (size_t I = 0; I < AllocSites.size(); I++) { if (AllocSites[I] != Other.AllocSites[I]) return false; } - for (size_t I = 0; I < CallSites.size(); I++) { - if (CallSites[I] != Other.CallSites[I]) - return false; - } - if (Other.CallSiteIds != CallSiteIds) return false; return true; @@ -433,12 +417,12 @@ struct IndexedMemProfRecord { // Serializes the memprof records in \p Records to the ostream \p OS based // on the schema provided in \p Schema. void serialize(const MemProfSchema &Schema, raw_ostream &OS, - IndexedVersion Version = Version0); + IndexedVersion Version); // Deserializes memprof records from the Buffer. static IndexedMemProfRecord deserialize(const MemProfSchema &Schema, const unsigned char *Buffer, - IndexedVersion Version = Version0); + IndexedVersion Version); // Returns the GUID for the function name after canonicalization. For // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are @@ -535,7 +519,7 @@ class RecordLookupTrait { data_type ReadData(uint64_t K, const unsigned char *D, offset_type /*Unused*/) { - Record = IndexedMemProfRecord::deserialize(Schema, D); + Record = IndexedMemProfRecord::deserialize(Schema, D, Version1); return Record; } @@ -574,7 +558,7 @@ class RecordWriterTrait { endian::Writer LE(Out, llvm::endianness::little); offset_type N = sizeof(K); LE.write(N); - offset_type M = V.serializedSize(); + offset_type M = V.serializedSize(Version1); LE.write(M); return std::make_pair(N, M); } @@ -588,7 +572,7 @@ class RecordWriterTrait { void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, offset_type /*Unused*/) { assert(Schema != nullptr && "MemProf schema is not initialized!"); - V.serialize(*Schema, Out); + V.serialize(*Schema, Out, Version1); // Clear the IndexedMemProfRecord which results in clearing/freeing its // vectors of allocs and callsites. This is owned by the associated on-disk // hash table, but unused after this point. See also the comment added to diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index ae5015c7dee7b..f1aa6f37aa399 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -265,7 +265,9 @@ TEST(MemProf, PortableWrapper) { EXPECT_EQ(3UL, ReadBlock.getAllocCpuId()); } -TEST(MemProf, RecordSerializationRoundTripVersion0) { +// Version0 and Version1 serialize IndexedMemProfRecord in the same format, so +// we share one test. +TEST(MemProf, RecordSerializationRoundTripVersion0And1) { const MemProfSchema Schema = getFullSchema(); MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000, From 803c29fa9602a74943d71d14fe8f05f03f5c2f62 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 3 Apr 2024 12:27:21 -0700 Subject: [PATCH 3/7] Fix a typo. --- llvm/include/llvm/ProfileData/MemProf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 3e93f086cd0f2..ad65d0105b86a 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -365,7 +365,7 @@ struct IndexedMemProfRecord { // the last entry in the list with the same function GUID. llvm::SmallVector> CallSites; // Conceptually the same as above. We are going to keep both CallSites and - // CallSiteIds while we are transitioning from CallSites to CallSitesIds. + // CallSiteIds while we are transitioning from CallSites to CallSiteIds. llvm::SmallVector CallSiteIds; void clear() { From f07603dea7d476a7c6a035097900f552c597fd0a Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 3 Apr 2024 13:42:50 -0700 Subject: [PATCH 4/7] Use separate functions for different versions. --- llvm/include/llvm/ProfileData/MemProf.h | 98 +++++++++++----- llvm/lib/ProfileData/MemProf.cpp | 149 ++++++++++++++++-------- 2 files changed, 171 insertions(+), 76 deletions(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index ad65d0105b86a..780ed8daa9be7 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -292,19 +292,14 @@ struct IndexedAllocationInfo { // Returns the size in bytes when this allocation info struct is serialized. size_t serializedSize(IndexedVersion Version) const { - size_t Size = 0; - if (Version <= Version1) { - // The number of frames to serialize. - Size += sizeof(uint64_t); - // The callstack frame ids. - Size += sizeof(FrameId) * CallStack.size(); - } else { - // The CallStackId - Size += sizeof(CallStackId); + switch (Version) { + case Version0: + case Version1: + return serializedSizeV0(); + case Version2: + return serializedSizeV2(); } - // The size of the payload. - Size += PortableMemInfoBlock::serializedSize(); - return Size; + llvm_unreachable("unsupported MemProf version"); } bool operator==(const IndexedAllocationInfo &Other) const { @@ -319,6 +314,27 @@ struct IndexedAllocationInfo { bool operator!=(const IndexedAllocationInfo &Other) const { return !operator==(Other); } + +private: + size_t serializedSizeV0() const { + size_t Size = 0; + // The number of frames to serialize. + Size += sizeof(uint64_t); + // The callstack frame ids. + Size += sizeof(FrameId) * CallStack.size(); + // The size of the payload. + Size += PortableMemInfoBlock::serializedSize(); + return Size; + } + + size_t serializedSizeV2() const { + size_t Size = 0; + // The CallStackId + Size += sizeof(CallStackId); + // The size of the payload. + Size += PortableMemInfoBlock::serializedSize(); + return Size; + } }; // Holds allocation information with frame contents inline. The type should @@ -381,23 +397,14 @@ struct IndexedMemProfRecord { } size_t serializedSize(IndexedVersion Version) const { - size_t Result = sizeof(GlobalValue::GUID); - for (const IndexedAllocationInfo &N : AllocSites) - Result += N.serializedSize(Version); - - // The number of callsites we have information for. - Result += sizeof(uint64_t); - if (Version <= Version1) { - for (const auto &Frames : CallSites) { - // The number of frame ids to serialize. - Result += sizeof(uint64_t); - Result += Frames.size() * sizeof(FrameId); - } - } else { - // The CallStackId - Result += CallSiteIds.size() * sizeof(CallStackId); + switch (Version) { + case Version0: + case Version1: + return serializedSizeV0(); + case Version2: + return serializedSizeV2(); } - return Result; + llvm_unreachable("unsupported MemProf version"); } bool operator==(const IndexedMemProfRecord &Other) const { @@ -428,6 +435,41 @@ struct IndexedMemProfRecord { // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are // mapped to functions using this GUID. static GlobalValue::GUID getGUID(const StringRef FunctionName); + +private: + size_t serializedSizeV0() const { + size_t Result = sizeof(GlobalValue::GUID); + for (const IndexedAllocationInfo &N : AllocSites) + Result += N.serializedSize(Version0); + + // The number of callsites we have information for. + Result += sizeof(uint64_t); + for (const auto &Frames : CallSites) { + // The number of frame ids to serialize. + Result += sizeof(uint64_t); + Result += Frames.size() * sizeof(FrameId); + } + return Result; + } + + size_t serializedSizeV2() const { + size_t Result = sizeof(GlobalValue::GUID); + for (const IndexedAllocationInfo &N : AllocSites) + Result += N.serializedSize(Version2); + + // The number of callsites we have information for. + Result += sizeof(uint64_t); + // The CallStackId + Result += CallSiteIds.size() * sizeof(CallStackId); + return Result; + } + + void serializeV0(const MemProfSchema &Schema, raw_ostream &OS); + void serializeV2(const MemProfSchema &Schema, raw_ostream &OS); + static IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema, + const unsigned char *Buffer); + static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema, + const unsigned char *Buffer); }; // Holds the memprof profile information for a function. The internal diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index e5c5fb58c25ab..69ea345c3eecc 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -13,41 +13,76 @@ namespace memprof { void IndexedMemProfRecord::serialize(const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version) { + switch (Version) { + case Version0: + case Version1: + serializeV0(Schema, OS); + return; + case Version2: + serializeV2(Schema, OS); + return; + } + llvm_unreachable("unsupported MemProf version"); +} + +void IndexedMemProfRecord::serializeV0(const MemProfSchema &Schema, + raw_ostream &OS) { using namespace support; endian::Writer LE(OS, llvm::endianness::little); LE.write(AllocSites.size()); for (const IndexedAllocationInfo &N : AllocSites) { - if (Version <= Version1) { - LE.write(N.CallStack.size()); - for (const FrameId &Id : N.CallStack) - LE.write(Id); - } else { - LE.write(N.CSId); - } + LE.write(N.CallStack.size()); + for (const FrameId &Id : N.CallStack) + LE.write(Id); N.Info.serialize(Schema, OS); } // Related contexts. - if (Version <= Version1) { - LE.write(CallSites.size()); - for (const auto &Frames : CallSites) { - LE.write(Frames.size()); - for (const FrameId &Id : Frames) - LE.write(Id); - } - } else { - LE.write(CallSiteIds.size()); - for (const auto &CSId : CallSiteIds) - LE.write(CSId); + LE.write(CallSites.size()); + for (const auto &Frames : CallSites) { + LE.write(Frames.size()); + for (const FrameId &Id : Frames) + LE.write(Id); + } +} + +void IndexedMemProfRecord::serializeV2(const MemProfSchema &Schema, + raw_ostream &OS) { + using namespace support; + + endian::Writer LE(OS, llvm::endianness::little); + + LE.write(AllocSites.size()); + for (const IndexedAllocationInfo &N : AllocSites) { + LE.write(N.CSId); + N.Info.serialize(Schema, OS); } + + // Related contexts. + LE.write(CallSiteIds.size()); + for (const auto &CSId : CallSiteIds) + LE.write(CSId); } IndexedMemProfRecord IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, const unsigned char *Ptr, IndexedVersion Version) { + switch (Version) { + case Version0: + case Version1: + return deserializeV0(Schema, Ptr); + case Version2: + return deserializeV2(Schema, Ptr); + } + llvm_unreachable("unsupported MemProf version"); +} + +IndexedMemProfRecord +IndexedMemProfRecord::deserializeV0(const MemProfSchema &Schema, + const unsigned char *Ptr) { using namespace support; IndexedMemProfRecord Record; @@ -57,20 +92,14 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, endian::readNext(Ptr); for (uint64_t I = 0; I < NumNodes; I++) { IndexedAllocationInfo Node; - if (Version <= Version1) { - const uint64_t NumFrames = - endian::readNext(Ptr); - for (uint64_t J = 0; J < NumFrames; J++) { - const FrameId Id = - endian::readNext(Ptr); - Node.CallStack.push_back(Id); - } - Node.CSId = hashCallStack(Node.CallStack); - } else { - Node.CSId = - endian::readNext( - Ptr); + const uint64_t NumFrames = + endian::readNext(Ptr); + for (uint64_t J = 0; J < NumFrames; J++) { + const FrameId Id = + endian::readNext(Ptr); + Node.CallStack.push_back(Id); } + Node.CSId = hashCallStack(Node.CallStack); Node.Info.deserialize(Schema, Ptr); Ptr += PortableMemInfoBlock::serializedSize(); Record.AllocSites.push_back(Node); @@ -80,24 +109,48 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, const uint64_t NumCtxs = endian::readNext(Ptr); for (uint64_t J = 0; J < NumCtxs; J++) { - if (Version <= Version1) { - const uint64_t NumFrames = - endian::readNext(Ptr); - llvm::SmallVector Frames; - Frames.reserve(NumFrames); - for (uint64_t K = 0; K < NumFrames; K++) { - const FrameId Id = - endian::readNext(Ptr); - Frames.push_back(Id); - } - Record.CallSites.push_back(Frames); - Record.CallSiteIds.push_back(hashCallStack(Frames)); - } else { - CallStackId CSId = - endian::readNext( - Ptr); - Record.CallSiteIds.push_back(CSId); + const uint64_t NumFrames = + endian::readNext(Ptr); + llvm::SmallVector Frames; + Frames.reserve(NumFrames); + for (uint64_t K = 0; K < NumFrames; K++) { + const FrameId Id = + endian::readNext(Ptr); + Frames.push_back(Id); } + Record.CallSites.push_back(Frames); + Record.CallSiteIds.push_back(hashCallStack(Frames)); + } + + return Record; +} + +IndexedMemProfRecord +IndexedMemProfRecord::deserializeV2(const MemProfSchema &Schema, + const unsigned char *Ptr) { + using namespace support; + + IndexedMemProfRecord Record; + + // Read the meminfo nodes. + const uint64_t NumNodes = + endian::readNext(Ptr); + for (uint64_t I = 0; I < NumNodes; I++) { + IndexedAllocationInfo Node; + Node.CSId = + endian::readNext(Ptr); + Node.Info.deserialize(Schema, Ptr); + Ptr += PortableMemInfoBlock::serializedSize(); + Record.AllocSites.push_back(Node); + } + + // Read the callsite information. + const uint64_t NumCtxs = + endian::readNext(Ptr); + for (uint64_t J = 0; J < NumCtxs; J++) { + CallStackId CSId = + endian::readNext(Ptr); + Record.CallSiteIds.push_back(CSId); } return Record; From 6b636d39374c987125f1f4a87d103c511193aa6f Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 3 Apr 2024 17:32:46 -0700 Subject: [PATCH 5/7] Move version-specific serializers to .cpp files. --- llvm/include/llvm/ProfileData/MemProf.h | 78 +------------- llvm/lib/ProfileData/MemProf.cpp | 134 ++++++++++++++++++------ 2 files changed, 106 insertions(+), 106 deletions(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 780ed8daa9be7..151dc181edf97 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -291,16 +291,7 @@ struct IndexedAllocationInfo { : CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB) {} // Returns the size in bytes when this allocation info struct is serialized. - size_t serializedSize(IndexedVersion Version) const { - switch (Version) { - case Version0: - case Version1: - return serializedSizeV0(); - case Version2: - return serializedSizeV2(); - } - llvm_unreachable("unsupported MemProf version"); - } + size_t serializedSize(IndexedVersion Version) const; bool operator==(const IndexedAllocationInfo &Other) const { if (Other.Info != Info) @@ -314,27 +305,6 @@ struct IndexedAllocationInfo { bool operator!=(const IndexedAllocationInfo &Other) const { return !operator==(Other); } - -private: - size_t serializedSizeV0() const { - size_t Size = 0; - // The number of frames to serialize. - Size += sizeof(uint64_t); - // The callstack frame ids. - Size += sizeof(FrameId) * CallStack.size(); - // The size of the payload. - Size += PortableMemInfoBlock::serializedSize(); - return Size; - } - - size_t serializedSizeV2() const { - size_t Size = 0; - // The CallStackId - Size += sizeof(CallStackId); - // The size of the payload. - Size += PortableMemInfoBlock::serializedSize(); - return Size; - } }; // Holds allocation information with frame contents inline. The type should @@ -396,16 +366,7 @@ struct IndexedMemProfRecord { CallSites.append(Other.CallSites); } - size_t serializedSize(IndexedVersion Version) const { - switch (Version) { - case Version0: - case Version1: - return serializedSizeV0(); - case Version2: - return serializedSizeV2(); - } - llvm_unreachable("unsupported MemProf version"); - } + size_t serializedSize(IndexedVersion Version) const; bool operator==(const IndexedMemProfRecord &Other) const { if (Other.AllocSites.size() != AllocSites.size()) @@ -435,41 +396,6 @@ struct IndexedMemProfRecord { // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are // mapped to functions using this GUID. static GlobalValue::GUID getGUID(const StringRef FunctionName); - -private: - size_t serializedSizeV0() const { - size_t Result = sizeof(GlobalValue::GUID); - for (const IndexedAllocationInfo &N : AllocSites) - Result += N.serializedSize(Version0); - - // The number of callsites we have information for. - Result += sizeof(uint64_t); - for (const auto &Frames : CallSites) { - // The number of frame ids to serialize. - Result += sizeof(uint64_t); - Result += Frames.size() * sizeof(FrameId); - } - return Result; - } - - size_t serializedSizeV2() const { - size_t Result = sizeof(GlobalValue::GUID); - for (const IndexedAllocationInfo &N : AllocSites) - Result += N.serializedSize(Version2); - - // The number of callsites we have information for. - Result += sizeof(uint64_t); - // The CallStackId - Result += CallSiteIds.size() * sizeof(CallStackId); - return Result; - } - - void serializeV0(const MemProfSchema &Schema, raw_ostream &OS); - void serializeV2(const MemProfSchema &Schema, raw_ostream &OS); - static IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema, - const unsigned char *Buffer); - static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema, - const unsigned char *Buffer); }; // Holds the memprof profile information for a function. The internal diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index 69ea345c3eecc..ac0a8702c3f9c 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -10,29 +10,88 @@ namespace llvm { namespace memprof { +namespace { +size_t serializedSizeV0(const IndexedAllocationInfo &IAI) { + size_t Size = 0; + // The number of frames to serialize. + Size += sizeof(uint64_t); + // The callstack frame ids. + Size += sizeof(FrameId) * IAI.CallStack.size(); + // The size of the payload. + Size += PortableMemInfoBlock::serializedSize(); + return Size; +} -void IndexedMemProfRecord::serialize(const MemProfSchema &Schema, - raw_ostream &OS, IndexedVersion Version) { +size_t serializedSizeV2(const IndexedAllocationInfo &IAI) { + size_t Size = 0; + // The CallStackId + Size += sizeof(CallStackId); + // The size of the payload. + Size += PortableMemInfoBlock::serializedSize(); + return Size; +} +} // namespace + +size_t IndexedAllocationInfo::serializedSize(IndexedVersion Version) const { switch (Version) { case Version0: case Version1: - serializeV0(Schema, OS); - return; + return serializedSizeV0(*this); case Version2: - serializeV2(Schema, OS); - return; + return serializedSizeV2(*this); } llvm_unreachable("unsupported MemProf version"); } -void IndexedMemProfRecord::serializeV0(const MemProfSchema &Schema, - raw_ostream &OS) { +namespace { +size_t serializedSizeV0(const IndexedMemProfRecord &Record) { + size_t Result = sizeof(GlobalValue::GUID); + for (const IndexedAllocationInfo &N : Record.AllocSites) + Result += N.serializedSize(Version0); + + // The number of callsites we have information for. + Result += sizeof(uint64_t); + for (const auto &Frames : Record.CallSites) { + // The number of frame ids to serialize. + Result += sizeof(uint64_t); + Result += Frames.size() * sizeof(FrameId); + } + return Result; +} + +size_t serializedSizeV2(const IndexedMemProfRecord &Record) { + size_t Result = sizeof(GlobalValue::GUID); + for (const IndexedAllocationInfo &N : Record.AllocSites) + Result += N.serializedSize(Version2); + + // The number of callsites we have information for. + Result += sizeof(uint64_t); + // The CallStackId + Result += Record.CallSiteIds.size() * sizeof(CallStackId); + return Result; +} +} // namespace + +size_t IndexedMemProfRecord::serializedSize(IndexedVersion Version) const { + switch (Version) { + case Version0: + case Version1: + return serializedSizeV0(*this); + case Version2: + return serializedSizeV2(*this); + } + llvm_unreachable("unsupported MemProf version"); +} + +namespace { +void serializeV0(const IndexedMemProfRecord &Record, + const MemProfSchema &Schema, raw_ostream &OS) { using namespace support; endian::Writer LE(OS, llvm::endianness::little); - LE.write(AllocSites.size()); - for (const IndexedAllocationInfo &N : AllocSites) { + LE.write(Record.AllocSites.size()); + for (const IndexedAllocationInfo &N : Record.AllocSites) { LE.write(N.CallStack.size()); for (const FrameId &Id : N.CallStack) LE.write(Id); @@ -40,49 +99,50 @@ void IndexedMemProfRecord::serializeV0(const MemProfSchema &Schema, } // Related contexts. - LE.write(CallSites.size()); - for (const auto &Frames : CallSites) { + LE.write(Record.CallSites.size()); + for (const auto &Frames : Record.CallSites) { LE.write(Frames.size()); for (const FrameId &Id : Frames) LE.write(Id); } } -void IndexedMemProfRecord::serializeV2(const MemProfSchema &Schema, - raw_ostream &OS) { +void serializeV2(const IndexedMemProfRecord &Record, + const MemProfSchema &Schema, raw_ostream &OS) { using namespace support; endian::Writer LE(OS, llvm::endianness::little); - LE.write(AllocSites.size()); - for (const IndexedAllocationInfo &N : AllocSites) { + LE.write(Record.AllocSites.size()); + for (const IndexedAllocationInfo &N : Record.AllocSites) { LE.write(N.CSId); N.Info.serialize(Schema, OS); } // Related contexts. - LE.write(CallSiteIds.size()); - for (const auto &CSId : CallSiteIds) + LE.write(Record.CallSiteIds.size()); + for (const auto &CSId : Record.CallSiteIds) LE.write(CSId); } +} // namespace -IndexedMemProfRecord -IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, - const unsigned char *Ptr, - IndexedVersion Version) { +void IndexedMemProfRecord::serialize(const MemProfSchema &Schema, + raw_ostream &OS, IndexedVersion Version) { switch (Version) { case Version0: case Version1: - return deserializeV0(Schema, Ptr); + serializeV0(*this, Schema, OS); + return; case Version2: - return deserializeV2(Schema, Ptr); + serializeV2(*this, Schema, OS); + return; } llvm_unreachable("unsupported MemProf version"); } -IndexedMemProfRecord -IndexedMemProfRecord::deserializeV0(const MemProfSchema &Schema, - const unsigned char *Ptr) { +namespace { +IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema, + const unsigned char *Ptr) { using namespace support; IndexedMemProfRecord Record; @@ -125,9 +185,8 @@ IndexedMemProfRecord::deserializeV0(const MemProfSchema &Schema, return Record; } -IndexedMemProfRecord -IndexedMemProfRecord::deserializeV2(const MemProfSchema &Schema, - const unsigned char *Ptr) { +IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema, + const unsigned char *Ptr) { using namespace support; IndexedMemProfRecord Record; @@ -155,6 +214,21 @@ IndexedMemProfRecord::deserializeV2(const MemProfSchema &Schema, return Record; } +} // namespace + +IndexedMemProfRecord +IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, + const unsigned char *Ptr, + IndexedVersion Version) { + switch (Version) { + case Version0: + case Version1: + return deserializeV0(Schema, Ptr); + case Version2: + return deserializeV2(Schema, Ptr); + } + llvm_unreachable("unsupported MemProf version"); +} GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) { // Canonicalize the function name to drop suffixes such as ".llvm.". Note From 28cbed7506f5e1ef1ac8919110e1b9198f7c3eca Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 3 Apr 2024 18:45:06 -0700 Subject: [PATCH 6/7] Include changes to RecordLookupTrait and RecordWriterTrait. --- llvm/include/llvm/ProfileData/MemProf.h | 11 +++++++---- llvm/lib/ProfileData/InstrProfReader.cpp | 2 +- llvm/lib/ProfileData/InstrProfWriter.cpp | 6 ++++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 151dc181edf97..ec06530826ecf 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -460,7 +460,8 @@ class RecordLookupTrait { using offset_type = uint64_t; RecordLookupTrait() = delete; - RecordLookupTrait(const MemProfSchema &S) : Schema(S) {} + RecordLookupTrait(IndexedVersion V, const MemProfSchema &S) + : Version(V), Schema(S) {} static bool EqualKey(uint64_t A, uint64_t B) { return A == B; } static uint64_t GetInternalKey(uint64_t K) { return K; } @@ -487,11 +488,13 @@ class RecordLookupTrait { data_type ReadData(uint64_t K, const unsigned char *D, offset_type /*Unused*/) { - Record = IndexedMemProfRecord::deserialize(Schema, D, Version1); + Record = IndexedMemProfRecord::deserialize(Schema, D, Version); return Record; } private: + // Holds the MemProf version. + IndexedVersion Version; // Holds the memprof schema used to deserialize records. MemProfSchema Schema; // Holds the records from one function deserialized from the indexed format. @@ -499,7 +502,7 @@ class RecordLookupTrait { }; // Trait for writing IndexedMemProfRecord data to the on-disk hash table. -class RecordWriterTrait { +template class RecordWriterTrait { public: using key_type = uint64_t; using key_type_ref = uint64_t; @@ -526,7 +529,7 @@ class RecordWriterTrait { endian::Writer LE(Out, llvm::endianness::little); offset_type N = sizeof(K); LE.write(N); - offset_type M = V.serializedSize(Version1); + offset_type M = V.serializedSize(Version); LE.write(M); return std::make_pair(N, M); } diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 7ac5c561dc080..884334ed070e8 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -1303,7 +1303,7 @@ Error IndexedInstrProfReader::readHeader() { MemProfRecordTable.reset(MemProfRecordHashTable::Create( /*Buckets=*/Start + RecordTableOffset, /*Payload=*/Ptr, - /*Base=*/Start, memprof::RecordLookupTrait(Schema))); + /*Base=*/Start, memprof::RecordLookupTrait(memprof::Version1, Schema))); // Initialize the frame table reader with the payload and bucket offsets. MemProfFrameTable.reset(MemProfFrameHashTable::Create( diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index c2c94ba30c658..a1bc180a53ca3 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -557,9 +557,11 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { OS.write(static_cast(Id)); } - auto RecordWriter = std::make_unique(); + auto RecordWriter = + std::make_unique>(); RecordWriter->Schema = &Schema; - OnDiskChainedHashTableGenerator + OnDiskChainedHashTableGenerator< + memprof::RecordWriterTrait> RecordTableGenerator; for (auto &I : MemProfRecordData) { // Insert the key (func hash) and value (memprof record). From dcda56308339887636b38f582404aad0bfa246fd Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 3 Apr 2024 18:52:57 -0700 Subject: [PATCH 7/7] Fix a typo. --- llvm/include/llvm/ProfileData/MemProf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index ec06530826ecf..110e697702641 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -543,7 +543,7 @@ template class RecordWriterTrait { void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, offset_type /*Unused*/) { assert(Schema != nullptr && "MemProf schema is not initialized!"); - V.serialize(*Schema, Out, Version1); + V.serialize(*Schema, Out, Version); // Clear the IndexedMemProfRecord which results in clearing/freeing its // vectors of allocs and callsites. This is owned by the associated on-disk // hash table, but unused after this point. See also the comment added to