From de3f58de91a532ff9be6f44d3e9b55fc49564407 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jun 2024 08:25:48 -0700 Subject: [PATCH] [memprof] Use std::vector instead of llvm::SmallVector (NFC) This patch replaces llvm::SmallVector with std::vector. llvm::SmallVector sets aside one inline element. Meanwhile, when I sort all call stacks by their lengths, the length at the first percentile is already 2. That is, 99 percent of call stacks do not take advantage of the inline element. Using std::vector reduces the cycle and instruction counts by 4.5% and 10.3%, respectively, with "llvm-profdata show" modified to deserialize all MemProfRecords. --- llvm/include/llvm/ProfileData/MemProf.h | 19 +++++++++---------- llvm/lib/ProfileData/MemProf.cpp | 3 +-- .../Instrumentation/MemProfiler.cpp | 4 ++-- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 406144d9db1e8..667b9fa033d75 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -364,7 +364,7 @@ struct IndexedAllocationInfo { // be used for temporary in-memory instances. struct AllocationInfo { // Same as IndexedAllocationInfo::CallStack with the frame contents inline. - llvm::SmallVector CallStack; + std::vector CallStack; // Same as IndexedAllocationInfo::Info; PortableMemInfoBlock Info; @@ -446,8 +446,7 @@ struct IndexedMemProfRecord { // Convert IndexedMemProfRecord to MemProfRecord. Callback is used to // translate CallStackId to call stacks with frames inline. MemProfRecord toMemProfRecord( - llvm::function_ref(const CallStackId)> Callback) - const; + llvm::function_ref(const CallStackId)> Callback) const; // Returns the GUID for the function name after canonicalization. For // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are @@ -462,7 +461,7 @@ struct MemProfRecord { // Same as IndexedMemProfRecord::AllocSites with frame contents inline. llvm::SmallVector AllocSites; // Same as IndexedMemProfRecord::CallSites with frame contents inline. - llvm::SmallVector> CallSites; + llvm::SmallVector> CallSites; MemProfRecord() = default; MemProfRecord( @@ -472,7 +471,7 @@ struct MemProfRecord { AllocSites.emplace_back(IndexedAI, IdToFrameCallback); } for (const ArrayRef Site : Record.CallSites) { - llvm::SmallVector Frames; + std::vector Frames; for (const FrameId Id : Site) { Frames.push_back(IdToFrameCallback(Id)); } @@ -490,7 +489,7 @@ struct MemProfRecord { if (!CallSites.empty()) { OS << " CallSites:\n"; - for (const llvm::SmallVector &Frames : CallSites) { + for (const std::vector &Frames : CallSites) { for (const Frame &F : Frames) { OS << " -\n"; F.printYAML(OS); @@ -844,8 +843,8 @@ template struct CallStackIdConverter { CallStackIdConverter(const CallStackIdConverter &) = delete; CallStackIdConverter &operator=(const CallStackIdConverter &) = delete; - llvm::SmallVector operator()(CallStackId CSId) { - llvm::SmallVector Frames; + std::vector operator()(CallStackId CSId) { + std::vector Frames; auto CSIter = Map.find(CSId); if (CSIter == Map.end()) { LastUnmappedId = CSId; @@ -886,8 +885,8 @@ struct LinearCallStackIdConverter { std::function FrameIdToFrame) : CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame) {} - llvm::SmallVector operator()(LinearCallStackId LinearCSId) { - llvm::SmallVector Frames; + std::vector operator()(LinearCallStackId LinearCSId) { + std::vector Frames; const unsigned char *Ptr = CallStackBase + diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index 1d9860e0ea7e8..2b227a65c1d8f 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -338,8 +338,7 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, } MemProfRecord IndexedMemProfRecord::toMemProfRecord( - llvm::function_ref(const CallStackId)> Callback) - const { + llvm::function_ref(const CallStackId)> Callback) const { MemProfRecord Record; Record.AllocSites.reserve(AllocSites.size()); diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index c0a3bf8464d2d..d70c6a7a0a152 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -759,7 +759,7 @@ static void readMemprof(Module &M, Function &F, std::map> LocHashToAllocInfo; // For the callsites we need to record the index of the associated frame in // the frame array (see comments below where the map entries are added). - std::map *, unsigned>>> + std::map *, unsigned>>> LocHashToCallSites; for (auto &AI : MemProfRec->AllocSites) { // Associate the allocation info with the leaf frame. The later matching @@ -815,7 +815,7 @@ static void readMemprof(Module &M, Function &F, // and another callsite). std::map>::iterator AllocInfoIter; - std::map *, + std::map *, unsigned>>>::iterator CallSitesIter; for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; DIL = DIL->getInlinedAt()) {