diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index c0a3bf8464d2d..3a55876cff166 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -149,11 +149,33 @@ static cl::opt ClMemProfMatchHotColdNew( "Match allocation profiles onto existing hot/cold operator new calls"), cl::Hidden, cl::init(false)); +static cl::opt + ClPrintMemProfMatchInfo("memprof-print-match-info", + cl::desc("Print matching stats for each allocation " + "context in this module's profiles"), + cl::Hidden, cl::init(false)); + +// Instrumentation statistics STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads"); STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes"); + +// Matching statistics STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); +STATISTIC(NumOfMemProfMismatch, + "Number of functions having mismatched memory profile hash."); +STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile."); +STATISTIC(NumOfMemProfAllocContextProfiles, + "Number of alloc contexts in memory profile."); +STATISTIC(NumOfMemProfCallSiteProfiles, + "Number of callsites in memory profile."); +STATISTIC(NumOfMemProfMatchedAllocContexts, + "Number of matched memory profile alloc contexts."); +STATISTIC(NumOfMemProfMatchedAllocs, + "Number of matched memory profile allocs."); +STATISTIC(NumOfMemProfMatchedCallSites, + "Number of matched memory profile callsites."); namespace { @@ -637,8 +659,22 @@ static uint64_t computeStackId(const memprof::Frame &Frame) { return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column); } -static void addCallStack(CallStackTrie &AllocTrie, - const AllocationInfo *AllocInfo) { +// Helper to generate a single hash id for a given callstack, used for emitting +// matching statistics and useful for uniquing such statistics across modules. +static uint64_t +computeFullStackId(const SmallVectorImpl &CallStack) { + llvm::HashBuilder, llvm::endianness::little> + HashBuilder; + for (auto &F : CallStack) + HashBuilder.add(F.Function, F.LineOffset, F.Column); + llvm::BLAKE3Result<8> Hash = HashBuilder.final(); + uint64_t Id; + std::memcpy(&Id, Hash.data(), sizeof(Hash)); + return Id; +} + +static AllocationType addCallStack(CallStackTrie &AllocTrie, + const AllocationInfo *AllocInfo) { SmallVector StackIds; for (const auto &StackFrame : AllocInfo->CallStack) StackIds.push_back(computeStackId(StackFrame)); @@ -646,6 +682,7 @@ static void addCallStack(CallStackTrie &AllocTrie, AllocInfo->Info.getAllocCount(), AllocInfo->Info.getTotalLifetime()); AllocTrie.addCallStack(AllocType, StackIds); + return AllocType; } // Helper to compare the InlinedCallStack computed from an instruction's debug @@ -701,9 +738,16 @@ static bool isNewWithHotColdVariant(Function *Callee, } } -static void readMemprof(Module &M, Function &F, - IndexedInstrProfReader *MemProfReader, - const TargetLibraryInfo &TLI) { +struct AllocMatchInfo { + uint64_t TotalSize = 0; + AllocationType AllocType = AllocationType::None; + bool Matched = false; +}; + +static void +readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, + const TargetLibraryInfo &TLI, + std::map &FullStackIdToAllocMatchInfo) { auto &Ctx = M.getContext(); // Previously we used getIRPGOFuncName() here. If F is local linkage, // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But @@ -727,6 +771,7 @@ static void readMemprof(Module &M, Function &F, SkipWarning = !PGOWarnMissing; LLVM_DEBUG(dbgs() << "unknown function"); } else if (Err == instrprof_error::hash_mismatch) { + NumOfMemProfMismatch++; SkipWarning = NoPGOWarnMismatch || (NoPGOWarnMismatchComdatWeak && @@ -748,6 +793,8 @@ static void readMemprof(Module &M, Function &F, return; } + NumOfMemProfFunc++; + // Detect if there are non-zero column numbers in the profile. If not, // treat all column numbers as 0 when matching (i.e. ignore any non-zero // columns in the IR). The profiled binary might have been built with @@ -762,6 +809,7 @@ static void readMemprof(Module &M, Function &F, std::map *, unsigned>>> LocHashToCallSites; for (auto &AI : MemProfRec->AllocSites) { + NumOfMemProfAllocContextProfiles++; // Associate the allocation info with the leaf frame. The later matching // code will match any inlined call sequences in the IR with a longer prefix // of call stack frames. @@ -770,6 +818,7 @@ static void readMemprof(Module &M, Function &F, ProfileHasColumns |= AI.CallStack[0].Column; } for (auto &CS : MemProfRec->CallSites) { + NumOfMemProfCallSiteProfiles++; // Need to record all frames from leaf up to and including this function, // as any of these may or may not have been inlined at this point. unsigned Idx = 0; @@ -863,13 +912,23 @@ static void readMemprof(Module &M, Function &F, // If we found and thus matched all frames on the call, include // this MIB. if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, - InlinedCallStack)) - addCallStack(AllocTrie, AllocInfo); + InlinedCallStack)) { + NumOfMemProfMatchedAllocContexts++; + auto AllocType = addCallStack(AllocTrie, AllocInfo); + // Record information about the allocation if match info printing + // was requested. + if (ClPrintMemProfMatchInfo) { + auto FullStackId = computeFullStackId(AllocInfo->CallStack); + FullStackIdToAllocMatchInfo[FullStackId] = { + AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true}; + } + } } // We might not have matched any to the full inlined call stack. // But if we did, create and attach metadata, or a function attribute if // all contexts have identical profiled behavior. if (!AllocTrie.empty()) { + NumOfMemProfMatchedAllocs++; // MemprofMDAttached will be false if a function attribute was // attached. bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); @@ -897,6 +956,7 @@ static void readMemprof(Module &M, Function &F, // attach call stack metadata. if (stackFrameIncludesInlinedCallStack( *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) { + NumOfMemProfMatchedCallSites++; addCallsiteMetadata(I, InlinedCallStack, Ctx); // Only need to find one with a matching call stack and add a single // callsite metadata. @@ -942,12 +1002,25 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult(M).getManager(); + // Map from the stack has of each allocation context in the function profiles + // to the total profiled size (bytes), allocation type, and whether we matched + // it to an allocation in the IR. + std::map FullStackIdToAllocMatchInfo; + for (auto &F : M) { if (F.isDeclaration()) continue; const TargetLibraryInfo &TLI = FAM.getResult(F); - readMemprof(M, F, MemProfReader.get(), TLI); + readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo); + } + + if (ClPrintMemProfMatchInfo) { + for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo) + errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType) + << " context with id " << Id << " has total profiled size " + << Info.TotalSize << (Info.Matched ? " is" : " not") + << " matched\n"; } return PreservedAnalyses::none(); diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll index 13f370a4071e8..4a87f4f9d7449 100644 --- a/llvm/test/Transforms/PGOProfile/memprof.ll +++ b/llvm/test/Transforms/PGOProfile/memprof.ll @@ -5,6 +5,8 @@ ; REQUIRES: zlib ;; Avoid failures on big-endian systems that can't read the profile properly ; REQUIRES: x86_64-linux +;; -stats requires asserts +; REQUIRES: asserts ;; TODO: Use text profile inputs once that is available for memprof. ;; # To update the Inputs below, run Inputs/update_memprof_inputs.sh. @@ -25,7 +27,7 @@ ; ALL-NOT: no profile data available for function ;; Using a memprof-only profile for memprof-use should only give memprof metadata -; RUN: opt < %s -passes='memprof-use' -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY +; RUN: opt < %s -passes='memprof-use' -pgo-warn-missing-function -S -memprof-print-match-info -stats 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY,MEMPROFMATCHINFO,MEMPROFSTATS ; There should not be any PGO metadata ; MEMPROFONLY-NOT: !prof @@ -61,6 +63,15 @@ ;; give both memprof and pgo metadata. ; RUN: opt < %s -passes='pgo-instr-use,memprof-use' -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,PGO +; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched +; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched +; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched +; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched +; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched +; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched +; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched +; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched + ; ModuleID = 'memprof.cc' source_filename = "memprof.cc" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -346,6 +357,14 @@ for.end: ; preds = %for.cond ; MEMPROFNOCOLINFO: ![[C10]] = !{i64 -4535090212904553409} ; MEMPROFNOCOLINFO: ![[C11]] = !{i64 3577763375057267810} +; MEMPROFSTATS: 8 memprof - Number of alloc contexts in memory profile. +; MEMPROFSTATS: 10 memprof - Number of callsites in memory profile. +; MEMPROFSTATS: 6 memprof - Number of functions having valid memory profile. +; MEMPROFSTATS: 8 memprof - Number of matched memory profile alloc contexts. +; MEMPROFSTATS: 3 memprof - Number of matched memory profile allocs. +; MEMPROFSTATS: 10 memprof - Number of matched memory profile callsites. + + ; Function Attrs: argmemonly nofree nounwind willreturn writeonly declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3