Skip to content

Commit 9501405

Browse files
committed
Restore "[MemProf] Refactor memory profile matching into MemProfiler (NFC)"
This restores commit 29252fd, reverted in 3498cf5 because it was thought to cause a bot failure, which ended up being unrelated to this patch set. Differential Revision: https://reviews.llvm.org/D154872
1 parent 9095d5c commit 9501405

File tree

3 files changed

+281
-275
lines changed

3 files changed

+281
-275
lines changed

llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,13 @@ class ModuleMemProfilerPass : public PassInfoMixin<ModuleMemProfilerPass> {
4343
static bool isRequired() { return true; }
4444
};
4545

46+
// TODO: Remove this declaration and make readMemprof static once the matching
47+
// is moved into its own pass.
48+
class IndexedInstrProfReader;
49+
class TargetLibraryInfo;
50+
void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
51+
const TargetLibraryInfo &TLI);
52+
4653
} // namespace llvm
4754

4855
#endif

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,12 @@
1818
#include "llvm/ADT/SmallVector.h"
1919
#include "llvm/ADT/Statistic.h"
2020
#include "llvm/ADT/StringRef.h"
21+
#include "llvm/Analysis/MemoryBuiltins.h"
22+
#include "llvm/Analysis/MemoryProfileInfo.h"
2123
#include "llvm/Analysis/ValueTracking.h"
2224
#include "llvm/IR/Constant.h"
2325
#include "llvm/IR/DataLayout.h"
26+
#include "llvm/IR/DiagnosticInfo.h"
2427
#include "llvm/IR/Function.h"
2528
#include "llvm/IR/GlobalValue.h"
2629
#include "llvm/IR/IRBuilder.h"
@@ -30,16 +33,28 @@
3033
#include "llvm/IR/Type.h"
3134
#include "llvm/IR/Value.h"
3235
#include "llvm/ProfileData/InstrProf.h"
36+
#include "llvm/ProfileData/InstrProfReader.h"
37+
#include "llvm/Support/BLAKE3.h"
3338
#include "llvm/Support/CommandLine.h"
3439
#include "llvm/Support/Debug.h"
40+
#include "llvm/Support/HashBuilder.h"
3541
#include "llvm/TargetParser/Triple.h"
3642
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
3743
#include "llvm/Transforms/Utils/ModuleUtils.h"
44+
#include <map>
45+
#include <set>
3846

3947
using namespace llvm;
48+
using namespace llvm::memprof;
4049

4150
#define DEBUG_TYPE "memprof"
4251

52+
namespace llvm {
53+
extern cl::opt<bool> PGOWarnMissing;
54+
extern cl::opt<bool> NoPGOWarnMismatch;
55+
extern cl::opt<bool> NoPGOWarnMismatchComdatWeak;
56+
} // namespace llvm
57+
4358
constexpr int LLVM_MEM_PROFILER_VERSION = 1;
4459

4560
// Size of memory mapped to a single shadow location.
@@ -128,6 +143,7 @@ STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
128143
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
129144
STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
130145
STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");
146+
STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
131147

132148
namespace {
133149

@@ -601,3 +617,251 @@ bool MemProfiler::instrumentFunction(Function &F) {
601617

602618
return FunctionModified;
603619
}
620+
621+
static void addCallsiteMetadata(Instruction &I,
622+
std::vector<uint64_t> &InlinedCallStack,
623+
LLVMContext &Ctx) {
624+
I.setMetadata(LLVMContext::MD_callsite,
625+
buildCallstackMetadata(InlinedCallStack, Ctx));
626+
}
627+
628+
static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,
629+
uint32_t Column) {
630+
llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little>
631+
HashBuilder;
632+
HashBuilder.add(Function, LineOffset, Column);
633+
llvm::BLAKE3Result<8> Hash = HashBuilder.final();
634+
uint64_t Id;
635+
std::memcpy(&Id, Hash.data(), sizeof(Hash));
636+
return Id;
637+
}
638+
639+
static uint64_t computeStackId(const memprof::Frame &Frame) {
640+
return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
641+
}
642+
643+
static void addCallStack(CallStackTrie &AllocTrie,
644+
const AllocationInfo *AllocInfo) {
645+
SmallVector<uint64_t> StackIds;
646+
for (const auto &StackFrame : AllocInfo->CallStack)
647+
StackIds.push_back(computeStackId(StackFrame));
648+
auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
649+
AllocInfo->Info.getAllocCount(),
650+
AllocInfo->Info.getTotalLifetime());
651+
AllocTrie.addCallStack(AllocType, StackIds);
652+
}
653+
654+
// Helper to compare the InlinedCallStack computed from an instruction's debug
655+
// info to a list of Frames from profile data (either the allocation data or a
656+
// callsite). For callsites, the StartIndex to use in the Frame array may be
657+
// non-zero.
658+
static bool
659+
stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,
660+
ArrayRef<uint64_t> InlinedCallStack,
661+
unsigned StartIndex = 0) {
662+
auto StackFrame = ProfileCallStack.begin() + StartIndex;
663+
auto InlCallStackIter = InlinedCallStack.begin();
664+
for (; StackFrame != ProfileCallStack.end() &&
665+
InlCallStackIter != InlinedCallStack.end();
666+
++StackFrame, ++InlCallStackIter) {
667+
uint64_t StackId = computeStackId(*StackFrame);
668+
if (StackId != *InlCallStackIter)
669+
return false;
670+
}
671+
// Return true if we found and matched all stack ids from the call
672+
// instruction.
673+
return InlCallStackIter == InlinedCallStack.end();
674+
}
675+
676+
void llvm::readMemprof(Module &M, Function &F,
677+
IndexedInstrProfReader *MemProfReader,
678+
const TargetLibraryInfo &TLI) {
679+
auto &Ctx = M.getContext();
680+
681+
auto FuncName = getPGOFuncName(F);
682+
auto FuncGUID = Function::getGUID(FuncName);
683+
Expected<memprof::MemProfRecord> MemProfResult =
684+
MemProfReader->getMemProfRecord(FuncGUID);
685+
if (Error E = MemProfResult.takeError()) {
686+
handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
687+
auto Err = IPE.get();
688+
bool SkipWarning = false;
689+
LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
690+
<< ": ");
691+
if (Err == instrprof_error::unknown_function) {
692+
NumOfMemProfMissing++;
693+
SkipWarning = !PGOWarnMissing;
694+
LLVM_DEBUG(dbgs() << "unknown function");
695+
} else if (Err == instrprof_error::hash_mismatch) {
696+
SkipWarning =
697+
NoPGOWarnMismatch ||
698+
(NoPGOWarnMismatchComdatWeak &&
699+
(F.hasComdat() ||
700+
F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
701+
LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
702+
}
703+
704+
if (SkipWarning)
705+
return;
706+
707+
std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
708+
Twine(" Hash = ") + std::to_string(FuncGUID))
709+
.str();
710+
711+
Ctx.diagnose(
712+
DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
713+
});
714+
return;
715+
}
716+
717+
// Build maps of the location hash to all profile data with that leaf location
718+
// (allocation info and the callsites).
719+
std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
720+
// For the callsites we need to record the index of the associated frame in
721+
// the frame array (see comments below where the map entries are added).
722+
std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
723+
LocHashToCallSites;
724+
const auto MemProfRec = std::move(MemProfResult.get());
725+
for (auto &AI : MemProfRec.AllocSites) {
726+
// Associate the allocation info with the leaf frame. The later matching
727+
// code will match any inlined call sequences in the IR with a longer prefix
728+
// of call stack frames.
729+
uint64_t StackId = computeStackId(AI.CallStack[0]);
730+
LocHashToAllocInfo[StackId].insert(&AI);
731+
}
732+
for (auto &CS : MemProfRec.CallSites) {
733+
// Need to record all frames from leaf up to and including this function,
734+
// as any of these may or may not have been inlined at this point.
735+
unsigned Idx = 0;
736+
for (auto &StackFrame : CS) {
737+
uint64_t StackId = computeStackId(StackFrame);
738+
LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++));
739+
// Once we find this function, we can stop recording.
740+
if (StackFrame.Function == FuncGUID)
741+
break;
742+
}
743+
assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
744+
}
745+
746+
auto GetOffset = [](const DILocation *DIL) {
747+
return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
748+
0xffff;
749+
};
750+
751+
// Now walk the instructions, looking up the associated profile data using
752+
// dbug locations.
753+
for (auto &BB : F) {
754+
for (auto &I : BB) {
755+
if (I.isDebugOrPseudoInst())
756+
continue;
757+
// We are only interested in calls (allocation or interior call stack
758+
// context calls).
759+
auto *CI = dyn_cast<CallBase>(&I);
760+
if (!CI)
761+
continue;
762+
auto *CalledFunction = CI->getCalledFunction();
763+
if (CalledFunction && CalledFunction->isIntrinsic())
764+
continue;
765+
// List of call stack ids computed from the location hashes on debug
766+
// locations (leaf to inlined at root).
767+
std::vector<uint64_t> InlinedCallStack;
768+
// Was the leaf location found in one of the profile maps?
769+
bool LeafFound = false;
770+
// If leaf was found in a map, iterators pointing to its location in both
771+
// of the maps. It might exist in neither, one, or both (the latter case
772+
// can happen because we don't currently have discriminators to
773+
// distinguish the case when a single line/col maps to both an allocation
774+
// and another callsite).
775+
std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
776+
AllocInfoIter;
777+
std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *,
778+
unsigned>>>::iterator CallSitesIter;
779+
for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
780+
DIL = DIL->getInlinedAt()) {
781+
// Use C++ linkage name if possible. Need to compile with
782+
// -fdebug-info-for-profiling to get linkage name.
783+
StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
784+
if (Name.empty())
785+
Name = DIL->getScope()->getSubprogram()->getName();
786+
auto CalleeGUID = Function::getGUID(Name);
787+
auto StackId =
788+
computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn());
789+
// LeafFound will only be false on the first iteration, since we either
790+
// set it true or break out of the loop below.
791+
if (!LeafFound) {
792+
AllocInfoIter = LocHashToAllocInfo.find(StackId);
793+
CallSitesIter = LocHashToCallSites.find(StackId);
794+
// Check if the leaf is in one of the maps. If not, no need to look
795+
// further at this call.
796+
if (AllocInfoIter == LocHashToAllocInfo.end() &&
797+
CallSitesIter == LocHashToCallSites.end())
798+
break;
799+
LeafFound = true;
800+
}
801+
InlinedCallStack.push_back(StackId);
802+
}
803+
// If leaf not in either of the maps, skip inst.
804+
if (!LeafFound)
805+
continue;
806+
807+
// First add !memprof metadata from allocation info, if we found the
808+
// instruction's leaf location in that map, and if the rest of the
809+
// instruction's locations match the prefix Frame locations on an
810+
// allocation context with the same leaf.
811+
if (AllocInfoIter != LocHashToAllocInfo.end()) {
812+
// Only consider allocations via new, to reduce unnecessary metadata,
813+
// since those are the only allocations that will be targeted initially.
814+
if (!isNewLikeFn(CI, &TLI))
815+
continue;
816+
// We may match this instruction's location list to multiple MIB
817+
// contexts. Add them to a Trie specialized for trimming the contexts to
818+
// the minimal needed to disambiguate contexts with unique behavior.
819+
CallStackTrie AllocTrie;
820+
for (auto *AllocInfo : AllocInfoIter->second) {
821+
// Check the full inlined call stack against this one.
822+
// If we found and thus matched all frames on the call, include
823+
// this MIB.
824+
if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
825+
InlinedCallStack))
826+
addCallStack(AllocTrie, AllocInfo);
827+
}
828+
// We might not have matched any to the full inlined call stack.
829+
// But if we did, create and attach metadata, or a function attribute if
830+
// all contexts have identical profiled behavior.
831+
if (!AllocTrie.empty()) {
832+
// MemprofMDAttached will be false if a function attribute was
833+
// attached.
834+
bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
835+
assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
836+
if (MemprofMDAttached) {
837+
// Add callsite metadata for the instruction's location list so that
838+
// it simpler later on to identify which part of the MIB contexts
839+
// are from this particular instruction (including during inlining,
840+
// when the callsite metdata will be updated appropriately).
841+
// FIXME: can this be changed to strip out the matching stack
842+
// context ids from the MIB contexts and not add any callsite
843+
// metadata here to save space?
844+
addCallsiteMetadata(I, InlinedCallStack, Ctx);
845+
}
846+
}
847+
continue;
848+
}
849+
850+
// Otherwise, add callsite metadata. If we reach here then we found the
851+
// instruction's leaf location in the callsites map and not the allocation
852+
// map.
853+
assert(CallSitesIter != LocHashToCallSites.end());
854+
for (auto CallStackIdx : CallSitesIter->second) {
855+
// If we found and thus matched all frames on the call, create and
856+
// attach call stack metadata.
857+
if (stackFrameIncludesInlinedCallStack(
858+
*CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
859+
addCallsiteMetadata(I, InlinedCallStack, Ctx);
860+
// Only need to find one with a matching call stack and add a single
861+
// callsite metadata.
862+
break;
863+
}
864+
}
865+
}
866+
}
867+
}

0 commit comments

Comments
 (0)