Skip to content

Commit 3498cf5

Browse files
committed
Revert "[MemProf] Refactor memory profile matching into MemProfiler (NFC)"
This reverts commit 29252fd. This broke AMD GPU OpenMP Offload buildbot
1 parent 3ab7ef2 commit 3498cf5

File tree

3 files changed

+275
-281
lines changed

3 files changed

+275
-281
lines changed

llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,6 @@ class ModuleMemProfilerPass : public PassInfoMixin<ModuleMemProfilerPass> {
4343
static bool isRequired() { return true; }
4444
};
4545

46-
// TODO: Remove this declaration and make readMemprof static once the matching
47-
// is moved into its own pass.
48-
class IndexedInstrProfReader;
49-
class TargetLibraryInfo;
50-
void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
51-
const TargetLibraryInfo &TLI);
52-
5346
} // namespace llvm
5447

5548
#endif

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 0 additions & 264 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,9 @@
1818
#include "llvm/ADT/SmallVector.h"
1919
#include "llvm/ADT/Statistic.h"
2020
#include "llvm/ADT/StringRef.h"
21-
#include "llvm/Analysis/MemoryBuiltins.h"
22-
#include "llvm/Analysis/MemoryProfileInfo.h"
2321
#include "llvm/Analysis/ValueTracking.h"
2422
#include "llvm/IR/Constant.h"
2523
#include "llvm/IR/DataLayout.h"
26-
#include "llvm/IR/DiagnosticInfo.h"
2724
#include "llvm/IR/Function.h"
2825
#include "llvm/IR/GlobalValue.h"
2926
#include "llvm/IR/IRBuilder.h"
@@ -33,28 +30,16 @@
3330
#include "llvm/IR/Type.h"
3431
#include "llvm/IR/Value.h"
3532
#include "llvm/ProfileData/InstrProf.h"
36-
#include "llvm/ProfileData/InstrProfReader.h"
37-
#include "llvm/Support/BLAKE3.h"
3833
#include "llvm/Support/CommandLine.h"
3934
#include "llvm/Support/Debug.h"
40-
#include "llvm/Support/HashBuilder.h"
4135
#include "llvm/TargetParser/Triple.h"
4236
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
4337
#include "llvm/Transforms/Utils/ModuleUtils.h"
44-
#include <map>
45-
#include <set>
4638

4739
using namespace llvm;
48-
using namespace llvm::memprof;
4940

5041
#define DEBUG_TYPE "memprof"
5142

52-
namespace llvm {
53-
extern cl::opt<bool> PGOWarnMissing;
54-
extern cl::opt<bool> NoPGOWarnMismatch;
55-
extern cl::opt<bool> NoPGOWarnMismatchComdatWeak;
56-
} // namespace llvm
57-
5843
constexpr int LLVM_MEM_PROFILER_VERSION = 1;
5944

6045
// Size of memory mapped to a single shadow location.
@@ -143,7 +128,6 @@ STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
143128
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
144129
STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
145130
STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");
146-
STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
147131

148132
namespace {
149133

@@ -617,251 +601,3 @@ bool MemProfiler::instrumentFunction(Function &F) {
617601

618602
return FunctionModified;
619603
}
620-
621-
static void addCallsiteMetadata(Instruction &I,
622-
std::vector<uint64_t> &InlinedCallStack,
623-
LLVMContext &Ctx) {
624-
I.setMetadata(LLVMContext::MD_callsite,
625-
buildCallstackMetadata(InlinedCallStack, Ctx));
626-
}
627-
628-
static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,
629-
uint32_t Column) {
630-
llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little>
631-
HashBuilder;
632-
HashBuilder.add(Function, LineOffset, Column);
633-
llvm::BLAKE3Result<8> Hash = HashBuilder.final();
634-
uint64_t Id;
635-
std::memcpy(&Id, Hash.data(), sizeof(Hash));
636-
return Id;
637-
}
638-
639-
static uint64_t computeStackId(const memprof::Frame &Frame) {
640-
return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
641-
}
642-
643-
static void addCallStack(CallStackTrie &AllocTrie,
644-
const AllocationInfo *AllocInfo) {
645-
SmallVector<uint64_t> StackIds;
646-
for (const auto &StackFrame : AllocInfo->CallStack)
647-
StackIds.push_back(computeStackId(StackFrame));
648-
auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
649-
AllocInfo->Info.getAllocCount(),
650-
AllocInfo->Info.getTotalLifetime());
651-
AllocTrie.addCallStack(AllocType, StackIds);
652-
}
653-
654-
// Helper to compare the InlinedCallStack computed from an instruction's debug
655-
// info to a list of Frames from profile data (either the allocation data or a
656-
// callsite). For callsites, the StartIndex to use in the Frame array may be
657-
// non-zero.
658-
static bool
659-
stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,
660-
ArrayRef<uint64_t> InlinedCallStack,
661-
unsigned StartIndex = 0) {
662-
auto StackFrame = ProfileCallStack.begin() + StartIndex;
663-
auto InlCallStackIter = InlinedCallStack.begin();
664-
for (; StackFrame != ProfileCallStack.end() &&
665-
InlCallStackIter != InlinedCallStack.end();
666-
++StackFrame, ++InlCallStackIter) {
667-
uint64_t StackId = computeStackId(*StackFrame);
668-
if (StackId != *InlCallStackIter)
669-
return false;
670-
}
671-
// Return true if we found and matched all stack ids from the call
672-
// instruction.
673-
return InlCallStackIter == InlinedCallStack.end();
674-
}
675-
676-
void llvm::readMemprof(Module &M, Function &F,
677-
IndexedInstrProfReader *MemProfReader,
678-
const TargetLibraryInfo &TLI) {
679-
auto &Ctx = M.getContext();
680-
681-
auto FuncName = getPGOFuncName(F);
682-
auto FuncGUID = Function::getGUID(FuncName);
683-
Expected<memprof::MemProfRecord> MemProfResult =
684-
MemProfReader->getMemProfRecord(FuncGUID);
685-
if (Error E = MemProfResult.takeError()) {
686-
handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
687-
auto Err = IPE.get();
688-
bool SkipWarning = false;
689-
LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
690-
<< ": ");
691-
if (Err == instrprof_error::unknown_function) {
692-
NumOfMemProfMissing++;
693-
SkipWarning = !PGOWarnMissing;
694-
LLVM_DEBUG(dbgs() << "unknown function");
695-
} else if (Err == instrprof_error::hash_mismatch) {
696-
SkipWarning =
697-
NoPGOWarnMismatch ||
698-
(NoPGOWarnMismatchComdatWeak &&
699-
(F.hasComdat() ||
700-
F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
701-
LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
702-
}
703-
704-
if (SkipWarning)
705-
return;
706-
707-
std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
708-
Twine(" Hash = ") + std::to_string(FuncGUID))
709-
.str();
710-
711-
Ctx.diagnose(
712-
DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
713-
});
714-
return;
715-
}
716-
717-
// Build maps of the location hash to all profile data with that leaf location
718-
// (allocation info and the callsites).
719-
std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
720-
// For the callsites we need to record the index of the associated frame in
721-
// the frame array (see comments below where the map entries are added).
722-
std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
723-
LocHashToCallSites;
724-
const auto MemProfRec = std::move(MemProfResult.get());
725-
for (auto &AI : MemProfRec.AllocSites) {
726-
// Associate the allocation info with the leaf frame. The later matching
727-
// code will match any inlined call sequences in the IR with a longer prefix
728-
// of call stack frames.
729-
uint64_t StackId = computeStackId(AI.CallStack[0]);
730-
LocHashToAllocInfo[StackId].insert(&AI);
731-
}
732-
for (auto &CS : MemProfRec.CallSites) {
733-
// Need to record all frames from leaf up to and including this function,
734-
// as any of these may or may not have been inlined at this point.
735-
unsigned Idx = 0;
736-
for (auto &StackFrame : CS) {
737-
uint64_t StackId = computeStackId(StackFrame);
738-
LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++));
739-
// Once we find this function, we can stop recording.
740-
if (StackFrame.Function == FuncGUID)
741-
break;
742-
}
743-
assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
744-
}
745-
746-
auto GetOffset = [](const DILocation *DIL) {
747-
return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
748-
0xffff;
749-
};
750-
751-
// Now walk the instructions, looking up the associated profile data using
752-
// dbug locations.
753-
for (auto &BB : F) {
754-
for (auto &I : BB) {
755-
if (I.isDebugOrPseudoInst())
756-
continue;
757-
// We are only interested in calls (allocation or interior call stack
758-
// context calls).
759-
auto *CI = dyn_cast<CallBase>(&I);
760-
if (!CI)
761-
continue;
762-
auto *CalledFunction = CI->getCalledFunction();
763-
if (CalledFunction && CalledFunction->isIntrinsic())
764-
continue;
765-
// List of call stack ids computed from the location hashes on debug
766-
// locations (leaf to inlined at root).
767-
std::vector<uint64_t> InlinedCallStack;
768-
// Was the leaf location found in one of the profile maps?
769-
bool LeafFound = false;
770-
// If leaf was found in a map, iterators pointing to its location in both
771-
// of the maps. It might exist in neither, one, or both (the latter case
772-
// can happen because we don't currently have discriminators to
773-
// distinguish the case when a single line/col maps to both an allocation
774-
// and another callsite).
775-
std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
776-
AllocInfoIter;
777-
std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *,
778-
unsigned>>>::iterator CallSitesIter;
779-
for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
780-
DIL = DIL->getInlinedAt()) {
781-
// Use C++ linkage name if possible. Need to compile with
782-
// -fdebug-info-for-profiling to get linkage name.
783-
StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
784-
if (Name.empty())
785-
Name = DIL->getScope()->getSubprogram()->getName();
786-
auto CalleeGUID = Function::getGUID(Name);
787-
auto StackId =
788-
computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn());
789-
// LeafFound will only be false on the first iteration, since we either
790-
// set it true or break out of the loop below.
791-
if (!LeafFound) {
792-
AllocInfoIter = LocHashToAllocInfo.find(StackId);
793-
CallSitesIter = LocHashToCallSites.find(StackId);
794-
// Check if the leaf is in one of the maps. If not, no need to look
795-
// further at this call.
796-
if (AllocInfoIter == LocHashToAllocInfo.end() &&
797-
CallSitesIter == LocHashToCallSites.end())
798-
break;
799-
LeafFound = true;
800-
}
801-
InlinedCallStack.push_back(StackId);
802-
}
803-
// If leaf not in either of the maps, skip inst.
804-
if (!LeafFound)
805-
continue;
806-
807-
// First add !memprof metadata from allocation info, if we found the
808-
// instruction's leaf location in that map, and if the rest of the
809-
// instruction's locations match the prefix Frame locations on an
810-
// allocation context with the same leaf.
811-
if (AllocInfoIter != LocHashToAllocInfo.end()) {
812-
// Only consider allocations via new, to reduce unnecessary metadata,
813-
// since those are the only allocations that will be targeted initially.
814-
if (!isNewLikeFn(CI, &TLI))
815-
continue;
816-
// We may match this instruction's location list to multiple MIB
817-
// contexts. Add them to a Trie specialized for trimming the contexts to
818-
// the minimal needed to disambiguate contexts with unique behavior.
819-
CallStackTrie AllocTrie;
820-
for (auto *AllocInfo : AllocInfoIter->second) {
821-
// Check the full inlined call stack against this one.
822-
// If we found and thus matched all frames on the call, include
823-
// this MIB.
824-
if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
825-
InlinedCallStack))
826-
addCallStack(AllocTrie, AllocInfo);
827-
}
828-
// We might not have matched any to the full inlined call stack.
829-
// But if we did, create and attach metadata, or a function attribute if
830-
// all contexts have identical profiled behavior.
831-
if (!AllocTrie.empty()) {
832-
// MemprofMDAttached will be false if a function attribute was
833-
// attached.
834-
bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
835-
assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
836-
if (MemprofMDAttached) {
837-
// Add callsite metadata for the instruction's location list so that
838-
// it simpler later on to identify which part of the MIB contexts
839-
// are from this particular instruction (including during inlining,
840-
// when the callsite metdata will be updated appropriately).
841-
// FIXME: can this be changed to strip out the matching stack
842-
// context ids from the MIB contexts and not add any callsite
843-
// metadata here to save space?
844-
addCallsiteMetadata(I, InlinedCallStack, Ctx);
845-
}
846-
}
847-
continue;
848-
}
849-
850-
// Otherwise, add callsite metadata. If we reach here then we found the
851-
// instruction's leaf location in the callsites map and not the allocation
852-
// map.
853-
assert(CallSitesIter != LocHashToCallSites.end());
854-
for (auto CallStackIdx : CallSitesIter->second) {
855-
// If we found and thus matched all frames on the call, create and
856-
// attach call stack metadata.
857-
if (stackFrameIncludesInlinedCallStack(
858-
*CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
859-
addCallsiteMetadata(I, InlinedCallStack, Ctx);
860-
// Only need to find one with a matching call stack and add a single
861-
// callsite metadata.
862-
break;
863-
}
864-
}
865-
}
866-
}
867-
}

0 commit comments

Comments
 (0)