Skip to content

[MemProf] Track and report profiled sizes through cloning #98382

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions llvm/include/llvm/Bitcode/LLVMBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,8 @@ enum GlobalValueSummarySymtabCodes {
// [valueid, n x stackidindex]
FS_PERMODULE_CALLSITE_INFO = 26,
// Summary of per-module allocation memprof metadata.
// [n x (alloc type, nummib, nummib x stackidindex)]
// [nummib, nummib x (alloc type, numstackids, numstackids x stackidindex),
// [nummib x total size]?]
FS_PERMODULE_ALLOC_INFO = 27,
// Summary of combined index memprof callsite metadata.
// [valueid, numstackindices, numver,
Expand All @@ -316,7 +317,7 @@ enum GlobalValueSummarySymtabCodes {
// Summary of combined index allocation memprof metadata.
// [nummib, numver,
// nummib x (alloc type, numstackids, numstackids x stackidindex),
// numver x version]
// numver x version, [nummib x total size]?]
FS_COMBINED_ALLOC_INFO = 29,
FS_STACK_IDS = 30,
};
Expand Down
16 changes: 15 additions & 1 deletion llvm/include/llvm/IR/ModuleSummaryIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,10 @@ struct AllocInfo {
// Vector of MIBs in this memprof metadata.
std::vector<MIBInfo> MIBs;

// If requested, keep track of total profiled sizes for each MIB. This will be
// a vector of the same length and order as the MIBs vector, if non-empty.
std::vector<uint64_t> TotalSizes;

AllocInfo(std::vector<MIBInfo> MIBs) : MIBs(std::move(MIBs)) {
Versions.push_back(0);
}
Expand All @@ -423,6 +427,16 @@ inline raw_ostream &operator<<(raw_ostream &OS, const AllocInfo &AE) {
for (auto &M : AE.MIBs) {
OS << "\t\t" << M << "\n";
}
if (!AE.TotalSizes.empty()) {
OS << " TotalSizes per MIB:\n\t\t";
First = true;
for (uint64_t TS : AE.TotalSizes) {
if (!First)
OS << ", ";
First = false;
OS << TS << "\n";
}
}
return OS;
}

Expand Down Expand Up @@ -1431,7 +1445,7 @@ class ModuleSummaryIndex {
// in the way some record are interpreted, like flags for instance.
// Note that incrementing this may require changes in both BitcodeReader.cpp
// and BitcodeWriter.cpp.
static constexpr uint64_t BitcodeSummaryVersion = 9;
static constexpr uint64_t BitcodeSummaryVersion = 10;

// Regular LTO module name for ASM writer
static constexpr const char *getRegularLTOModuleName() {
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ extern cl::opt<bool> ScalePartialSampleProfileWorkingSetSize;

extern cl::opt<unsigned> MaxNumVTableAnnotations;

extern cl::opt<bool> MemProfReportHintedSizes;

// Walk through the operands of a given User via worklist iteration and populate
// the set of GlobalValue references encountered. Invoked either on an
// Instruction or a GlobalVariable (which walks its initializer).
Expand Down Expand Up @@ -517,6 +519,7 @@ static void computeFunctionSummary(
auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof);
if (MemProfMD) {
std::vector<MIBInfo> MIBs;
std::vector<uint64_t> TotalSizes;
for (auto &MDOp : MemProfMD->operands()) {
auto *MIBMD = cast<const MDNode>(MDOp);
MDNode *StackNode = getMIBStackNode(MIBMD);
Expand All @@ -536,8 +539,17 @@ static void computeFunctionSummary(
}
MIBs.push_back(
MIBInfo(getMIBAllocType(MIBMD), std::move(StackIdIndices)));
if (MemProfReportHintedSizes) {
auto TotalSize = getMIBTotalSize(MIBMD);
assert(TotalSize);
TotalSizes.push_back(TotalSize);
}
}
Allocs.push_back(AllocInfo(std::move(MIBs)));
if (MemProfReportHintedSizes) {
assert(Allocs.back().MIBs.size() == TotalSizes.size());
Allocs.back().TotalSizes = std::move(TotalSizes);
}
} else if (!InstCallsite.empty()) {
SmallVector<unsigned> StackIdIndices;
for (auto StackId : InstCallsite)
Expand Down
32 changes: 31 additions & 1 deletion llvm/lib/Bitcode/Reader/BitcodeReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7994,7 +7994,12 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
case bitc::FS_PERMODULE_ALLOC_INFO: {
unsigned I = 0;
std::vector<MIBInfo> MIBs;
while (I < Record.size()) {
unsigned NumMIBs = 0;
if (Version >= 10)
NumMIBs = Record[I++];
unsigned MIBsRead = 0;
while ((Version >= 10 && MIBsRead++ < NumMIBs) ||
(Version < 10 && I < Record.size())) {
assert(Record.size() - I >= 2);
AllocationType AllocType = (AllocationType)Record[I++];
unsigned NumStackEntries = Record[I++];
Expand All @@ -8007,7 +8012,19 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
}
MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
}
std::vector<uint64_t> TotalSizes;
// We either have no sizes or NumMIBs of them.
assert(I == Record.size() || Record.size() - I == NumMIBs);
if (I < Record.size()) {
MIBsRead = 0;
while (MIBsRead++ < NumMIBs)
TotalSizes.push_back(Record[I++]);
}
PendingAllocs.push_back(AllocInfo(std::move(MIBs)));
if (!TotalSizes.empty()) {
assert(PendingAllocs.back().MIBs.size() == TotalSizes.size());
PendingAllocs.back().TotalSizes = std::move(TotalSizes);
}
break;
}

Expand All @@ -8034,8 +8051,21 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
SmallVector<uint8_t> Versions;
for (unsigned J = 0; J < NumVersions; J++)
Versions.push_back(Record[I++]);
std::vector<uint64_t> TotalSizes;
// We either have no sizes or NumMIBs of them.
assert(I == Record.size() || Record.size() - I == NumMIBs);
if (I < Record.size()) {
MIBsRead = 0;
while (MIBsRead++ < NumMIBs) {
TotalSizes.push_back(Record[I++]);
}
}
PendingAllocs.push_back(
AllocInfo(std::move(Versions), std::move(MIBs)));
if (!TotalSizes.empty()) {
assert(PendingAllocs.back().MIBs.size() == TotalSizes.size());
PendingAllocs.back().TotalSizes = std::move(TotalSizes);
}
break;
}
}
Expand Down
19 changes: 14 additions & 5 deletions llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4189,10 +4189,9 @@ static void writeFunctionHeapProfileRecords(
// Per module alloc versions should always have a single entry of
// value 0.
assert(!PerModule || (AI.Versions.size() == 1 && AI.Versions[0] == 0));
if (!PerModule) {
Record.push_back(AI.MIBs.size());
Record.push_back(AI.MIBs.size());
if (!PerModule)
Record.push_back(AI.Versions.size());
}
for (auto &MIB : AI.MIBs) {
Record.push_back((uint8_t)MIB.AllocType);
Record.push_back(MIB.StackIdIndices.size());
Expand All @@ -4203,6 +4202,11 @@ static void writeFunctionHeapProfileRecords(
for (auto V : AI.Versions)
Record.push_back(V);
}
assert(AI.TotalSizes.empty() || AI.TotalSizes.size() == AI.MIBs.size());
if (!AI.TotalSizes.empty()) {
for (auto Size : AI.TotalSizes)
Record.push_back(Size);
}
Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO
: bitc::FS_COMBINED_ALLOC_INFO,
Record, AllocAbbrev);
Expand Down Expand Up @@ -4432,7 +4436,9 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {

Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_ALLOC_INFO));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
// n x (alloc type, numstackids, numstackids x stackidindex)
// optional: nummib x total size
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
Expand Down Expand Up @@ -4576,6 +4582,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
// nummib x (alloc type, numstackids, numstackids x stackidindex),
// numver x version
// optional: nummib x total size
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
Expand Down Expand Up @@ -4675,7 +4682,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
writeFunctionHeapProfileRecords(
Stream, FS, CallsiteAbbrev, AllocAbbrev,
/*PerModule*/ false,
/*GetValueId*/ [&](const ValueInfo &VI) -> unsigned {
/*GetValueId*/
[&](const ValueInfo &VI) -> unsigned {
std::optional<unsigned> ValueID = GetValueId(VI);
// This can happen in shared index files for distributed ThinLTO if
// the callee function summary is not included. Record 0 which we
Expand All @@ -4685,7 +4693,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
return 0;
return *ValueID;
},
/*GetStackIndex*/ [&](unsigned I) {
/*GetStackIndex*/
[&](unsigned I) {
// Get the corresponding index into the list of StackIds actually
// being written for this combined index (which may be a subset in
// the case of distributed indexes).
Expand Down
83 changes: 68 additions & 15 deletions llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ cl::opt<bool> SupportsHotColdNew(
cl::desc("Linking with hot/cold operator new interfaces"));
} // namespace llvm

extern cl::opt<bool> MemProfReportHintedSizes;

namespace {
/// CRTP base for graphs built from either IR or ThinLTO summary index.
///
Expand Down Expand Up @@ -172,6 +174,7 @@ class CallsiteContextGraph {

void dump() const;
void print(raw_ostream &OS) const;
void printTotalSizes(raw_ostream &OS) const;

friend raw_ostream &operator<<(raw_ostream &OS,
const CallsiteContextGraph &CCG) {
Expand Down Expand Up @@ -439,7 +442,7 @@ class CallsiteContextGraph {
void addStackNodesForMIB(ContextNode *AllocNode,
CallStack<NodeT, IteratorT> &StackContext,
CallStack<NodeT, IteratorT> &CallsiteContext,
AllocationType AllocType);
AllocationType AllocType, uint64_t TotalSize);

/// Matches all callsite metadata (or summary) to the nodes created for
/// allocation memprof MIB metadata, synthesizing new nodes to reflect any
Expand Down Expand Up @@ -611,6 +614,10 @@ class CallsiteContextGraph {
/// Map from each context ID to the AllocationType assigned to that context.
DenseMap<uint32_t, AllocationType> ContextIdToAllocationType;

/// Map from each contextID to the profiled aggregate allocation size,
/// optionally populated when requested (via MemProfReportHintedSizes).
DenseMap<uint32_t, uint64_t> ContextIdToTotalSize;

/// Identifies the context node created for a stack id when adding the MIB
/// contexts to the graph. This is used to locate the context nodes when
/// trying to assign the corresponding callsites with those stack ids to these
Expand Down Expand Up @@ -1004,18 +1011,36 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
return AllocNode;
}

static std::string getAllocTypeString(uint8_t AllocTypes) {
if (!AllocTypes)
return "None";
std::string Str;
if (AllocTypes & (uint8_t)AllocationType::NotCold)
Str += "NotCold";
if (AllocTypes & (uint8_t)AllocationType::Cold)
Str += "Cold";
return Str;
}

template <typename DerivedCCG, typename FuncTy, typename CallTy>
template <class NodeT, class IteratorT>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType) {
CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType,
uint64_t TotalSize) {
assert(!MemProfReportHintedSizes || TotalSize > 0);
// Treating the hot alloc type as NotCold before the disambiguation for "hot"
// is done.
if (AllocType == AllocationType::Hot)
AllocType = AllocationType::NotCold;

ContextIdToAllocationType[++LastContextId] = AllocType;

if (MemProfReportHintedSizes) {
assert(TotalSize);
ContextIdToTotalSize[LastContextId] = TotalSize;
}

// Update alloc type and context ids for this MIB.
AllocNode->AllocTypes |= (uint8_t)AllocType;

Expand Down Expand Up @@ -1060,6 +1085,10 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
assert(ContextIdToAllocationType.count(OldId));
// The new context has the same allocation type as original.
ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
// For now set this to 0 so we don't duplicate sizes. Not clear how to divvy
// up the size. Assume that if we are able to duplicate context ids that we
// will be able to disambiguate all copies.
ContextIdToTotalSize[LastContextId] = 0;
}
return NewContextIds;
}
Expand Down Expand Up @@ -1663,7 +1692,7 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode);
addStackNodesForMIB<MDNode, MDNode::op_iterator>(
AllocNode, StackContext, CallsiteContext,
getMIBAllocType(MIBMD));
getMIBAllocType(MIBMD), getMIBTotalSize(MIBMD));
}
assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
// Memprof and callsite metadata on memory allocations no longer
Expand Down Expand Up @@ -1735,12 +1764,20 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
// stack ids on the allocation call during ModuleSummaryAnalysis.
CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
EmptyContext;
unsigned I = 0;
assert(!MemProfReportHintedSizes ||
AN.TotalSizes.size() == AN.MIBs.size());
// Now add all of the MIBs and their stack nodes.
for (auto &MIB : AN.MIBs) {
CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
StackContext(&MIB);
uint64_t TotalSize = 0;
if (MemProfReportHintedSizes)
TotalSize = AN.TotalSizes[I];
addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
AllocNode, StackContext, EmptyContext, MIB.AllocType);
AllocNode, StackContext, EmptyContext, MIB.AllocType,
TotalSize);
I++;
}
assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
// Initialize version 0 on the summary alloc node to the current alloc
Expand Down Expand Up @@ -2171,17 +2208,6 @@ bool IndexCallsiteContextGraph::calleeMatchesFunc(
return true;
}

static std::string getAllocTypeString(uint8_t AllocTypes) {
if (!AllocTypes)
return "None";
std::string Str;
if (AllocTypes & (uint8_t)AllocationType::NotCold)
Str += "NotCold";
if (AllocTypes & (uint8_t)AllocationType::Cold)
Str += "Cold";
return Str;
}

template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
const {
Expand Down Expand Up @@ -2261,6 +2287,30 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
}
}

template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
raw_ostream &OS) const {
using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
for (const auto Node : nodes<GraphType>(this)) {
if (Node->isRemoved())
continue;
if (!Node->IsAllocation)
continue;
DenseSet<uint32_t> ContextIds = Node->getContextIds();
std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
std::sort(SortedIds.begin(), SortedIds.end());
for (auto Id : SortedIds) {
auto SizeI = ContextIdToTotalSize.find(Id);
assert(SizeI != ContextIdToTotalSize.end());
auto TypeI = ContextIdToAllocationType.find(Id);
assert(TypeI != ContextIdToAllocationType.end());
OS << getAllocTypeString((uint8_t)TypeI->second) << " context " << Id
<< " with total size " << SizeI->second << " is "
<< getAllocTypeString(Node->AllocTypes) << " after cloning\n";
}
}
}

template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const {
using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
Expand Down Expand Up @@ -3797,6 +3847,9 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
if (ExportToDot)
exportToDot("clonefuncassign");

if (MemProfReportHintedSizes)
printTotalSizes(errs());

return Changed;
}

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Bitcode/summary_version.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s

; CHECK: <GLOBALVAL_SUMMARY_BLOCK
; CHECK: <VERSION op0=9/>
; CHECK: <VERSION op0=10/>



Expand Down
Loading
Loading