Skip to content

Revert "[MemProf] Reduce cloning overhead by sharing nodes when possible" #102932

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 12, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 15 additions & 109 deletions llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,16 +242,9 @@ class CallsiteContextGraph {
// recursion.
bool Recursive = false;

// The corresponding allocation or interior call. This is the primary call
// for which we have created this node.
// The corresponding allocation or interior call.
CallInfo Call;

// List of other calls that can be treated the same as the primary call
// through cloning. I.e. located in the same function and have the same
// (possibly pruned) stack ids. They will be updated the same way as the
// primary call when assigning to function clones.
std::vector<CallInfo> MatchingCalls;

// For alloc nodes this is a unique id assigned when constructed, and for
// callsite stack nodes it is the original stack id when the node is
// constructed from the memprof MIB metadata on the alloc nodes. Note that
Expand Down Expand Up @@ -464,9 +457,6 @@ class CallsiteContextGraph {
/// iteration.
MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata;

/// Records the function each call is located in.
DenseMap<CallInfo, const FuncTy *> CallToFunc;

/// Map from callsite node to the enclosing caller function.
std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;

Expand All @@ -484,8 +474,7 @@ class CallsiteContextGraph {
/// StackIdToMatchingCalls map.
void assignStackNodesPostOrder(
ContextNode *Node, DenseSet<const ContextNode *> &Visited,
DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls,
DenseMap<CallInfo, CallInfo> &CallToMatchingCall);
DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls);

/// Duplicates the given set of context ids, updating the provided
/// map from each original id with the newly generated context ids,
Expand Down Expand Up @@ -1241,11 +1230,10 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,

template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
assignStackNodesPostOrder(
ContextNode *Node, DenseSet<const ContextNode *> &Visited,
DenseMap<uint64_t, std::vector<CallContextInfo>>
&StackIdToMatchingCalls,
DenseMap<CallInfo, CallInfo> &CallToMatchingCall) {
assignStackNodesPostOrder(ContextNode *Node,
DenseSet<const ContextNode *> &Visited,
DenseMap<uint64_t, std::vector<CallContextInfo>>
&StackIdToMatchingCalls) {
auto Inserted = Visited.insert(Node);
if (!Inserted.second)
return;
Expand All @@ -1258,8 +1246,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
// Skip any that have been removed during the recursion.
if (!Edge)
continue;
assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls,
CallToMatchingCall);
assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls);
}

// If this node's stack id is in the map, update the graph to contain new
Expand Down Expand Up @@ -1302,19 +1289,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
// Skip any for which we didn't assign any ids, these don't get a node in
// the graph.
if (SavedContextIds.empty()) {
// If this call has a matching call (located in the same function and
// having the same stack ids), simply add it to the context node created
// for its matching call earlier. These can be treated the same through
// cloning and get updated at the same time.
if (!CallToMatchingCall.contains(Call))
continue;
auto MatchingCall = CallToMatchingCall[Call];
assert(NonAllocationCallToContextNodeMap.contains(MatchingCall));
NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
Call);
if (SavedContextIds.empty())
continue;
}

assert(LastId == Ids.back());

Expand Down Expand Up @@ -1446,10 +1422,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
// there is more than one call with the same stack ids. Their (possibly newly
// duplicated) context ids are saved in the StackIdToMatchingCalls map.
DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
// Save a map from each call to any that are found to match it. I.e. located
// in the same function and have the same (possibly pruned) stack ids. We use
// this to avoid creating extra graph nodes as they can be treated the same.
DenseMap<CallInfo, CallInfo> CallToMatchingCall;
for (auto &It : StackIdToMatchingCalls) {
auto &Calls = It.getSecond();
// Skip single calls with a single stack id. These don't need a new node.
Expand Down Expand Up @@ -1488,13 +1460,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
assert(!LastNodeContextIds.empty());

// Map from function to the first call from the below list (with matching
// stack ids) found in that function. Note that calls from different
// functions can have the same stack ids because this is the list of stack
// ids that had (possibly pruned) nodes after building the graph from the
// allocation MIBs.
DenseMap<const FuncTy *, CallInfo> FuncToCallMap;

for (unsigned I = 0; I < Calls.size(); I++) {
auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
assert(SavedContextIds.empty());
Expand Down Expand Up @@ -1568,18 +1533,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
continue;
}

const FuncTy *CallFunc = CallToFunc[Call];

// If the prior call had the same stack ids this map would not be empty.
// Check if we already have a call that "matches" because it is located
// in the same function.
if (FuncToCallMap.contains(CallFunc)) {
// Record the matching call found for this call, and skip it. We
// will subsequently combine it into the same node.
CallToMatchingCall[Call] = FuncToCallMap[CallFunc];
continue;
}

// Check if the next set of stack ids is the same (since the Calls vector
// of tuples is sorted by the stack ids we can just look at the next one).
bool DuplicateContextIds = false;
Expand Down Expand Up @@ -1609,14 +1562,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
set_subtract(LastNodeContextIds, StackSequenceContextIds);
if (LastNodeContextIds.empty())
break;
// No longer possibly in a sequence of calls with duplicate stack ids,
// clear the map.
FuncToCallMap.clear();
} else
// Record the call with its function, so we can locate it the next time
// we find a call from this function when processing the calls with the
// same stack ids.
FuncToCallMap[CallFunc] = Call;
}
}
}

Expand All @@ -1633,8 +1579,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
// associated context ids over to the new nodes.
DenseSet<const ContextNode *> Visited;
for (auto &Entry : AllocationCallToContextNodeMap)
assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls,
CallToMatchingCall);
assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls);
if (VerifyCCG)
check();
}
Expand Down Expand Up @@ -1734,7 +1679,6 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
continue;
if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) {
CallsWithMetadata.push_back(&I);
CallToFunc[&I] = &F;
auto *AllocNode = addAllocNode(&I, &F);
auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite);
assert(CallsiteMD);
Expand All @@ -1756,10 +1700,8 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
I.setMetadata(LLVMContext::MD_callsite, nullptr);
}
// For callsite metadata, add to list for this function for later use.
else if (I.getMetadata(LLVMContext::MD_callsite)) {
else if (I.getMetadata(LLVMContext::MD_callsite))
CallsWithMetadata.push_back(&I);
CallToFunc[&I] = &F;
}
}
}
if (!CallsWithMetadata.empty())
Expand Down Expand Up @@ -1814,10 +1756,8 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
// correlate properly in applyImport in the backends.
if (AN.MIBs.empty())
continue;
IndexCall AllocCall(&AN);
CallsWithMetadata.push_back(AllocCall);
CallToFunc[AllocCall] = FS;
auto *AllocNode = addAllocNode(AllocCall, FS);
CallsWithMetadata.push_back({&AN});
auto *AllocNode = addAllocNode({&AN}, FS);
// Pass an empty CallStack to the CallsiteContext (second)
// parameter, since for ThinLTO we already collapsed out the inlined
// stack ids on the allocation call during ModuleSummaryAnalysis.
Expand Down Expand Up @@ -1848,11 +1788,8 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
}
// For callsite metadata, add to list for this function for later use.
if (!FS->callsites().empty())
for (auto &SN : FS->mutableCallsites()) {
IndexCall StackNodeCall(&SN);
CallsWithMetadata.push_back(StackNodeCall);
CallToFunc[StackNodeCall] = FS;
}
for (auto &SN : FS->mutableCallsites())
CallsWithMetadata.push_back({&SN});

if (!CallsWithMetadata.empty())
FuncToCallsWithMetadata[FS] = CallsWithMetadata;
Expand Down Expand Up @@ -2288,14 +2225,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
if (Recursive)
OS << " (recursive)";
OS << "\n";
if (!MatchingCalls.empty()) {
OS << "\tMatchingCalls:\n";
for (auto &MatchingCall : MatchingCalls) {
OS << "\t";
MatchingCall.print(OS);
OS << "\n";
}
}
OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n";
OS << "\tContextIds:";
// Make a copy of the computed context ids that we can sort for stability.
Expand Down Expand Up @@ -2549,7 +2478,6 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
std::make_unique<ContextNode>(Node->IsAllocation, Node->Call));
ContextNode *Clone = NodeOwner.back().get();
Node->addClone(Clone);
Clone->MatchingCalls = Node->MatchingCalls;
assert(NodeToCallingFunc.count(Node));
NodeToCallingFunc[Clone] = NodeToCallingFunc[Node];
moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true,
Expand Down Expand Up @@ -3093,14 +3021,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
if (CallMap.count(Call))
CallClone = CallMap[Call];
CallsiteClone->setCall(CallClone);
// Need to do the same for all matching calls.
for (auto &MatchingCall : Node->MatchingCalls) {
CallInfo CallClone(MatchingCall);
if (CallMap.count(MatchingCall))
CallClone = CallMap[MatchingCall];
// Updates the call in the list.
MatchingCall = CallClone;
}
};

// Keep track of the clones of callsite Node that need to be assigned to
Expand Down Expand Up @@ -3267,16 +3187,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
CallInfo NewCall(CallMap[OrigCall]);
assert(NewCall);
NewClone->setCall(NewCall);
// Need to do the same for all matching calls.
for (auto &MatchingCall : NewClone->MatchingCalls) {
CallInfo OrigMatchingCall(MatchingCall);
OrigMatchingCall.setCloneNo(0);
assert(CallMap.count(OrigMatchingCall));
CallInfo NewCall(CallMap[OrigMatchingCall]);
assert(NewCall);
// Updates the call in the list.
MatchingCall = NewCall;
}
}
}
// Fall through to handling below to perform the recording of the
Expand Down Expand Up @@ -3463,7 +3373,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {

if (Node->IsAllocation) {
updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes));
assert(Node->MatchingCalls.empty());
return;
}

Expand All @@ -3472,9 +3381,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {

auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node];
updateCall(Node->Call, CalleeFunc);
// Update all the matching calls as well.
for (auto &Call : Node->MatchingCalls)
updateCall(Call, CalleeFunc);
};

// Performs DFS traversal starting from allocation nodes to update calls to
Expand Down
Loading