diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 66bd786c85df5..64da3dfdd97c4 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -242,9 +242,16 @@ class CallsiteContextGraph { // recursion. bool Recursive = false; - // The corresponding allocation or interior call. + // The corresponding allocation or interior call. This is the primary call + // for which we have created this node. CallInfo Call; + // List of other calls that can be treated the same as the primary call + // through cloning. I.e. located in the same function and have the same + // (possibly pruned) stack ids. They will be updated the same way as the + // primary call when assigning to function clones. + std::vector MatchingCalls; + // For alloc nodes this is a unique id assigned when constructed, and for // callsite stack nodes it is the original stack id when the node is // constructed from the memprof MIB metadata on the alloc nodes. Note that @@ -457,6 +464,9 @@ class CallsiteContextGraph { /// iteration. MapVector> FuncToCallsWithMetadata; + /// Records the function each call is located in. + DenseMap CallToFunc; + /// Map from callsite node to the enclosing caller function. std::map NodeToCallingFunc; @@ -474,7 +484,8 @@ class CallsiteContextGraph { /// StackIdToMatchingCalls map. void assignStackNodesPostOrder( ContextNode *Node, DenseSet &Visited, - DenseMap> &StackIdToMatchingCalls); + DenseMap> &StackIdToMatchingCalls, + DenseMap &CallToMatchingCall); /// Duplicates the given set of context ids, updating the provided /// map from each original id with the newly generated context ids, @@ -1230,10 +1241,11 @@ static void checkNode(const ContextNode *Node, template void CallsiteContextGraph:: - assignStackNodesPostOrder(ContextNode *Node, - DenseSet &Visited, - DenseMap> - &StackIdToMatchingCalls) { + assignStackNodesPostOrder( + ContextNode *Node, DenseSet &Visited, + DenseMap> + &StackIdToMatchingCalls, + DenseMap &CallToMatchingCall) { auto Inserted = Visited.insert(Node); if (!Inserted.second) return; @@ -1246,7 +1258,8 @@ void CallsiteContextGraph:: // Skip any that have been removed during the recursion. if (!Edge) continue; - assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls); + assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls, + CallToMatchingCall); } // If this node's stack id is in the map, update the graph to contain new @@ -1289,8 +1302,19 @@ void CallsiteContextGraph:: auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; // Skip any for which we didn't assign any ids, these don't get a node in // the graph. - if (SavedContextIds.empty()) + if (SavedContextIds.empty()) { + // If this call has a matching call (located in the same function and + // having the same stack ids), simply add it to the context node created + // for its matching call earlier. These can be treated the same through + // cloning and get updated at the same time. + if (!CallToMatchingCall.contains(Call)) + continue; + auto MatchingCall = CallToMatchingCall[Call]; + assert(NonAllocationCallToContextNodeMap.contains(MatchingCall)); + NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back( + Call); continue; + } assert(LastId == Ids.back()); @@ -1422,6 +1446,10 @@ void CallsiteContextGraph::updateStackNodes() { // there is more than one call with the same stack ids. Their (possibly newly // duplicated) context ids are saved in the StackIdToMatchingCalls map. DenseMap> OldToNewContextIds; + // Save a map from each call to any that are found to match it. I.e. located + // in the same function and have the same (possibly pruned) stack ids. We use + // this to avoid creating extra graph nodes as they can be treated the same. + DenseMap CallToMatchingCall; for (auto &It : StackIdToMatchingCalls) { auto &Calls = It.getSecond(); // Skip single calls with a single stack id. These don't need a new node. @@ -1460,6 +1488,13 @@ void CallsiteContextGraph::updateStackNodes() { DenseSet LastNodeContextIds = LastNode->getContextIds(); assert(!LastNodeContextIds.empty()); + // Map from function to the first call from the below list (with matching + // stack ids) found in that function. Note that calls from different + // functions can have the same stack ids because this is the list of stack + // ids that had (possibly pruned) nodes after building the graph from the + // allocation MIBs. + DenseMap FuncToCallMap; + for (unsigned I = 0; I < Calls.size(); I++) { auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; assert(SavedContextIds.empty()); @@ -1533,6 +1568,18 @@ void CallsiteContextGraph::updateStackNodes() { continue; } + const FuncTy *CallFunc = CallToFunc[Call]; + + // If the prior call had the same stack ids this map would not be empty. + // Check if we already have a call that "matches" because it is located + // in the same function. + if (FuncToCallMap.contains(CallFunc)) { + // Record the matching call found for this call, and skip it. We + // will subsequently combine it into the same node. + CallToMatchingCall[Call] = FuncToCallMap[CallFunc]; + continue; + } + // Check if the next set of stack ids is the same (since the Calls vector // of tuples is sorted by the stack ids we can just look at the next one). bool DuplicateContextIds = false; @@ -1562,7 +1609,14 @@ void CallsiteContextGraph::updateStackNodes() { set_subtract(LastNodeContextIds, StackSequenceContextIds); if (LastNodeContextIds.empty()) break; - } + // No longer possibly in a sequence of calls with duplicate stack ids, + // clear the map. + FuncToCallMap.clear(); + } else + // Record the call with its function, so we can locate it the next time + // we find a call from this function when processing the calls with the + // same stack ids. + FuncToCallMap[CallFunc] = Call; } } @@ -1579,7 +1633,8 @@ void CallsiteContextGraph::updateStackNodes() { // associated context ids over to the new nodes. DenseSet Visited; for (auto &Entry : AllocationCallToContextNodeMap) - assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls); + assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls, + CallToMatchingCall); if (VerifyCCG) check(); } @@ -1679,6 +1734,7 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( continue; if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) { CallsWithMetadata.push_back(&I); + CallToFunc[&I] = &F; auto *AllocNode = addAllocNode(&I, &F); auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite); assert(CallsiteMD); @@ -1700,8 +1756,10 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( I.setMetadata(LLVMContext::MD_callsite, nullptr); } // For callsite metadata, add to list for this function for later use. - else if (I.getMetadata(LLVMContext::MD_callsite)) + else if (I.getMetadata(LLVMContext::MD_callsite)) { CallsWithMetadata.push_back(&I); + CallToFunc[&I] = &F; + } } } if (!CallsWithMetadata.empty()) @@ -1756,8 +1814,10 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( // correlate properly in applyImport in the backends. if (AN.MIBs.empty()) continue; - CallsWithMetadata.push_back({&AN}); - auto *AllocNode = addAllocNode({&AN}, FS); + IndexCall AllocCall(&AN); + CallsWithMetadata.push_back(AllocCall); + CallToFunc[AllocCall] = FS; + auto *AllocNode = addAllocNode(AllocCall, FS); // Pass an empty CallStack to the CallsiteContext (second) // parameter, since for ThinLTO we already collapsed out the inlined // stack ids on the allocation call during ModuleSummaryAnalysis. @@ -1788,8 +1848,11 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( } // For callsite metadata, add to list for this function for later use. if (!FS->callsites().empty()) - for (auto &SN : FS->mutableCallsites()) - CallsWithMetadata.push_back({&SN}); + for (auto &SN : FS->mutableCallsites()) { + IndexCall StackNodeCall(&SN); + CallsWithMetadata.push_back(StackNodeCall); + CallToFunc[StackNodeCall] = FS; + } if (!CallsWithMetadata.empty()) FuncToCallsWithMetadata[FS] = CallsWithMetadata; @@ -2225,6 +2288,14 @@ void CallsiteContextGraph::ContextNode::print( if (Recursive) OS << " (recursive)"; OS << "\n"; + if (!MatchingCalls.empty()) { + OS << "\tMatchingCalls:\n"; + for (auto &MatchingCall : MatchingCalls) { + OS << "\t"; + MatchingCall.print(OS); + OS << "\n"; + } + } OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n"; OS << "\tContextIds:"; // Make a copy of the computed context ids that we can sort for stability. @@ -2478,6 +2549,7 @@ CallsiteContextGraph::moveEdgeToNewCalleeClone( std::make_unique(Node->IsAllocation, Node->Call)); ContextNode *Clone = NodeOwner.back().get(); Node->addClone(Clone); + Clone->MatchingCalls = Node->MatchingCalls; assert(NodeToCallingFunc.count(Node)); NodeToCallingFunc[Clone] = NodeToCallingFunc[Node]; moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true, @@ -3021,6 +3093,14 @@ bool CallsiteContextGraph::assignFunctions() { if (CallMap.count(Call)) CallClone = CallMap[Call]; CallsiteClone->setCall(CallClone); + // Need to do the same for all matching calls. + for (auto &MatchingCall : Node->MatchingCalls) { + CallInfo CallClone(MatchingCall); + if (CallMap.count(MatchingCall)) + CallClone = CallMap[MatchingCall]; + // Updates the call in the list. + MatchingCall = CallClone; + } }; // Keep track of the clones of callsite Node that need to be assigned to @@ -3187,6 +3267,16 @@ bool CallsiteContextGraph::assignFunctions() { CallInfo NewCall(CallMap[OrigCall]); assert(NewCall); NewClone->setCall(NewCall); + // Need to do the same for all matching calls. + for (auto &MatchingCall : NewClone->MatchingCalls) { + CallInfo OrigMatchingCall(MatchingCall); + OrigMatchingCall.setCloneNo(0); + assert(CallMap.count(OrigMatchingCall)); + CallInfo NewCall(CallMap[OrigMatchingCall]); + assert(NewCall); + // Updates the call in the list. + MatchingCall = NewCall; + } } } // Fall through to handling below to perform the recording of the @@ -3373,6 +3463,7 @@ bool CallsiteContextGraph::assignFunctions() { if (Node->IsAllocation) { updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes)); + assert(Node->MatchingCalls.empty()); return; } @@ -3381,6 +3472,9 @@ bool CallsiteContextGraph::assignFunctions() { auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node]; updateCall(Node->Call, CalleeFunc); + // Update all the matching calls as well. + for (auto &Call : Node->MatchingCalls) + updateCall(Call, CalleeFunc); }; // Performs DFS traversal starting from allocation nodes to update calls to