diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 66b68d5cd457f..52def8f21312d 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -242,9 +242,16 @@ class CallsiteContextGraph { // recursion. bool Recursive = false; - // The corresponding allocation or interior call. + // The corresponding allocation or interior call. This is the primary call + // for which we have created this node. CallInfo Call; + // List of other calls that can be treated the same as the primary call + // through cloning. I.e. located in the same function and have the same + // (possibly pruned) stack ids. They will be updated the same way as the + // primary call when assigning to function clones. + std::vector MatchingCalls; + // For alloc nodes this is a unique id assigned when constructed, and for // callsite stack nodes it is the original stack id when the node is // constructed from the memprof MIB metadata on the alloc nodes. Note that @@ -457,6 +464,9 @@ class CallsiteContextGraph { /// iteration. MapVector> FuncToCallsWithMetadata; + /// Records the function each call is located in. + DenseMap CallToFunc; + /// Map from callsite node to the enclosing caller function. std::map NodeToCallingFunc; @@ -474,7 +484,8 @@ class CallsiteContextGraph { /// StackIdToMatchingCalls map. void assignStackNodesPostOrder( ContextNode *Node, DenseSet &Visited, - DenseMap> &StackIdToMatchingCalls); + DenseMap> &StackIdToMatchingCalls, + DenseMap &CallToMatchingCall); /// Duplicates the given set of context ids, updating the provided /// map from each original id with the newly generated context ids, @@ -521,6 +532,11 @@ class CallsiteContextGraph { Call, Func, CallerFunc, FoundCalleeChain); } + /// Returns true if both call instructions have the same callee. + bool sameCallee(CallTy Call1, CallTy Call2) { + return static_cast(this)->sameCallee(Call1, Call2); + } + /// Get a list of nodes corresponding to the stack ids in the given /// callsite's context. std::vector getStackIdsWithContextNodesForCall(CallTy Call) { @@ -667,6 +683,7 @@ class ModuleCallsiteContextGraph bool calleeMatchesFunc( Instruction *Call, const Function *Func, const Function *CallerFunc, std::vector> &FoundCalleeChain); + bool sameCallee(Instruction *Call1, Instruction *Call2); bool findProfiledCalleeThroughTailCalls( const Function *ProfiledCallee, Value *CurCallee, unsigned Depth, std::vector> &FoundCalleeChain, @@ -744,6 +761,7 @@ class IndexCallsiteContextGraph IndexCall &Call, const FunctionSummary *Func, const FunctionSummary *CallerFunc, std::vector> &FoundCalleeChain); + bool sameCallee(IndexCall &Call1, IndexCall &Call2); bool findProfiledCalleeThroughTailCalls( ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth, std::vector> &FoundCalleeChain, @@ -1230,10 +1248,11 @@ static void checkNode(const ContextNode *Node, template void CallsiteContextGraph:: - assignStackNodesPostOrder(ContextNode *Node, - DenseSet &Visited, - DenseMap> - &StackIdToMatchingCalls) { + assignStackNodesPostOrder( + ContextNode *Node, DenseSet &Visited, + DenseMap> + &StackIdToMatchingCalls, + DenseMap &CallToMatchingCall) { auto Inserted = Visited.insert(Node); if (!Inserted.second) return; @@ -1246,7 +1265,8 @@ void CallsiteContextGraph:: // Skip any that have been removed during the recursion. if (!Edge) continue; - assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls); + assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls, + CallToMatchingCall); } // If this node's stack id is in the map, update the graph to contain new @@ -1289,8 +1309,19 @@ void CallsiteContextGraph:: auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; // Skip any for which we didn't assign any ids, these don't get a node in // the graph. - if (SavedContextIds.empty()) + if (SavedContextIds.empty()) { + // If this call has a matching call (located in the same function and + // having the same stack ids), simply add it to the context node created + // for its matching call earlier. These can be treated the same through + // cloning and get updated at the same time. + if (!CallToMatchingCall.contains(Call)) + continue; + auto MatchingCall = CallToMatchingCall[Call]; + assert(NonAllocationCallToContextNodeMap.contains(MatchingCall)); + NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back( + Call); continue; + } assert(LastId == Ids.back()); @@ -1422,6 +1453,10 @@ void CallsiteContextGraph::updateStackNodes() { // there is more than one call with the same stack ids. Their (possibly newly // duplicated) context ids are saved in the StackIdToMatchingCalls map. DenseMap> OldToNewContextIds; + // Save a map from each call to any that are found to match it. I.e. located + // in the same function and have the same (possibly pruned) stack ids. We use + // this to avoid creating extra graph nodes as they can be treated the same. + DenseMap CallToMatchingCall; for (auto &It : StackIdToMatchingCalls) { auto &Calls = It.getSecond(); // Skip single calls with a single stack id. These don't need a new node. @@ -1460,6 +1495,13 @@ void CallsiteContextGraph::updateStackNodes() { DenseSet LastNodeContextIds = LastNode->getContextIds(); assert(!LastNodeContextIds.empty()); + // Map from function to the first call from the below list (with matching + // stack ids) found in that function. Note that calls from different + // functions can have the same stack ids because this is the list of stack + // ids that had (possibly pruned) nodes after building the graph from the + // allocation MIBs. + DenseMap FuncToCallMap; + for (unsigned I = 0; I < Calls.size(); I++) { auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; assert(SavedContextIds.empty()); @@ -1533,6 +1575,18 @@ void CallsiteContextGraph::updateStackNodes() { continue; } + const FuncTy *CallFunc = CallToFunc[Call]; + + // If the prior call had the same stack ids this map would not be empty. + // Check if we already have a call that "matches" because it is located + // in the same function. + if (FuncToCallMap.contains(CallFunc)) { + // Record the matching call found for this call, and skip it. We + // will subsequently combine it into the same node. + CallToMatchingCall[Call] = FuncToCallMap[CallFunc]; + continue; + } + // Check if the next set of stack ids is the same (since the Calls vector // of tuples is sorted by the stack ids we can just look at the next one). bool DuplicateContextIds = false; @@ -1562,7 +1616,14 @@ void CallsiteContextGraph::updateStackNodes() { set_subtract(LastNodeContextIds, StackSequenceContextIds); if (LastNodeContextIds.empty()) break; - } + // No longer possibly in a sequence of calls with duplicate stack ids, + // clear the map. + FuncToCallMap.clear(); + } else + // Record the call with its function, so we can locate it the next time + // we find a call from this function when processing the calls with the + // same stack ids. + FuncToCallMap[CallFunc] = Call; } } @@ -1579,7 +1640,8 @@ void CallsiteContextGraph::updateStackNodes() { // associated context ids over to the new nodes. DenseSet Visited; for (auto &Entry : AllocationCallToContextNodeMap) - assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls); + assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls, + CallToMatchingCall); if (VerifyCCG) check(); } @@ -1679,6 +1741,7 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( continue; if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) { CallsWithMetadata.push_back(&I); + CallToFunc[&I] = &F; auto *AllocNode = addAllocNode(&I, &F); auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite); assert(CallsiteMD); @@ -1700,8 +1763,10 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( I.setMetadata(LLVMContext::MD_callsite, nullptr); } // For callsite metadata, add to list for this function for later use. - else if (I.getMetadata(LLVMContext::MD_callsite)) + else if (I.getMetadata(LLVMContext::MD_callsite)) { CallsWithMetadata.push_back(&I); + CallToFunc[&I] = &F; + } } } if (!CallsWithMetadata.empty()) @@ -1756,8 +1821,10 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( // correlate properly in applyImport in the backends. if (AN.MIBs.empty()) continue; - CallsWithMetadata.push_back({&AN}); - auto *AllocNode = addAllocNode({&AN}, FS); + IndexCall AllocCall(&AN); + CallsWithMetadata.push_back(AllocCall); + CallToFunc[AllocCall] = FS; + auto *AllocNode = addAllocNode(AllocCall, FS); // Pass an empty CallStack to the CallsiteContext (second) // parameter, since for ThinLTO we already collapsed out the inlined // stack ids on the allocation call during ModuleSummaryAnalysis. @@ -1788,8 +1855,11 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( } // For callsite metadata, add to list for this function for later use. if (!FS->callsites().empty()) - for (auto &SN : FS->mutableCallsites()) - CallsWithMetadata.push_back({&SN}); + for (auto &SN : FS->mutableCallsites()) { + IndexCall StackNodeCall(&SN); + CallsWithMetadata.push_back(StackNodeCall); + CallToFunc[StackNodeCall] = FS; + } if (!CallsWithMetadata.empty()) FuncToCallsWithMetadata[FS] = CallsWithMetadata; @@ -1829,26 +1899,76 @@ void CallsiteContextGraph TailCallToContextNodeMap; + std::vector> NewCallToNode; for (auto &Entry : NonAllocationCallToContextNodeMap) { auto *Node = Entry.second; assert(Node->Clones.empty()); // Check all node callees and see if in the same function. - auto Call = Node->Call.call(); - for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end(); - ++EI) { - auto Edge = *EI; - if (!Edge->Callee->hasCall()) - continue; - assert(NodeToCallingFunc.count(Edge->Callee)); - // Check if the called function matches that of the callee node. - if (calleesMatch(Call, EI, TailCallToContextNodeMap)) - continue; + // We need to check all of the calls recorded in this Node, because in some + // cases we may have had multiple calls with the same debug info calling + // different callees. This can happen, for example, when an object is + // constructed in the paramter list - the destructor call of the object has + // the same debug info (line/col) as the call the object was passed to. + // Here we will prune any that don't match all callee nodes. + std::vector AllCalls; + AllCalls.reserve(Node->MatchingCalls.size() + 1); + AllCalls.push_back(Node->Call); + AllCalls.insert(AllCalls.end(), Node->MatchingCalls.begin(), + Node->MatchingCalls.end()); + auto It = AllCalls.begin(); + // Iterate through the calls until we find the first that matches. + for (; It != AllCalls.end(); ++It) { + auto ThisCall = *It; + bool Match = true; + for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end(); + ++EI) { + auto Edge = *EI; + if (!Edge->Callee->hasCall()) + continue; + assert(NodeToCallingFunc.count(Edge->Callee)); + // Check if the called function matches that of the callee node. + if (!calleesMatch(ThisCall.call(), EI, TailCallToContextNodeMap)) { + Match = false; + break; + } + } + // Found a call that matches the callee nodes, we can quit now. + if (Match) { + // If the first match is not the primary call on the Node, update it + // now. We will update the list of matching calls further below. + if (Node->Call != ThisCall) { + Node->setCall(ThisCall); + // We need to update the NonAllocationCallToContextNodeMap, but don't + // want to do this during iteration over that map, so save the calls + // that need updated entries. + NewCallToNode.push_back({ThisCall, Node}); + // We should only have shared this node between calls from the same + // function. + assert(NodeToCallingFunc[Node] == CallToFunc[Node->Call]); + } + break; + } + } + // We will update this list below (or leave it cleared if there was no + // match found above). + Node->MatchingCalls.clear(); + // If we hit the end of the AllCalls vector, no call matching the callee + // nodes was found, clear the call information in the node. + if (It == AllCalls.end()) { RemovedEdgesWithMismatchedCallees++; // Work around by setting Node to have a null call, so it gets // skipped during cloning. Otherwise assignFunctions will assert // because its data structures are not designed to handle this case. Node->setCall(CallInfo()); - break; + continue; + } + // Now add back any matching calls that call the same function as the + // matching primary call on Node. + for (++It; It != AllCalls.end(); ++It) { + auto ThisCall = *It; + if (!sameCallee(Node->Call.call(), ThisCall.call())) + continue; + Node->MatchingCalls.push_back(ThisCall); } } @@ -1856,8 +1976,14 @@ void CallsiteContextGraphhasCall(); }); + // Also remove any entries if we updated the node's primary call above. + NonAllocationCallToContextNodeMap.remove_if([](const auto &it) { + return !it.second->hasCall() || it.second->Call != it.first; + }); + + // Add entries for any new primary calls recorded above. + for (auto &[Call, Node] : NewCallToNode) + NonAllocationCallToContextNodeMap[Call] = Node; // Add the new nodes after the above loop so that the iteration is not // invalidated. @@ -2083,6 +2209,21 @@ bool ModuleCallsiteContextGraph::calleeMatchesFunc( return true; } +bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1, + Instruction *Call2) { + auto *CB1 = cast(Call1); + if (!CB1->getCalledOperand() || CB1->isIndirectCall()) + return false; + auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts(); + auto *CalleeFunc1 = dyn_cast(CalleeVal1); + auto *CB2 = cast(Call2); + if (!CB2->getCalledOperand() || CB2->isIndirectCall()) + return false; + auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts(); + auto *CalleeFunc2 = dyn_cast(CalleeVal2); + return CalleeFunc1 == CalleeFunc2; +} + bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls( ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth, std::vector> &FoundCalleeChain, @@ -2209,6 +2350,14 @@ bool IndexCallsiteContextGraph::calleeMatchesFunc( return true; } +bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) { + ValueInfo Callee1 = + dyn_cast_if_present(Call1.getBase())->Callee; + ValueInfo Callee2 = + dyn_cast_if_present(Call2.getBase())->Callee; + return Callee1 == Callee2; +} + template void CallsiteContextGraph::ContextNode::dump() const { @@ -2225,6 +2374,14 @@ void CallsiteContextGraph::ContextNode::print( if (Recursive) OS << " (recursive)"; OS << "\n"; + if (!MatchingCalls.empty()) { + OS << "\tMatchingCalls:\n"; + for (auto &MatchingCall : MatchingCalls) { + OS << "\t"; + MatchingCall.print(OS); + OS << "\n"; + } + } OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n"; OS << "\tContextIds:"; // Make a copy of the computed context ids that we can sort for stability. @@ -2478,6 +2635,7 @@ CallsiteContextGraph::moveEdgeToNewCalleeClone( std::make_unique(Node->IsAllocation, Node->Call)); ContextNode *Clone = NodeOwner.back().get(); Node->addClone(Clone); + Clone->MatchingCalls = Node->MatchingCalls; assert(NodeToCallingFunc.count(Node)); NodeToCallingFunc[Clone] = NodeToCallingFunc[Node]; moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true, @@ -3021,6 +3179,14 @@ bool CallsiteContextGraph::assignFunctions() { if (CallMap.count(Call)) CallClone = CallMap[Call]; CallsiteClone->setCall(CallClone); + // Need to do the same for all matching calls. + for (auto &MatchingCall : Node->MatchingCalls) { + CallInfo CallClone(MatchingCall); + if (CallMap.count(MatchingCall)) + CallClone = CallMap[MatchingCall]; + // Updates the call in the list. + MatchingCall = CallClone; + } }; // Keep track of the clones of callsite Node that need to be assigned to @@ -3187,6 +3353,16 @@ bool CallsiteContextGraph::assignFunctions() { CallInfo NewCall(CallMap[OrigCall]); assert(NewCall); NewClone->setCall(NewCall); + // Need to do the same for all matching calls. + for (auto &MatchingCall : NewClone->MatchingCalls) { + CallInfo OrigMatchingCall(MatchingCall); + OrigMatchingCall.setCloneNo(0); + assert(CallMap.count(OrigMatchingCall)); + CallInfo NewCall(CallMap[OrigMatchingCall]); + assert(NewCall); + // Updates the call in the list. + MatchingCall = NewCall; + } } } // Fall through to handling below to perform the recording of the @@ -3373,6 +3549,7 @@ bool CallsiteContextGraph::assignFunctions() { if (Node->IsAllocation) { updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes)); + assert(Node->MatchingCalls.empty()); return; } @@ -3381,6 +3558,9 @@ bool CallsiteContextGraph::assignFunctions() { auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node]; updateCall(Node->Call, CalleeFunc); + // Update all the matching calls as well. + for (auto &Call : Node->MatchingCalls) + updateCall(Call, CalleeFunc); }; // Performs DFS traversal starting from allocation nodes to update calls to diff --git a/llvm/test/ThinLTO/X86/memprof-aliased-location1.ll b/llvm/test/ThinLTO/X86/memprof-aliased-location1.ll new file mode 100644 index 0000000000000..42819d5421ca0 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-aliased-location1.ll @@ -0,0 +1,116 @@ +;; Test to ensure a call to a different callee but with the same debug info +;; (and therefore callsite metadata) as a preceding call in the alloc context +;; does not cause missing or incorrect cloning. This test is otherwise the same +;; as memprof-basic.ll. + +;; -stats requires asserts +; REQUIRES: asserts + +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,blah, \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \ +; RUN: -o %t.out 2>&1 | FileCheck %s \ +; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS + +; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR + +source_filename = "memprof-aliased-location1.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() #0 { +entry: + %call = call ptr @_Z3foov(), !callsite !0 + %call1 = call ptr @_Z3foov(), !callsite !1 + ret i32 0 +} + +declare void @blah() + +define internal ptr @_Z3barv() #0 { +entry: + %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7 + ret ptr null +} + +declare ptr @_Znam(i64) + +define internal ptr @_Z3bazv() #0 { +entry: + ;; Preceding call to another callee but with the same debug location / callsite id + call void @blah(), !callsite !8 + %call = call ptr @_Z3barv(), !callsite !8 + ret ptr null +} + +define internal ptr @_Z3foov() #0 { +entry: + %call = call ptr @_Z3bazv(), !callsite !9 + ret ptr null +} + +; uselistorder directives +uselistorder ptr @_Z3foov, { 1, 0 } + +attributes #0 = { noinline optnone } + +!0 = !{i64 8632435727821051414} +!1 = !{i64 -3421689549917153178} +!2 = !{!3, !5} +!3 = !{!4, !"notcold", i64 100} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!5 = !{!6, !"cold", i64 400} +!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!7 = !{i64 9086428284934609951} +!8 = !{i64 -5964873800580613432} +!9 = !{i64 2732490490862098848} + +; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1 +; REMARKS: created clone _Z3barv.memprof.1 +; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold +; REMARKS: call in clone _Z3barv.memprof.1 marked with memprof allocation attribute cold +; REMARKS: created clone _Z3bazv.memprof.1 +; REMARKS: call in clone _Z3bazv.memprof.1 assigned to call function clone _Z3barv.memprof.1 +; REMARKS: created clone _Z3foov.memprof.1 +; REMARKS: call in clone _Z3foov.memprof.1 assigned to call function clone _Z3bazv.memprof.1 + + +; IR: define {{.*}} @main +;; The first call to foo does not allocate cold memory. It should call the +;; original functions, which ultimately call the original allocation decorated +;; with a "notcold" attribute. +; IR: call {{.*}} @_Z3foov() +;; The second call to foo allocates cold memory. It should call cloned functions +;; which ultimately call a cloned allocation decorated with a "cold" attribute. +; IR: call {{.*}} @_Z3foov.memprof.1() +; IR: define internal {{.*}} @_Z3barv() +; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]] +; IR: define internal {{.*}} @_Z3bazv() +; IR: call {{.*}} @_Z3barv() +; IR: define internal {{.*}} @_Z3foov() +; IR: call {{.*}} @_Z3bazv() +; IR: define internal {{.*}} @_Z3barv.memprof.1() +; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]] +; IR: define internal {{.*}} @_Z3bazv.memprof.1() +; IR: call {{.*}} @_Z3barv.memprof.1() +; IR: define internal {{.*}} @_Z3foov.memprof.1() +; IR: call {{.*}} @_Z3bazv.memprof.1() +; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" } +; IR: attributes #[[COLD]] = { "memprof"="cold" } + + +; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) +; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend +; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) +; STATS-BE: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend +; STATS-BE: 2 memprof-context-disambiguation - Number of allocation versions (including clones) during ThinLTO backend +; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis +; STATS-BE: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend +; STATS-BE: 3 memprof-context-disambiguation - Number of functions that had clones created during ThinLTO backend +; STATS-BE: 2 memprof-context-disambiguation - Maximum number of allocation versions created for an original allocation during ThinLTO backend +; STATS-BE: 1 memprof-context-disambiguation - Number of original (not cloned) allocations with memprof profiles during ThinLTO backend diff --git a/llvm/test/ThinLTO/X86/memprof-aliased-location2.ll b/llvm/test/ThinLTO/X86/memprof-aliased-location2.ll new file mode 100644 index 0000000000000..663f8525043c2 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-aliased-location2.ll @@ -0,0 +1,116 @@ +;; Test to ensure a call to a different callee but with the same debug info +;; (and therefore callsite metadata) as a subsequent call in the alloc context +;; does not cause missing or incorrect cloning. This test is otherwise the same +;; as memprof-basic.ll. + +;; -stats requires asserts +; REQUIRES: asserts + +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,blah, \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \ +; RUN: -o %t.out 2>&1 | FileCheck %s \ +; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS + +; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR + +source_filename = "memprof-aliased-location2.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() #0 { +entry: + %call = call ptr @_Z3foov(), !callsite !0 + %call1 = call ptr @_Z3foov(), !callsite !1 + ret i32 0 +} + +declare void @blah() + +define internal ptr @_Z3barv() #0 { +entry: + %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7 + ret ptr null +} + +declare ptr @_Znam(i64) + +define internal ptr @_Z3bazv() #0 { +entry: + %call = call ptr @_Z3barv(), !callsite !8 + ;; Subsequent call to another callee but with the same debug location / callsite id + call void @blah(), !callsite !8 + ret ptr null +} + +define internal ptr @_Z3foov() #0 { +entry: + %call = call ptr @_Z3bazv(), !callsite !9 + ret ptr null +} + +; uselistorder directives +uselistorder ptr @_Z3foov, { 1, 0 } + +attributes #0 = { noinline optnone } + +!0 = !{i64 8632435727821051414} +!1 = !{i64 -3421689549917153178} +!2 = !{!3, !5} +!3 = !{!4, !"notcold", i64 100} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!5 = !{!6, !"cold", i64 400} +!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!7 = !{i64 9086428284934609951} +!8 = !{i64 -5964873800580613432} +!9 = !{i64 2732490490862098848} + +; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1 +; REMARKS: created clone _Z3barv.memprof.1 +; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold +; REMARKS: call in clone _Z3barv.memprof.1 marked with memprof allocation attribute cold +; REMARKS: created clone _Z3bazv.memprof.1 +; REMARKS: call in clone _Z3bazv.memprof.1 assigned to call function clone _Z3barv.memprof.1 +; REMARKS: created clone _Z3foov.memprof.1 +; REMARKS: call in clone _Z3foov.memprof.1 assigned to call function clone _Z3bazv.memprof.1 + + +; IR: define {{.*}} @main +;; The first call to foo does not allocate cold memory. It should call the +;; original functions, which ultimately call the original allocation decorated +;; with a "notcold" attribute. +; IR: call {{.*}} @_Z3foov() +;; The second call to foo allocates cold memory. It should call cloned functions +;; which ultimately call a cloned allocation decorated with a "cold" attribute. +; IR: call {{.*}} @_Z3foov.memprof.1() +; IR: define internal {{.*}} @_Z3barv() +; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]] +; IR: define internal {{.*}} @_Z3bazv() +; IR: call {{.*}} @_Z3barv() +; IR: define internal {{.*}} @_Z3foov() +; IR: call {{.*}} @_Z3bazv() +; IR: define internal {{.*}} @_Z3barv.memprof.1() +; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]] +; IR: define internal {{.*}} @_Z3bazv.memprof.1() +; IR: call {{.*}} @_Z3barv.memprof.1() +; IR: define internal {{.*}} @_Z3foov.memprof.1() +; IR: call {{.*}} @_Z3bazv.memprof.1() +; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" } +; IR: attributes #[[COLD]] = { "memprof"="cold" } + + +; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) +; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend +; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) +; STATS-BE: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend +; STATS-BE: 2 memprof-context-disambiguation - Number of allocation versions (including clones) during ThinLTO backend +; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis +; STATS-BE: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend +; STATS-BE: 3 memprof-context-disambiguation - Number of functions that had clones created during ThinLTO backend +; STATS-BE: 2 memprof-context-disambiguation - Maximum number of allocation versions created for an original allocation during ThinLTO backend +; STATS-BE: 1 memprof-context-disambiguation - Number of original (not cloned) allocations with memprof profiles during ThinLTO backend diff --git a/llvm/test/ThinLTO/X86/memprof-tailcall-aliased-location1.ll b/llvm/test/ThinLTO/X86/memprof-tailcall-aliased-location1.ll new file mode 100644 index 0000000000000..3f5dc7732dc5c --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-tailcall-aliased-location1.ll @@ -0,0 +1,99 @@ +;; Test to ensure a call to a different callee but with the same debug info +;; (and therefore callsite metadata) as a preceding tail call in the alloc +;; context does not cause missing or incorrect cloning. This test is otherwise +;; the same as memprof-tailcall.ll. + +;; -stats requires asserts +; REQUIRES: asserts + +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t.o,_Z3barv,plx \ +; RUN: -r=%t.o,_Z3bazv,plx \ +; RUN: -r=%t.o,_Z3foov,plx \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -r=%t.o,blah, \ +; RUN: -stats -save-temps \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS + +; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR + +; STATS: 2 memprof-context-disambiguation - Number of profiled callees found via tail calls +; STATS: 4 memprof-context-disambiguation - Aggregate depth of profiled callees found via tail calls +; STATS: 2 memprof-context-disambiguation - Maximum depth of profiled callees found via tail calls + +source_filename = "memprof-tailcall-aliased-location1.cc" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline +; IR-LABEL: @_Z3barv() +define ptr @_Z3barv() local_unnamed_addr #0 { +entry: + ; IR: call {{.*}} @_Znam(i64 10) #[[NOTCOLD:[0-9]+]] + %call = tail call ptr @_Znam(i64 10) #2, !memprof !0, !callsite !5 + ret ptr %call +} + +; Function Attrs: nobuiltin allocsize(0) +declare ptr @_Znam(i64) #1 +declare void @blah() + +; Function Attrs: noinline +; IR-LABEL: @_Z3bazv() +define ptr @_Z3bazv() #0 { +entry: + ; IR: call ptr @_Z3barv() + %call = tail call ptr @_Z3barv() + ret ptr %call +} + +; Function Attrs: noinline +; IR-LABEL: @_Z3foov() +define ptr @_Z3foov() #0 { +entry: + ; IR: call ptr @_Z3bazv() + %call = tail call ptr @_Z3bazv() + ret ptr %call +} + +; Function Attrs: noinline +; IR-LABEL: @main() +define i32 @main() #0 { + ;; Preceding call to another callee but with the same debug location / callsite id + call void @blah(), !callsite !6 + ;; The first call to foo is part of a cold context, and should use the + ;; original functions. + ; IR: call ptr @_Z3foov() + %call = tail call ptr @_Z3foov(), !callsite !6 + ;; The second call to foo is part of a cold context, and should call the + ;; cloned functions. + ; IR: call ptr @_Z3foov.memprof.1() + %call1 = tail call ptr @_Z3foov(), !callsite !7 + ret i32 0 +} + +; IR-LABEL: @_Z3barv.memprof.1() +; IR: call {{.*}} @_Znam(i64 10) #[[COLD:[0-9]+]] +; IR-LABEL: @_Z3bazv.memprof.1() +; IR: call ptr @_Z3barv.memprof.1() +; IR-LABEL: @_Z3foov.memprof.1() +; IR: call ptr @_Z3bazv.memprof.1() + +; IR: attributes #[[NOTCOLD]] = { builtin allocsize(0) "memprof"="notcold" } +; IR: attributes #[[COLD]] = { builtin allocsize(0) "memprof"="cold" } + +attributes #0 = { noinline } +attributes #1 = { nobuiltin allocsize(0) } +attributes #2 = { builtin allocsize(0) } + +!0 = !{!1, !3} +!1 = !{!2, !"notcold"} +!2 = !{i64 3186456655321080972, i64 8632435727821051414} +!3 = !{!4, !"cold"} +!4 = !{i64 3186456655321080972, i64 -3421689549917153178} +!5 = !{i64 3186456655321080972} +!6 = !{i64 8632435727821051414} +!7 = !{i64 -3421689549917153178} diff --git a/llvm/test/ThinLTO/X86/memprof-tailcall-aliased-location2.ll b/llvm/test/ThinLTO/X86/memprof-tailcall-aliased-location2.ll new file mode 100644 index 0000000000000..3085b4e41938b --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-tailcall-aliased-location2.ll @@ -0,0 +1,99 @@ +;; Test to ensure a call to a different callee but with the same debug info +;; (and therefore callsite metadata) as a subsequent tail call in the alloc +;; context does not cause missing or incorrect cloning. This test is otherwise +;; the same as memprof-tailcall.ll. + +;; -stats requires asserts +; REQUIRES: asserts + +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t.o,_Z3barv,plx \ +; RUN: -r=%t.o,_Z3bazv,plx \ +; RUN: -r=%t.o,_Z3foov,plx \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -r=%t.o,blah, \ +; RUN: -stats -save-temps \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS + +; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR + +; STATS: 2 memprof-context-disambiguation - Number of profiled callees found via tail calls +; STATS: 4 memprof-context-disambiguation - Aggregate depth of profiled callees found via tail calls +; STATS: 2 memprof-context-disambiguation - Maximum depth of profiled callees found via tail calls + +source_filename = "memprof-tailcall-aliased-location2.cc" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline +; IR-LABEL: @_Z3barv() +define ptr @_Z3barv() local_unnamed_addr #0 { +entry: + ; IR: call {{.*}} @_Znam(i64 10) #[[NOTCOLD:[0-9]+]] + %call = tail call ptr @_Znam(i64 10) #2, !memprof !0, !callsite !5 + ret ptr %call +} + +; Function Attrs: nobuiltin allocsize(0) +declare ptr @_Znam(i64) #1 +declare void @blah() + +; Function Attrs: noinline +; IR-LABEL: @_Z3bazv() +define ptr @_Z3bazv() #0 { +entry: + ; IR: call ptr @_Z3barv() + %call = tail call ptr @_Z3barv() + ret ptr %call +} + +; Function Attrs: noinline +; IR-LABEL: @_Z3foov() +define ptr @_Z3foov() #0 { +entry: + ; IR: call ptr @_Z3bazv() + %call = tail call ptr @_Z3bazv() + ret ptr %call +} + +; Function Attrs: noinline +; IR-LABEL: @main() +define i32 @main() #0 { + ;; The first call to foo is part of a cold context, and should use the + ;; original functions. + ; IR: call ptr @_Z3foov() + %call = tail call ptr @_Z3foov(), !callsite !6 + ;; Subsequent call to another callee but with the same debug location / callsite id + call void @blah(), !callsite !6 + ;; The second call to foo is part of a cold context, and should call the + ;; cloned functions. + ; IR: call ptr @_Z3foov.memprof.1() + %call1 = tail call ptr @_Z3foov(), !callsite !7 + ret i32 0 +} + +; IR-LABEL: @_Z3barv.memprof.1() +; IR: call {{.*}} @_Znam(i64 10) #[[COLD:[0-9]+]] +; IR-LABEL: @_Z3bazv.memprof.1() +; IR: call ptr @_Z3barv.memprof.1() +; IR-LABEL: @_Z3foov.memprof.1() +; IR: call ptr @_Z3bazv.memprof.1() + +; IR: attributes #[[NOTCOLD]] = { builtin allocsize(0) "memprof"="notcold" } +; IR: attributes #[[COLD]] = { builtin allocsize(0) "memprof"="cold" } + +attributes #0 = { noinline } +attributes #1 = { nobuiltin allocsize(0) } +attributes #2 = { builtin allocsize(0) } + +!0 = !{!1, !3} +!1 = !{!2, !"notcold"} +!2 = !{i64 3186456655321080972, i64 8632435727821051414} +!3 = !{!4, !"cold"} +!4 = !{i64 3186456655321080972, i64 -3421689549917153178} +!5 = !{i64 3186456655321080972} +!6 = !{i64 8632435727821051414} +!7 = !{i64 -3421689549917153178} diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location1.ll b/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location1.ll new file mode 100644 index 0000000000000..8f9df20471e41 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location1.ll @@ -0,0 +1,274 @@ +;; Test to ensure a call to a different callee but with the same debug info +;; (and therefore callsite metadata) as a preceding call in the alloc context +;; does not cause missing or incorrect cloning. This test is otherwise the same +;; as basic.ll. + +;; -stats requires asserts +; REQUIRES: asserts + +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -stats -pass-remarks=memprof-context-disambiguation \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=IR \ +; RUN: --check-prefix=STATS --check-prefix=REMARKS + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() #0 { +entry: + %call = call noundef ptr @_Z3foov(), !callsite !0 + %call1 = call noundef ptr @_Z3foov(), !callsite !1 + ret i32 0 +} + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1 + +; Function Attrs: nobuiltin +declare void @_ZdaPv() #2 + +define internal ptr @_Z3barv() #3 { +entry: + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !2, !callsite !7 + ret ptr null +} + +declare ptr @_Znam(i64) +declare void @blah() + +define internal ptr @_Z3bazv() #4 { +entry: + ;; Preceding call to another callee but with the same debug location / callsite id + call void @blah(), !callsite !8 + %call = call noundef ptr @_Z3barv(), !callsite !8 + ret ptr null +} + +; Function Attrs: noinline +define internal ptr @_Z3foov() #5 { +entry: + %call = call noundef ptr @_Z3bazv(), !callsite !9 + ret ptr null +} + +; uselistorder directives +uselistorder ptr @_Z3foov, { 1, 0 } + +attributes #0 = { "tune-cpu"="generic" } +attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #2 = { nobuiltin } +attributes #3 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } +attributes #4 = { "stack-protector-buffer-size"="8" } +attributes #5 = { noinline } +attributes #6 = { builtin } + +!0 = !{i64 8632435727821051414} +!1 = !{i64 -3421689549917153178} +!2 = !{!3, !5} +!3 = !{!4, !"notcold", i64 100} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!5 = !{!6, !"cold", i64 400} +!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!7 = !{i64 9086428284934609951} +!8 = !{i64 -5964873800580613432} +!9 = !{i64 2732490490862098848} + + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR:0x[a-z0-9]+]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +; DUMP: Node [[BAZ]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +; DUMP: Node [[FOO]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: CCG after cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR:0x[a-z0-9]+]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Clones: [[BAR2:0x[a-z0-9]+]] + +; DUMP: Node [[BAZ]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Clones: [[BAZ2:0x[a-z0-9]+]] + +; DUMP: Node [[FOO]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Clones: [[FOO2:0x[a-z0-9]+]] + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[FOO2]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 +; DUMP: Clone of [[FOO]] + +; DUMP: Node [[BAZ2]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2 +; DUMP: Clone of [[BAZ]] + +; DUMP: Node [[BAR2]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2 +; DUMP: Clone of [[BAR]] + + +; REMARKS: created clone _Z3barv.memprof.1 +; REMARKS: created clone _Z3bazv.memprof.1 +; REMARKS: created clone _Z3foov.memprof.1 +; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1 +; REMARKS: call in clone _Z3foov.memprof.1 assigned to call function clone _Z3bazv.memprof.1 +; REMARKS: call in clone _Z3bazv.memprof.1 assigned to call function clone _Z3barv.memprof.1 +; REMARKS: call in clone _Z3barv.memprof.1 marked with memprof allocation attribute cold +; REMARKS: call in clone main assigned to call function clone _Z3foov +; REMARKS: call in clone _Z3foov assigned to call function clone _Z3bazv +; REMARKS: call in clone _Z3bazv assigned to call function clone _Z3barv +; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold + +; SIZES: NotCold context 1 with total size 100 is NotCold after cloning +; SIZES: Cold context 2 with total size 400 is Cold after cloning + +; IR: define {{.*}} @main +;; The first call to foo does not allocate cold memory. It should call the +;; original functions, which ultimately call the original allocation decorated +;; with a "notcold" attribute. +; IR: call {{.*}} @_Z3foov() +;; The second call to foo allocates cold memory. It should call cloned functions +;; which ultimately call a cloned allocation decorated with a "cold" attribute. +; IR: call {{.*}} @_Z3foov.memprof.1() +; IR: define internal {{.*}} @_Z3barv() +; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]] +; IR: define internal {{.*}} @_Z3bazv() +; IR: call {{.*}} @_Z3barv() +; IR: define internal {{.*}} @_Z3foov() +; IR: call {{.*}} @_Z3bazv() +; IR: define internal {{.*}} @_Z3barv.memprof.1() +; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]] +; IR: define internal {{.*}} @_Z3bazv.memprof.1() +; IR: call {{.*}} @_Z3barv.memprof.1() +; IR: define internal {{.*}} @_Z3foov.memprof.1() +; IR: call {{.*}} @_Z3bazv.memprof.1() +; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" } +; IR: attributes #[[COLD]] = { builtin "memprof"="cold" } + + +; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) +; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) +; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis + + +; DOT: digraph "postbuild" { +; DOT: label="postbuild"; +; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; +; DOT: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"]; +; DOT: Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"]; +; DOT: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOT: } + + +; DOTCLONED: digraph "cloned" { +; DOTCLONED: label="cloned"; +; DOTCLONED: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; +; DOTCLONED: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"]; +; DOTCLONED: Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOTCLONED: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"]; +; DOTCLONED: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOTCLONED: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOTCLONED: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOTCLONED: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOTCLONED: Node[[MAIN2]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOTCLONED: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3bazv}"]; +; DOTCLONED: Node[[FOO2]] -> Node[[BAZ2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOTCLONED: Node[[BAZ2]] [shape=record,tooltip="N[[BAZ2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3bazv -\> _Z3barv}"]; +; DOTCLONED: Node[[BAZ2]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOTCLONED: Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; +; DOTCLONED: } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location2.ll new file mode 100644 index 0000000000000..c3c164d492863 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location2.ll @@ -0,0 +1,274 @@ +;; Test to ensure a call to a different callee but with the same debug info +;; (and therefore callsite metadata) as a subsequent call in the alloc context +;; does not cause missing or incorrect cloning. This test is otherwise the same +;; as basic.ll. + +;; -stats requires asserts +; REQUIRES: asserts + +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -stats -pass-remarks=memprof-context-disambiguation \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=IR \ +; RUN: --check-prefix=STATS --check-prefix=REMARKS + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() #0 { +entry: + %call = call noundef ptr @_Z3foov(), !callsite !0 + %call1 = call noundef ptr @_Z3foov(), !callsite !1 + ret i32 0 +} + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1 + +; Function Attrs: nobuiltin +declare void @_ZdaPv() #2 + +define internal ptr @_Z3barv() #3 { +entry: + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !2, !callsite !7 + ret ptr null +} + +declare ptr @_Znam(i64) +declare void @blah() + +define internal ptr @_Z3bazv() #4 { +entry: + %call = call noundef ptr @_Z3barv(), !callsite !8 + ;; Subsequent call to another callee but with the same debug location / callsite id + call void @blah(), !callsite !8 + ret ptr null +} + +; Function Attrs: noinline +define internal ptr @_Z3foov() #5 { +entry: + %call = call noundef ptr @_Z3bazv(), !callsite !9 + ret ptr null +} + +; uselistorder directives +uselistorder ptr @_Z3foov, { 1, 0 } + +attributes #0 = { "tune-cpu"="generic" } +attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #2 = { nobuiltin } +attributes #3 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } +attributes #4 = { "stack-protector-buffer-size"="8" } +attributes #5 = { noinline } +attributes #6 = { builtin } + +!0 = !{i64 8632435727821051414} +!1 = !{i64 -3421689549917153178} +!2 = !{!3, !5} +!3 = !{!4, !"notcold", i64 100} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!5 = !{!6, !"cold", i64 400} +!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!7 = !{i64 9086428284934609951} +!8 = !{i64 -5964873800580613432} +!9 = !{i64 2732490490862098848} + + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR:0x[a-z0-9]+]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +; DUMP: Node [[BAZ]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +; DUMP: Node [[FOO]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: CCG after cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR:0x[a-z0-9]+]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Clones: [[BAR2:0x[a-z0-9]+]] + +; DUMP: Node [[BAZ]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Clones: [[BAZ2:0x[a-z0-9]+]] + +; DUMP: Node [[FOO]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Clones: [[FOO2:0x[a-z0-9]+]] + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[FOO2]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 +; DUMP: Clone of [[FOO]] + +; DUMP: Node [[BAZ2]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2 +; DUMP: Clone of [[BAZ]] + +; DUMP: Node [[BAR2]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2 +; DUMP: Clone of [[BAR]] + + +; REMARKS: created clone _Z3barv.memprof.1 +; REMARKS: created clone _Z3bazv.memprof.1 +; REMARKS: created clone _Z3foov.memprof.1 +; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1 +; REMARKS: call in clone _Z3foov.memprof.1 assigned to call function clone _Z3bazv.memprof.1 +; REMARKS: call in clone _Z3bazv.memprof.1 assigned to call function clone _Z3barv.memprof.1 +; REMARKS: call in clone _Z3barv.memprof.1 marked with memprof allocation attribute cold +; REMARKS: call in clone main assigned to call function clone _Z3foov +; REMARKS: call in clone _Z3foov assigned to call function clone _Z3bazv +; REMARKS: call in clone _Z3bazv assigned to call function clone _Z3barv +; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold + +; SIZES: NotCold context 1 with total size 100 is NotCold after cloning +; SIZES: Cold context 2 with total size 400 is Cold after cloning + +; IR: define {{.*}} @main +;; The first call to foo does not allocate cold memory. It should call the +;; original functions, which ultimately call the original allocation decorated +;; with a "notcold" attribute. +; IR: call {{.*}} @_Z3foov() +;; The second call to foo allocates cold memory. It should call cloned functions +;; which ultimately call a cloned allocation decorated with a "cold" attribute. +; IR: call {{.*}} @_Z3foov.memprof.1() +; IR: define internal {{.*}} @_Z3barv() +; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]] +; IR: define internal {{.*}} @_Z3bazv() +; IR: call {{.*}} @_Z3barv() +; IR: define internal {{.*}} @_Z3foov() +; IR: call {{.*}} @_Z3bazv() +; IR: define internal {{.*}} @_Z3barv.memprof.1() +; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]] +; IR: define internal {{.*}} @_Z3bazv.memprof.1() +; IR: call {{.*}} @_Z3barv.memprof.1() +; IR: define internal {{.*}} @_Z3foov.memprof.1() +; IR: call {{.*}} @_Z3bazv.memprof.1() +; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" } +; IR: attributes #[[COLD]] = { builtin "memprof"="cold" } + + +; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) +; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) +; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis + + +; DOT: digraph "postbuild" { +; DOT: label="postbuild"; +; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; +; DOT: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"]; +; DOT: Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"]; +; DOT: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOT: } + + +; DOTCLONED: digraph "cloned" { +; DOTCLONED: label="cloned"; +; DOTCLONED: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; +; DOTCLONED: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"]; +; DOTCLONED: Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOTCLONED: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"]; +; DOTCLONED: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOTCLONED: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOTCLONED: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOTCLONED: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOTCLONED: Node[[MAIN2]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOTCLONED: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3bazv}"]; +; DOTCLONED: Node[[FOO2]] -> Node[[BAZ2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOTCLONED: Node[[BAZ2]] [shape=record,tooltip="N[[BAZ2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3bazv -\> _Z3barv}"]; +; DOTCLONED: Node[[BAZ2]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOTCLONED: Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; +; DOTCLONED: } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/tailcall-aliased-location1.ll b/llvm/test/Transforms/MemProfContextDisambiguation/tailcall-aliased-location1.ll new file mode 100644 index 0000000000000..e0bcd284c097c --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/tailcall-aliased-location1.ll @@ -0,0 +1,100 @@ +;; Test to ensure a call to a different callee but with the same debug info +;; (and therefore callsite metadata) as a preceding tail call in the alloc +;; context does not cause missing or incorrect cloning. This test is otherwise +;; the same as tailcall.ll. + +;; -stats requires asserts +; REQUIRES: asserts + +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -stats %s -S 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=IR + +source_filename = "tailcall-aliased-location1.cc" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = dso_local global [2 x ptr] [ptr @_Z2a1v, ptr @_Z2a2v], align 16 + +declare void @_Z2a1v() #0 + +declare void @_Z2a2v() #0 + +; Function Attrs: noinline +; IR-LABEL: @_Z3barv() +define ptr @_Z3barv() local_unnamed_addr #0 { +entry: + ; IR: call ptr @_Znam(i64 10) #[[NOTCOLD:[0-9]+]] + %call = tail call ptr @_Znam(i64 10) #2, !memprof !0, !callsite !5 + ret ptr %call +} + +; Function Attrs: nobuiltin allocsize(0) +declare ptr @_Znam(i64) #1 +declare void @blah() + +; Function Attrs: noinline +; IR-LABEL: @_Z3bazv() +define ptr @_Z3bazv() #0 { +entry: + ; IR: call ptr @_Z3barv() + %call = tail call ptr @_Z3barv() + ret ptr %call +} + +; Function Attrs: noinline +; IR-LABEL: @_Z3foov() +define ptr @_Z3foov() #0 { +entry: + ; IR: call ptr @_Z3bazv() + %call = tail call ptr @_Z3bazv() + ret ptr %call +} + +; Function Attrs: noinline +; IR-LABEL: @main() +define i32 @main() #0 { + ;; Preceding call to another callee but with the same debug location / callsite id + call void @blah(), !callsite !6 + ;; The first call to foo is part of a cold context, and should use the + ;; original functions. + ;; allocation. The latter should call the cloned functions. + ; IR: call ptr @_Z3foov() + %call = tail call ptr @_Z3foov(), !callsite !6 + ;; The second call to foo is part of a cold context, and should call the + ;; cloned functions. + ; IR: call ptr @_Z3foov.memprof.1() + %call1 = tail call ptr @_Z3foov(), !callsite !7 + %2 = load ptr, ptr @a, align 16 + call void %2(), !callsite !10 + ret i32 0 +} + +; IR-LABEL: @_Z3barv.memprof.1() +; IR: call ptr @_Znam(i64 10) #[[COLD:[0-9]+]] +; IR-LABEL: @_Z3bazv.memprof.1() +; IR: call ptr @_Z3barv.memprof.1() +; IR-LABEL: @_Z3foov.memprof.1() +; IR: call ptr @_Z3bazv.memprof.1() + +; IR: attributes #[[NOTCOLD]] = { builtin allocsize(0) "memprof"="notcold" } +; IR: attributes #[[COLD]] = { builtin allocsize(0) "memprof"="cold" } + +; STATS: 2 memprof-context-disambiguation - Number of profiled callees found via tail calls +; STATS: 4 memprof-context-disambiguation - Aggregate depth of profiled callees found via tail calls +; STATS: 2 memprof-context-disambiguation - Maximum depth of profiled callees found via tail calls + +attributes #0 = { noinline } +attributes #1 = { nobuiltin allocsize(0) } +attributes #2 = { builtin allocsize(0) } + +!0 = !{!1, !3, !8} +!1 = !{!2, !"notcold"} +!2 = !{i64 3186456655321080972, i64 8632435727821051414} +!3 = !{!4, !"cold"} +!4 = !{i64 3186456655321080972, i64 -3421689549917153178} +!5 = !{i64 3186456655321080972} +!6 = !{i64 8632435727821051414} +!7 = !{i64 -3421689549917153178} +!8 = !{!9, !"notcold"} +!9 = !{i64 3186456655321080972, i64 1} +!10 = !{i64 1} diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/tailcall-aliased-location2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/tailcall-aliased-location2.ll new file mode 100644 index 0000000000000..1e76243fe0f48 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/tailcall-aliased-location2.ll @@ -0,0 +1,100 @@ +;; Test to ensure a call to a different callee but with the same debug info +;; (and therefore callsite metadata) as a subsequent tail call in the alloc +;; context does not cause missing or incorrect cloning. This test is otherwise +;; the same as tailcall.ll. + +;; -stats requires asserts +; REQUIRES: asserts + +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -stats %s -S 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=IR + +source_filename = "tailcall-aliased-location2.cc" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = dso_local global [2 x ptr] [ptr @_Z2a1v, ptr @_Z2a2v], align 16 + +declare void @_Z2a1v() #0 + +declare void @_Z2a2v() #0 + +; Function Attrs: noinline +; IR-LABEL: @_Z3barv() +define ptr @_Z3barv() local_unnamed_addr #0 { +entry: + ; IR: call ptr @_Znam(i64 10) #[[NOTCOLD:[0-9]+]] + %call = tail call ptr @_Znam(i64 10) #2, !memprof !0, !callsite !5 + ret ptr %call +} + +; Function Attrs: nobuiltin allocsize(0) +declare ptr @_Znam(i64) #1 +declare void @blah() + +; Function Attrs: noinline +; IR-LABEL: @_Z3bazv() +define ptr @_Z3bazv() #0 { +entry: + ; IR: call ptr @_Z3barv() + %call = tail call ptr @_Z3barv() + ret ptr %call +} + +; Function Attrs: noinline +; IR-LABEL: @_Z3foov() +define ptr @_Z3foov() #0 { +entry: + ; IR: call ptr @_Z3bazv() + %call = tail call ptr @_Z3bazv() + ret ptr %call +} + +; Function Attrs: noinline +; IR-LABEL: @main() +define i32 @main() #0 { + ;; The first call to foo is part of a cold context, and should use the + ;; original functions. + ;; allocation. The latter should call the cloned functions. + ; IR: call ptr @_Z3foov() + %call = tail call ptr @_Z3foov(), !callsite !6 + ;; Subsequent call to another callee but with the same debug location / callsite id + call void @blah(), !callsite !6 + ;; The second call to foo is part of a cold context, and should call the + ;; cloned functions. + ; IR: call ptr @_Z3foov.memprof.1() + %call1 = tail call ptr @_Z3foov(), !callsite !7 + %2 = load ptr, ptr @a, align 16 + call void %2(), !callsite !10 + ret i32 0 +} + +; IR-LABEL: @_Z3barv.memprof.1() +; IR: call ptr @_Znam(i64 10) #[[COLD:[0-9]+]] +; IR-LABEL: @_Z3bazv.memprof.1() +; IR: call ptr @_Z3barv.memprof.1() +; IR-LABEL: @_Z3foov.memprof.1() +; IR: call ptr @_Z3bazv.memprof.1() + +; IR: attributes #[[NOTCOLD]] = { builtin allocsize(0) "memprof"="notcold" } +; IR: attributes #[[COLD]] = { builtin allocsize(0) "memprof"="cold" } + +; STATS: 2 memprof-context-disambiguation - Number of profiled callees found via tail calls +; STATS: 4 memprof-context-disambiguation - Aggregate depth of profiled callees found via tail calls +; STATS: 2 memprof-context-disambiguation - Maximum depth of profiled callees found via tail calls + +attributes #0 = { noinline } +attributes #1 = { nobuiltin allocsize(0) } +attributes #2 = { builtin allocsize(0) } + +!0 = !{!1, !3, !8} +!1 = !{!2, !"notcold"} +!2 = !{i64 3186456655321080972, i64 8632435727821051414} +!3 = !{!4, !"cold"} +!4 = !{i64 3186456655321080972, i64 -3421689549917153178} +!5 = !{i64 3186456655321080972} +!6 = !{i64 8632435727821051414} +!7 = !{i64 -3421689549917153178} +!8 = !{!9, !"notcold"} +!9 = !{i64 3186456655321080972, i64 1} +!10 = !{i64 1}