Skip to content

Commit a79d1b9

Browse files
Revert "[MemProf] Reduce cloning overhead by sharing nodes when possible (#99…"
This reverts commit 055e431.
1 parent 38b67c5 commit a79d1b9

File tree

1 file changed

+15
-109
lines changed

1 file changed

+15
-109
lines changed

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 15 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -242,16 +242,9 @@ class CallsiteContextGraph {
242242
// recursion.
243243
bool Recursive = false;
244244

245-
// The corresponding allocation or interior call. This is the primary call
246-
// for which we have created this node.
245+
// The corresponding allocation or interior call.
247246
CallInfo Call;
248247

249-
// List of other calls that can be treated the same as the primary call
250-
// through cloning. I.e. located in the same function and have the same
251-
// (possibly pruned) stack ids. They will be updated the same way as the
252-
// primary call when assigning to function clones.
253-
std::vector<CallInfo> MatchingCalls;
254-
255248
// For alloc nodes this is a unique id assigned when constructed, and for
256249
// callsite stack nodes it is the original stack id when the node is
257250
// constructed from the memprof MIB metadata on the alloc nodes. Note that
@@ -464,9 +457,6 @@ class CallsiteContextGraph {
464457
/// iteration.
465458
MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata;
466459

467-
/// Records the function each call is located in.
468-
DenseMap<CallInfo, const FuncTy *> CallToFunc;
469-
470460
/// Map from callsite node to the enclosing caller function.
471461
std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
472462

@@ -484,8 +474,7 @@ class CallsiteContextGraph {
484474
/// StackIdToMatchingCalls map.
485475
void assignStackNodesPostOrder(
486476
ContextNode *Node, DenseSet<const ContextNode *> &Visited,
487-
DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls,
488-
DenseMap<CallInfo, CallInfo> &CallToMatchingCall);
477+
DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls);
489478

490479
/// Duplicates the given set of context ids, updating the provided
491480
/// map from each original id with the newly generated context ids,
@@ -1241,11 +1230,10 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
12411230

12421231
template <typename DerivedCCG, typename FuncTy, typename CallTy>
12431232
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1244-
assignStackNodesPostOrder(
1245-
ContextNode *Node, DenseSet<const ContextNode *> &Visited,
1246-
DenseMap<uint64_t, std::vector<CallContextInfo>>
1247-
&StackIdToMatchingCalls,
1248-
DenseMap<CallInfo, CallInfo> &CallToMatchingCall) {
1233+
assignStackNodesPostOrder(ContextNode *Node,
1234+
DenseSet<const ContextNode *> &Visited,
1235+
DenseMap<uint64_t, std::vector<CallContextInfo>>
1236+
&StackIdToMatchingCalls) {
12491237
auto Inserted = Visited.insert(Node);
12501238
if (!Inserted.second)
12511239
return;
@@ -1258,8 +1246,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
12581246
// Skip any that have been removed during the recursion.
12591247
if (!Edge)
12601248
continue;
1261-
assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls,
1262-
CallToMatchingCall);
1249+
assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls);
12631250
}
12641251

12651252
// If this node's stack id is in the map, update the graph to contain new
@@ -1302,19 +1289,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
13021289
auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
13031290
// Skip any for which we didn't assign any ids, these don't get a node in
13041291
// the graph.
1305-
if (SavedContextIds.empty()) {
1306-
// If this call has a matching call (located in the same function and
1307-
// having the same stack ids), simply add it to the context node created
1308-
// for its matching call earlier. These can be treated the same through
1309-
// cloning and get updated at the same time.
1310-
if (!CallToMatchingCall.contains(Call))
1311-
continue;
1312-
auto MatchingCall = CallToMatchingCall[Call];
1313-
assert(NonAllocationCallToContextNodeMap.contains(MatchingCall));
1314-
NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1315-
Call);
1292+
if (SavedContextIds.empty())
13161293
continue;
1317-
}
13181294

13191295
assert(LastId == Ids.back());
13201296

@@ -1446,10 +1422,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
14461422
// there is more than one call with the same stack ids. Their (possibly newly
14471423
// duplicated) context ids are saved in the StackIdToMatchingCalls map.
14481424
DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
1449-
// Save a map from each call to any that are found to match it. I.e. located
1450-
// in the same function and have the same (possibly pruned) stack ids. We use
1451-
// this to avoid creating extra graph nodes as they can be treated the same.
1452-
DenseMap<CallInfo, CallInfo> CallToMatchingCall;
14531425
for (auto &It : StackIdToMatchingCalls) {
14541426
auto &Calls = It.getSecond();
14551427
// Skip single calls with a single stack id. These don't need a new node.
@@ -1488,13 +1460,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
14881460
DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
14891461
assert(!LastNodeContextIds.empty());
14901462

1491-
// Map from function to the first call from the below list (with matching
1492-
// stack ids) found in that function. Note that calls from different
1493-
// functions can have the same stack ids because this is the list of stack
1494-
// ids that had (possibly pruned) nodes after building the graph from the
1495-
// allocation MIBs.
1496-
DenseMap<const FuncTy *, CallInfo> FuncToCallMap;
1497-
14981463
for (unsigned I = 0; I < Calls.size(); I++) {
14991464
auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
15001465
assert(SavedContextIds.empty());
@@ -1568,18 +1533,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
15681533
continue;
15691534
}
15701535

1571-
const FuncTy *CallFunc = CallToFunc[Call];
1572-
1573-
// If the prior call had the same stack ids this map would not be empty.
1574-
// Check if we already have a call that "matches" because it is located
1575-
// in the same function.
1576-
if (FuncToCallMap.contains(CallFunc)) {
1577-
// Record the matching call found for this call, and skip it. We
1578-
// will subsequently combine it into the same node.
1579-
CallToMatchingCall[Call] = FuncToCallMap[CallFunc];
1580-
continue;
1581-
}
1582-
15831536
// Check if the next set of stack ids is the same (since the Calls vector
15841537
// of tuples is sorted by the stack ids we can just look at the next one).
15851538
bool DuplicateContextIds = false;
@@ -1609,14 +1562,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
16091562
set_subtract(LastNodeContextIds, StackSequenceContextIds);
16101563
if (LastNodeContextIds.empty())
16111564
break;
1612-
// No longer possibly in a sequence of calls with duplicate stack ids,
1613-
// clear the map.
1614-
FuncToCallMap.clear();
1615-
} else
1616-
// Record the call with its function, so we can locate it the next time
1617-
// we find a call from this function when processing the calls with the
1618-
// same stack ids.
1619-
FuncToCallMap[CallFunc] = Call;
1565+
}
16201566
}
16211567
}
16221568

@@ -1633,8 +1579,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
16331579
// associated context ids over to the new nodes.
16341580
DenseSet<const ContextNode *> Visited;
16351581
for (auto &Entry : AllocationCallToContextNodeMap)
1636-
assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls,
1637-
CallToMatchingCall);
1582+
assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls);
16381583
if (VerifyCCG)
16391584
check();
16401585
}
@@ -1734,7 +1679,6 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
17341679
continue;
17351680
if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) {
17361681
CallsWithMetadata.push_back(&I);
1737-
CallToFunc[&I] = &F;
17381682
auto *AllocNode = addAllocNode(&I, &F);
17391683
auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite);
17401684
assert(CallsiteMD);
@@ -1756,10 +1700,8 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
17561700
I.setMetadata(LLVMContext::MD_callsite, nullptr);
17571701
}
17581702
// For callsite metadata, add to list for this function for later use.
1759-
else if (I.getMetadata(LLVMContext::MD_callsite)) {
1703+
else if (I.getMetadata(LLVMContext::MD_callsite))
17601704
CallsWithMetadata.push_back(&I);
1761-
CallToFunc[&I] = &F;
1762-
}
17631705
}
17641706
}
17651707
if (!CallsWithMetadata.empty())
@@ -1814,10 +1756,8 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
18141756
// correlate properly in applyImport in the backends.
18151757
if (AN.MIBs.empty())
18161758
continue;
1817-
IndexCall AllocCall(&AN);
1818-
CallsWithMetadata.push_back(AllocCall);
1819-
CallToFunc[AllocCall] = FS;
1820-
auto *AllocNode = addAllocNode(AllocCall, FS);
1759+
CallsWithMetadata.push_back({&AN});
1760+
auto *AllocNode = addAllocNode({&AN}, FS);
18211761
// Pass an empty CallStack to the CallsiteContext (second)
18221762
// parameter, since for ThinLTO we already collapsed out the inlined
18231763
// stack ids on the allocation call during ModuleSummaryAnalysis.
@@ -1848,11 +1788,8 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
18481788
}
18491789
// For callsite metadata, add to list for this function for later use.
18501790
if (!FS->callsites().empty())
1851-
for (auto &SN : FS->mutableCallsites()) {
1852-
IndexCall StackNodeCall(&SN);
1853-
CallsWithMetadata.push_back(StackNodeCall);
1854-
CallToFunc[StackNodeCall] = FS;
1855-
}
1791+
for (auto &SN : FS->mutableCallsites())
1792+
CallsWithMetadata.push_back({&SN});
18561793

18571794
if (!CallsWithMetadata.empty())
18581795
FuncToCallsWithMetadata[FS] = CallsWithMetadata;
@@ -2288,14 +2225,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
22882225
if (Recursive)
22892226
OS << " (recursive)";
22902227
OS << "\n";
2291-
if (!MatchingCalls.empty()) {
2292-
OS << "\tMatchingCalls:\n";
2293-
for (auto &MatchingCall : MatchingCalls) {
2294-
OS << "\t";
2295-
MatchingCall.print(OS);
2296-
OS << "\n";
2297-
}
2298-
}
22992228
OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n";
23002229
OS << "\tContextIds:";
23012230
// Make a copy of the computed context ids that we can sort for stability.
@@ -2549,7 +2478,6 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
25492478
std::make_unique<ContextNode>(Node->IsAllocation, Node->Call));
25502479
ContextNode *Clone = NodeOwner.back().get();
25512480
Node->addClone(Clone);
2552-
Clone->MatchingCalls = Node->MatchingCalls;
25532481
assert(NodeToCallingFunc.count(Node));
25542482
NodeToCallingFunc[Clone] = NodeToCallingFunc[Node];
25552483
moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true,
@@ -3093,14 +3021,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
30933021
if (CallMap.count(Call))
30943022
CallClone = CallMap[Call];
30953023
CallsiteClone->setCall(CallClone);
3096-
// Need to do the same for all matching calls.
3097-
for (auto &MatchingCall : Node->MatchingCalls) {
3098-
CallInfo CallClone(MatchingCall);
3099-
if (CallMap.count(MatchingCall))
3100-
CallClone = CallMap[MatchingCall];
3101-
// Updates the call in the list.
3102-
MatchingCall = CallClone;
3103-
}
31043024
};
31053025

31063026
// Keep track of the clones of callsite Node that need to be assigned to
@@ -3267,16 +3187,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
32673187
CallInfo NewCall(CallMap[OrigCall]);
32683188
assert(NewCall);
32693189
NewClone->setCall(NewCall);
3270-
// Need to do the same for all matching calls.
3271-
for (auto &MatchingCall : NewClone->MatchingCalls) {
3272-
CallInfo OrigMatchingCall(MatchingCall);
3273-
OrigMatchingCall.setCloneNo(0);
3274-
assert(CallMap.count(OrigMatchingCall));
3275-
CallInfo NewCall(CallMap[OrigMatchingCall]);
3276-
assert(NewCall);
3277-
// Updates the call in the list.
3278-
MatchingCall = NewCall;
3279-
}
32803190
}
32813191
}
32823192
// Fall through to handling below to perform the recording of the
@@ -3463,7 +3373,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
34633373

34643374
if (Node->IsAllocation) {
34653375
updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes));
3466-
assert(Node->MatchingCalls.empty());
34673376
return;
34683377
}
34693378

@@ -3472,9 +3381,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
34723381

34733382
auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node];
34743383
updateCall(Node->Call, CalleeFunc);
3475-
// Update all the matching calls as well.
3476-
for (auto &Call : Node->MatchingCalls)
3477-
updateCall(Call, CalleeFunc);
34783384
};
34793385

34803386
// Performs DFS traversal starting from allocation nodes to update calls to

0 commit comments

Comments
 (0)