Skip to content

Commit 2fb51fb

Browse files
[FuncSpec] Update function specialization to handle phi-chains (#72903)
When using the LLVM flang compiler with alias analysis (AA) enabled, SPEC2017:548.exchange2_r was running significantly slower than wihtout the AA. This was caused by the GVN pass replacing many of the loads in the pre-AA code with phi-nodes that form a long chain of dependencies, which the function specialization was unable to follow. This adds a function to discover phi-nodes in a transitive set, with some limitations to avoid spending ages analysing phi-nodes. The minimum latency savings also had to be lowered - fewer load instructions means less saving. Adding some more prints to help debugging the isProfitable decision. No significant change in compile time or generated code-size. (A previous attempt to fix this was abandoned: #71442) --------- Co-authored-by: Alexandros Lamprineas <[email protected]>
1 parent 2e09ea6 commit 2fb51fb

File tree

5 files changed

+293
-14
lines changed

5 files changed

+293
-14
lines changed

llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,22 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
217217
Cost estimateSwitchInst(SwitchInst &I);
218218
Cost estimateBranchInst(BranchInst &I);
219219

220+
// Transitively Incoming Values (TIV) is a set of Values that can "feed" a
221+
// value to the initial PHI-node. It is defined like this:
222+
//
223+
// * the initial PHI-node belongs to TIV.
224+
//
225+
// * for every PHI-node in TIV, its operands belong to TIV
226+
//
227+
// If TIV for the initial PHI-node (P) contains more than one constant or a
228+
// value that is not a PHI-node, then P cannot be folded to a constant.
229+
//
230+
// As soon as we detect these cases, we bail, without constructing the
231+
// full TIV.
232+
// Otherwise P can be folded to the one constant in TIV.
233+
bool discoverTransitivelyIncomingValues(Constant *Const, PHINode *Root,
234+
DenseSet<PHINode *> &TransitivePHIs);
235+
220236
Constant *visitInstruction(Instruction &I) { return nullptr; }
221237
Constant *visitPHINode(PHINode &I);
222238
Constant *visitFreezeInst(FreezeInst &I);

llvm/lib/Transforms/IPO/FunctionSpecialization.cpp

Lines changed: 94 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,17 @@ static cl::opt<unsigned> MaxClones(
3939
"The maximum number of clones allowed for a single function "
4040
"specialization"));
4141

42+
static cl::opt<unsigned>
43+
MaxDiscoveryIterations("funcspec-max-discovery-iterations", cl::init(100),
44+
cl::Hidden,
45+
cl::desc("The maximum number of iterations allowed "
46+
"when searching for transitive "
47+
"phis"));
48+
4249
static cl::opt<unsigned> MaxIncomingPhiValues(
43-
"funcspec-max-incoming-phi-values", cl::init(4), cl::Hidden, cl::desc(
44-
"The maximum number of incoming values a PHI node can have to be "
45-
"considered during the specialization bonus estimation"));
50+
"funcspec-max-incoming-phi-values", cl::init(8), cl::Hidden,
51+
cl::desc("The maximum number of incoming values a PHI node can have to be "
52+
"considered during the specialization bonus estimation"));
4653

4754
static cl::opt<unsigned> MaxBlockPredecessors(
4855
"funcspec-max-block-predecessors", cl::init(2), cl::Hidden, cl::desc(
@@ -64,9 +71,9 @@ static cl::opt<unsigned> MinCodeSizeSavings(
6471
"much percent of the original function size"));
6572

6673
static cl::opt<unsigned> MinLatencySavings(
67-
"funcspec-min-latency-savings", cl::init(70), cl::Hidden, cl::desc(
68-
"Reject specializations whose latency savings are less than this"
69-
"much percent of the original function size"));
74+
"funcspec-min-latency-savings", cl::init(40), cl::Hidden,
75+
cl::desc("Reject specializations whose latency savings are less than this"
76+
"much percent of the original function size"));
7077

7178
static cl::opt<unsigned> MinInliningBonus(
7279
"funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, cl::desc(
@@ -262,29 +269,102 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
262269
return estimateBasicBlocks(WorkList);
263270
}
264271

272+
bool InstCostVisitor::discoverTransitivelyIncomingValues(
273+
Constant *Const, PHINode *Root, DenseSet<PHINode *> &TransitivePHIs) {
274+
275+
SmallVector<PHINode *, 64> WorkList;
276+
WorkList.push_back(Root);
277+
unsigned Iter = 0;
278+
279+
while (!WorkList.empty()) {
280+
PHINode *PN = WorkList.pop_back_val();
281+
282+
if (++Iter > MaxDiscoveryIterations ||
283+
PN->getNumIncomingValues() > MaxIncomingPhiValues)
284+
return false;
285+
286+
if (!TransitivePHIs.insert(PN).second)
287+
continue;
288+
289+
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
290+
Value *V = PN->getIncomingValue(I);
291+
292+
// Disregard self-references and dead incoming values.
293+
if (auto *Inst = dyn_cast<Instruction>(V))
294+
if (Inst == PN || DeadBlocks.contains(PN->getIncomingBlock(I)))
295+
continue;
296+
297+
if (Constant *C = findConstantFor(V, KnownConstants)) {
298+
// Not all incoming values are the same constant. Bail immediately.
299+
if (C != Const)
300+
return false;
301+
continue;
302+
}
303+
304+
if (auto *Phi = dyn_cast<PHINode>(V)) {
305+
WorkList.push_back(Phi);
306+
continue;
307+
}
308+
309+
// We can't reason about anything else.
310+
return false;
311+
}
312+
}
313+
return true;
314+
}
315+
265316
Constant *InstCostVisitor::visitPHINode(PHINode &I) {
266317
if (I.getNumIncomingValues() > MaxIncomingPhiValues)
267318
return nullptr;
268319

269320
bool Inserted = VisitedPHIs.insert(&I).second;
270321
Constant *Const = nullptr;
322+
bool HaveSeenIncomingPHI = false;
271323

272324
for (unsigned Idx = 0, E = I.getNumIncomingValues(); Idx != E; ++Idx) {
273325
Value *V = I.getIncomingValue(Idx);
326+
327+
// Disregard self-references and dead incoming values.
274328
if (auto *Inst = dyn_cast<Instruction>(V))
275329
if (Inst == &I || DeadBlocks.contains(I.getIncomingBlock(Idx)))
276330
continue;
277-
Constant *C = findConstantFor(V, KnownConstants);
278-
if (!C) {
279-
if (Inserted)
280-
PendingPHIs.push_back(&I);
281-
return nullptr;
331+
332+
if (Constant *C = findConstantFor(V, KnownConstants)) {
333+
if (!Const)
334+
Const = C;
335+
// Not all incoming values are the same constant. Bail immediately.
336+
if (C != Const)
337+
return nullptr;
338+
continue;
282339
}
283-
if (!Const)
284-
Const = C;
285-
else if (C != Const)
340+
341+
if (Inserted) {
342+
// First time we are seeing this phi. We will retry later, after
343+
// all the constant arguments have been propagated. Bail for now.
344+
PendingPHIs.push_back(&I);
286345
return nullptr;
346+
}
347+
348+
if (isa<PHINode>(V)) {
349+
// Perhaps it is a Transitive Phi. We will confirm later.
350+
HaveSeenIncomingPHI = true;
351+
continue;
352+
}
353+
354+
// We can't reason about anything else.
355+
return nullptr;
287356
}
357+
358+
if (!Const)
359+
return nullptr;
360+
361+
if (!HaveSeenIncomingPHI)
362+
return Const;
363+
364+
DenseSet<PHINode *> TransitivePHIs;
365+
if (!discoverTransitivelyIncomingValues(Const, &I, TransitivePHIs))
366+
return nullptr;
367+
288368
return Const;
289369
}
290370

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
;
3+
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=20 -funcspec-for-literal-constant -S < %s | FileCheck %s --check-prefix=FUNCSPEC
4+
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=20 -funcspec-for-literal-constant -funcspec-max-discovery-iterations=16 -S < %s | FileCheck %s --check-prefix=NOFUNCSPEC
5+
6+
define i64 @bar(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) {
7+
; FUNCSPEC-LABEL: define i64 @bar(
8+
; FUNCSPEC-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) {
9+
; FUNCSPEC-NEXT: entry:
10+
; FUNCSPEC-NEXT: [[F1:%.*]] = call i64 @foo.specialized.1(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0:![0-9]+]]
11+
; FUNCSPEC-NEXT: [[F2:%.*]] = call i64 @foo.specialized.2(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG1:![0-9]+]]
12+
; FUNCSPEC-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[F1]], [[F2]]
13+
; FUNCSPEC-NEXT: ret i64 [[ADD]]
14+
;
15+
; NOFUNCSPEC-LABEL: define i64 @bar(
16+
; NOFUNCSPEC-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) {
17+
; NOFUNCSPEC-NEXT: entry:
18+
; NOFUNCSPEC-NEXT: [[F1:%.*]] = call i64 @foo(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0:![0-9]+]]
19+
; NOFUNCSPEC-NEXT: [[F2:%.*]] = call i64 @foo(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0]]
20+
; NOFUNCSPEC-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[F1]], [[F2]]
21+
; NOFUNCSPEC-NEXT: ret i64 [[ADD]]
22+
;
23+
entry:
24+
%f1 = call i64 @foo(i64 3, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10)
25+
%f2 = call i64 @foo(i64 4, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10)
26+
%add = add i64 %f1, %f2
27+
ret i64 %add
28+
}
29+
30+
define internal i64 @foo(i64 %n, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) {
31+
entry:
32+
br i1 %c1, label %l1, label %l9
33+
34+
l1:
35+
%phi1 = phi i64 [ %n, %entry ], [ %phi2, %l2 ]
36+
%add = add i64 %phi1, 1
37+
%div = sdiv i64 %add, 2
38+
br i1 %c2, label %l1_5, label %exit
39+
40+
l1_5:
41+
br i1 %c3, label %l1_75, label %l6
42+
43+
l1_75:
44+
br i1 %c4, label %l2, label %l3
45+
46+
l2:
47+
%phi2 = phi i64 [ %phi1, %l1_75 ], [ %phi3, %l3 ]
48+
br label %l1
49+
50+
l3:
51+
%phi3 = phi i64 [ %phi1, %l1_75 ], [ %phi4, %l4 ]
52+
br label %l2
53+
54+
l4:
55+
%phi4 = phi i64 [ %phi5, %l5 ], [ %phi6, %l6 ]
56+
br i1 %c5, label %l3, label %l6
57+
58+
l5:
59+
%phi5 = phi i64 [ %phi6, %l6_5 ], [ %phi7, %l7 ]
60+
br label %l4
61+
62+
l6:
63+
%phi6 = phi i64 [ %phi4, %l4 ], [ %phi1, %l1_5 ]
64+
br i1 %c6, label %l4, label %l6_5
65+
66+
l6_5:
67+
br i1 %c7, label %l5, label %l8
68+
69+
l7:
70+
%phi7 = phi i64 [ %phi9, %l9 ], [ %phi8, %l8 ]
71+
br i1 %c8, label %l5, label %l8
72+
73+
l8:
74+
%phi8 = phi i64 [ %phi6, %l6_5 ], [ %phi7, %l7 ]
75+
br i1 %c9, label %l7, label %l9
76+
77+
l9:
78+
%phi9 = phi i64 [ %n, %entry ], [ %phi8, %l8 ]
79+
%sub = sub i64 %phi9, 1
80+
%mul = mul i64 %sub, 2
81+
br i1 %c10, label %l7, label %exit
82+
83+
exit:
84+
%res = phi i64 [ %div, %l1 ], [ %mul, %l9]
85+
ret i64 %res
86+
}
87+
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=10 -funcspec-for-literal-constant -S < %s | FileCheck %s
2+
3+
define i64 @bar(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i64 %x1) {
4+
; CHECK-LABEL: define i64 @bar(
5+
; CHECK-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i64 [[X1:%.*]]) {
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: [[F1:%.*]] = call i64 @foo.specialized.1(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]])
8+
; CHECK-NEXT: [[F2:%.*]] = call i64 @foo(i64 [[X1]], i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]])
9+
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[F1]], [[F2]]
10+
; CHECK-NEXT: ret i64 [[ADD]]
11+
;
12+
entry:
13+
%f1 = call i64 @foo(i64 3, i1 %c1, i1 %c2, i1 %c3, i1 %c4)
14+
%f2 = call i64 @foo(i64 %x1, i1 %c1, i1 %c2, i1 %c3, i1 %c4)
15+
%add = add i64 %f1, %f2
16+
ret i64 %add
17+
}
18+
19+
define internal i64 @foo(i64 %n, i1 %c1, i1 %c2, i1 %c3, i1 %c4) {
20+
entry:
21+
br label %l0
22+
23+
l1:
24+
%phi1 = phi i64 [ %phi0, %l0 ], [ %phi2, %l2 ]
25+
%add = add i64 %phi1, 1
26+
%div = sdiv i64 %add, 2
27+
br i1 %c2, label %l2, label %exit
28+
29+
l2:
30+
%phi2 = phi i64 [ %phi0, %l0 ], [ %phi1, %l1 ]
31+
%sub = sub i64 %phi2, 1
32+
%mul = mul i64 %sub, 2
33+
br i1 %c4, label %l1, label %exit
34+
35+
l0:
36+
%phi0 = phi i64 [ %n, %entry ]
37+
br i1 %c1, label %l1, label %l2
38+
39+
exit:
40+
%res = phi i64 [ %div, %l1 ], [ %mul, %l2]
41+
ret i64 %res
42+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=10 -funcspec-for-literal-constant -S < %s | FileCheck %s
2+
3+
define i64 @bar(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i64 %x1) {
4+
; CHECK-LABEL: define i64 @bar(
5+
; CHECK-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i64 [[X1:%.*]]) {
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: [[F1:%.*]] = call i64 @foo(i64 3, i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]])
8+
; CHECK-NEXT: [[F2:%.*]] = call i64 @foo(i64 4, i64 [[X1]], i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]])
9+
; CHECK-NEXT: [[F3:%.*]] = call i64 @foo.specialized.1(i64 3, i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]])
10+
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[F1]], [[F2]]
11+
; CHECK-NEXT: [[ADD2:%.*]] = add i64 [[ADD]], [[F3]]
12+
; CHECK-NEXT: ret i64 [[ADD2]]
13+
;
14+
entry:
15+
%f1 = call i64 @foo(i64 3, i64 4, i1 %c1, i1 %c2, i1 %c3, i1 %c4)
16+
%f2 = call i64 @foo(i64 4, i64 %x1, i1 %c1, i1 %c2, i1 %c3, i1 %c4)
17+
%f3 = call i64 @foo(i64 3, i64 3, i1 %c1, i1 %c2, i1 %c3, i1 %c4)
18+
%add = add i64 %f1, %f2
19+
%add2 = add i64 %add, %f3
20+
ret i64 %add2
21+
}
22+
23+
define internal i64 @foo(i64 %n, i64 %m, i1 %c1, i1 %c2, i1 %c3, i1 %c4) {
24+
entry:
25+
br i1 %c1, label %l1, label %l4
26+
27+
l1:
28+
%phi1 = phi i64 [ %n, %entry ], [ %phi2, %l2 ]
29+
%add = add i64 %phi1, 1
30+
%div = sdiv i64 %add, 2
31+
br i1 %c2, label %l1_5, label %exit
32+
33+
l1_5:
34+
br i1 %c3, label %l2, label %l3
35+
36+
l2:
37+
%phi2 = phi i64 [ %phi1, %l1_5 ], [ %phi3, %l3 ]
38+
br label %l1
39+
40+
l3:
41+
%phi3 = phi i64 [ %phi1, %l1_5 ], [ %m, %l4 ]
42+
br i1 %c2, label %l4, label %l2
43+
44+
l4:
45+
%phi4 = phi i64 [ %n, %entry ], [ %phi3, %l3 ]
46+
%sub = sub i64 %phi4, 1
47+
%mul = mul i64 %sub, 2
48+
br i1 %c4, label %l3, label %exit
49+
50+
exit:
51+
%res = phi i64 [ %div, %l1 ], [ %mul, %l4]
52+
ret i64 %res
53+
}
54+

0 commit comments

Comments
 (0)