Skip to content

Commit 76aa370

Browse files
authored
[SystemZ] Remove inlining threshold multiplier. (#106058)
Due to recently reported problems with having the inlining threshold multiplier set fairly high (x3), this patch removes the multiplier while addressing the regressions seen by doing so in adjustInliningThreshold(). The specific cases that benefit from inlining that were now found to be in need of handling contain a considerable number of memory accesses to the same memory in both caller and callee.
1 parent 18d3a5d commit 76aa370

File tree

3 files changed

+207
-9
lines changed

3 files changed

+207
-9
lines changed

llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp

Lines changed: 71 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,17 +53,83 @@ static bool isUsedAsMemCpySource(const Value *V, bool &OtherUse) {
5353
return UsedAsMemCpySource;
5454
}
5555

56+
static void countNumMemAccesses(const Value *Ptr, unsigned &NumStores,
57+
unsigned &NumLoads, const Function *F) {
58+
if (!isa<PointerType>(Ptr->getType()))
59+
return;
60+
for (const User *U : Ptr->users())
61+
if (const Instruction *User = dyn_cast<Instruction>(U)) {
62+
if (User->getParent()->getParent() == F) {
63+
if (const auto *SI = dyn_cast<StoreInst>(User)) {
64+
if (SI->getPointerOperand() == Ptr && !SI->isVolatile())
65+
NumStores++;
66+
} else if (const auto *LI = dyn_cast<LoadInst>(User)) {
67+
if (LI->getPointerOperand() == Ptr && !LI->isVolatile())
68+
NumLoads++;
69+
} else if (const auto *GEP = dyn_cast<GetElementPtrInst>(User)) {
70+
if (GEP->getPointerOperand() == Ptr)
71+
countNumMemAccesses(GEP, NumStores, NumLoads, F);
72+
}
73+
}
74+
}
75+
}
76+
5677
unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
5778
unsigned Bonus = 0;
79+
const Function *Caller = CB->getParent()->getParent();
80+
const Function *Callee = CB->getCalledFunction();
81+
if (!Callee)
82+
return 0;
83+
const Module *M = Caller->getParent();
5884

5985
// Increase the threshold if an incoming argument is used only as a memcpy
6086
// source.
61-
if (Function *Callee = CB->getCalledFunction())
62-
for (Argument &Arg : Callee->args()) {
63-
bool OtherUse = false;
64-
if (isUsedAsMemCpySource(&Arg, OtherUse) && !OtherUse)
65-
Bonus += 150;
87+
for (const Argument &Arg : Callee->args()) {
88+
bool OtherUse = false;
89+
if (isUsedAsMemCpySource(&Arg, OtherUse) && !OtherUse) {
90+
Bonus = 1000;
91+
break;
6692
}
93+
}
94+
95+
// Give bonus for globals used much in both caller and callee.
96+
std::set<const GlobalVariable *> CalleeGlobals;
97+
std::set<const GlobalVariable *> CallerGlobals;
98+
for (const GlobalVariable &Global : M->globals())
99+
for (const User *U : Global.users())
100+
if (const Instruction *User = dyn_cast<Instruction>(U)) {
101+
if (User->getParent()->getParent() == Callee)
102+
CalleeGlobals.insert(&Global);
103+
if (User->getParent()->getParent() == Caller)
104+
CallerGlobals.insert(&Global);
105+
}
106+
for (auto *GV : CalleeGlobals)
107+
if (CallerGlobals.count(GV)) {
108+
unsigned CalleeStores = 0, CalleeLoads = 0;
109+
unsigned CallerStores = 0, CallerLoads = 0;
110+
countNumMemAccesses(GV, CalleeStores, CalleeLoads, Callee);
111+
countNumMemAccesses(GV, CallerStores, CallerLoads, Caller);
112+
if ((CalleeStores + CalleeLoads) > 10 &&
113+
(CallerStores + CallerLoads) > 10) {
114+
Bonus = 1000;
115+
break;
116+
}
117+
}
118+
119+
// Give bonus when Callee accesses an Alloca of Caller heavily.
120+
unsigned NumStores = 0;
121+
unsigned NumLoads = 0;
122+
for (unsigned OpIdx = 0; OpIdx != Callee->arg_size(); ++OpIdx) {
123+
Value *CallerArg = CB->getArgOperand(OpIdx);
124+
Argument *CalleeArg = Callee->getArg(OpIdx);
125+
if (isa<AllocaInst>(CallerArg))
126+
countNumMemAccesses(CalleeArg, NumStores, NumLoads, Callee);
127+
}
128+
if (NumLoads > 10)
129+
Bonus += NumLoads * 50;
130+
if (NumStores > 10)
131+
Bonus += NumStores * 50;
132+
Bonus = std::min(Bonus, unsigned(1000));
67133

68134
LLVM_DEBUG(if (Bonus)
69135
dbgs() << "++ SZTTI Adding inlining bonus: " << Bonus << "\n";);

llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
3838
/// \name Scalar TTI Implementations
3939
/// @{
4040

41-
unsigned getInliningThresholdMultiplier() const { return 3; }
4241
unsigned adjustInliningThreshold(const CallBase *CB) const;
4342

4443
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,

llvm/test/CodeGen/SystemZ/inline-thresh-adjust.ll

Lines changed: 136 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
; RUN: opt < %s -mtriple=systemz-unknown -mcpu=z15 -passes='cgscc(inline)' -disable-output \
22
; RUN: -debug-only=inline,systemztti 2>&1 | FileCheck %s
33
; REQUIRES: asserts
4-
;
4+
55
; Check that the inlining threshold is incremented for a function using an
66
; argument only as a memcpy source.
7-
7+
;
88
; CHECK: Inlining calls in: root_function
99
; CHECK: Inlining {{.*}} Call: call void @leaf_function_A(ptr %Dst)
10-
; CHECK: ++ SZTTI Adding inlining bonus: 150
10+
; CHECK: ++ SZTTI Adding inlining bonus: 1000
1111
; CHECK: Inlining {{.*}} Call: call void @leaf_function_B(ptr %Dst, ptr %Src)
1212

1313
define void @leaf_function_A(ptr %Dst) {
@@ -30,3 +30,136 @@ entry:
3030
}
3131

3232
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
33+
34+
; Check that the inlining threshold is incremented in case of multiple
35+
; accesses of a global variable by both caller and callee (which is true here
36+
; after the first call is inlined).
37+
;
38+
; CHECK: Inlining calls in: Caller1
39+
; CHECK: ++ SZTTI Adding inlining bonus: 1000
40+
41+
@GlobV = external global i32
42+
43+
define i64 @Caller1(i1 %cond1, i32 %0) #0 {
44+
entry:
45+
br i1 %cond1, label %sw.bb3437, label %fake_end
46+
47+
common.ret: ; preds = %fake_end, %sw.bb3437
48+
ret i64 0
49+
50+
sw.bb3437: ; preds = %entry
51+
%call34652 = call i32 @Callee1(ptr null, i32 %0)
52+
br label %common.ret
53+
54+
fake_end: ; preds = %entry
55+
%call57981 = call i32 @Callee1(ptr null, i32 0)
56+
br label %common.ret
57+
}
58+
59+
define i32 @Callee1(ptr %rex, i32 %parenfloor) #0 {
60+
entry:
61+
%cmp21 = icmp slt i32 %parenfloor, 0
62+
br i1 %cmp21, label %for.body, label %for.end
63+
64+
common.ret: ; preds = %for.end, %for.body
65+
ret i32 0
66+
67+
for.body: ; preds = %entry
68+
%0 = load i32, ptr @GlobV, align 4
69+
%inc = or i32 %0, 1
70+
store i32 %inc, ptr @GlobV, align 4
71+
store i64 0, ptr %rex, align 8
72+
%1 = load i32, ptr @GlobV, align 4
73+
%inc28 = or i32 %1, 1
74+
store i32 %inc28, ptr @GlobV, align 4
75+
store i64 0, ptr %rex, align 8
76+
%2 = load i32, ptr @GlobV, align 4
77+
%inc35 = or i32 %2, 1
78+
store i32 %inc35, ptr @GlobV, align 4
79+
store i32 0, ptr %rex, align 8
80+
br label %common.ret
81+
82+
for.end: ; preds = %entry
83+
store i32 0, ptr @GlobV, align 4
84+
store i32 0, ptr %rex, align 8
85+
%3 = load i32, ptr @GlobV, align 4
86+
%inc42 = or i32 %3, 1
87+
store i32 %inc42, ptr @GlobV, align 4
88+
store i32 0, ptr %rex, align 8
89+
%4 = load i32, ptr @GlobV, align 4
90+
%inc48 = or i32 %4, 1
91+
store i32 %inc48, ptr @GlobV, align 4
92+
br label %common.ret
93+
}
94+
95+
; Check that the inlining threshold is incremented for a function that is
96+
; accessing an alloca of the caller multiple times.
97+
;
98+
; CHECK: Inlining calls in: Caller2
99+
; CHECK: ++ SZTTI Adding inlining bonus: 550
100+
101+
define i1 @Caller2() {
102+
entry:
103+
%A = alloca [80 x i64], align 8
104+
call void @Callee2(ptr %A)
105+
ret i1 false
106+
}
107+
108+
define void @Callee2(ptr nocapture readonly %Arg) {
109+
entry:
110+
%nonzero = getelementptr i8, ptr %Arg, i64 48
111+
%0 = load i32, ptr %nonzero, align 8
112+
%tobool1.not = icmp eq i32 %0, 0
113+
br i1 %tobool1.not, label %if.else38, label %if.then2
114+
115+
if.then2: ; preds = %entry
116+
%1 = load i32, ptr %Arg, align 4
117+
%tobool4.not = icmp eq i32 %1, 0
118+
br i1 %tobool4.not, label %common.ret, label %if.then5
119+
120+
if.then5: ; preds = %if.then2
121+
%2 = load double, ptr %Arg, align 8
122+
%slab_den = getelementptr i8, ptr %Arg, i64 24
123+
%3 = load double, ptr %slab_den, align 8
124+
%mul = fmul double %2, %3
125+
%cmp = fcmp olt double %mul, 0.000000e+00
126+
br i1 %cmp, label %common.ret, label %if.end55
127+
128+
common.ret: ; preds = %if.end100, %if.else79, %if.end55, %if.else38, %if.then5, %if.then2
129+
ret void
130+
131+
if.else38: ; preds = %entry
132+
%4 = load double, ptr %Arg, align 8
133+
%cmp52 = fcmp ogt double %4, 0.000000e+00
134+
br i1 %cmp52, label %common.ret, label %if.end55
135+
136+
if.end55: ; preds = %if.else38, %if.then5
137+
%arrayidx57 = getelementptr i8, ptr %Arg, i64 52
138+
%5 = load i32, ptr %arrayidx57, align 4
139+
%tobool58.not = icmp eq i32 %5, 0
140+
br i1 %tobool58.not, label %common.ret, label %if.then59
141+
142+
if.then59: ; preds = %if.end55
143+
%arrayidx61 = getelementptr i8, ptr %Arg, i64 64
144+
%6 = load i32, ptr %arrayidx61, align 4
145+
%tobool62.not = icmp eq i32 %6, 0
146+
br i1 %tobool62.not, label %if.else79, label %if.end100
147+
148+
if.else79: ; preds = %if.then59
149+
%arrayidx84 = getelementptr i8, ptr %Arg, i64 8
150+
%7 = load double, ptr %arrayidx84, align 8
151+
%arrayidx87 = getelementptr i8, ptr %Arg, i64 32
152+
%8 = load double, ptr %arrayidx87, align 8
153+
%mul88 = fmul double %7, %8
154+
%9 = fcmp olt double %mul88, 0.000000e+00
155+
br i1 %9, label %common.ret, label %if.end100
156+
157+
if.end100: ; preds = %if.else79, %if.then59
158+
%arrayidx151 = getelementptr i8, ptr %Arg, i64 16
159+
%10 = load double, ptr %arrayidx151, align 8
160+
%arrayidx154 = getelementptr i8, ptr %Arg, i64 40
161+
%11 = load double, ptr %arrayidx154, align 8
162+
%mul155 = fmul double %10, %11
163+
%cmp181 = fcmp olt double %mul155, 0.000000e+00
164+
br label %common.ret
165+
}

0 commit comments

Comments
 (0)