@@ -54,12 +54,12 @@ static bool isUsedAsMemCpySource(const Value *V, bool &OtherUse) {
54
54
}
55
55
56
56
static void countNumMemAccesses (const Value *Ptr , unsigned &NumStores,
57
- unsigned &NumLoads, const Function *F = nullptr ) {
57
+ unsigned &NumLoads, const Function *F) {
58
58
if (!isa<PointerType>(Ptr ->getType ()))
59
59
return ;
60
60
for (const User *U : Ptr ->users ())
61
61
if (const Instruction *User = dyn_cast<Instruction>(U)) {
62
- if (User->getParent ()->getParent () == F || !F ) {
62
+ if (User->getParent ()->getParent () == F) {
63
63
if (const auto *SI = dyn_cast<StoreInst>(User)) {
64
64
if (SI->getPointerOperand () == Ptr && !SI->isVolatile ())
65
65
NumStores++;
@@ -70,220 +70,68 @@ static void countNumMemAccesses(const Value *Ptr, unsigned &NumStores,
70
70
}
71
71
else if (const auto *GEP = dyn_cast<GetElementPtrInst>(User)) {
72
72
if (GEP->getPointerOperand () == Ptr )
73
- countNumMemAccesses (GEP, NumStores, NumLoads);
73
+ countNumMemAccesses (GEP, NumStores, NumLoads, F );
74
74
}
75
75
}
76
76
}
77
77
}
78
78
79
- static unsigned usesAroundCall (const CallBase *CB, const GlobalVariable *GV) {
80
- unsigned Uses = 0 ;
81
- std::set<const Value *> Ptrs;
82
- Ptrs.insert (GV);
83
-
84
- const BasicBlock *BB = CB->getParent ();
85
- const unsigned CutOff = 20 ;
86
- BasicBlock::const_iterator II = CB->getIterator ();
87
- for (unsigned N = 0 ; N < CutOff && II != BB->begin (); N++)
88
- II--;
89
- BasicBlock::const_iterator EE = CB->getIterator ();
90
- for (unsigned N = 0 ; N < CutOff && EE != BB->end (); N++)
91
- EE++;
92
-
93
- for (; II != EE; ++II) {
94
- if (const auto *SI = dyn_cast<StoreInst>(II)) {
95
- if (Ptrs.count (SI->getPointerOperand ()) && !SI->isVolatile ())
96
- Uses++;
97
- }
98
- else if (const auto *LI = dyn_cast<LoadInst>(II)) {
99
- if (Ptrs.count (LI->getPointerOperand ()) && !LI->isVolatile ())
100
- Uses++;
101
- }
102
- else if (const auto *GEP = dyn_cast<GetElementPtrInst>(II)) {
103
- if (Ptrs.count (GEP->getPointerOperand ()))
104
- Ptrs.insert (GEP);
105
- }
106
- }
107
- return Uses;
108
- }
109
-
110
- static unsigned usesEntryExit (const Function *F, const GlobalVariable *GV) {
111
- unsigned Uses = 0 ;
112
- std::set<const Value *> Ptrs;
113
- Ptrs.insert (GV);
114
-
115
- const unsigned CutOff = 100 ;
116
- const BasicBlock *BB = &F->getEntryBlock ();
117
- unsigned N = 0 ;
118
- for (BasicBlock::const_iterator II = BB->begin ();
119
- II != BB->end () && N < CutOff; ++II, N++) {
120
- if (const auto *SI = dyn_cast<StoreInst>(II)) {
121
- if (Ptrs.count (SI->getPointerOperand ()) && !SI->isVolatile ())
122
- Uses++;
123
- }
124
- else if (const auto *LI = dyn_cast<LoadInst>(II)) {
125
- if (Ptrs.count (LI->getPointerOperand ()) && !LI->isVolatile ())
126
- Uses++;
127
- }
128
- else if (const auto *GEP = dyn_cast<GetElementPtrInst>(II)) {
129
- if (Ptrs.count (GEP->getPointerOperand ()))
130
- Ptrs.insert (GEP);
131
- }
132
- }
133
-
134
- Ptrs.clear ();
135
- Ptrs.insert (GV);
136
- unsigned ReturnBlockUses = 0 ;
137
- unsigned NumReturnBlocks = 0 ;
138
- for (auto &BBII : *F) {
139
- if (isa<ReturnInst>(BBII.getTerminator ())) {
140
- if (NumReturnBlocks++ > 0 ) {
141
- ReturnBlockUses = 0 ;
142
- break ;
143
- }
144
- BasicBlock::const_iterator EE = BBII.getTerminator ()->getIterator ();
145
- BasicBlock::const_iterator II = EE;
146
- for (unsigned N = 0 ; N < CutOff && II != BBII.begin (); N++)
147
- II--;
148
- for (; II != EE; ++II) {
149
- if (const auto *SI = dyn_cast<StoreInst>(II)) {
150
- if (Ptrs.count (SI->getPointerOperand ()) && !SI->isVolatile ())
151
- ReturnBlockUses++;
152
- }
153
- else if (const auto *LI = dyn_cast<LoadInst>(II)) {
154
- if (Ptrs.count (LI->getPointerOperand ()) && !LI->isVolatile ())
155
- ReturnBlockUses++;
156
- }
157
- else if (const auto *GEP = dyn_cast<GetElementPtrInst>(II)) {
158
- if (Ptrs.count (GEP->getPointerOperand ()))
159
- Ptrs.insert (GEP);
160
- }
161
- }
162
- }
163
- }
164
-
165
- return Uses + ReturnBlockUses;
166
- }
167
-
168
79
unsigned SystemZTTIImpl::adjustInliningThreshold (const CallBase *CB) const {
169
80
unsigned Bonus = 0 ;
170
-
171
-
172
- // dbgs() << "INSTRCOUNT: " << CB->getCalledFunction()->getInstructionCount()
173
- // << CB->getCalledFunction()->getName() << "\n";
174
- // if (CB->getCalledFunction()->getInstructionCount() == 216)
175
- // Bonus = 300;
176
-
177
- // if (Function *Callee = CB->getCalledFunction()) {
178
- // const char *CallerFunName = CB->getParent()->getParent()->getName().data();
179
- // const char *CalleeFunName = Callee->getName().data();
180
-
181
- // if (std::strcmp(CallerFunName , "S_regmatch") == 0) {
182
- // if (std::strcmp(CalleeFunName, "S_reghopmaybe3") == 0 || // less important
183
- // std::strcmp(CalleeFunName, "S_regcppop") == 0 ||
184
- // std::strcmp(CalleeFunName, "S_regcppush") == 0)
185
- // return 250;
186
- // }
187
- // }
188
-
189
- // Check inlining with memory accesses common to caller and callee
190
- // - Around call in caller? entry/exit blocks in callee?
191
- // - Globals used (much?) in both caller and callee
192
- // - Specific type of pattern: load; inc/dec; store ?
193
- // - non-volatile loads/stores?
194
- // - int/fp loads/stores? ptr?
195
- // - num occurences in caller?
196
- // - or specifically 2+ functions inlined if many common accesses?
197
- // - specifically 2+ functions getting same adress as argument (ptr)?
198
- // - (ptr-args generally?)
199
- if (const Function *Callee = CB->getCalledFunction ()) {
200
- const Function *Caller = CB->getParent ()->getParent ();
201
- const Module *M = Caller->getParent ();
202
- std::set<const GlobalVariable *> CalleeGlobals;
203
- std::set<const GlobalVariable *> CallerGlobals;
204
- for (const GlobalVariable &Global : M->globals ())
205
- for (const User *U : Global.users ())
206
- if (const Instruction *User = dyn_cast<Instruction>(U)) {
207
- if (User->getParent ()->getParent () == Callee)
208
- CalleeGlobals.insert (&Global);
209
- if (User->getParent ()->getParent () == Caller)
210
- CallerGlobals.insert (&Global);
211
- }
212
-
213
- for (auto *GV : CalleeGlobals)
214
- if (CallerGlobals.count (GV)) {
215
- unsigned CalleeStores = 0 , CalleeLoads = 0 ;
216
- unsigned CallerStores = 0 , CallerLoads = 0 ;
217
- countNumMemAccesses (GV, CalleeStores, CalleeLoads, Callee);
218
- countNumMemAccesses (GV, CallerStores, CallerLoads, Caller);
219
- if ((CalleeStores || CalleeLoads) && (CallerStores || CallerLoads)) {
220
- // dbgs() << "GV: @" << GV->getName()
221
- // << " " << *GV->getValueType()
222
- // << " Callee: " << Callee->getName() << " S: " << CalleeStores
223
- // << " L: " << CalleeLoads << " MEE: " << (CalleeStores + CalleeLoads)
224
- // << " Callee-size: " << Callee->getInstructionCount()
225
- // << " Caller: " << Caller->getName() << " S: " << CallerStores
226
- // << " L: " << CallerLoads << " MER: " << (CallerStores + CallerLoads)
227
- // << " Uses-around-call: " << usesAroundCall(CB, GV)
228
- // << " Uses-entry-exit-callee: " << usesEntryExit(Callee, GV)
229
- // << "\n";
230
-
231
- // const char *CallerFunName = CB->getParent()->getParent()->getName().data();
232
- // const char *CalleeFunName = Callee->getName().data();
233
- // if (std::strcmp(CallerFunName , "S_regmatch") == 0) {
234
- // if (std::strcmp(CalleeFunName, "S_regcppop") == 0) {
235
- // return 250;
236
- // }
237
- // if (std::strcmp(CalleeFunName, "S_regcppush") == 0) {
238
- // return 250;
239
- // }
240
- if (// usesEntryExit(Callee, GV) >= 5 &&
241
- Callee->getInstructionCount () < 250 &&
242
-
243
- // (CalleeStores >= 5 && CalleeLoads >= 5) &&
244
- (CalleeStores + CalleeLoads) > 10 &&
245
-
246
- // CallerLoads > 25)
247
- (CallerStores + CallerLoads) > 10 )
248
- return 500 ;
249
-
250
- // if
251
- // if ((CallerStores + CallerLoads) > 25)
252
- // if (CallerLoads) > 25)
253
-
254
- // }
255
- }
256
- }
257
- }
81
+ const Function *Caller = CB->getParent ()->getParent ();
82
+ const Function *Callee = CB->getCalledFunction ();
83
+ if (!Callee)
84
+ return 0 ;
85
+ const Module *M = Caller->getParent ();
258
86
259
87
// Increase the threshold if an incoming argument is used only as a memcpy
260
88
// source.
261
- if (Function *Callee = CB-> getCalledFunction ())
262
- for (Argument &Arg : Callee-> args ()) {
263
- bool OtherUse = false ;
264
- if ( isUsedAsMemCpySource (&Arg, OtherUse) && !OtherUse)
265
- Bonus += 1000 ;
89
+ for ( const Argument &Arg : Callee-> args ()) {
90
+ bool OtherUse = false ;
91
+ if ( isUsedAsMemCpySource (&Arg, OtherUse) && !OtherUse) {
92
+ Bonus = 1000 ;
93
+ break ;
266
94
}
95
+ }
267
96
268
- if (!Bonus) {
269
- if (Function *Callee = CB->getCalledFunction ()) {
270
- unsigned NumStores = 0 ;
271
- unsigned NumLoads = 0 ;
272
- for (unsigned OpIdx = 0 ; OpIdx != Callee->arg_size (); ++OpIdx) {
273
- Value *CallerArg = CB->getArgOperand (OpIdx);
274
- Argument *CalleeArg = Callee->getArg (OpIdx);
275
- if (isa<AllocaInst>(CallerArg))
276
- countNumMemAccesses (CalleeArg, NumStores, NumLoads);
97
+ // Give bonus for globals used much in both caller and callee.
98
+ std::set<const GlobalVariable *> CalleeGlobals;
99
+ std::set<const GlobalVariable *> CallerGlobals;
100
+ for (const GlobalVariable &Global : M->globals ())
101
+ for (const User *U : Global.users ())
102
+ if (const Instruction *User = dyn_cast<Instruction>(U)) {
103
+ if (User->getParent ()->getParent () == Callee)
104
+ CalleeGlobals.insert (&Global);
105
+ if (User->getParent ()->getParent () == Caller)
106
+ CallerGlobals.insert (&Global);
107
+ }
108
+ for (auto *GV : CalleeGlobals)
109
+ if (CallerGlobals.count (GV)) {
110
+ unsigned CalleeStores = 0 , CalleeLoads = 0 ;
111
+ unsigned CallerStores = 0 , CallerLoads = 0 ;
112
+ countNumMemAccesses (GV, CalleeStores, CalleeLoads, Callee);
113
+ countNumMemAccesses (GV, CallerStores, CallerLoads, Caller);
114
+ if ((CalleeStores + CalleeLoads) > 10 &&
115
+ (CallerStores + CallerLoads) > 10 ) {
116
+ Bonus = 1000 ;
117
+ break ;
277
118
}
278
- // dbgs() << "NUM: " << NumStores << " " << NumLoads << "\n";
279
- // Best on povray, but not doing stores slightly better on blender.
280
- if (NumLoads > 10 )
281
- Bonus += NumLoads * 50 ;
282
- if (NumStores > 10 )
283
- Bonus += NumStores * 50 ;
284
- Bonus = std::min (Bonus, unsigned (1000 ));
285
119
}
120
+
121
+ // Give bonus when Callee accesses an Alloca of Caller heavily.
122
+ unsigned NumStores = 0 ;
123
+ unsigned NumLoads = 0 ;
124
+ for (unsigned OpIdx = 0 ; OpIdx != Callee->arg_size (); ++OpIdx) {
125
+ Value *CallerArg = CB->getArgOperand (OpIdx);
126
+ Argument *CalleeArg = Callee->getArg (OpIdx);
127
+ if (isa<AllocaInst>(CallerArg))
128
+ countNumMemAccesses (CalleeArg, NumStores, NumLoads, Callee);
286
129
}
130
+ if (NumLoads > 10 )
131
+ Bonus += NumLoads * 50 ;
132
+ if (NumStores > 10 )
133
+ Bonus += NumStores * 50 ;
134
+ Bonus = std::min (Bonus, unsigned (1000 ));
287
135
288
136
LLVM_DEBUG (if (Bonus)
289
137
dbgs () << " ++ SZTTI Adding inlining bonus: " << Bonus << " \n " ;);
0 commit comments