@@ -53,20 +53,241 @@ static bool isUsedAsMemCpySource(const Value *V, bool &OtherUse) {
53
53
return UsedAsMemCpySource;
54
54
}
55
55
56
+ static void countNumMemAccesses (const Value *Ptr , unsigned &NumStores,
57
+ unsigned &NumLoads, const Function *F = nullptr ) {
58
+ if (!isa<PointerType>(Ptr ->getType ()))
59
+ return ;
60
+ for (const User *U : Ptr ->users ())
61
+ if (const Instruction *User = dyn_cast<Instruction>(U)) {
62
+ if (User->getParent ()->getParent () == F || !F) {
63
+ if (const auto *SI = dyn_cast<StoreInst>(User)) {
64
+ if (SI->getPointerOperand () == Ptr && !SI->isVolatile ())
65
+ NumStores++;
66
+ }
67
+ else if (const auto *LI = dyn_cast<LoadInst>(User)) {
68
+ if (LI->getPointerOperand () == Ptr && !LI->isVolatile ())
69
+ NumLoads++;
70
+ }
71
+ else if (const auto *GEP = dyn_cast<GetElementPtrInst>(User)) {
72
+ if (GEP->getPointerOperand () == Ptr )
73
+ countNumMemAccesses (GEP, NumStores, NumLoads);
74
+ }
75
+ }
76
+ }
77
+ }
78
+
79
+ static unsigned usesAroundCall (const CallBase *CB, const GlobalVariable *GV) {
80
+ unsigned Uses = 0 ;
81
+ std::set<const Value *> Ptrs;
82
+ Ptrs.insert (GV);
83
+
84
+ const BasicBlock *BB = CB->getParent ();
85
+ const unsigned CutOff = 20 ;
86
+ BasicBlock::const_iterator II = CB->getIterator ();
87
+ for (unsigned N = 0 ; N < CutOff && II != BB->begin (); N++)
88
+ II--;
89
+ BasicBlock::const_iterator EE = CB->getIterator ();
90
+ for (unsigned N = 0 ; N < CutOff && EE != BB->end (); N++)
91
+ EE++;
92
+
93
+ for (; II != EE; ++II) {
94
+ if (const auto *SI = dyn_cast<StoreInst>(II)) {
95
+ if (Ptrs.count (SI->getPointerOperand ()) && !SI->isVolatile ())
96
+ Uses++;
97
+ }
98
+ else if (const auto *LI = dyn_cast<LoadInst>(II)) {
99
+ if (Ptrs.count (LI->getPointerOperand ()) && !LI->isVolatile ())
100
+ Uses++;
101
+ }
102
+ else if (const auto *GEP = dyn_cast<GetElementPtrInst>(II)) {
103
+ if (Ptrs.count (GEP->getPointerOperand ()))
104
+ Ptrs.insert (GEP);
105
+ }
106
+ }
107
+ return Uses;
108
+ }
109
+
110
+ static unsigned usesEntryExit (const Function *F, const GlobalVariable *GV) {
111
+ unsigned Uses = 0 ;
112
+ std::set<const Value *> Ptrs;
113
+ Ptrs.insert (GV);
114
+
115
+ const unsigned CutOff = 100 ;
116
+ const BasicBlock *BB = &F->getEntryBlock ();
117
+ unsigned N = 0 ;
118
+ for (BasicBlock::const_iterator II = BB->begin ();
119
+ II != BB->end () && N < CutOff; ++II, N++) {
120
+ if (const auto *SI = dyn_cast<StoreInst>(II)) {
121
+ if (Ptrs.count (SI->getPointerOperand ()) && !SI->isVolatile ())
122
+ Uses++;
123
+ }
124
+ else if (const auto *LI = dyn_cast<LoadInst>(II)) {
125
+ if (Ptrs.count (LI->getPointerOperand ()) && !LI->isVolatile ())
126
+ Uses++;
127
+ }
128
+ else if (const auto *GEP = dyn_cast<GetElementPtrInst>(II)) {
129
+ if (Ptrs.count (GEP->getPointerOperand ()))
130
+ Ptrs.insert (GEP);
131
+ }
132
+ }
133
+
134
+ Ptrs.clear ();
135
+ Ptrs.insert (GV);
136
+ unsigned ReturnBlockUses = 0 ;
137
+ unsigned NumReturnBlocks = 0 ;
138
+ for (auto &BBII : *F) {
139
+ if (isa<ReturnInst>(BBII.getTerminator ())) {
140
+ if (NumReturnBlocks++ > 0 ) {
141
+ ReturnBlockUses = 0 ;
142
+ break ;
143
+ }
144
+ BasicBlock::const_iterator EE = BBII.getTerminator ()->getIterator ();
145
+ BasicBlock::const_iterator II = EE;
146
+ for (unsigned N = 0 ; N < CutOff && II != BBII.begin (); N++)
147
+ II--;
148
+ for (; II != EE; ++II) {
149
+ if (const auto *SI = dyn_cast<StoreInst>(II)) {
150
+ if (Ptrs.count (SI->getPointerOperand ()) && !SI->isVolatile ())
151
+ ReturnBlockUses++;
152
+ }
153
+ else if (const auto *LI = dyn_cast<LoadInst>(II)) {
154
+ if (Ptrs.count (LI->getPointerOperand ()) && !LI->isVolatile ())
155
+ ReturnBlockUses++;
156
+ }
157
+ else if (const auto *GEP = dyn_cast<GetElementPtrInst>(II)) {
158
+ if (Ptrs.count (GEP->getPointerOperand ()))
159
+ Ptrs.insert (GEP);
160
+ }
161
+ }
162
+ }
163
+ }
164
+
165
+ return Uses + ReturnBlockUses;
166
+ }
167
+
56
168
unsigned SystemZTTIImpl::adjustInliningThreshold (const CallBase *CB) const {
57
169
unsigned Bonus = 0 ;
58
170
171
+
172
+ // dbgs() << "INSTRCOUNT: " << CB->getCalledFunction()->getInstructionCount()
173
+ // << CB->getCalledFunction()->getName() << "\n";
174
+ // if (CB->getCalledFunction()->getInstructionCount() == 216)
175
+ // Bonus = 300;
176
+
177
+ // if (Function *Callee = CB->getCalledFunction()) {
178
+ // const char *CallerFunName = CB->getParent()->getParent()->getName().data();
179
+ // const char *CalleeFunName = Callee->getName().data();
180
+
181
+ // if (std::strcmp(CallerFunName , "S_regmatch") == 0) {
182
+ // if (std::strcmp(CalleeFunName, "S_reghopmaybe3") == 0 || // less important
183
+ // std::strcmp(CalleeFunName, "S_regcppop") == 0 ||
184
+ // std::strcmp(CalleeFunName, "S_regcppush") == 0)
185
+ // return 250;
186
+ // }
187
+ // }
188
+
189
+ // Check inlining with memory accesses common to caller and callee
190
+ // - Around call in caller? entry/exit blocks in callee?
191
+ // - Globals used (much?) in both caller and callee
192
+ // - Specific type of pattern: load; inc/dec; store ?
193
+ // - non-volatile loads/stores?
194
+ // - int/fp loads/stores? ptr?
195
+ // - num occurences in caller?
196
+ // - or specifically 2+ functions inlined if many common accesses?
197
+ // - specifically 2+ functions getting same adress as argument (ptr)?
198
+ // - (ptr-args generally?)
199
+ if (const Function *Callee = CB->getCalledFunction ()) {
200
+ const Function *Caller = CB->getParent ()->getParent ();
201
+ const Module *M = Caller->getParent ();
202
+ std::set<const GlobalVariable *> CalleeGlobals;
203
+ std::set<const GlobalVariable *> CallerGlobals;
204
+ for (const GlobalVariable &Global : M->globals ())
205
+ for (const User *U : Global.users ())
206
+ if (const Instruction *User = dyn_cast<Instruction>(U)) {
207
+ if (User->getParent ()->getParent () == Callee)
208
+ CalleeGlobals.insert (&Global);
209
+ if (User->getParent ()->getParent () == Caller)
210
+ CallerGlobals.insert (&Global);
211
+ }
212
+
213
+ for (auto *GV : CalleeGlobals)
214
+ if (CallerGlobals.count (GV)) {
215
+ unsigned CalleeStores = 0 , CalleeLoads = 0 ;
216
+ unsigned CallerStores = 0 , CallerLoads = 0 ;
217
+ countNumMemAccesses (GV, CalleeStores, CalleeLoads, Callee);
218
+ countNumMemAccesses (GV, CallerStores, CallerLoads, Caller);
219
+ if ((CalleeStores || CalleeLoads) && (CallerStores || CallerLoads)) {
220
+ // dbgs() << "GV: @" << GV->getName()
221
+ // << " " << *GV->getValueType()
222
+ // << " Callee: " << Callee->getName() << " S: " << CalleeStores
223
+ // << " L: " << CalleeLoads << " MEE: " << (CalleeStores + CalleeLoads)
224
+ // << " Callee-size: " << Callee->getInstructionCount()
225
+ // << " Caller: " << Caller->getName() << " S: " << CallerStores
226
+ // << " L: " << CallerLoads << " MER: " << (CallerStores + CallerLoads)
227
+ // << " Uses-around-call: " << usesAroundCall(CB, GV)
228
+ // << " Uses-entry-exit-callee: " << usesEntryExit(Callee, GV)
229
+ // << "\n";
230
+
231
+ // const char *CallerFunName = CB->getParent()->getParent()->getName().data();
232
+ // const char *CalleeFunName = Callee->getName().data();
233
+ // if (std::strcmp(CallerFunName , "S_regmatch") == 0) {
234
+ // if (std::strcmp(CalleeFunName, "S_regcppop") == 0) {
235
+ // return 250;
236
+ // }
237
+ // if (std::strcmp(CalleeFunName, "S_regcppush") == 0) {
238
+ // return 250;
239
+ // }
240
+ if (// usesEntryExit(Callee, GV) >= 5 &&
241
+ Callee->getInstructionCount () < 250 &&
242
+
243
+ // (CalleeStores >= 5 && CalleeLoads >= 5) &&
244
+ (CalleeStores + CalleeLoads) > 10 &&
245
+
246
+ // CallerLoads > 25)
247
+ (CallerStores + CallerLoads) > 10 )
248
+ return 500 ;
249
+
250
+ // if
251
+ // if ((CallerStores + CallerLoads) > 25)
252
+ // if (CallerLoads) > 25)
253
+
254
+ // }
255
+ }
256
+ }
257
+ }
258
+
59
259
// Increase the threshold if an incoming argument is used only as a memcpy
60
260
// source.
61
261
if (Function *Callee = CB->getCalledFunction ())
62
262
for (Argument &Arg : Callee->args ()) {
63
263
bool OtherUse = false ;
64
264
if (isUsedAsMemCpySource (&Arg, OtherUse) && !OtherUse)
65
- Bonus += 150 ;
265
+ Bonus += 1000 ;
66
266
}
67
267
268
+ if (!Bonus) {
269
+ if (Function *Callee = CB->getCalledFunction ()) {
270
+ unsigned NumStores = 0 ;
271
+ unsigned NumLoads = 0 ;
272
+ for (unsigned OpIdx = 0 ; OpIdx != Callee->arg_size (); ++OpIdx) {
273
+ Value *CallerArg = CB->getArgOperand (OpIdx);
274
+ Argument *CalleeArg = Callee->getArg (OpIdx);
275
+ if (isa<AllocaInst>(CallerArg))
276
+ countNumMemAccesses (CalleeArg, NumStores, NumLoads);
277
+ }
278
+ // dbgs() << "NUM: " << NumStores << " " << NumLoads << "\n";
279
+ // Best on povray, but not doing stores slightly better on blender.
280
+ if (NumLoads > 10 )
281
+ Bonus += NumLoads * 50 ;
282
+ if (NumStores > 10 )
283
+ Bonus += NumStores * 50 ;
284
+ Bonus = std::min (Bonus, unsigned (1000 ));
285
+ }
286
+ }
287
+
68
288
LLVM_DEBUG (if (Bonus)
69
289
dbgs () << " ++ SZTTI Adding inlining bonus: " << Bonus << " \n " ;);
290
+
70
291
return Bonus;
71
292
}
72
293
0 commit comments