Skip to content

Commit def3693

Browse files
committed
Experiments
1 parent 00def06 commit def3693

File tree

2 files changed

+223
-2
lines changed

2 files changed

+223
-2
lines changed

llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp

+222-1
Original file line numberDiff line numberDiff line change
@@ -53,20 +53,241 @@ static bool isUsedAsMemCpySource(const Value *V, bool &OtherUse) {
5353
return UsedAsMemCpySource;
5454
}
5555

56+
static void countNumMemAccesses(const Value *Ptr, unsigned &NumStores,
57+
unsigned &NumLoads, const Function *F = nullptr) {
58+
if (!isa<PointerType>(Ptr->getType()))
59+
return;
60+
for (const User *U : Ptr->users())
61+
if (const Instruction *User = dyn_cast<Instruction>(U)) {
62+
if (User->getParent()->getParent() == F || !F) {
63+
if (const auto *SI = dyn_cast<StoreInst>(User)) {
64+
if (SI->getPointerOperand() == Ptr && !SI->isVolatile())
65+
NumStores++;
66+
}
67+
else if (const auto *LI = dyn_cast<LoadInst>(User)) {
68+
if (LI->getPointerOperand() == Ptr && !LI->isVolatile())
69+
NumLoads++;
70+
}
71+
else if (const auto *GEP = dyn_cast<GetElementPtrInst>(User)) {
72+
if (GEP->getPointerOperand() == Ptr)
73+
countNumMemAccesses(GEP, NumStores, NumLoads);
74+
}
75+
}
76+
}
77+
}
78+
79+
static unsigned usesAroundCall(const CallBase *CB, const GlobalVariable *GV) {
80+
unsigned Uses = 0;
81+
std::set<const Value *> Ptrs;
82+
Ptrs.insert(GV);
83+
84+
const BasicBlock *BB = CB->getParent();
85+
const unsigned CutOff = 20;
86+
BasicBlock::const_iterator II = CB->getIterator();
87+
for (unsigned N = 0; N < CutOff && II != BB->begin(); N++)
88+
II--;
89+
BasicBlock::const_iterator EE = CB->getIterator();
90+
for (unsigned N = 0; N < CutOff && EE != BB->end(); N++)
91+
EE++;
92+
93+
for (; II != EE; ++II) {
94+
if (const auto *SI = dyn_cast<StoreInst>(II)) {
95+
if (Ptrs.count(SI->getPointerOperand()) && !SI->isVolatile())
96+
Uses++;
97+
}
98+
else if (const auto *LI = dyn_cast<LoadInst>(II)) {
99+
if (Ptrs.count(LI->getPointerOperand()) && !LI->isVolatile())
100+
Uses++;
101+
}
102+
else if (const auto *GEP = dyn_cast<GetElementPtrInst>(II)) {
103+
if (Ptrs.count(GEP->getPointerOperand()))
104+
Ptrs.insert(GEP);
105+
}
106+
}
107+
return Uses;
108+
}
109+
110+
static unsigned usesEntryExit(const Function *F, const GlobalVariable *GV) {
111+
unsigned Uses = 0;
112+
std::set<const Value *> Ptrs;
113+
Ptrs.insert(GV);
114+
115+
const unsigned CutOff = 100;
116+
const BasicBlock *BB = &F->getEntryBlock();
117+
unsigned N = 0;
118+
for (BasicBlock::const_iterator II = BB->begin();
119+
II != BB->end() && N < CutOff; ++II, N++) {
120+
if (const auto *SI = dyn_cast<StoreInst>(II)) {
121+
if (Ptrs.count(SI->getPointerOperand()) && !SI->isVolatile())
122+
Uses++;
123+
}
124+
else if (const auto *LI = dyn_cast<LoadInst>(II)) {
125+
if (Ptrs.count(LI->getPointerOperand()) && !LI->isVolatile())
126+
Uses++;
127+
}
128+
else if (const auto *GEP = dyn_cast<GetElementPtrInst>(II)) {
129+
if (Ptrs.count(GEP->getPointerOperand()))
130+
Ptrs.insert(GEP);
131+
}
132+
}
133+
134+
Ptrs.clear();
135+
Ptrs.insert(GV);
136+
unsigned ReturnBlockUses = 0;
137+
unsigned NumReturnBlocks = 0;
138+
for (auto &BBII : *F) {
139+
if (isa<ReturnInst>(BBII.getTerminator())) {
140+
if (NumReturnBlocks++ > 0) {
141+
ReturnBlockUses = 0;
142+
break;
143+
}
144+
BasicBlock::const_iterator EE = BBII.getTerminator()->getIterator();
145+
BasicBlock::const_iterator II = EE;
146+
for (unsigned N = 0; N < CutOff && II != BBII.begin(); N++)
147+
II--;
148+
for (; II != EE; ++II) {
149+
if (const auto *SI = dyn_cast<StoreInst>(II)) {
150+
if (Ptrs.count(SI->getPointerOperand()) && !SI->isVolatile())
151+
ReturnBlockUses++;
152+
}
153+
else if (const auto *LI = dyn_cast<LoadInst>(II)) {
154+
if (Ptrs.count(LI->getPointerOperand()) && !LI->isVolatile())
155+
ReturnBlockUses++;
156+
}
157+
else if (const auto *GEP = dyn_cast<GetElementPtrInst>(II)) {
158+
if (Ptrs.count(GEP->getPointerOperand()))
159+
Ptrs.insert(GEP);
160+
}
161+
}
162+
}
163+
}
164+
165+
return Uses + ReturnBlockUses;
166+
}
167+
56168
unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
57169
unsigned Bonus = 0;
58170

171+
172+
// dbgs() << "INSTRCOUNT: " << CB->getCalledFunction()->getInstructionCount()
173+
// << CB->getCalledFunction()->getName() << "\n";
174+
// if (CB->getCalledFunction()->getInstructionCount() == 216)
175+
// Bonus = 300;
176+
177+
// if (Function *Callee = CB->getCalledFunction()) {
178+
// const char *CallerFunName = CB->getParent()->getParent()->getName().data();
179+
// const char *CalleeFunName = Callee->getName().data();
180+
181+
// if (std::strcmp(CallerFunName , "S_regmatch") == 0) {
182+
// if (std::strcmp(CalleeFunName, "S_reghopmaybe3") == 0 || // less important
183+
// std::strcmp(CalleeFunName, "S_regcppop") == 0 ||
184+
// std::strcmp(CalleeFunName, "S_regcppush") == 0)
185+
// return 250;
186+
// }
187+
// }
188+
189+
// Check inlining with memory accesses common to caller and callee
190+
// - Around call in caller? entry/exit blocks in callee?
191+
// - Globals used (much?) in both caller and callee
192+
// - Specific type of pattern: load; inc/dec; store ?
193+
// - non-volatile loads/stores?
194+
// - int/fp loads/stores? ptr?
195+
// - num occurences in caller?
196+
// - or specifically 2+ functions inlined if many common accesses?
197+
// - specifically 2+ functions getting same adress as argument (ptr)?
198+
// - (ptr-args generally?)
199+
if (const Function *Callee = CB->getCalledFunction()) {
200+
const Function *Caller = CB->getParent()->getParent();
201+
const Module *M = Caller->getParent();
202+
std::set<const GlobalVariable *> CalleeGlobals;
203+
std::set<const GlobalVariable *> CallerGlobals;
204+
for (const GlobalVariable &Global : M->globals())
205+
for (const User *U : Global.users())
206+
if (const Instruction *User = dyn_cast<Instruction>(U)) {
207+
if (User->getParent()->getParent() == Callee)
208+
CalleeGlobals.insert(&Global);
209+
if (User->getParent()->getParent() == Caller)
210+
CallerGlobals.insert(&Global);
211+
}
212+
213+
for (auto *GV : CalleeGlobals)
214+
if (CallerGlobals.count(GV)) {
215+
unsigned CalleeStores = 0, CalleeLoads = 0;
216+
unsigned CallerStores = 0, CallerLoads = 0;
217+
countNumMemAccesses(GV, CalleeStores, CalleeLoads, Callee);
218+
countNumMemAccesses(GV, CallerStores, CallerLoads, Caller);
219+
if ((CalleeStores || CalleeLoads) && (CallerStores || CallerLoads)) {
220+
// dbgs() << "GV: @" << GV->getName()
221+
// << " " << *GV->getValueType()
222+
// << " Callee: " << Callee->getName() << " S: " << CalleeStores
223+
// << " L: " << CalleeLoads << " MEE: " << (CalleeStores + CalleeLoads)
224+
// << " Callee-size: " << Callee->getInstructionCount()
225+
// << " Caller: " << Caller->getName() << " S: " << CallerStores
226+
// << " L: " << CallerLoads << " MER: " << (CallerStores + CallerLoads)
227+
// << " Uses-around-call: " << usesAroundCall(CB, GV)
228+
// << " Uses-entry-exit-callee: " << usesEntryExit(Callee, GV)
229+
// << "\n";
230+
231+
// const char *CallerFunName = CB->getParent()->getParent()->getName().data();
232+
// const char *CalleeFunName = Callee->getName().data();
233+
// if (std::strcmp(CallerFunName , "S_regmatch") == 0) {
234+
// if (std::strcmp(CalleeFunName, "S_regcppop") == 0) {
235+
// return 250;
236+
// }
237+
// if (std::strcmp(CalleeFunName, "S_regcppush") == 0) {
238+
// return 250;
239+
// }
240+
if (//usesEntryExit(Callee, GV) >= 5 &&
241+
Callee->getInstructionCount() < 250 &&
242+
243+
// (CalleeStores >= 5 && CalleeLoads >= 5) &&
244+
(CalleeStores + CalleeLoads) > 10 &&
245+
246+
// CallerLoads > 25)
247+
(CallerStores + CallerLoads) > 10)
248+
return 500;
249+
250+
// if
251+
// if ((CallerStores + CallerLoads) > 25)
252+
// if (CallerLoads) > 25)
253+
254+
//}
255+
}
256+
}
257+
}
258+
59259
// Increase the threshold if an incoming argument is used only as a memcpy
60260
// source.
61261
if (Function *Callee = CB->getCalledFunction())
62262
for (Argument &Arg : Callee->args()) {
63263
bool OtherUse = false;
64264
if (isUsedAsMemCpySource(&Arg, OtherUse) && !OtherUse)
65-
Bonus += 150;
265+
Bonus += 1000;
66266
}
67267

268+
if (!Bonus) {
269+
if (Function *Callee = CB->getCalledFunction()) {
270+
unsigned NumStores = 0;
271+
unsigned NumLoads = 0;
272+
for (unsigned OpIdx = 0; OpIdx != Callee->arg_size(); ++OpIdx) {
273+
Value *CallerArg = CB->getArgOperand(OpIdx);
274+
Argument *CalleeArg = Callee->getArg(OpIdx);
275+
if (isa<AllocaInst>(CallerArg))
276+
countNumMemAccesses(CalleeArg, NumStores, NumLoads);
277+
}
278+
// dbgs() << "NUM: " << NumStores << " " << NumLoads << "\n";
279+
// Best on povray, but not doing stores slightly better on blender.
280+
if (NumLoads > 10)
281+
Bonus += NumLoads * 50;
282+
if (NumStores > 10)
283+
Bonus += NumStores * 50;
284+
Bonus = std::min(Bonus, unsigned(1000));
285+
}
286+
}
287+
68288
LLVM_DEBUG(if (Bonus)
69289
dbgs() << "++ SZTTI Adding inlining bonus: " << Bonus << "\n";);
290+
70291
return Bonus;
71292
}
72293

llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
3838
/// \name Scalar TTI Implementations
3939
/// @{
4040

41-
unsigned getInliningThresholdMultiplier() const { return 3; }
41+
unsigned getInliningThresholdMultiplier() const { return 1; }
4242
unsigned adjustInliningThreshold(const CallBase *CB) const;
4343

4444
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,

0 commit comments

Comments
 (0)