Skip to content

Commit 52682fe

Browse files
committed
Rewrite
1 parent def3693 commit 52682fe

File tree

2 files changed

+185
-204
lines changed

2 files changed

+185
-204
lines changed

llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp

+49-201
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,12 @@ static bool isUsedAsMemCpySource(const Value *V, bool &OtherUse) {
5454
}
5555

5656
static void countNumMemAccesses(const Value *Ptr, unsigned &NumStores,
57-
unsigned &NumLoads, const Function *F = nullptr) {
57+
unsigned &NumLoads, const Function *F) {
5858
if (!isa<PointerType>(Ptr->getType()))
5959
return;
6060
for (const User *U : Ptr->users())
6161
if (const Instruction *User = dyn_cast<Instruction>(U)) {
62-
if (User->getParent()->getParent() == F || !F) {
62+
if (User->getParent()->getParent() == F) {
6363
if (const auto *SI = dyn_cast<StoreInst>(User)) {
6464
if (SI->getPointerOperand() == Ptr && !SI->isVolatile())
6565
NumStores++;
@@ -70,220 +70,68 @@ static void countNumMemAccesses(const Value *Ptr, unsigned &NumStores,
7070
}
7171
else if (const auto *GEP = dyn_cast<GetElementPtrInst>(User)) {
7272
if (GEP->getPointerOperand() == Ptr)
73-
countNumMemAccesses(GEP, NumStores, NumLoads);
73+
countNumMemAccesses(GEP, NumStores, NumLoads, F);
7474
}
7575
}
7676
}
7777
}
7878

79-
static unsigned usesAroundCall(const CallBase *CB, const GlobalVariable *GV) {
80-
unsigned Uses = 0;
81-
std::set<const Value *> Ptrs;
82-
Ptrs.insert(GV);
83-
84-
const BasicBlock *BB = CB->getParent();
85-
const unsigned CutOff = 20;
86-
BasicBlock::const_iterator II = CB->getIterator();
87-
for (unsigned N = 0; N < CutOff && II != BB->begin(); N++)
88-
II--;
89-
BasicBlock::const_iterator EE = CB->getIterator();
90-
for (unsigned N = 0; N < CutOff && EE != BB->end(); N++)
91-
EE++;
92-
93-
for (; II != EE; ++II) {
94-
if (const auto *SI = dyn_cast<StoreInst>(II)) {
95-
if (Ptrs.count(SI->getPointerOperand()) && !SI->isVolatile())
96-
Uses++;
97-
}
98-
else if (const auto *LI = dyn_cast<LoadInst>(II)) {
99-
if (Ptrs.count(LI->getPointerOperand()) && !LI->isVolatile())
100-
Uses++;
101-
}
102-
else if (const auto *GEP = dyn_cast<GetElementPtrInst>(II)) {
103-
if (Ptrs.count(GEP->getPointerOperand()))
104-
Ptrs.insert(GEP);
105-
}
106-
}
107-
return Uses;
108-
}
109-
110-
static unsigned usesEntryExit(const Function *F, const GlobalVariable *GV) {
111-
unsigned Uses = 0;
112-
std::set<const Value *> Ptrs;
113-
Ptrs.insert(GV);
114-
115-
const unsigned CutOff = 100;
116-
const BasicBlock *BB = &F->getEntryBlock();
117-
unsigned N = 0;
118-
for (BasicBlock::const_iterator II = BB->begin();
119-
II != BB->end() && N < CutOff; ++II, N++) {
120-
if (const auto *SI = dyn_cast<StoreInst>(II)) {
121-
if (Ptrs.count(SI->getPointerOperand()) && !SI->isVolatile())
122-
Uses++;
123-
}
124-
else if (const auto *LI = dyn_cast<LoadInst>(II)) {
125-
if (Ptrs.count(LI->getPointerOperand()) && !LI->isVolatile())
126-
Uses++;
127-
}
128-
else if (const auto *GEP = dyn_cast<GetElementPtrInst>(II)) {
129-
if (Ptrs.count(GEP->getPointerOperand()))
130-
Ptrs.insert(GEP);
131-
}
132-
}
133-
134-
Ptrs.clear();
135-
Ptrs.insert(GV);
136-
unsigned ReturnBlockUses = 0;
137-
unsigned NumReturnBlocks = 0;
138-
for (auto &BBII : *F) {
139-
if (isa<ReturnInst>(BBII.getTerminator())) {
140-
if (NumReturnBlocks++ > 0) {
141-
ReturnBlockUses = 0;
142-
break;
143-
}
144-
BasicBlock::const_iterator EE = BBII.getTerminator()->getIterator();
145-
BasicBlock::const_iterator II = EE;
146-
for (unsigned N = 0; N < CutOff && II != BBII.begin(); N++)
147-
II--;
148-
for (; II != EE; ++II) {
149-
if (const auto *SI = dyn_cast<StoreInst>(II)) {
150-
if (Ptrs.count(SI->getPointerOperand()) && !SI->isVolatile())
151-
ReturnBlockUses++;
152-
}
153-
else if (const auto *LI = dyn_cast<LoadInst>(II)) {
154-
if (Ptrs.count(LI->getPointerOperand()) && !LI->isVolatile())
155-
ReturnBlockUses++;
156-
}
157-
else if (const auto *GEP = dyn_cast<GetElementPtrInst>(II)) {
158-
if (Ptrs.count(GEP->getPointerOperand()))
159-
Ptrs.insert(GEP);
160-
}
161-
}
162-
}
163-
}
164-
165-
return Uses + ReturnBlockUses;
166-
}
167-
16879
unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
16980
unsigned Bonus = 0;
170-
171-
172-
// dbgs() << "INSTRCOUNT: " << CB->getCalledFunction()->getInstructionCount()
173-
// << CB->getCalledFunction()->getName() << "\n";
174-
// if (CB->getCalledFunction()->getInstructionCount() == 216)
175-
// Bonus = 300;
176-
177-
// if (Function *Callee = CB->getCalledFunction()) {
178-
// const char *CallerFunName = CB->getParent()->getParent()->getName().data();
179-
// const char *CalleeFunName = Callee->getName().data();
180-
181-
// if (std::strcmp(CallerFunName , "S_regmatch") == 0) {
182-
// if (std::strcmp(CalleeFunName, "S_reghopmaybe3") == 0 || // less important
183-
// std::strcmp(CalleeFunName, "S_regcppop") == 0 ||
184-
// std::strcmp(CalleeFunName, "S_regcppush") == 0)
185-
// return 250;
186-
// }
187-
// }
188-
189-
// Check inlining with memory accesses common to caller and callee
190-
// - Around call in caller? entry/exit blocks in callee?
191-
// - Globals used (much?) in both caller and callee
192-
// - Specific type of pattern: load; inc/dec; store ?
193-
// - non-volatile loads/stores?
194-
// - int/fp loads/stores? ptr?
195-
// - num occurences in caller?
196-
// - or specifically 2+ functions inlined if many common accesses?
197-
// - specifically 2+ functions getting same adress as argument (ptr)?
198-
// - (ptr-args generally?)
199-
if (const Function *Callee = CB->getCalledFunction()) {
200-
const Function *Caller = CB->getParent()->getParent();
201-
const Module *M = Caller->getParent();
202-
std::set<const GlobalVariable *> CalleeGlobals;
203-
std::set<const GlobalVariable *> CallerGlobals;
204-
for (const GlobalVariable &Global : M->globals())
205-
for (const User *U : Global.users())
206-
if (const Instruction *User = dyn_cast<Instruction>(U)) {
207-
if (User->getParent()->getParent() == Callee)
208-
CalleeGlobals.insert(&Global);
209-
if (User->getParent()->getParent() == Caller)
210-
CallerGlobals.insert(&Global);
211-
}
212-
213-
for (auto *GV : CalleeGlobals)
214-
if (CallerGlobals.count(GV)) {
215-
unsigned CalleeStores = 0, CalleeLoads = 0;
216-
unsigned CallerStores = 0, CallerLoads = 0;
217-
countNumMemAccesses(GV, CalleeStores, CalleeLoads, Callee);
218-
countNumMemAccesses(GV, CallerStores, CallerLoads, Caller);
219-
if ((CalleeStores || CalleeLoads) && (CallerStores || CallerLoads)) {
220-
// dbgs() << "GV: @" << GV->getName()
221-
// << " " << *GV->getValueType()
222-
// << " Callee: " << Callee->getName() << " S: " << CalleeStores
223-
// << " L: " << CalleeLoads << " MEE: " << (CalleeStores + CalleeLoads)
224-
// << " Callee-size: " << Callee->getInstructionCount()
225-
// << " Caller: " << Caller->getName() << " S: " << CallerStores
226-
// << " L: " << CallerLoads << " MER: " << (CallerStores + CallerLoads)
227-
// << " Uses-around-call: " << usesAroundCall(CB, GV)
228-
// << " Uses-entry-exit-callee: " << usesEntryExit(Callee, GV)
229-
// << "\n";
230-
231-
// const char *CallerFunName = CB->getParent()->getParent()->getName().data();
232-
// const char *CalleeFunName = Callee->getName().data();
233-
// if (std::strcmp(CallerFunName , "S_regmatch") == 0) {
234-
// if (std::strcmp(CalleeFunName, "S_regcppop") == 0) {
235-
// return 250;
236-
// }
237-
// if (std::strcmp(CalleeFunName, "S_regcppush") == 0) {
238-
// return 250;
239-
// }
240-
if (//usesEntryExit(Callee, GV) >= 5 &&
241-
Callee->getInstructionCount() < 250 &&
242-
243-
// (CalleeStores >= 5 && CalleeLoads >= 5) &&
244-
(CalleeStores + CalleeLoads) > 10 &&
245-
246-
// CallerLoads > 25)
247-
(CallerStores + CallerLoads) > 10)
248-
return 500;
249-
250-
// if
251-
// if ((CallerStores + CallerLoads) > 25)
252-
// if (CallerLoads) > 25)
253-
254-
//}
255-
}
256-
}
257-
}
81+
const Function *Caller = CB->getParent()->getParent();
82+
const Function *Callee = CB->getCalledFunction();
83+
if (!Callee)
84+
return 0;
85+
const Module *M = Caller->getParent();
25886

25987
// Increase the threshold if an incoming argument is used only as a memcpy
26088
// source.
261-
if (Function *Callee = CB->getCalledFunction())
262-
for (Argument &Arg : Callee->args()) {
263-
bool OtherUse = false;
264-
if (isUsedAsMemCpySource(&Arg, OtherUse) && !OtherUse)
265-
Bonus += 1000;
89+
for (const Argument &Arg : Callee->args()) {
90+
bool OtherUse = false;
91+
if (isUsedAsMemCpySource(&Arg, OtherUse) && !OtherUse) {
92+
Bonus = 1000;
93+
break;
26694
}
95+
}
26796

268-
if (!Bonus) {
269-
if (Function *Callee = CB->getCalledFunction()) {
270-
unsigned NumStores = 0;
271-
unsigned NumLoads = 0;
272-
for (unsigned OpIdx = 0; OpIdx != Callee->arg_size(); ++OpIdx) {
273-
Value *CallerArg = CB->getArgOperand(OpIdx);
274-
Argument *CalleeArg = Callee->getArg(OpIdx);
275-
if (isa<AllocaInst>(CallerArg))
276-
countNumMemAccesses(CalleeArg, NumStores, NumLoads);
97+
// Give bonus for globals used much in both caller and callee.
98+
std::set<const GlobalVariable *> CalleeGlobals;
99+
std::set<const GlobalVariable *> CallerGlobals;
100+
for (const GlobalVariable &Global : M->globals())
101+
for (const User *U : Global.users())
102+
if (const Instruction *User = dyn_cast<Instruction>(U)) {
103+
if (User->getParent()->getParent() == Callee)
104+
CalleeGlobals.insert(&Global);
105+
if (User->getParent()->getParent() == Caller)
106+
CallerGlobals.insert(&Global);
107+
}
108+
for (auto *GV : CalleeGlobals)
109+
if (CallerGlobals.count(GV)) {
110+
unsigned CalleeStores = 0, CalleeLoads = 0;
111+
unsigned CallerStores = 0, CallerLoads = 0;
112+
countNumMemAccesses(GV, CalleeStores, CalleeLoads, Callee);
113+
countNumMemAccesses(GV, CallerStores, CallerLoads, Caller);
114+
if ((CalleeStores + CalleeLoads) > 10 &&
115+
(CallerStores + CallerLoads) > 10) {
116+
Bonus = 1000;
117+
break;
277118
}
278-
// dbgs() << "NUM: " << NumStores << " " << NumLoads << "\n";
279-
// Best on povray, but not doing stores slightly better on blender.
280-
if (NumLoads > 10)
281-
Bonus += NumLoads * 50;
282-
if (NumStores > 10)
283-
Bonus += NumStores * 50;
284-
Bonus = std::min(Bonus, unsigned(1000));
285119
}
120+
121+
// Give bonus when Callee accesses an Alloca of Caller heavily.
122+
unsigned NumStores = 0;
123+
unsigned NumLoads = 0;
124+
for (unsigned OpIdx = 0; OpIdx != Callee->arg_size(); ++OpIdx) {
125+
Value *CallerArg = CB->getArgOperand(OpIdx);
126+
Argument *CalleeArg = Callee->getArg(OpIdx);
127+
if (isa<AllocaInst>(CallerArg))
128+
countNumMemAccesses(CalleeArg, NumStores, NumLoads, Callee);
286129
}
130+
if (NumLoads > 10)
131+
Bonus += NumLoads * 50;
132+
if (NumStores > 10)
133+
Bonus += NumStores * 50;
134+
Bonus = std::min(Bonus, unsigned(1000));
287135

288136
LLVM_DEBUG(if (Bonus)
289137
dbgs() << "++ SZTTI Adding inlining bonus: " << Bonus << "\n";);

0 commit comments

Comments
 (0)