@@ -1395,7 +1395,7 @@ class BoUpSLP {
1395
1395
1396
1396
/// \returns the cost incurred by unwanted spills and fills, caused by
1397
1397
/// holding live values over call sites.
1398
- InstructionCost getSpillCost() const ;
1398
+ InstructionCost getSpillCost();
1399
1399
1400
1400
/// \returns the vectorization cost of the subtree that starts at \p VL.
1401
1401
/// A negative number means that this is profitable.
@@ -2958,7 +2958,7 @@ class BoUpSLP {
2958
2958
}
2959
2959
2960
2960
/// Check if the value is vectorized in the tree.
2961
- bool isVectorized(Value *V) const {
2961
+ bool isVectorized(const Value *V) const {
2962
2962
assert(V && "V cannot be nullptr.");
2963
2963
return ScalarToTreeEntries.contains(V);
2964
2964
}
@@ -12160,78 +12160,80 @@ bool BoUpSLP::isTreeNotExtendable() const {
12160
12160
return Res;
12161
12161
}
12162
12162
12163
- InstructionCost BoUpSLP::getSpillCost() const {
12163
+ InstructionCost BoUpSLP::getSpillCost() {
12164
12164
// Walk from the bottom of the tree to the top, tracking which values are
12165
12165
// live. When we see a call instruction that is not part of our tree,
12166
12166
// query TTI to see if there is a cost to keeping values live over it
12167
12167
// (for example, if spills and fills are required).
12168
- unsigned BundleWidth = VectorizableTree.front()->Scalars.size();
12169
12168
InstructionCost Cost = 0;
12170
12169
12171
- SmallPtrSet<Instruction *, 4> LiveValues ;
12172
- Instruction *PrevInst = nullptr;
12170
+ SmallPtrSet<const TreeEntry *, 4> LiveEntries ;
12171
+ const TreeEntry *Prev = nullptr;
12173
12172
12174
12173
// The entries in VectorizableTree are not necessarily ordered by their
12175
12174
// position in basic blocks. Collect them and order them by dominance so later
12176
12175
// instructions are guaranteed to be visited first. For instructions in
12177
12176
// different basic blocks, we only scan to the beginning of the block, so
12178
12177
// their order does not matter, as long as all instructions in a basic block
12179
12178
// are grouped together. Using dominance ensures a deterministic order.
12180
- SmallVector<Instruction *, 16> OrderedScalars ;
12179
+ SmallVector<TreeEntry *, 16> OrderedEntries ;
12181
12180
for (const auto &TEPtr : VectorizableTree) {
12182
- if (TEPtr->State != TreeEntry::Vectorize )
12181
+ if (TEPtr->isGather() )
12183
12182
continue;
12184
- Instruction *Inst = dyn_cast<Instruction> (TEPtr->Scalars[0] );
12185
- if (!Inst)
12186
- continue;
12187
- OrderedScalars.push_back(Inst);
12188
- }
12189
- llvm::sort(OrderedScalars, [&](Instruction *A, Instruction *B) {
12190
- auto *NodeA = DT->getNode(A-> getParent());
12191
- auto *NodeB = DT->getNode(B-> getParent());
12183
+ OrderedEntries.push_back (TEPtr.get() );
12184
+ }
12185
+ llvm::stable_sort(OrderedEntries, [&](const TreeEntry *TA,
12186
+ const TreeEntry *TB) {
12187
+ Instruction &A = getLastInstructionInBundle(TA);
12188
+ Instruction &B = getLastInstructionInBundle(TB);
12189
+ auto *NodeA = DT->getNode(A. getParent());
12190
+ auto *NodeB = DT->getNode(B. getParent());
12192
12191
assert(NodeA && "Should only process reachable instructions");
12193
12192
assert(NodeB && "Should only process reachable instructions");
12194
12193
assert((NodeA == NodeB) == (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
12195
12194
"Different nodes should have different DFS numbers");
12196
12195
if (NodeA != NodeB)
12197
12196
return NodeA->getDFSNumIn() > NodeB->getDFSNumIn();
12198
- return B-> comesBefore(A);
12197
+ return B. comesBefore(& A);
12199
12198
});
12200
12199
12201
- for (Instruction *Inst : OrderedScalars ) {
12202
- if (!PrevInst ) {
12203
- PrevInst = Inst ;
12200
+ for (const TreeEntry *TE : OrderedEntries ) {
12201
+ if (!Prev ) {
12202
+ Prev = TE ;
12204
12203
continue;
12205
12204
}
12206
12205
12207
- // Update LiveValues.
12208
- LiveValues.erase(PrevInst);
12209
- for (auto &J : PrevInst->operands()) {
12210
- if (isa<Instruction>(&*J) && isVectorized(&*J))
12211
- LiveValues.insert(cast<Instruction>(&*J));
12206
+ LiveEntries.erase(Prev);
12207
+ for (unsigned I : seq<unsigned>(Prev->getNumOperands())) {
12208
+ const TreeEntry *Op = getVectorizedOperand(Prev, I);
12209
+ if (!Op)
12210
+ continue;
12211
+ assert(!Op->isGather() && "Expected vectorized operand.");
12212
+ LiveEntries.insert(Op);
12212
12213
}
12213
12214
12214
12215
LLVM_DEBUG({
12215
- dbgs() << "SLP: #LV: " << LiveValues .size();
12216
- for (auto *X : LiveValues )
12217
- dbgs() << " " << X->getName ();
12216
+ dbgs() << "SLP: #LV: " << LiveEntries .size();
12217
+ for (auto *X : LiveEntries )
12218
+ X->dump ();
12218
12219
dbgs() << ", Looking at ";
12219
- Inst ->dump();
12220
+ TE ->dump();
12220
12221
});
12221
12222
12222
12223
// Now find the sequence of instructions between PrevInst and Inst.
12223
12224
unsigned NumCalls = 0;
12224
- BasicBlock::reverse_iterator InstIt = ++Inst->getIterator().getReverse(),
12225
- PrevInstIt =
12226
- PrevInst->getIterator().getReverse();
12225
+ const Instruction *PrevInst = &getLastInstructionInBundle(Prev);
12226
+ BasicBlock::const_reverse_iterator
12227
+ InstIt = ++getLastInstructionInBundle(TE).getIterator().getReverse(),
12228
+ PrevInstIt = PrevInst->getIterator().getReverse();
12227
12229
while (InstIt != PrevInstIt) {
12228
12230
if (PrevInstIt == PrevInst->getParent()->rend()) {
12229
- PrevInstIt = Inst-> getParent()->rbegin();
12231
+ PrevInstIt = getLastInstructionInBundle(TE). getParent()->rbegin();
12230
12232
continue;
12231
12233
}
12232
12234
12233
- auto NoCallIntrinsic = [this](Instruction *I) {
12234
- auto *II = dyn_cast<IntrinsicInst>(I);
12235
+ auto NoCallIntrinsic = [this](const Instruction *I) {
12236
+ const auto *II = dyn_cast<IntrinsicInst>(I);
12235
12237
if (!II)
12236
12238
return false;
12237
12239
if (II->isAssumeLikeIntrinsic())
@@ -12252,25 +12254,28 @@ InstructionCost BoUpSLP::getSpillCost() const {
12252
12254
};
12253
12255
12254
12256
// Debug information does not impact spill cost.
12255
- if (isa<CallBase>(&*PrevInstIt) && !NoCallIntrinsic(&*PrevInstIt) &&
12256
- &*PrevInstIt != PrevInst)
12257
+ // Vectorized calls, represented as vector intrinsics, do not impact spill
12258
+ // cost.
12259
+ if (const auto *CB = dyn_cast<CallBase>(&*PrevInstIt);
12260
+ CB && !NoCallIntrinsic(CB) && !isVectorized(CB))
12257
12261
NumCalls++;
12258
12262
12259
12263
++PrevInstIt;
12260
12264
}
12261
12265
12262
12266
if (NumCalls) {
12263
- SmallVector<Type *, 4> V;
12264
- for (auto *II : LiveValues) {
12265
- auto *ScalarTy = II->getType();
12266
- if (auto *VectorTy = dyn_cast<FixedVectorType>(ScalarTy))
12267
- ScalarTy = VectorTy->getElementType();
12268
- V.push_back(getWidenedType(ScalarTy, BundleWidth));
12267
+ SmallVector<Type *, 4> EntriesTypes;
12268
+ for (const TreeEntry *TE : LiveEntries) {
12269
+ auto *ScalarTy = TE->getMainOp()->getType();
12270
+ auto It = MinBWs.find(TE);
12271
+ if (It != MinBWs.end())
12272
+ ScalarTy = IntegerType::get(ScalarTy->getContext(), It->second.first);
12273
+ EntriesTypes.push_back(getWidenedType(ScalarTy, TE->getVectorFactor()));
12269
12274
}
12270
- Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(V );
12275
+ Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(EntriesTypes );
12271
12276
}
12272
12277
12273
- PrevInst = Inst ;
12278
+ Prev = TE ;
12274
12279
}
12275
12280
12276
12281
return Cost;
0 commit comments