@@ -1134,6 +1134,7 @@ class BoUpSLP {
1134
1134
MustGather.clear();
1135
1135
EntryToLastInstruction.clear();
1136
1136
ExternalUses.clear();
1137
+ ExternalUsesAsGEPs.clear();
1137
1138
for (auto &Iter : BlocksSchedules) {
1138
1139
BlockScheduling *BS = Iter.second.get();
1139
1140
BS->clear();
@@ -3154,6 +3155,10 @@ class BoUpSLP {
3154
3155
/// after vectorization.
3155
3156
UserList ExternalUses;
3156
3157
3158
+ /// A list of GEPs which can be reaplced by scalar GEPs instead of
3159
+ /// extractelement instructions.
3160
+ SmallPtrSet<Value *, 4> ExternalUsesAsGEPs;
3161
+
3157
3162
/// Values used only by @llvm.assume calls.
3158
3163
SmallPtrSet<const Value *, 32> EphValues;
3159
3164
@@ -5541,6 +5546,7 @@ void BoUpSLP::buildExternalUses(
5541
5546
<< FoundLane << " from " << *Scalar << ".\n");
5542
5547
ScalarToExtUses.try_emplace(Scalar, ExternalUses.size());
5543
5548
ExternalUses.emplace_back(Scalar, nullptr, FoundLane);
5549
+ continue;
5544
5550
}
5545
5551
for (User *U : Scalar->users()) {
5546
5552
LLVM_DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
@@ -9925,6 +9931,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
9925
9931
SmallVector<APInt> DemandedElts;
9926
9932
SmallDenseSet<Value *, 4> UsedInserts;
9927
9933
DenseSet<std::pair<const TreeEntry *, Type *>> VectorCasts;
9934
+ std::optional<DenseMap<Value *, unsigned>> ValueToExtUses;
9928
9935
for (ExternalUser &EU : ExternalUses) {
9929
9936
// We only add extract cost once for the same scalar.
9930
9937
if (!isa_and_nonnull<InsertElementInst>(EU.User) &&
@@ -10033,12 +10040,40 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
10033
10040
}
10034
10041
}
10035
10042
}
10043
+ // Leave the GEPs as is, they are free in most cases and better to keep them
10044
+ // as GEPs.
10045
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
10046
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(EU.Scalar)) {
10047
+ if (!ValueToExtUses) {
10048
+ ValueToExtUses.emplace();
10049
+ for_each(enumerate(ExternalUses), [&](const auto &P) {
10050
+ ValueToExtUses->try_emplace(P.value().Scalar, P.index());
10051
+ });
10052
+ }
10053
+ // Can use original GEP, if no operands vectorized or they are marked as
10054
+ // externally used already.
10055
+ bool CanBeUsedAsGEP = all_of(GEP->operands(), [&](Value *V) {
10056
+ if (!getTreeEntry(V))
10057
+ return true;
10058
+ auto It = ValueToExtUses->find(V);
10059
+ if (It != ValueToExtUses->end()) {
10060
+ // Replace all uses to avoid compiler crash.
10061
+ ExternalUses[It->second].User = nullptr;
10062
+ return true;
10063
+ }
10064
+ return false;
10065
+ });
10066
+ if (CanBeUsedAsGEP) {
10067
+ ExtractCost += TTI->getInstructionCost(GEP, CostKind);
10068
+ ExternalUsesAsGEPs.insert(EU.Scalar);
10069
+ continue;
10070
+ }
10071
+ }
10036
10072
10037
10073
// If we plan to rewrite the tree in a smaller type, we will need to sign
10038
10074
// extend the extracted value back to the original type. Here, we account
10039
10075
// for the extract and the added cost of the sign extend if needed.
10040
10076
auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth);
10041
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
10042
10077
auto It = MinBWs.find(getTreeEntry(EU.Scalar));
10043
10078
if (It != MinBWs.end()) {
10044
10079
auto *MinTy = IntegerType::get(F->getContext(), It->second.first);
@@ -13161,6 +13196,8 @@ Value *BoUpSLP::vectorizeTree(
13161
13196
if (Scalar->getType() != Vec->getType()) {
13162
13197
Value *Ex = nullptr;
13163
13198
Value *ExV = nullptr;
13199
+ auto *GEP = dyn_cast<GetElementPtrInst>(Scalar);
13200
+ bool ReplaceGEP = GEP && ExternalUsesAsGEPs.contains(GEP);
13164
13201
auto It = ScalarToEEs.find(Scalar);
13165
13202
if (It != ScalarToEEs.end()) {
13166
13203
// No need to emit many extracts, just move the only one in the
@@ -13186,6 +13223,15 @@ Value *BoUpSLP::vectorizeTree(
13186
13223
if (const TreeEntry *ETE = getTreeEntry(V))
13187
13224
V = ETE->VectorizedValue;
13188
13225
Ex = Builder.CreateExtractElement(V, ES->getIndexOperand());
13226
+ } else if (ReplaceGEP) {
13227
+ // Leave the GEPs as is, they are free in most cases and better to
13228
+ // keep them as GEPs.
13229
+ auto *CloneGEP = GEP->clone();
13230
+ CloneGEP->insertBefore(*Builder.GetInsertBlock(),
13231
+ Builder.GetInsertPoint());
13232
+ if (GEP->hasName())
13233
+ CloneGEP->takeName(GEP);
13234
+ Ex = CloneGEP;
13189
13235
} else {
13190
13236
Ex = Builder.CreateExtractElement(Vec, Lane);
13191
13237
}
@@ -13224,6 +13270,8 @@ Value *BoUpSLP::vectorizeTree(
13224
13270
assert((ExternallyUsedValues.count(Scalar) ||
13225
13271
any_of(Scalar->users(),
13226
13272
[&](llvm::User *U) {
13273
+ if (ExternalUsesAsGEPs.contains(U))
13274
+ return true;
13227
13275
TreeEntry *UseEntry = getTreeEntry(U);
13228
13276
return UseEntry &&
13229
13277
(UseEntry->State == TreeEntry::Vectorize ||
0 commit comments