@@ -1351,6 +1351,16 @@ class LoopVectorizationCostModel {
1351
1351
/// that can be vectorized.
1352
1352
bool stridedAccessCanBeWidened(Instruction *I, ElementCount VF) const;
1353
1353
1354
+ /// Get the stride of the strided memory access instruction \p Instr. Return 0
1355
+ /// if the instruction \p Instr is not considered for vectorization as a
1356
+ /// strided memory access.
1357
+ int64_t getStride(Instruction *Instr) const {
1358
+ auto It = StrideInfo.find(Instr);
1359
+ if (It != StrideInfo.end())
1360
+ return It->second;
1361
+ return 0;
1362
+ }
1363
+
1354
1364
/// Returns true if we're required to use a scalar epilogue for at least
1355
1365
/// the final iteration of the original loop.
1356
1366
bool requiresScalarEpilogue(bool IsVectorizing) const {
@@ -1763,6 +1773,9 @@ class LoopVectorizationCostModel {
1763
1773
Ops, [this, VF](Value *V) { return this->needsExtract(V, VF); }));
1764
1774
}
1765
1775
1776
+ /// The mapping of memory access instructions to their stride values.
1777
+ DenseMap<Instruction *, int64_t> StrideInfo;
1778
+
1766
1779
public:
1767
1780
/// The loop that we evaluate.
1768
1781
Loop *TheLoop;
@@ -6183,6 +6196,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
6183
6196
if (StridedLoadStoreCost < Cost) {
6184
6197
Decision = CM_Strided;
6185
6198
Cost = StridedLoadStoreCost;
6199
+ StrideInfo[&I] = ConsecutiveStride;
6186
6200
}
6187
6201
}
6188
6202
setWideningDecision(&I, VF, Decision, Cost);
@@ -8427,9 +8441,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
8427
8441
if (Strided) {
8428
8442
const DataLayout &DL = Load->getDataLayout();
8429
8443
auto *StrideTy = DL.getIndexType(Load->getPointerOperand()->getType());
8430
- VPValue *Stride = Plan.getOrAddLiveIn(ConstantInt::get(
8431
- StrideTy, -1 * DL.getTypeAllocSize(getLoadStoreType(Load))));
8432
- return new VPWidenStridedLoadRecipe(*Load, Ptr, Stride, &Plan.getVF(),
8444
+ int64_t Stride = CM.getStride(Load);
8445
+ assert(Stride == -1 &&
8446
+ "Only stride memory access with a stride of -1 is supported.");
8447
+ VPValue *StrideVPV = Plan.getOrAddLiveIn(ConstantInt::get(
8448
+ StrideTy, Stride * DL.getTypeAllocSize(getLoadStoreType(Load))));
8449
+ return new VPWidenStridedLoadRecipe(*Load, Ptr, StrideVPV, &Plan.getVF(),
8433
8450
Mask, I->getDebugLoc());
8434
8451
}
8435
8452
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
0 commit comments