@@ -1143,6 +1143,7 @@ class LoopVectorizationCostModel {
1143
1143
CM_Widen_Reverse, // For consecutive accesses with stride -1.
1144
1144
CM_Interleave,
1145
1145
CM_GatherScatter,
1146
+ CM_Strided,
1146
1147
CM_Scalarize,
1147
1148
CM_VectorCall,
1148
1149
CM_IntrinsicCall
@@ -6160,6 +6161,17 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
6160
6161
" Expected consecutive stride." );
6161
6162
InstWidening Decision =
6162
6163
ConsecutiveStride == 1 ? CM_Widen : CM_Widen_Reverse;
6164
+ // Consider using strided load/store for consecutive reverse accesses to
6165
+ // achieve more efficient memory operations.
6166
+ if (ConsecutiveStride == -1 ) {
6167
+ const InstructionCost StridedLoadStoreCost =
6168
+ isLegalStridedLoadStore (&I, VF) ? getStridedLoadStoreCost (&I, VF)
6169
+ : InstructionCost::getInvalid ();
6170
+ if (StridedLoadStoreCost < Cost) {
6171
+ Decision = CM_Strided;
6172
+ Cost = StridedLoadStoreCost;
6173
+ }
6174
+ }
6163
6175
setWideningDecision (&I, VF, Decision, Cost);
6164
6176
continue ;
6165
6177
}
@@ -6805,6 +6817,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
6805
6817
return TTI::CastContextHint::Normal;
6806
6818
6807
6819
switch (getWideningDecision (I, VF)) {
6820
+ // TODO: New CastContextHint for strided accesses.
6821
+ case LoopVectorizationCostModel::CM_Strided:
6808
6822
case LoopVectorizationCostModel::CM_GatherScatter:
6809
6823
return TTI::CastContextHint::GatherScatter;
6810
6824
case LoopVectorizationCostModel::CM_Interleave:
@@ -8363,6 +8377,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
8363
8377
bool Reverse = Decision == LoopVectorizationCostModel::CM_Widen_Reverse;
8364
8378
bool Consecutive =
8365
8379
Reverse || Decision == LoopVectorizationCostModel::CM_Widen;
8380
+ bool Strided = Decision == LoopVectorizationCostModel::CM_Strided;
8366
8381
8367
8382
VPValue *Ptr = isa<LoadInst>(I) ? Operands[0 ] : Operands[1 ];
8368
8383
if (Consecutive) {
@@ -8389,12 +8404,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
8389
8404
Ptr = VectorPtr;
8390
8405
}
8391
8406
if (LoadInst *Load = dyn_cast<LoadInst>(I))
8392
- return new VPWidenLoadRecipe (*Load, Ptr , Mask, Consecutive, Reverse, false ,
8393
- I->getDebugLoc ());
8407
+ return new VPWidenLoadRecipe (*Load, Ptr , Mask, Consecutive, Reverse,
8408
+ Strided, I->getDebugLoc ());
8394
8409
8395
8410
StoreInst *Store = cast<StoreInst>(I);
8396
8411
return new VPWidenStoreRecipe (*Store, Ptr , Operands[0 ], Mask, Consecutive,
8397
- Reverse, false , I->getDebugLoc ());
8412
+ Reverse, Strided , I->getDebugLoc ());
8398
8413
}
8399
8414
8400
8415
// / Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
0 commit comments