Skip to content

Commit 0bb1b34

Browse files
committed
New Recipe VPWidenStridedLoadRecipe
- Remove strided store - Need CM.getStride
1 parent 6d78ece commit 0bb1b34

12 files changed

+382
-307
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3475,6 +3475,10 @@ bool LoopVectorizationCostModel::stridedAccessCanBeWidened(
34753475
if (!VF.isVector())
34763476
return false;
34773477

3478+
// FIXME: Remove this check for StoreInst after strided store is supported.
3479+
if (isa<StoreInst>(I))
3480+
return false;
3481+
34783482
[[maybe_unused]] auto *Ptr = getLoadStorePointerOperand(I);
34793483
auto *ScalarTy = getLoadStoreType(I);
34803484
// TODO: Support non-unit-reverse strided accesses. Add stride analysis here
@@ -4424,7 +4428,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
44244428
[](const auto *R) { return Instruction::Select; })
44254429
.Case<VPWidenStoreRecipe>(
44264430
[](const auto *R) { return Instruction::Store; })
4427-
.Case<VPWidenLoadRecipe>(
4431+
.Case<VPWidenLoadRecipe, VPWidenStridedLoadRecipe>(
44284432
[](const auto *R) { return Instruction::Load; })
44294433
.Case<VPWidenCallRecipe, VPWidenIntrinsicRecipe>(
44304434
[](const auto *R) { return Instruction::Call; })
@@ -4523,6 +4527,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
45234527
case VPDef::VPWidenPointerInductionSC:
45244528
case VPDef::VPReductionPHISC:
45254529
case VPDef::VPInterleaveSC:
4530+
case VPDef::VPWidenStridedLoadSC:
45264531
case VPDef::VPWidenLoadEVLSC:
45274532
case VPDef::VPWidenLoadSC:
45284533
case VPDef::VPWidenStoreEVLSC:
@@ -8418,13 +8423,22 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
84188423
Builder.insert(VectorPtr);
84198424
Ptr = VectorPtr;
84208425
}
8421-
if (LoadInst *Load = dyn_cast<LoadInst>(I))
8426+
if (LoadInst *Load = dyn_cast<LoadInst>(I)) {
8427+
if (Strided) {
8428+
const DataLayout &DL = Load->getDataLayout();
8429+
auto *StrideTy = DL.getIndexType(Load->getPointerOperand()->getType());
8430+
VPValue *Stride = Plan.getOrAddLiveIn(ConstantInt::get(
8431+
StrideTy, -1 * DL.getTypeAllocSize(getLoadStoreType(Load))));
8432+
return new VPWidenStridedLoadRecipe(*Load, Ptr, Stride, &Plan.getVF(),
8433+
Mask, I->getDebugLoc());
8434+
}
84228435
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
8423-
Strided, I->getDebugLoc());
8436+
I->getDebugLoc());
8437+
}
84248438

84258439
StoreInst *Store = cast<StoreInst>(I);
84268440
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
8427-
Reverse, Strided, I->getDebugLoc());
8441+
Reverse, I->getDebugLoc());
84288442
}
84298443

84308444
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 70 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
537537
case VPRecipeBase::VPBranchOnMaskSC:
538538
case VPRecipeBase::VPInterleaveSC:
539539
case VPRecipeBase::VPIRInstructionSC:
540+
case VPRecipeBase::VPWidenStridedLoadSC:
540541
case VPRecipeBase::VPWidenLoadEVLSC:
541542
case VPRecipeBase::VPWidenLoadSC:
542543
case VPRecipeBase::VPWidenStoreEVLSC:
@@ -2567,9 +2568,6 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
25672568
/// Whether the consecutive accessed addresses are in reverse order.
25682569
bool Reverse;
25692570

2570-
/// Whether the accessed addresses are evenly spaced apart by a fixed stride.
2571-
bool Strided = false;
2572-
25732571
/// Whether the memory access is masked.
25742572
bool IsMasked = false;
25752573

@@ -2583,9 +2581,9 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
25832581

25842582
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
25852583
std::initializer_list<VPValue *> Operands,
2586-
bool Consecutive, bool Reverse, bool Strided, DebugLoc DL)
2584+
bool Consecutive, bool Reverse, DebugLoc DL)
25872585
: VPRecipeBase(SC, Operands, DL), Ingredient(I), Consecutive(Consecutive),
2588-
Reverse(Reverse), Strided(Strided) {
2586+
Reverse(Reverse) {
25892587
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
25902588
}
25912589

@@ -2598,7 +2596,8 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
25982596
return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
25992597
R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
26002598
R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
2601-
R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
2599+
R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC ||
2600+
R->getVPDefID() == VPRecipeBase::VPWidenStridedLoadSC;
26022601
}
26032602

26042603
static inline bool classof(const VPUser *U) {
@@ -2613,10 +2612,6 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
26132612
/// order.
26142613
bool isReverse() const { return Reverse; }
26152614

2616-
/// Return whether the accessed addresses are evenly spaced apart by a fixed
2617-
/// stride.
2618-
bool isStrided() const { return Strided; }
2619-
26202615
/// Return the address accessed by this recipe.
26212616
VPValue *getAddr() const { return getOperand(0); }
26222617

@@ -2646,16 +2641,16 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
26462641
/// optional mask.
26472642
struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
26482643
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
2649-
bool Consecutive, bool Reverse, bool Strided, DebugLoc DL)
2644+
bool Consecutive, bool Reverse, DebugLoc DL)
26502645
: VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2651-
Reverse, Strided, DL),
2646+
Reverse, DL),
26522647
VPValue(this, &Load) {
26532648
setMask(Mask);
26542649
}
26552650

26562651
VPWidenLoadRecipe *clone() override {
26572652
return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2658-
getMask(), Consecutive, Reverse, Strided,
2653+
getMask(), Consecutive, Reverse,
26592654
getDebugLoc());
26602655
}
26612656

@@ -2674,9 +2669,9 @@ struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
26742669
bool onlyFirstLaneUsed(const VPValue *Op) const override {
26752670
assert(is_contained(operands(), Op) &&
26762671
"Op must be an operand of the recipe");
2677-
// Widened, consecutive/strided loads operations only demand the first
2678-
// lane of their address.
2679-
return Op == getAddr() && (isConsecutive() || isStrided());
2672+
// Widened, consecutive loads operations only demand the first lane of their
2673+
// address.
2674+
return Op == getAddr() && isConsecutive();
26802675
}
26812676
};
26822677

@@ -2687,7 +2682,7 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
26872682
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
26882683
: VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
26892684
{L.getAddr(), &EVL}, L.isConsecutive(),
2690-
L.isReverse(), L.isStrided(), L.getDebugLoc()),
2685+
L.isReverse(), L.getDebugLoc()),
26912686
VPValue(this, &getIngredient()) {
26922687
setMask(Mask);
26932688
}
@@ -2716,26 +2711,72 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
27162711
"Op must be an operand of the recipe");
27172712
// Widened loads only demand the first lane of EVL and consecutive/strided
27182713
// loads only demand the first lane of their address.
2719-
return Op == getEVL() ||
2720-
(Op == getAddr() && (isConsecutive() || isStrided()));
2714+
return Op == getEVL() || (Op == getAddr() && isConsecutive());
2715+
}
2716+
};
2717+
2718+
/// A recipe for strided load operations, using the base address, stride, and an
2719+
/// optional mask.
2720+
struct VPWidenStridedLoadRecipe final : public VPWidenMemoryRecipe,
2721+
public VPValue {
2722+
VPWidenStridedLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Stride,
2723+
VPValue *VF, VPValue *Mask, DebugLoc DL)
2724+
: VPWidenMemoryRecipe(VPDef::VPWidenStridedLoadSC, Load,
2725+
{Addr, Stride, VF},
2726+
/*Consecutive=*/false, /*Reverse=*/false, DL),
2727+
VPValue(this, &Load) {
2728+
setMask(Mask);
2729+
}
2730+
2731+
VPWidenStridedLoadRecipe *clone() override {
2732+
return new VPWidenStridedLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2733+
getStride(), getVF(), getMask(),
2734+
getDebugLoc());
2735+
}
2736+
2737+
VP_CLASSOF_IMPL(VPDef::VPWidenStridedLoadSC);
2738+
2739+
/// Return the stride operand.
2740+
VPValue *getStride() const { return getOperand(1); }
2741+
2742+
/// Return the VF operand.
2743+
VPValue *getVF() const { return getOperand(2); }
2744+
2745+
/// Generate a strided load.
2746+
void execute(VPTransformState &State) override;
2747+
2748+
/// Return the cost of this VPWidenStridedLoadRecipe.
2749+
InstructionCost computeCost(ElementCount VF,
2750+
VPCostContext &Ctx) const override;
2751+
2752+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2753+
/// Print the recipe.
2754+
void print(raw_ostream &O, const Twine &Indent,
2755+
VPSlotTracker &SlotTracker) const override;
2756+
#endif
2757+
2758+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2759+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2760+
assert(is_contained(operands(), Op) &&
2761+
"Op must be an operand of the recipe");
2762+
return Op == getAddr() || Op == getStride() || Op == getVF();
27212763
}
27222764
};
27232765

27242766
/// A recipe for widening store operations, using the stored value, the address
27252767
/// to store to and an optional mask.
27262768
struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
27272769
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
2728-
VPValue *Mask, bool Consecutive, bool Reverse,
2729-
bool Strided, DebugLoc DL)
2770+
VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
27302771
: VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
2731-
Consecutive, Reverse, Strided, DL) {
2772+
Consecutive, Reverse, DL) {
27322773
setMask(Mask);
27332774
}
27342775

27352776
VPWidenStoreRecipe *clone() override {
27362777
return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
27372778
getStoredValue(), getMask(), Consecutive,
2738-
Reverse, Strided, getDebugLoc());
2779+
Reverse, getDebugLoc());
27392780
}
27402781

27412782
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
@@ -2756,10 +2797,9 @@ struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
27562797
bool onlyFirstLaneUsed(const VPValue *Op) const override {
27572798
assert(is_contained(operands(), Op) &&
27582799
"Op must be an operand of the recipe");
2759-
// Widened, consecutive/strided stores only demand the first lane of their
2800+
// Widened, consecutive stores only demand the first lane of their
27602801
// address, unless the same operand is also stored.
2761-
return Op == getAddr() && (isConsecutive() || isStrided()) &&
2762-
Op != getStoredValue();
2802+
return Op == getAddr() && isConsecutive() && Op != getStoredValue();
27632803
}
27642804
};
27652805

@@ -2770,8 +2810,7 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
27702810
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
27712811
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
27722812
{S.getAddr(), S.getStoredValue(), &EVL},
2773-
S.isConsecutive(), S.isReverse(), S.isStrided(),
2774-
S.getDebugLoc()) {
2813+
S.isConsecutive(), S.isReverse(), S.getDebugLoc()) {
27752814
setMask(Mask);
27762815
}
27772816

@@ -2804,11 +2843,10 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
28042843
assert(getStoredValue() != Op && "unexpected store of EVL");
28052844
return true;
28062845
}
2807-
// Widened, consecutive/strided memory operations only demand the first lane
2808-
// of their address, unless the same operand is also stored. That latter can
2846+
// Widened, consecutive memory operations only demand the first lane of
2847+
// their address, unless the same operand is also stored. That latter can
28092848
// happen with opaque pointers.
2810-
return Op == getAddr() && (isConsecutive() || isStrided()) &&
2811-
Op != getStoredValue();
2849+
return Op == getAddr() && isConsecutive() && Op != getStoredValue();
28122850
}
28132851
};
28142852

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,10 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
159159
}
160160

161161
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
162-
assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
163-
"Store recipes should not define any values");
162+
assert(
163+
(isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe, VPWidenStridedLoadRecipe>(
164+
R)) &&
165+
"Store recipes should not define any values");
164166
return cast<LoadInst>(&R->getIngredient())->getType();
165167
}
166168

0 commit comments

Comments
 (0)