@@ -980,6 +980,14 @@ class BoUpSLP {
980
980
class ShuffleInstructionBuilder;
981
981
982
982
public:
983
+ /// Tracks the state we can represent the loads in the given sequence.
984
+ enum class LoadsState {
985
+ Gather,
986
+ Vectorize,
987
+ ScatterVectorize,
988
+ StridedVectorize
989
+ };
990
+
983
991
using ValueList = SmallVector<Value *, 8>;
984
992
using InstrList = SmallVector<Instruction *, 16>;
985
993
using ValueSet = SmallPtrSet<Value *, 16>;
@@ -1184,6 +1192,19 @@ class BoUpSLP {
1184
1192
/// may not be necessary.
1185
1193
bool isLoadCombineCandidate() const;
1186
1194
1195
+ /// Checks if the given array of loads can be represented as a vectorized,
1196
+ /// scatter or just simple gather.
1197
+ /// \param VL list of loads.
1198
+ /// \param VL0 main load value.
1199
+ /// \param Order returned order of load instructions.
1200
+ /// \param PointerOps returned list of pointer operands.
1201
+ /// \param TryRecursiveCheck used to check if long masked gather can be
1202
+ /// represented as a serie of loads/insert subvector, if profitable.
1203
+ LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
1204
+ SmallVectorImpl<unsigned> &Order,
1205
+ SmallVectorImpl<Value *> &PointerOps,
1206
+ bool TryRecursiveCheck = true) const;
1207
+
1187
1208
OptimizationRemarkEmitter *getORE() { return ORE; }
1188
1209
1189
1210
/// This structure holds any data we need about the edges being traversed
@@ -3957,11 +3978,6 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
3957
3978
return std::move(CurrentOrder);
3958
3979
}
3959
3980
3960
- namespace {
3961
- /// Tracks the state we can represent the loads in the given sequence.
3962
- enum class LoadsState { Gather, Vectorize, ScatterVectorize, StridedVectorize };
3963
- } // anonymous namespace
3964
-
3965
3981
static bool arePointersCompatible(Value *Ptr1, Value *Ptr2,
3966
3982
const TargetLibraryInfo &TLI,
3967
3983
bool CompareOpcodes = true) {
@@ -3998,16 +4014,9 @@ static bool isReverseOrder(ArrayRef<unsigned> Order) {
3998
4014
});
3999
4015
}
4000
4016
4001
- /// Checks if the given array of loads can be represented as a vectorized,
4002
- /// scatter or just simple gather.
4003
- static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
4004
- const Value *VL0,
4005
- const TargetTransformInfo &TTI,
4006
- const DataLayout &DL, ScalarEvolution &SE,
4007
- LoopInfo &LI, const TargetLibraryInfo &TLI,
4008
- SmallVectorImpl<unsigned> &Order,
4009
- SmallVectorImpl<Value *> &PointerOps,
4010
- bool TryRecursiveCheck = true) {
4017
+ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
4018
+ ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order,
4019
+ SmallVectorImpl<Value *> &PointerOps, bool TryRecursiveCheck) const {
4011
4020
// Check that a vectorized load would load the same memory as a scalar
4012
4021
// load. For example, we don't want to vectorize loads that are smaller
4013
4022
// than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
@@ -4016,7 +4025,7 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
4016
4025
// unvectorized version.
4017
4026
Type *ScalarTy = VL0->getType();
4018
4027
4019
- if (DL. getTypeSizeInBits(ScalarTy) != DL. getTypeAllocSizeInBits(ScalarTy))
4028
+ if (DL-> getTypeSizeInBits(ScalarTy) != DL-> getTypeAllocSizeInBits(ScalarTy))
4020
4029
return LoadsState::Gather;
4021
4030
4022
4031
// Make sure all loads in the bundle are simple - we can't vectorize
@@ -4036,9 +4045,9 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
4036
4045
Order.clear();
4037
4046
auto *VecTy = FixedVectorType::get(ScalarTy, Sz);
4038
4047
// Check the order of pointer operands or that all pointers are the same.
4039
- bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order);
4048
+ bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, * DL, * SE, Order);
4040
4049
if (IsSorted || all_of(PointerOps, [&](Value *P) {
4041
- return arePointersCompatible(P, PointerOps.front(), TLI);
4050
+ return arePointersCompatible(P, PointerOps.front(), * TLI);
4042
4051
})) {
4043
4052
if (IsSorted) {
4044
4053
Value *Ptr0;
@@ -4051,7 +4060,7 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
4051
4060
PtrN = PointerOps[Order.back()];
4052
4061
}
4053
4062
std::optional<int> Diff =
4054
- getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE);
4063
+ getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, * DL, * SE);
4055
4064
// Check that the sorted loads are consecutive.
4056
4065
if (static_cast<unsigned>(*Diff) == Sz - 1)
4057
4066
return LoadsState::Vectorize;
@@ -4078,7 +4087,7 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
4078
4087
Align Alignment =
4079
4088
cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
4080
4089
->getAlign();
4081
- if (TTI. isLegalStridedLoadStore(VecTy, Alignment)) {
4090
+ if (TTI-> isLegalStridedLoadStore(VecTy, Alignment)) {
4082
4091
// Iterate through all pointers and check if all distances are
4083
4092
// unique multiple of Dist.
4084
4093
SmallSet<int, 4> Dists;
@@ -4087,7 +4096,8 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
4087
4096
if (Ptr == PtrN)
4088
4097
Dist = *Diff;
4089
4098
else if (Ptr != Ptr0)
4090
- Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE);
4099
+ Dist =
4100
+ *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
4091
4101
// If the strides are not the same or repeated, we can't
4092
4102
// vectorize.
4093
4103
if (((Dist / Stride) * Stride) != Dist ||
@@ -4100,11 +4110,11 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
4100
4110
}
4101
4111
}
4102
4112
}
4103
- auto CheckForShuffledLoads = [&](Align CommonAlignment) {
4104
- unsigned Sz = DL. getTypeSizeInBits(ScalarTy);
4105
- unsigned MinVF = R. getMinVF(Sz);
4113
+ auto CheckForShuffledLoads = [&, &TTI = *TTI ](Align CommonAlignment) {
4114
+ unsigned Sz = DL-> getTypeSizeInBits(ScalarTy);
4115
+ unsigned MinVF = getMinVF(Sz);
4106
4116
unsigned MaxVF = std::max<unsigned>(bit_floor(VL.size() / 2), MinVF);
4107
- MaxVF = std::min(R. getMaximumVF(Sz, Instruction::Load), MaxVF);
4117
+ MaxVF = std::min(getMaximumVF(Sz, Instruction::Load), MaxVF);
4108
4118
for (unsigned VF = MaxVF; VF >= MinVF; VF /= 2) {
4109
4119
unsigned VectorizedCnt = 0;
4110
4120
SmallVector<LoadsState> States;
@@ -4114,8 +4124,8 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
4114
4124
SmallVector<unsigned> Order;
4115
4125
SmallVector<Value *> PointerOps;
4116
4126
LoadsState LS =
4117
- canVectorizeLoads(R, Slice, Slice.front(), TTI, DL, SE, LI, TLI ,
4118
- Order, PointerOps, /*TryRecursiveCheck=*/false);
4127
+ canVectorizeLoads(Slice, Slice.front(), Order, PointerOps ,
4128
+ /*TryRecursiveCheck=*/false);
4119
4129
// Check that the sorted loads are consecutive.
4120
4130
if (LS == LoadsState::Gather)
4121
4131
break;
@@ -4175,7 +4185,7 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
4175
4185
// TODO: need to improve analysis of the pointers, if not all of them are
4176
4186
// GEPs or have > 2 operands, we end up with a gather node, which just
4177
4187
// increases the cost.
4178
- Loop *L = LI. getLoopFor(cast<LoadInst>(VL0)->getParent());
4188
+ Loop *L = LI-> getLoopFor(cast<LoadInst>(VL0)->getParent());
4179
4189
bool ProfitableGatherPointers =
4180
4190
L && Sz > 2 && count_if(PointerOps, [L](Value *V) {
4181
4191
return L->isLoopInvariant(V);
@@ -4187,8 +4197,8 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
4187
4197
isa<Constant, Instruction>(GEP->getOperand(1)));
4188
4198
})) {
4189
4199
Align CommonAlignment = computeCommonAlignment<LoadInst>(VL);
4190
- if (TTI. isLegalMaskedGather(VecTy, CommonAlignment) &&
4191
- !TTI. forceScalarizeMaskedGather(VecTy, CommonAlignment)) {
4200
+ if (TTI-> isLegalMaskedGather(VecTy, CommonAlignment) &&
4201
+ !TTI-> forceScalarizeMaskedGather(VecTy, CommonAlignment)) {
4192
4202
// Check if potential masked gather can be represented as series
4193
4203
// of loads + insertsubvectors.
4194
4204
if (TryRecursiveCheck && CheckForShuffledLoads(CommonAlignment)) {
@@ -5635,8 +5645,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
5635
5645
// treats loading/storing it as an i8 struct. If we vectorize loads/stores
5636
5646
// from such a struct, we read/write packed bits disagreeing with the
5637
5647
// unvectorized version.
5638
- switch (canVectorizeLoads(*this, VL, VL0, *TTI, *DL, *SE, *LI, *TLI,
5639
- CurrentOrder, PointerOps)) {
5648
+ switch (canVectorizeLoads(VL, VL0, CurrentOrder, PointerOps)) {
5640
5649
case LoadsState::Vectorize:
5641
5650
return TreeEntry::Vectorize;
5642
5651
case LoadsState::ScatterVectorize:
@@ -7416,9 +7425,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
7416
7425
!VectorizedLoads.count(Slice.back()) && allSameBlock(Slice)) {
7417
7426
SmallVector<Value *> PointerOps;
7418
7427
OrdersType CurrentOrder;
7419
- LoadsState LS =
7420
- canVectorizeLoads(R, Slice, Slice.front(), TTI, *R.DL, *R.SE,
7421
- *R.LI, *R.TLI, CurrentOrder, PointerOps);
7428
+ LoadsState LS = R.canVectorizeLoads(Slice, Slice.front(),
7429
+ CurrentOrder, PointerOps);
7422
7430
switch (LS) {
7423
7431
case LoadsState::Vectorize:
7424
7432
case LoadsState::ScatterVectorize:
0 commit comments