diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0e6f7e8435e3a..06c1840529eb0 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5597,6 +5597,71 @@ static bool isMaskedLoadCompress( return TotalVecCost < GatherCost; } +/// Checks if strided loads can be generated out of \p VL loads with pointers \p +/// PointerOps: +/// 1. Target with strided load support is detected. +/// 2. The number of loads is greater than MinProfitableStridedLoads, or the +/// potential stride <= MaxProfitableLoadStride and the potential stride is +/// power-of-2 (to avoid perf regressions for the very small number of loads) +/// and max distance > number of loads, or potential stride is -1. +/// 3. The loads are ordered, or number of unordered loads <= +/// MaxProfitableUnorderedLoads, or loads are in reversed order. (this check is +/// to avoid extra costs for very expensive shuffles). +/// 4. Any pointer operand is an instruction with the users outside of the +/// current graph (for masked gathers extra extractelement instructions +/// might be required). +static bool isStridedLoad(ArrayRef VL, ArrayRef PointerOps, + ArrayRef Order, + const TargetTransformInfo &TTI, const DataLayout &DL, + ScalarEvolution &SE, + const bool IsAnyPointerUsedOutGraph, const int Diff) { + const unsigned Sz = VL.size(); + const unsigned AbsoluteDiff = std::abs(Diff); + Type *ScalarTy = VL.front()->getType(); + auto *VecTy = getWidenedType(ScalarTy, Sz); + if (IsAnyPointerUsedOutGraph || + (AbsoluteDiff > Sz && + (Sz > MinProfitableStridedLoads || + (AbsoluteDiff <= MaxProfitableLoadStride * Sz && + AbsoluteDiff % Sz == 0 && has_single_bit(AbsoluteDiff / Sz)))) || + Diff == -(static_cast(Sz) - 1)) { + int Stride = Diff / static_cast(Sz - 1); + if (Diff != Stride * static_cast(Sz - 1)) + return false; + Align Alignment = + cast(Order.empty() ? VL.front() : VL[Order.front()]) + ->getAlign(); + if (!TTI.isLegalStridedLoadStore(VecTy, Alignment)) + return false; + Value *Ptr0; + Value *PtrN; + if (Order.empty()) { + Ptr0 = PointerOps.front(); + PtrN = PointerOps.back(); + } else { + Ptr0 = PointerOps[Order.front()]; + PtrN = PointerOps[Order.back()]; + } + // Iterate through all pointers and check if all distances are + // unique multiple of Dist. + SmallSet Dists; + for (Value *Ptr : PointerOps) { + int Dist = 0; + if (Ptr == PtrN) + Dist = Diff; + else if (Ptr != Ptr0) + Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE); + // If the strides are not the same or repeated, we can't + // vectorize. + if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second) + break; + } + if (Dists.size() == Sz) + return true; + } + return false; +} + BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(ArrayRef VL, const Value *VL0, SmallVectorImpl &Order, @@ -5670,59 +5735,17 @@ BoUpSLP::canVectorizeLoads(ArrayRef VL, const Value *VL0, return LoadsState::Vectorize; // Simple check if not a strided access - clear order. bool IsPossibleStrided = *Diff % (Sz - 1) == 0; - // Try to generate strided load node if: - // 1. Target with strided load support is detected. - // 2. The number of loads is greater than MinProfitableStridedLoads, - // or the potential stride <= MaxProfitableLoadStride and the - // potential stride is power-of-2 (to avoid perf regressions for the very - // small number of loads) and max distance > number of loads, or potential - // stride is -1. - // 3. The loads are ordered, or number of unordered loads <= - // MaxProfitableUnorderedLoads, or loads are in reversed order. - // (this check is to avoid extra costs for very expensive shuffles). - // 4. Any pointer operand is an instruction with the users outside of the - // current graph (for masked gathers extra extractelement instructions - // might be required). + // Try to generate strided load node. auto IsAnyPointerUsedOutGraph = IsPossibleStrided && any_of(PointerOps, [&](Value *V) { return isa(V) && any_of(V->users(), [&](User *U) { return !isVectorized(U) && !MustGather.contains(U); }); }); - const unsigned AbsoluteDiff = std::abs(*Diff); if (IsPossibleStrided && - (IsAnyPointerUsedOutGraph || - (AbsoluteDiff > Sz && - (Sz > MinProfitableStridedLoads || - (AbsoluteDiff <= MaxProfitableLoadStride * Sz && - AbsoluteDiff % Sz == 0 && has_single_bit(AbsoluteDiff / Sz)))) || - *Diff == -(static_cast(Sz) - 1))) { - int Stride = *Diff / static_cast(Sz - 1); - if (*Diff == Stride * static_cast(Sz - 1)) { - Align Alignment = - cast(Order.empty() ? VL.front() : VL[Order.front()]) - ->getAlign(); - if (TTI->isLegalStridedLoadStore(VecTy, Alignment)) { - // Iterate through all pointers and check if all distances are - // unique multiple of Dist. - SmallSet Dists; - for (Value *Ptr : PointerOps) { - int Dist = 0; - if (Ptr == PtrN) - Dist = *Diff; - else if (Ptr != Ptr0) - Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE); - // If the strides are not the same or repeated, we can't - // vectorize. - if (((Dist / Stride) * Stride) != Dist || - !Dists.insert(Dist).second) - break; - } - if (Dists.size() == Sz) - return LoadsState::StridedVectorize; - } - } - } + isStridedLoad(VL, PointerOps, Order, *TTI, *DL, *SE, + IsAnyPointerUsedOutGraph, *Diff)) + return LoadsState::StridedVectorize; bool IsMasked; unsigned InterleaveFactor; SmallVector CompressMask;