diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8d2ce6bad6af7..1e8d34fde6cc0 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -253,6 +253,21 @@ static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) { VF * getNumElements(ScalarTy)); } +static void transformScalarShuffleIndiciesToVector(unsigned VecTyNumElements, + SmallVectorImpl &Mask) { + // The ShuffleBuilder implementation use shufflevector to splat an "element". + // But the element have different meaning for SLP (scalar) and REVEC + // (vector). We need to expand Mask into masks which shufflevector can use + // directly. + SmallVector NewMask(Mask.size() * VecTyNumElements); + for (unsigned I : seq(Mask.size())) + for (auto [J, MaskV] : enumerate(MutableArrayRef(NewMask).slice( + I * VecTyNumElements, VecTyNumElements))) + MaskV = Mask[I] == PoisonMaskElem ? PoisonMaskElem + : Mask[I] * VecTyNumElements + J; + Mask.swap(NewMask); +} + /// \returns True if the value is a constant (but not globals/constant /// expressions). static bool isConstant(Value *V) { @@ -7762,6 +7777,31 @@ namespace { /// The base class for shuffle instruction emission and shuffle cost estimation. class BaseShuffleAnalysis { protected: + Type *ScalarTy = nullptr; + + BaseShuffleAnalysis(Type *ScalarTy) : ScalarTy(ScalarTy) {} + + /// V is expected to be a vectorized value. + /// When REVEC is disabled, there is no difference between VF and + /// VNumElements. + /// When REVEC is enabled, VF is VNumElements / ScalarTyNumElements. + /// e.g., if ScalarTy is <4 x Ty> and V1 is <8 x Ty>, 2 is returned instead + /// of 8. + unsigned getVF(Value *V) const { + assert(V && "V cannot be nullptr"); + assert(isa(V->getType()) && + "V does not have FixedVectorType"); + assert(ScalarTy && "ScalarTy cannot be nullptr"); + unsigned ScalarTyNumElements = getNumElements(ScalarTy); + unsigned VNumElements = + cast(V->getType())->getNumElements(); + assert(VNumElements > ScalarTyNumElements && + "the number of elements of V is not large enough"); + assert(VNumElements % ScalarTyNumElements == 0 && + "the number of elements of V is not a vectorized value"); + return VNumElements / ScalarTyNumElements; + } + /// Checks if the mask is an identity mask. /// \param IsStrict if is true the function returns false if mask size does /// not match vector size. @@ -8258,7 +8298,6 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { bool IsFinalized = false; SmallVector CommonMask; SmallVector, 2> InVectors; - Type *ScalarTy = nullptr; const TargetTransformInfo &TTI; InstructionCost Cost = 0; SmallDenseSet VectorizedVals; @@ -8840,14 +8879,14 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { } else if (V1 && P2.isNull()) { // Shuffle single vector. ExtraCost += GetValueMinBWAffectedCost(V1); - CommonVF = cast(V1->getType())->getNumElements(); + CommonVF = getVF(V1); assert( all_of(Mask, [=](int Idx) { return Idx < static_cast(CommonVF); }) && "All elements in mask must be less than CommonVF."); } else if (V1 && !V2) { // Shuffle vector and tree node. - unsigned VF = cast(V1->getType())->getNumElements(); + unsigned VF = getVF(V1); const TreeEntry *E2 = P2.get(); CommonVF = std::max(VF, E2->getVectorFactor()); assert(all_of(Mask, @@ -8873,7 +8912,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF)); } else if (!V1 && V2) { // Shuffle vector and tree node. - unsigned VF = cast(V2->getType())->getNumElements(); + unsigned VF = getVF(V2); const TreeEntry *E1 = P1.get(); CommonVF = std::max(VF, E1->getVectorFactor()); assert(all_of(Mask, @@ -8901,9 +8940,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF)); } else { assert(V1 && V2 && "Expected both vectors."); - unsigned VF = cast(V1->getType())->getNumElements(); - CommonVF = - std::max(VF, cast(V2->getType())->getNumElements()); + unsigned VF = getVF(V1); + CommonVF = std::max(VF, getVF(V2)); assert(all_of(Mask, [=](int Idx) { return Idx < 2 * static_cast(CommonVF); @@ -8921,6 +8959,9 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF)); } } + if (auto *VecTy = dyn_cast(ScalarTy)) + transformScalarShuffleIndiciesToVector(VecTy->getNumElements(), + CommonMask); InVectors.front() = Constant::getNullValue(getWidenedType(ScalarTy, CommonMask.size())); if (InVectors.size() == 2) @@ -8933,7 +8974,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { ShuffleCostEstimator(Type *ScalarTy, TargetTransformInfo &TTI, ArrayRef VectorizedVals, BoUpSLP &R, SmallPtrSetImpl &CheckedExtracts) - : ScalarTy(ScalarTy), TTI(TTI), + : BaseShuffleAnalysis(ScalarTy), TTI(TTI), VectorizedVals(VectorizedVals.begin(), VectorizedVals.end()), R(R), CheckedExtracts(CheckedExtracts) {} Value *adjustExtracts(const TreeEntry *E, MutableArrayRef Mask, @@ -9138,7 +9179,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { } assert(!InVectors.empty() && !CommonMask.empty() && "Expected only tree entries from extracts/reused buildvectors."); - unsigned VF = cast(V1->getType())->getNumElements(); + unsigned VF = getVF(V1); if (InVectors.size() == 2) { Cost += createShuffle(InVectors.front(), InVectors.back(), CommonMask); transformMaskAfterShuffle(CommonMask, CommonMask); @@ -11795,7 +11836,6 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { /// resulting shuffle and the second operand sets to be the newly added /// operand. The \p CommonMask is transformed in the proper way after that. SmallVector InVectors; - Type *ScalarTy = nullptr; IRBuilderBase &Builder; BoUpSLP &R; @@ -11921,7 +11961,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { public: ShuffleInstructionBuilder(Type *ScalarTy, IRBuilderBase &Builder, BoUpSLP &R) - : ScalarTy(ScalarTy), Builder(Builder), R(R) {} + : BaseShuffleAnalysis(ScalarTy), Builder(Builder), R(R) {} /// Adjusts extractelements after reusing them. Value *adjustExtracts(const TreeEntry *E, MutableArrayRef Mask, @@ -12178,7 +12218,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { break; } } - int VF = cast(V1->getType())->getNumElements(); + int VF = getVF(V1); for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx) if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem) CommonMask[Idx] = Mask[Idx] + (It == InVectors.begin() ? 0 : VF); @@ -12201,6 +12241,14 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { finalize(ArrayRef ExtMask, unsigned VF = 0, function_ref &)> Action = {}) { IsFinalized = true; + SmallVector NewExtMask(ExtMask); + if (auto *VecTy = dyn_cast(ScalarTy)) { + transformScalarShuffleIndiciesToVector(VecTy->getNumElements(), + CommonMask); + transformScalarShuffleIndiciesToVector(VecTy->getNumElements(), + NewExtMask); + ExtMask = NewExtMask; + } if (Action) { Value *Vec = InVectors.front(); if (InVectors.size() == 2) {