-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[SLP][REVEC] Make ShuffleCostEstimator and ShuffleInstructionBuilder can vectorize vector instructions. #99606
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Han-Kuan Chen (HanKuanChen) ChangesSince the mask indices expect the source is scalar type, we need to transform the mask indices into a form which can be used when REVEC is enabled. The transform is only called before the CreateShuffleVector. This is a following patch for #99499. Full diff: https://github.com/llvm/llvm-project/pull/99606.diff 1 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d8c3bae06e932..0f67ba7e56032 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -253,6 +253,21 @@ static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) {
VF * getNumElements(ScalarTy));
}
+static void transformScalarShuffleIndiciesToVector(unsigned VecTyNumElements,
+ SmallVectorImpl<int> &Mask) {
+ // The ShuffleBuilder implementation use shufflevector to splat an "element".
+ // But the element have different meaning for SLP (scalar) and REVEC
+ // (vector). We need to expand Mask into masks which shufflevector can use
+ // directly.
+ SmallVector<int> NewMask(Mask.size() * VecTyNumElements);
+ for (size_t I = 0, E = Mask.size(); I != E; ++I)
+ for (unsigned J = 0; J != VecTyNumElements; ++J)
+ NewMask[I * VecTyNumElements + J] = Mask[I] == PoisonMaskElem
+ ? PoisonMaskElem
+ : Mask[I] * VecTyNumElements + J;
+ Mask.swap(NewMask);
+}
+
/// \returns True if the value is a constant (but not globals/constant
/// expressions).
static bool isConstant(Value *V) {
@@ -8800,6 +8815,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
// Shuffle single vector.
ExtraCost += GetValueMinBWAffectedCost(V1);
CommonVF = cast<FixedVectorType>(V1->getType())->getNumElements();
+ if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy))
+ CommonVF /= VecTy->getNumElements();
assert(
all_of(Mask,
[=](int Idx) { return Idx < static_cast<int>(CommonVF); }) &&
@@ -8807,6 +8824,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
} else if (V1 && !V2) {
// Shuffle vector and tree node.
unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
+ if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy))
+ VF /= VecTy->getNumElements();
const TreeEntry *E2 = P2.get<const TreeEntry *>();
CommonVF = std::max(VF, E2->getVectorFactor());
assert(all_of(Mask,
@@ -8833,6 +8852,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
} else if (!V1 && V2) {
// Shuffle vector and tree node.
unsigned VF = cast<FixedVectorType>(V2->getType())->getNumElements();
+ if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy))
+ VF /= VecTy->getNumElements();
const TreeEntry *E1 = P1.get<const TreeEntry *>();
CommonVF = std::max(VF, E1->getVectorFactor());
assert(all_of(Mask,
@@ -8863,6 +8884,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
CommonVF =
std::max(VF, cast<FixedVectorType>(V2->getType())->getNumElements());
+ if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy))
+ CommonVF /= VecTy->getNumElements();
assert(all_of(Mask,
[=](int Idx) {
return Idx < 2 * static_cast<int>(CommonVF);
@@ -8880,6 +8903,9 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
}
}
+ if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy))
+ transformScalarShuffleIndiciesToVector(VecTy->getNumElements(),
+ CommonMask);
InVectors.front() =
Constant::getNullValue(getWidenedType(ScalarTy, CommonMask.size()));
if (InVectors.size() == 2)
@@ -9098,6 +9124,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
assert(!InVectors.empty() && !CommonMask.empty() &&
"Expected only tree entries from extracts/reused buildvectors.");
unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
+ if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy))
+ VF /= VecTy->getNumElements();
if (InVectors.size() == 2) {
Cost += createShuffle(InVectors.front(), InVectors.back(), CommonMask);
transformMaskAfterShuffle(CommonMask, CommonMask);
@@ -12123,6 +12151,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
int VF = CommonMask.size();
if (auto *FTy = dyn_cast<FixedVectorType>(V1->getType()))
VF = FTy->getNumElements();
+ if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy))
+ VF /= VecTy->getNumElements();
for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem)
CommonMask[Idx] = Mask[Idx] + (It == InVectors.begin() ? 0 : VF);
@@ -12145,6 +12175,14 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
finalize(ArrayRef<int> ExtMask, unsigned VF = 0,
function_ref<void(Value *&, SmallVectorImpl<int> &)> Action = {}) {
IsFinalized = true;
+ SmallVector<int> NewExtMask(ExtMask);
+ if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy)) {
+ transformScalarShuffleIndiciesToVector(VecTy->getNumElements(),
+ CommonMask);
+ transformScalarShuffleIndiciesToVector(VecTy->getNumElements(),
+ NewExtMask);
+ ExtMask = NewExtMask;
+ }
if (Action) {
Value *Vec = InVectors.front();
if (InVectors.size() == 2) {
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need the tests
b1cae31
to
cf11555
Compare
cf11555
to
0f10cc5
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some previous comments not addressed yet
0f10cc5
to
c9bf976
Compare
cast<FixedVectorType>(V->getType())->getNumElements(); | ||
assert(VNumElements > ScalarTyNumElements && | ||
"the number of elements of V is not large enough"); | ||
assert(VNumElements % ScalarTyNumElements == 0 && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if ScalarTyNumElements is non-power-of-2, can it happen?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ScalarTyNumElements
can be non-power-of-2. But the rule VNumElements > ScalarTyNumElements
and VNumElements % ScalarTyNumElements
are still applied.
@@ -8728,6 +8744,15 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { | |||
} | |||
return TTI::TCC_Free; | |||
}; | |||
auto GetVF = [&](Value *V) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better make it a function and use here and in other places
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a getVF
member function. (see c40998e)
ce27ff7
to
06458ab
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@@ -12179,6 +12217,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { | |||
} | |||
} | |||
int VF = cast<FixedVectorType>(V1->getType())->getNumElements(); | |||
if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy)) | |||
VF /= VecTy->getNumElements(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be processed by something similar like getVF or getVF itself?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can make getVF
into a protected member function of BaseShuffleAnalysis
. However, BaseShuffleAnalysis
would need to store ScalarTy
. See ae865fc.
ShuffleCostEstimator.
06458ab
to
2b77a2f
Compare
ShuffleInstructionBuilder::add support vector instructions.
instructions. The VF is relative to the number of elements in ScalarTy instead of the size of mask.
2b77a2f
to
2e951cf
Compare
The changes looks good but need the test |
Close the PR and move into #99499. |
Since the mask indices expect the source is scalar type, we need to transform the mask indices into a form which can be used when REVEC is enabled. The transform is only called before the CreateShuffleVector.
This is a following patch for #99499.