Skip to content

[AArch64] Add costs for LD3/LD4 shuffles. #89268

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1376,7 +1376,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {

return TargetTTI->getShuffleCost(
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy,
AdjustMask, CostKind, 0, nullptr, {}, Shuffle);
AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
}

// Narrowing shuffle - perform shuffle at original wider width and
Expand All @@ -1385,7 +1385,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {

InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc,
VecSrcTy, AdjustMask, CostKind, 0, nullptr, {}, Shuffle);
VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);

SmallVector<int, 16> ExtractMask(Mask.size());
std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
Expand Down
10 changes: 10 additions & 0 deletions llvm/include/llvm/IR/Instructions.h
Original file line number Diff line number Diff line change
Expand Up @@ -2631,6 +2631,16 @@ class ShuffleVectorInst : public Instruction {
return isInterleaveMask(Mask, Factor, NumInputElts, StartIndexes);
}

/// Check if the mask is a DE-interleave mask of the given factor
/// \p Factor like:
/// <Index, Index+Factor, ..., Index+(NumElts-1)*Factor>
static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
unsigned &Index);
static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor) {
unsigned Unused;
return isDeInterleaveMaskOfFactor(Mask, Factor, Unused);
}

/// Checks if the shuffle is a bit rotation of the first operand across
/// multiple subelements, e.g:
///
Expand Down
32 changes: 5 additions & 27 deletions llvm/lib/CodeGen/InterleavedAccessPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,28 +200,6 @@ FunctionPass *llvm::createInterleavedAccessPass() {
return new InterleavedAccess();
}

/// Check if the mask is a DE-interleave mask of the given factor
/// \p Factor like:
/// <Index, Index+Factor, ..., Index+(NumElts-1)*Factor>
static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
unsigned &Index) {
// Check all potential start indices from 0 to (Factor - 1).
for (Index = 0; Index < Factor; Index++) {
unsigned i = 0;

// Check that elements are in ascending order by Factor. Ignore undef
// elements.
for (; i < Mask.size(); i++)
if (Mask[i] >= 0 && static_cast<unsigned>(Mask[i]) != Index + i * Factor)
break;

if (i == Mask.size())
return true;
}

return false;
}

/// Check if the mask is a DE-interleave mask for an interleaved load.
///
/// E.g. DE-interleave masks (Factor = 2) could be:
Expand All @@ -238,7 +216,7 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
// Make sure we don't produce a load wider than the input load.
if (Mask.size() * Factor > NumLoadElements)
return false;
if (isDeInterleaveMaskOfFactor(Mask, Factor, Index))
if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index))
return true;
}

Expand Down Expand Up @@ -333,8 +311,8 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
for (auto *Shuffle : Shuffles) {
if (Shuffle->getType() != VecTy)
return false;
if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor,
Index))
if (!ShuffleVectorInst::isDeInterleaveMaskOfFactor(
Shuffle->getShuffleMask(), Factor, Index))
return false;

assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
Expand All @@ -343,8 +321,8 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
for (auto *Shuffle : BinOpShuffles) {
if (Shuffle->getType() != VecTy)
return false;
if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor,
Index))
if (!ShuffleVectorInst::isDeInterleaveMaskOfFactor(
Shuffle->getShuffleMask(), Factor, Index))
return false;

assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
Expand Down
25 changes: 25 additions & 0 deletions llvm/lib/IR/Instructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2978,6 +2978,31 @@ bool ShuffleVectorInst::isInterleaveMask(
return true;
}

/// Check if the mask is a DE-interleave mask of the given factor
/// \p Factor like:
/// <Index, Index+Factor, ..., Index+(NumElts-1)*Factor>
bool ShuffleVectorInst::isDeInterleaveMaskOfFactor(ArrayRef<int> Mask,
unsigned Factor,
unsigned &Index) {
// Check all potential start indices from 0 to (Factor - 1).
for (unsigned Idx = 0; Idx < Factor; Idx++) {
unsigned I = 0;

// Check that elements are in ascending order by Factor. Ignore undef
// elements.
for (; I < Mask.size(); I++)
if (Mask[I] >= 0 && static_cast<unsigned>(Mask[I]) != Idx + I * Factor)
break;

if (I == Mask.size()) {
Index = Idx;
return true;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe only set the output Index on success?

}
}

return false;
}

/// Try to lower a vector shuffle as a bit rotation.
///
/// Look for a repeated rotation pattern in each sub group.
Expand Down
11 changes: 10 additions & 1 deletion llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3827,9 +3827,18 @@ InstructionCost AArch64TTIImpl::getShuffleCost(
Tp->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {

// Check for LD3/LD4 instructions, which are represented in llvm IR as
// deinterleaving-shuffle(load). The shuffle cost could potentially be free,
// but we model it with a cost of LT.first so that LD3/LD4 have a higher
// cost than just the load.
if (Args.size() >= 1 && isa<LoadInst>(Args[0]) &&
(ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, 3) ||
ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, 4)))
return std::max<InstructionCost>(1, LT.first / 4);

// Check for ST3/ST4 instructions, which are represented in llvm IR as
// store(interleaving-shuffle). The shuffle cost could potentially be free,
// but we model it with a cost of LT.first so that LD3/LD3 have a higher
// but we model it with a cost of LT.first so that ST3/ST4 have a higher
// cost than just the store.
if (CxtI && CxtI->hasOneUse() && isa<StoreInst>(*CxtI->user_begin()) &&
(ShuffleVectorInst::isInterleaveMask(
Expand Down
Loading
Loading