Skip to content

[AArch64] Add costs for ST3 and ST4 instructions, modelled as store(shuffle). #87934

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1291,12 +1291,11 @@ class TargetTransformInfo {
/// passed through \p Args, which helps improve the cost estimation in some
/// cases, like in broadcast loads.
/// NOTE: For subvector extractions Tp represents the source type.
InstructionCost
getShuffleCost(ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask = std::nullopt,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
int Index = 0, VectorType *SubTp = nullptr,
ArrayRef<const Value *> Args = std::nullopt) const;
InstructionCost getShuffleCost(
ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask = std::nullopt,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, int Index = 0,
VectorType *SubTp = nullptr, ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr) const;

/// Represents a hint about the context in which a cast is used.
///
Expand Down Expand Up @@ -2008,11 +2007,10 @@ class TargetTransformInfo::Concept {
const SmallBitVector &OpcodeMask,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const = 0;

virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) = 0;
virtual InstructionCost
getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
ArrayRef<const Value *> Args, const Instruction *CxtI) = 0;
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
Type *Src, CastContextHint CCH,
TTI::TargetCostKind CostKind,
Expand Down Expand Up @@ -2647,8 +2645,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args) override {
return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
ArrayRef<const Value *> Args,
const Instruction *CxtI) override {
return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args,
CxtI);
}
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
CastContextHint CCH,
Expand Down
44 changes: 25 additions & 19 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -579,10 +579,12 @@ class TargetTransformInfoImplBase {
return InstructionCost::getInvalid();
}

InstructionCost
getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt) const {
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr) const {
return 1;
}

Expand Down Expand Up @@ -1341,13 +1343,13 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
if (Shuffle->isExtractSubvectorMask(SubIndex))
return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
Mask, CostKind, SubIndex, VecTy,
Operands);
Operands, Shuffle);

if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
return TargetTTI->getShuffleCost(
TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
Operands);
Operands, Shuffle);

int ReplicationFactor, VF;
if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
Expand All @@ -1374,7 +1376,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {

return TargetTTI->getShuffleCost(
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy,
AdjustMask, CostKind, 0, nullptr);
AdjustMask, CostKind, 0, nullptr, {}, Shuffle);
}

// Narrowing shuffle - perform shuffle at original wider width and
Expand All @@ -1383,49 +1385,53 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {

InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc,
VecSrcTy, AdjustMask, CostKind, 0, nullptr);
VecSrcTy, AdjustMask, CostKind, 0, nullptr, {}, Shuffle);

SmallVector<int, 16> ExtractMask(Mask.size());
std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
return ShuffleCost + TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector,
VecSrcTy, ExtractMask,
CostKind, 0, VecTy);
return ShuffleCost + TargetTTI->getShuffleCost(
TTI::SK_ExtractSubvector, VecSrcTy,
ExtractMask, CostKind, 0, VecTy, {}, Shuffle);
}

if (Shuffle->isIdentity())
return 0;

if (Shuffle->isReverse())
return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, Mask, CostKind,
0, nullptr, Operands);
0, nullptr, Operands, Shuffle);

if (Shuffle->isSelect())
return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, Mask, CostKind,
0, nullptr, Operands);
0, nullptr, Operands, Shuffle);

if (Shuffle->isTranspose())
return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, Mask,
CostKind, 0, nullptr, Operands);
CostKind, 0, nullptr, Operands,
Shuffle);

if (Shuffle->isZeroEltSplat())
return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, Mask,
CostKind, 0, nullptr, Operands);
CostKind, 0, nullptr, Operands,
Shuffle);

if (Shuffle->isSingleSource())
return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, Mask,
CostKind, 0, nullptr, Operands);
CostKind, 0, nullptr, Operands,
Shuffle);

if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
return TargetTTI->getShuffleCost(
TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands);
FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands,
Shuffle);

if (Shuffle->isSplice(SubIndex))
return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, Mask, CostKind,
SubIndex, nullptr, Operands);
SubIndex, nullptr, Operands, Shuffle);

return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Mask,
CostKind, 0, nullptr, Operands);
CostKind, 0, nullptr, Operands, Shuffle);
}
case Instruction::ExtractElement: {
auto *EEI = dyn_cast<ExtractElementInst>(U);
Expand Down
3 changes: 2 additions & 1 deletion llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1020,7 +1020,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt) {
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr) {
switch (improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp)) {
case TTI::SK_Broadcast:
if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -916,9 +916,9 @@ InstructionCost TargetTransformInfo::getAltInstrCost(
InstructionCost TargetTransformInfo::getShuffleCost(
ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) const {
InstructionCost Cost =
TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind, Index, SubTp, Args);
ArrayRef<const Value *> Args, const Instruction *CxtI) const {
InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind,
Index, SubTp, Args, CxtI);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
Expand Down
28 changes: 20 additions & 8 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3815,18 +3815,29 @@ InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) {
return LegalizationCost * LT.first;
}

InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
InstructionCost AArch64TTIImpl::getShuffleCost(
TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
ArrayRef<const Value *> Args, const Instruction *CxtI) {
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);

// If we have a Mask, and the LT is being legalized somehow, split the Mask
// into smaller vectors and sum the cost of each shuffle.
if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
Tp->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {

// Check for ST3/ST4 instructions, which are represented in llvm IR as
// store(interleaving-shuffle). The shuffle cost could potentially be free,
// but we model it with a cost of LT.first so that LD3/LD3 have a higher
// cost than just the store.
if (CxtI && CxtI->hasOneUse() && isa<StoreInst>(*CxtI->user_begin()) &&
(ShuffleVectorInst::isInterleaveMask(
Mask, 4, Tp->getElementCount().getKnownMinValue() * 2) ||
ShuffleVectorInst::isInterleaveMask(
Mask, 3, Tp->getElementCount().getKnownMinValue() * 2)))
return LT.first;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where do you use CxtI to detect a store?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, yeah! I rewrote the code when moving it into this outer if statement, and must have missed that part of the original.


unsigned TpNumElts = Mask.size();
unsigned LTNumElts = LT.second.getVectorNumElements();
unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
Expand Down Expand Up @@ -3874,7 +3885,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if (NumSources <= 2)
Cost += getShuffleCost(NumSources <= 1 ? TTI::SK_PermuteSingleSrc
: TTI::SK_PermuteTwoSrc,
NTp, NMask, CostKind, 0, nullptr, Args);
NTp, NMask, CostKind, 0, nullptr, Args, CxtI);
else if (any_of(enumerate(NMask), [&](const auto &ME) {
return ME.value() % LTNumElts == ME.index();
}))
Expand Down Expand Up @@ -4055,7 +4066,8 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// Restore optimal kind.
if (IsExtractSubvector)
Kind = TTI::SK_ExtractSubvector;
return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args,
CxtI);
}

static bool containsDecreasingPointers(Loop *TheLoop,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr);

InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1127,7 +1127,8 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *VT, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
Kind = improveShuffleKindFromMask(Kind, Mask, VT, Index, SubTp);
// Treat extractsubvector as single op permutation.
bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr);

bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1212,7 +1212,8 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp);
// Treat extractsubvector as single op permutation.
bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARM/ARMTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr);

bool preferInLoopReduction(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,8 @@ InstructionCost HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, Type *SubTp,
ArrayRef<const Value *> Args) {
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
return 1;
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
Type *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr);
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,8 @@ InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, Type *SubTp,
ArrayRef<const Value *> Args) {
ArrayRef<const Value *> Args,
const Instruction *CxtI) {

InstructionCost CostFactor =
vectorCostAdjustmentFactor(Instruction::ShuffleVector, Tp, nullptr);
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
Type *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr);
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp);

std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = std::nullopt);
ArrayRef<const Value *> Args = std::nullopt,
const Instruction *CxtI = nullptr);

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
Expand Down
Loading
Loading