Skip to content

Commit a536743

Browse files
authored
[AArch64] Add ZIP and UZP shuffle costs. (#88150)
This adds some costs for the shuffle instructions that should be lowered to zip1/zip2/uzp1/uzp2 instructions.
1 parent 3f7f446 commit a536743

File tree

6 files changed

+84
-70
lines changed

6 files changed

+84
-70
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -11851,35 +11851,6 @@ static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
1185111851
return true;
1185211852
}
1185311853

11854-
static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
11855-
unsigned NumElts = VT.getVectorNumElements();
11856-
if (NumElts % 2 != 0)
11857-
return false;
11858-
WhichResult = (M[0] == 0 ? 0 : 1);
11859-
unsigned Idx = WhichResult * NumElts / 2;
11860-
for (unsigned i = 0; i != NumElts; i += 2) {
11861-
if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
11862-
(M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
11863-
return false;
11864-
Idx += 1;
11865-
}
11866-
11867-
return true;
11868-
}
11869-
11870-
static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
11871-
unsigned NumElts = VT.getVectorNumElements();
11872-
WhichResult = (M[0] == 0 ? 0 : 1);
11873-
for (unsigned i = 0; i != NumElts; ++i) {
11874-
if (M[i] < 0)
11875-
continue; // ignore UNDEF indices
11876-
if ((unsigned)M[i] != 2 * i + WhichResult)
11877-
return false;
11878-
}
11879-
11880-
return true;
11881-
}
11882-
1188311854
static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
1188411855
unsigned NumElts = VT.getVectorNumElements();
1188511856
if (NumElts % 2 != 0)

llvm/lib/Target/AArch64/AArch64PerfectShuffle.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616

1717
#include "llvm/ADT/ArrayRef.h"
1818

19+
namespace llvm {
20+
1921
// 31 entries have cost 0
2022
// 756 entries have cost 1
2123
// 3690 entries have cost 2
@@ -6618,4 +6620,35 @@ static unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) {
66186620
return (PFEntry >> 30) + 1;
66196621
}
66206622

6623+
inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6624+
unsigned NumElts = VT.getVectorNumElements();
6625+
if (NumElts % 2 != 0)
6626+
return false;
6627+
WhichResult = (M[0] == 0 ? 0 : 1);
6628+
unsigned Idx = WhichResult * NumElts / 2;
6629+
for (unsigned i = 0; i != NumElts; i += 2) {
6630+
if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
6631+
(M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
6632+
return false;
6633+
Idx += 1;
6634+
}
6635+
6636+
return true;
6637+
}
6638+
6639+
inline bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6640+
unsigned NumElts = VT.getVectorNumElements();
6641+
WhichResult = (M[0] == 0 ? 0 : 1);
6642+
for (unsigned i = 0; i != NumElts; ++i) {
6643+
if (M[i] < 0)
6644+
continue; // ignore UNDEF indices
6645+
if ((unsigned)M[i] != 2 * i + WhichResult)
6646+
return false;
6647+
}
6648+
6649+
return true;
6650+
}
6651+
6652+
} // namespace llvm
6653+
66216654
#endif

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3932,6 +3932,16 @@ InstructionCost AArch64TTIImpl::getShuffleCost(
39323932
}))
39333933
return 0;
39343934

3935+
// Check for other shuffles that are not SK_ kinds but we have native
3936+
// instructions for, for example ZIP and UZP.
3937+
unsigned Unused;
3938+
if (LT.second.isFixedLengthVector() &&
3939+
LT.second.getVectorNumElements() == Mask.size() &&
3940+
(Kind == TTI::SK_PermuteTwoSrc || Kind == TTI::SK_PermuteSingleSrc) &&
3941+
(isZIPMask(Mask, LT.second, Unused) ||
3942+
isUZPMask(Mask, LT.second, Unused)))
3943+
return 1;
3944+
39353945
if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
39363946
Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc ||
39373947
Kind == TTI::SK_Reverse || Kind == TTI::SK_Splice) {

0 commit comments

Comments
 (0)