Skip to content

Commit 178f471

Browse files
authored
[CostModel][X86] getShuffleCost - shuffles with only one defined element are always cheap (#124412)
If we're just moving a single element around inside a 128-bit lane (probably as an alternative to extracting it), we can assume this is cheap as a single PSRLDQ/PSHUFD/SHUFPS. I've got the horrid feeling we're moving towards matching all SSE shuffle patterns inside the cost model, but I'm going to do my best to avoid this for now :|
1 parent cb6f021 commit 178f471

File tree

7 files changed

+76
-126
lines changed

7 files changed

+76
-126
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1565,19 +1565,25 @@ InstructionCost X86TTIImpl::getShuffleCost(
15651565

15661566
// Attempt to detect a cheaper inlane shuffle, avoiding 128-bit subvector
15671567
// permutation.
1568+
// Attempt to detect a shuffle mask with a single defined element.
15681569
bool IsInLaneShuffle = false;
1570+
bool IsSingleElementMask = false;
15691571
if (BaseTp->getPrimitiveSizeInBits() > 0 &&
15701572
(BaseTp->getPrimitiveSizeInBits() % 128) == 0 &&
15711573
BaseTp->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
15721574
Mask.size() == BaseTp->getElementCount().getKnownMinValue()) {
15731575
unsigned NumLanes = BaseTp->getPrimitiveSizeInBits() / 128;
15741576
unsigned NumEltsPerLane = Mask.size() / NumLanes;
1575-
if ((Mask.size() % NumLanes) == 0)
1577+
if ((Mask.size() % NumLanes) == 0) {
15761578
IsInLaneShuffle = all_of(enumerate(Mask), [&](const auto &P) {
15771579
return P.value() == PoisonMaskElem ||
15781580
((P.value() % Mask.size()) / NumEltsPerLane) ==
15791581
(P.index() / NumEltsPerLane);
15801582
});
1583+
IsSingleElementMask = (Mask.size() - 1) == count_if(Mask, [](int M) {
1584+
return M == PoisonMaskElem;
1585+
});
1586+
}
15811587
}
15821588

15831589
// Treat <X x bfloat> shuffles as <X x half>.
@@ -1791,6 +1797,11 @@ InstructionCost X86TTIImpl::getShuffleCost(
17911797
return BaseT::getShuffleCost(Kind, BaseTp, Mask, CostKind, Index, SubTp);
17921798
}
17931799

1800+
// If we're just moving a single element around (probably as an alternative to
1801+
// extracting it), we can assume this is cheap.
1802+
if (LT.first == 1 && IsInLaneShuffle && IsSingleElementMask)
1803+
return TTI::TCC_Basic;
1804+
17941805
static const CostTblEntry AVX512VBMIShuffleTbl[] = {
17951806
{TTI::SK_Reverse, MVT::v64i8, 1}, // vpermb
17961807
{TTI::SK_Reverse, MVT::v32i8, 1}, // vpermb

llvm/test/Analysis/CostModel/X86/reduction.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,7 @@ define fastcc i16 @no_pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) {
638638
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3
639639
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
640640
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf
641-
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
641+
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
642642
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7
643643
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0
644644
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
@@ -1133,7 +1133,7 @@ define fastcc i16 @pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) {
11331133
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
11341134
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1
11351135
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1136-
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1136+
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
11371137
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1
11381138
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0
11391139
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r

llvm/test/Transforms/PhaseOrdering/X86/hadd.ll

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -59,18 +59,15 @@ define <8 x i16> @add_v8i16_01234567(<8 x i16> %a, <8 x i16> %b) {
5959

6060
define <8 x i16> @add_v8i16_u1234567(<8 x i16> %a, <8 x i16> %b) {
6161
; SSE2-LABEL: @add_v8i16_u1234567(
62-
; SSE2-NEXT: [[A2:%.*]] = extractelement <8 x i16> [[A:%.*]], i64 2
63-
; SSE2-NEXT: [[A3:%.*]] = extractelement <8 x i16> [[A]], i64 3
64-
; SSE2-NEXT: [[A4:%.*]] = extractelement <8 x i16> [[A]], i64 4
65-
; SSE2-NEXT: [[A5:%.*]] = extractelement <8 x i16> [[A]], i64 5
66-
; SSE2-NEXT: [[A6:%.*]] = extractelement <8 x i16> [[A]], i64 6
67-
; SSE2-NEXT: [[A7:%.*]] = extractelement <8 x i16> [[A]], i64 7
68-
; SSE2-NEXT: [[A23:%.*]] = add i16 [[A2]], [[A3]]
69-
; SSE2-NEXT: [[A45:%.*]] = add i16 [[A4]], [[A5]]
70-
; SSE2-NEXT: [[A67:%.*]] = add i16 [[A6]], [[A7]]
71-
; SSE2-NEXT: [[HADD1:%.*]] = insertelement <8 x i16> poison, i16 [[A23]], i64 1
72-
; SSE2-NEXT: [[HADD2:%.*]] = insertelement <8 x i16> [[HADD1]], i16 [[A45]], i64 2
73-
; SSE2-NEXT: [[HADD3:%.*]] = insertelement <8 x i16> [[HADD2]], i16 [[A67]], i64 3
62+
; SSE2-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 5, i32 poison, i32 poison, i32 poison>
63+
; SSE2-NEXT: [[TMP5:%.*]] = add <8 x i16> [[A]], [[SHIFT2]]
64+
; SSE2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 poison>
65+
; SSE2-NEXT: [[TMP6:%.*]] = add <8 x i16> [[A]], [[SHIFT3]]
66+
; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
67+
; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
68+
; SSE2-NEXT: [[HADD1:%.*]] = add <8 x i16> [[TMP7]], [[TMP4]]
69+
; SSE2-NEXT: [[HADD2:%.*]] = shufflevector <8 x i16> [[HADD1]], <8 x i16> [[TMP5]], <8 x i32> <i32 poison, i32 1, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
70+
; SSE2-NEXT: [[HADD3:%.*]] = shufflevector <8 x i16> [[HADD2]], <8 x i16> [[TMP6]], <8 x i32> <i32 poison, i32 1, i32 2, i32 14, i32 poison, i32 poison, i32 poison, i32 poison>
7471
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
7572
; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
7673
; SSE2-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]]

llvm/test/Transforms/PhaseOrdering/X86/hsub.ll

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -59,18 +59,15 @@ define <8 x i16> @sub_v8i16_01234567(<8 x i16> %a, <8 x i16> %b) {
5959

6060
define <8 x i16> @sub_v8i16_u1234567(<8 x i16> %a, <8 x i16> %b) {
6161
; SSE2-LABEL: @sub_v8i16_u1234567(
62-
; SSE2-NEXT: [[A2:%.*]] = extractelement <8 x i16> [[A:%.*]], i64 2
63-
; SSE2-NEXT: [[A3:%.*]] = extractelement <8 x i16> [[A]], i64 3
64-
; SSE2-NEXT: [[A4:%.*]] = extractelement <8 x i16> [[A]], i64 4
65-
; SSE2-NEXT: [[A5:%.*]] = extractelement <8 x i16> [[A]], i64 5
66-
; SSE2-NEXT: [[A6:%.*]] = extractelement <8 x i16> [[A]], i64 6
67-
; SSE2-NEXT: [[A7:%.*]] = extractelement <8 x i16> [[A]], i64 7
68-
; SSE2-NEXT: [[A23:%.*]] = sub i16 [[A2]], [[A3]]
69-
; SSE2-NEXT: [[A45:%.*]] = sub i16 [[A4]], [[A5]]
70-
; SSE2-NEXT: [[A67:%.*]] = sub i16 [[A6]], [[A7]]
71-
; SSE2-NEXT: [[HSUB1:%.*]] = insertelement <8 x i16> poison, i16 [[A23]], i64 1
72-
; SSE2-NEXT: [[HSUB2:%.*]] = insertelement <8 x i16> [[HSUB1]], i16 [[A45]], i64 2
73-
; SSE2-NEXT: [[HSUB3:%.*]] = insertelement <8 x i16> [[HSUB2]], i16 [[A67]], i64 3
62+
; SSE2-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 5, i32 poison, i32 poison, i32 poison>
63+
; SSE2-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[A]], [[SHIFT2]]
64+
; SSE2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 poison>
65+
; SSE2-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[A]], [[SHIFT3]]
66+
; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
67+
; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
68+
; SSE2-NEXT: [[HSUB1:%.*]] = sub <8 x i16> [[TMP7]], [[TMP4]]
69+
; SSE2-NEXT: [[HSUB2:%.*]] = shufflevector <8 x i16> [[HSUB1]], <8 x i16> [[TMP5]], <8 x i32> <i32 poison, i32 1, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
70+
; SSE2-NEXT: [[HSUB3:%.*]] = shufflevector <8 x i16> [[HSUB2]], <8 x i16> [[TMP6]], <8 x i32> <i32 poison, i32 1, i32 2, i32 14, i32 poison, i32 poison, i32 poison, i32 poison>
7471
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
7572
; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
7673
; SSE2-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]]

llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll

Lines changed: 15 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -252,17 +252,11 @@ define i8 @ext1_ext1_add_uses2(<16 x i8> %x, <16 x i8> %y) {
252252
}
253253

254254
define i8 @ext0_ext1_add(<16 x i8> %x, <16 x i8> %y) {
255-
; SSE-LABEL: @ext0_ext1_add(
256-
; SSE-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0
257-
; SSE-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 1
258-
; SSE-NEXT: [[R:%.*]] = add nuw i8 [[E0]], [[E1]]
259-
; SSE-NEXT: ret i8 [[R]]
260-
;
261-
; AVX-LABEL: @ext0_ext1_add(
262-
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
263-
; AVX-NEXT: [[TMP1:%.*]] = add nuw <16 x i8> [[X:%.*]], [[SHIFT]]
264-
; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
265-
; AVX-NEXT: ret i8 [[R]]
255+
; CHECK-LABEL: @ext0_ext1_add(
256+
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
257+
; CHECK-NEXT: [[TMP1:%.*]] = add nuw <16 x i8> [[X:%.*]], [[SHIFT]]
258+
; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
259+
; CHECK-NEXT: ret i8 [[R]]
266260
;
267261
%e0 = extractelement <16 x i8> %x, i32 0
268262
%e1 = extractelement <16 x i8> %y, i32 1
@@ -271,17 +265,11 @@ define i8 @ext0_ext1_add(<16 x i8> %x, <16 x i8> %y) {
271265
}
272266

273267
define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) {
274-
; SSE-LABEL: @ext5_ext0_add(
275-
; SSE-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 5
276-
; SSE-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 0
277-
; SSE-NEXT: [[R:%.*]] = sub nsw i8 [[E0]], [[E1]]
278-
; SSE-NEXT: ret i8 [[R]]
279-
;
280-
; AVX-LABEL: @ext5_ext0_add(
281-
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
282-
; AVX-NEXT: [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]]
283-
; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0
284-
; AVX-NEXT: ret i8 [[R]]
268+
; CHECK-LABEL: @ext5_ext0_add(
269+
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
270+
; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]]
271+
; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0
272+
; CHECK-NEXT: ret i8 [[R]]
285273
;
286274
%e0 = extractelement <16 x i8> %x, i32 5
287275
%e1 = extractelement <16 x i8> %y, i32 0
@@ -290,17 +278,11 @@ define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) {
290278
}
291279

292280
define i8 @ext1_ext6_add(<16 x i8> %x, <16 x i8> %y) {
293-
; SSE-LABEL: @ext1_ext6_add(
294-
; SSE-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 1
295-
; SSE-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 6
296-
; SSE-NEXT: [[R:%.*]] = and i8 [[E0]], [[E1]]
297-
; SSE-NEXT: ret i8 [[R]]
298-
;
299-
; AVX-LABEL: @ext1_ext6_add(
300-
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 poison, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
301-
; AVX-NEXT: [[TMP1:%.*]] = and <16 x i8> [[X:%.*]], [[SHIFT]]
302-
; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1
303-
; AVX-NEXT: ret i8 [[R]]
281+
; CHECK-LABEL: @ext1_ext6_add(
282+
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 poison, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
283+
; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i8> [[X:%.*]], [[SHIFT]]
284+
; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1
285+
; CHECK-NEXT: ret i8 [[R]]
304286
;
305287
%e0 = extractelement <16 x i8> %x, i32 1
306288
%e1 = extractelement <16 x i8> %y, i32 6

llvm/test/Transforms/VectorCombine/X86/extract-binop.ll

Lines changed: 15 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -252,17 +252,11 @@ define i8 @ext1_ext1_add_uses2(<16 x i8> %x, <16 x i8> %y) {
252252
}
253253

254254
define i8 @ext0_ext1_add(<16 x i8> %x, <16 x i8> %y) {
255-
; SSE-LABEL: @ext0_ext1_add(
256-
; SSE-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0
257-
; SSE-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 1
258-
; SSE-NEXT: [[R:%.*]] = add nuw i8 [[E0]], [[E1]]
259-
; SSE-NEXT: ret i8 [[R]]
260-
;
261-
; AVX-LABEL: @ext0_ext1_add(
262-
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
263-
; AVX-NEXT: [[TMP1:%.*]] = add nuw <16 x i8> [[X:%.*]], [[SHIFT]]
264-
; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
265-
; AVX-NEXT: ret i8 [[R]]
255+
; CHECK-LABEL: @ext0_ext1_add(
256+
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
257+
; CHECK-NEXT: [[TMP1:%.*]] = add nuw <16 x i8> [[X:%.*]], [[SHIFT]]
258+
; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
259+
; CHECK-NEXT: ret i8 [[R]]
266260
;
267261
%e0 = extractelement <16 x i8> %x, i32 0
268262
%e1 = extractelement <16 x i8> %y, i32 1
@@ -271,17 +265,11 @@ define i8 @ext0_ext1_add(<16 x i8> %x, <16 x i8> %y) {
271265
}
272266

273267
define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) {
274-
; SSE-LABEL: @ext5_ext0_add(
275-
; SSE-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 5
276-
; SSE-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 0
277-
; SSE-NEXT: [[R:%.*]] = sub nsw i8 [[E0]], [[E1]]
278-
; SSE-NEXT: ret i8 [[R]]
279-
;
280-
; AVX-LABEL: @ext5_ext0_add(
281-
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
282-
; AVX-NEXT: [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]]
283-
; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0
284-
; AVX-NEXT: ret i8 [[R]]
268+
; CHECK-LABEL: @ext5_ext0_add(
269+
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
270+
; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]]
271+
; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0
272+
; CHECK-NEXT: ret i8 [[R]]
285273
;
286274
%e0 = extractelement <16 x i8> %x, i32 5
287275
%e1 = extractelement <16 x i8> %y, i32 0
@@ -290,17 +278,11 @@ define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) {
290278
}
291279

292280
define i8 @ext1_ext6_add(<16 x i8> %x, <16 x i8> %y) {
293-
; SSE-LABEL: @ext1_ext6_add(
294-
; SSE-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 1
295-
; SSE-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 6
296-
; SSE-NEXT: [[R:%.*]] = and i8 [[E0]], [[E1]]
297-
; SSE-NEXT: ret i8 [[R]]
298-
;
299-
; AVX-LABEL: @ext1_ext6_add(
300-
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 poison, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
301-
; AVX-NEXT: [[TMP1:%.*]] = and <16 x i8> [[X:%.*]], [[SHIFT]]
302-
; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1
303-
; AVX-NEXT: ret i8 [[R]]
281+
; CHECK-LABEL: @ext1_ext6_add(
282+
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 poison, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
283+
; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i8> [[X:%.*]], [[SHIFT]]
284+
; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1
285+
; CHECK-NEXT: ret i8 [[R]]
304286
;
305287
%e0 = extractelement <16 x i8> %x, i32 1
306288
%e1 = extractelement <16 x i8> %y, i32 6

0 commit comments

Comments
 (0)