Skip to content

Commit 364e915

Browse files
committed
[VectorCombine][X86] foldShuffleOfCastops - fold shuffle(cast(x),cast(y)) -> cast(shuffle(x,y)) iff cost efficient
Based off the existing foldShuffleOfBinops fold Fixes #67803
1 parent d53b829 commit 364e915

File tree

3 files changed

+110
-46
lines changed

3 files changed

+110
-46
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ class VectorCombine {
112112
bool foldSingleElementStore(Instruction &I);
113113
bool scalarizeLoadExtract(Instruction &I);
114114
bool foldShuffleOfBinops(Instruction &I);
115+
bool foldShuffleOfCastops(Instruction &I);
115116
bool foldShuffleFromReductions(Instruction &I);
116117
bool foldTruncFromReductions(Instruction &I);
117118
bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
@@ -1432,6 +1433,75 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
14321433
return true;
14331434
}
14341435

1436+
/// Try to convert "shuffle (castop), (castop)" with a shared castop operand
1437+
/// into "castop (shuffle)".
1438+
bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
1439+
Value *V0, *V1;
1440+
ArrayRef<int> Mask;
1441+
if (!match(&I, m_Shuffle(m_OneUse(m_Value(V0)), m_OneUse(m_Value(V1)),
1442+
m_Mask(Mask))))
1443+
return false;
1444+
1445+
auto *C0 = dyn_cast<CastInst>(V0);
1446+
auto *C1 = dyn_cast<CastInst>(V1);
1447+
if (!C0 || !C1)
1448+
return false;
1449+
1450+
Instruction::CastOps Opcode = C0->getOpcode();
1451+
if (Opcode == Instruction::BitCast || C0->getSrcTy() != C1->getSrcTy())
1452+
return false;
1453+
1454+
// Handle shuffle(zext_nneg(x), sext(y)) -> sext(shuffle(x,y)) folds.
1455+
if (Opcode != C1->getOpcode()) {
1456+
if (match(C0, m_SExtLike(m_Value())) && match(C1, m_SExtLike(m_Value())))
1457+
Opcode = Instruction::SExt;
1458+
else
1459+
return false;
1460+
}
1461+
1462+
auto *ShuffleDstTy = dyn_cast<FixedVectorType>(I.getType());
1463+
auto *CastDstTy = dyn_cast<FixedVectorType>(C0->getDestTy());
1464+
auto *CastSrcTy = dyn_cast<FixedVectorType>(C0->getSrcTy());
1465+
if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
1466+
return false;
1467+
assert(CastDstTy->getElementCount() == CastSrcTy->getElementCount() &&
1468+
"Unexpected src/dst element counts");
1469+
1470+
auto *NewShuffleDstTy =
1471+
FixedVectorType::get(CastSrcTy->getScalarType(), Mask.size());
1472+
1473+
// Try to replace a castop with a shuffle if the shuffle is not costly.
1474+
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1475+
1476+
InstructionCost OldCost =
1477+
TTI.getCastInstrCost(C0->getOpcode(), CastDstTy, CastSrcTy,
1478+
TTI::CastContextHint::None, CostKind) +
1479+
TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy,
1480+
TTI::CastContextHint::None, CostKind);
1481+
OldCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
1482+
CastDstTy, Mask, CostKind);
1483+
1484+
InstructionCost NewCost = TTI.getShuffleCost(
1485+
TargetTransformInfo::SK_PermuteTwoSrc, CastSrcTy, Mask, CostKind);
1486+
NewCost += TTI.getCastInstrCost(Opcode, ShuffleDstTy, NewShuffleDstTy,
1487+
TTI::CastContextHint::None, CostKind);
1488+
if (NewCost > OldCost)
1489+
return false;
1490+
1491+
Value *Shuf =
1492+
Builder.CreateShuffleVector(C0->getOperand(0), C1->getOperand(0), Mask);
1493+
Value *Cast = Builder.CreateCast(Opcode, Shuf, ShuffleDstTy);
1494+
1495+
// Intersect flags from the old casts.
1496+
if (auto *NewInst = dyn_cast<Instruction>(Cast)) {
1497+
NewInst->copyIRFlags(C0);
1498+
NewInst->andIRFlags(C1);
1499+
}
1500+
1501+
replaceValue(I, *Cast);
1502+
return true;
1503+
}
1504+
14351505
/// Given a commutative reduction, the order of the input lanes does not alter
14361506
/// the results. We can use this to remove certain shuffles feeding the
14371507
/// reduction, removing the need to shuffle at all.
@@ -1986,6 +2056,7 @@ bool VectorCombine::run() {
19862056
break;
19872057
case Instruction::ShuffleVector:
19882058
MadeChange |= foldShuffleOfBinops(I);
2059+
MadeChange |= foldShuffleOfCastops(I);
19892060
MadeChange |= foldSelectShuffle(I);
19902061
break;
19912062
case Instruction::BitCast:

llvm/test/Transforms/PhaseOrdering/X86/pr67803.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,7 @@ define <4 x i64> @PR67803(<4 x i64> %x, <4 x i64> %y, <4 x i64> %a, <4 x i64> %b
99
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[X:%.*]] to <8 x i32>
1010
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[Y:%.*]] to <8 x i32>
1111
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP0]], [[TMP1]]
12-
; CHECK-NEXT: [[CMP_I21:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13-
; CHECK-NEXT: [[SEXT_I22:%.*]] = sext <4 x i1> [[CMP_I21]] to <4 x i32>
14-
; CHECK-NEXT: [[CMP_I:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
15-
; CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
16-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[SEXT_I22]], <4 x i32> [[SEXT_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
12+
; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i32>
1713
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8>
1814
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <32 x i8> [[TMP5]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1915
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8>

llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll

Lines changed: 38 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@
66

77
define <16 x i32> @concat_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
88
; CHECK-LABEL: @concat_zext_v8i16_v16i32(
9-
; CHECK-NEXT: [[X0:%.*]] = zext <8 x i16> [[A0:%.*]] to <8 x i32>
10-
; CHECK-NEXT: [[X1:%.*]] = zext <8 x i16> [[A1:%.*]] to <8 x i32>
11-
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10+
; CHECK-NEXT: [[R:%.*]] = zext <16 x i16> [[TMP1]] to <16 x i32>
1211
; CHECK-NEXT: ret <16 x i32> [[R]]
1312
;
1413
%x0 = zext <8 x i16> %a0 to <8 x i32>
@@ -19,9 +18,8 @@ define <16 x i32> @concat_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
1918

2019
define <16 x i32> @concat_zext_nneg_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
2120
; CHECK-LABEL: @concat_zext_nneg_v8i16_v16i32(
22-
; CHECK-NEXT: [[X0:%.*]] = zext nneg <8 x i16> [[A0:%.*]] to <8 x i32>
23-
; CHECK-NEXT: [[X1:%.*]] = zext nneg <8 x i16> [[A1:%.*]] to <8 x i32>
24-
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
21+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22+
; CHECK-NEXT: [[R:%.*]] = zext nneg <16 x i16> [[TMP1]] to <16 x i32>
2523
; CHECK-NEXT: ret <16 x i32> [[R]]
2624
;
2725
%x0 = zext nneg <8 x i16> %a0 to <8 x i32>
@@ -30,13 +28,17 @@ define <16 x i32> @concat_zext_nneg_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
3028
ret <16 x i32> %r
3129
}
3230

33-
; TODO - sext + zext nneg -> sext
3431
define <16 x i32> @concat_sext_zext_nneg_v8i16_v8i32(<8 x i16> %a0, <8 x i16> %a1) {
35-
; CHECK-LABEL: @concat_sext_zext_nneg_v8i16_v8i32(
36-
; CHECK-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32>
37-
; CHECK-NEXT: [[X1:%.*]] = zext nneg <8 x i16> [[A1:%.*]] to <8 x i32>
38-
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
39-
; CHECK-NEXT: ret <16 x i32> [[R]]
32+
; SSE-LABEL: @concat_sext_zext_nneg_v8i16_v8i32(
33+
; SSE-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32>
34+
; SSE-NEXT: [[X1:%.*]] = zext nneg <8 x i16> [[A1:%.*]] to <8 x i32>
35+
; SSE-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
36+
; SSE-NEXT: ret <16 x i32> [[R]]
37+
;
38+
; AVX-LABEL: @concat_sext_zext_nneg_v8i16_v8i32(
39+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
40+
; AVX-NEXT: [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32>
41+
; AVX-NEXT: ret <16 x i32> [[R]]
4042
;
4143
%x0 = sext <8 x i16> %a0 to <8 x i32>
4244
%x1 = zext nneg <8 x i16> %a1 to <8 x i32>
@@ -46,9 +48,8 @@ define <16 x i32> @concat_sext_zext_nneg_v8i16_v8i32(<8 x i16> %a0, <8 x i16> %a
4648

4749
define <16 x i32> @concat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
4850
; CHECK-LABEL: @concat_sext_v8i16_v16i32(
49-
; CHECK-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32>
50-
; CHECK-NEXT: [[X1:%.*]] = sext <8 x i16> [[A1:%.*]] to <8 x i32>
51-
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
51+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
52+
; CHECK-NEXT: [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32>
5253
; CHECK-NEXT: ret <16 x i32> [[R]]
5354
;
5455
%x0 = sext <8 x i16> %a0 to <8 x i32>
@@ -59,9 +60,8 @@ define <16 x i32> @concat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
5960

6061
define <8 x i32> @concat_sext_v4i1_v8i32(<4 x i1> %a0, <4 x i1> %a1) {
6162
; CHECK-LABEL: @concat_sext_v4i1_v8i32(
62-
; CHECK-NEXT: [[X0:%.*]] = sext <4 x i1> [[A0:%.*]] to <4 x i32>
63-
; CHECK-NEXT: [[X1:%.*]] = sext <4 x i1> [[A1:%.*]] to <4 x i32>
64-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[X0]], <4 x i32> [[X1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
63+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A0:%.*]], <4 x i1> [[A1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
64+
; CHECK-NEXT: [[R:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i32>
6565
; CHECK-NEXT: ret <8 x i32> [[R]]
6666
;
6767
%x0 = sext <4 x i1> %a0 to <4 x i32>
@@ -72,9 +72,8 @@ define <8 x i32> @concat_sext_v4i1_v8i32(<4 x i1> %a0, <4 x i1> %a1) {
7272

7373
define <8 x i16> @concat_trunc_v4i32_v8i16(<4 x i32> %a0, <4 x i32> %a1) {
7474
; CHECK-LABEL: @concat_trunc_v4i32_v8i16(
75-
; CHECK-NEXT: [[X0:%.*]] = trunc <4 x i32> [[A0:%.*]] to <4 x i16>
76-
; CHECK-NEXT: [[X1:%.*]] = trunc <4 x i32> [[A1:%.*]] to <4 x i16>
77-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[X0]], <4 x i16> [[X1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
75+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
76+
; CHECK-NEXT: [[R:%.*]] = trunc <8 x i32> [[TMP1]] to <8 x i16>
7877
; CHECK-NEXT: ret <8 x i16> [[R]]
7978
;
8079
%x0 = trunc <4 x i32> %a0 to <4 x i16>
@@ -85,9 +84,8 @@ define <8 x i16> @concat_trunc_v4i32_v8i16(<4 x i32> %a0, <4 x i32> %a1) {
8584

8685
define <8 x ptr> @concat_inttoptr_v4i32_v8iptr(<4 x i32> %a0, <4 x i32> %a1) {
8786
; CHECK-LABEL: @concat_inttoptr_v4i32_v8iptr(
88-
; CHECK-NEXT: [[X0:%.*]] = inttoptr <4 x i32> [[A0:%.*]] to <4 x ptr>
89-
; CHECK-NEXT: [[X1:%.*]] = inttoptr <4 x i32> [[A1:%.*]] to <4 x ptr>
90-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x ptr> [[X0]], <4 x ptr> [[X1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
87+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
88+
; CHECK-NEXT: [[R:%.*]] = inttoptr <8 x i32> [[TMP1]] to <8 x ptr>
9189
; CHECK-NEXT: ret <8 x ptr> [[R]]
9290
;
9391
%x0 = inttoptr <4 x i32> %a0 to <4 x ptr>
@@ -98,9 +96,8 @@ define <8 x ptr> @concat_inttoptr_v4i32_v8iptr(<4 x i32> %a0, <4 x i32> %a1) {
9896

9997
define <16 x i64> @concat_ptrtoint_v8i16_v16i32(<8 x ptr> %a0, <8 x ptr> %a1) {
10098
; CHECK-LABEL: @concat_ptrtoint_v8i16_v16i32(
101-
; CHECK-NEXT: [[X0:%.*]] = ptrtoint <8 x ptr> [[A0:%.*]] to <8 x i64>
102-
; CHECK-NEXT: [[X1:%.*]] = ptrtoint <8 x ptr> [[A1:%.*]] to <8 x i64>
103-
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i64> [[X0]], <8 x i64> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
99+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[A0:%.*]], <8 x ptr> [[A1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
100+
; CHECK-NEXT: [[R:%.*]] = ptrtoint <16 x ptr> [[TMP1]] to <16 x i64>
104101
; CHECK-NEXT: ret <16 x i64> [[R]]
105102
;
106103
%x0 = ptrtoint <8 x ptr> %a0 to <8 x i64>
@@ -110,11 +107,16 @@ define <16 x i64> @concat_ptrtoint_v8i16_v16i32(<8 x ptr> %a0, <8 x ptr> %a1) {
110107
}
111108

112109
define <8 x double> @concat_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> %a1) {
113-
; CHECK-LABEL: @concat_fpext_v4f32_v8f64(
114-
; CHECK-NEXT: [[X0:%.*]] = fpext <4 x float> [[A0:%.*]] to <4 x double>
115-
; CHECK-NEXT: [[X1:%.*]] = fpext <4 x float> [[A1:%.*]] to <4 x double>
116-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x double> [[X0]], <4 x double> [[X1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
117-
; CHECK-NEXT: ret <8 x double> [[R]]
110+
; SSE-LABEL: @concat_fpext_v4f32_v8f64(
111+
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
112+
; SSE-NEXT: [[R:%.*]] = fpext <8 x float> [[TMP1]] to <8 x double>
113+
; SSE-NEXT: ret <8 x double> [[R]]
114+
;
115+
; AVX-LABEL: @concat_fpext_v4f32_v8f64(
116+
; AVX-NEXT: [[X0:%.*]] = fpext <4 x float> [[A0:%.*]] to <4 x double>
117+
; AVX-NEXT: [[X1:%.*]] = fpext <4 x float> [[A1:%.*]] to <4 x double>
118+
; AVX-NEXT: [[R:%.*]] = shufflevector <4 x double> [[X0]], <4 x double> [[X1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
119+
; AVX-NEXT: ret <8 x double> [[R]]
118120
;
119121
%x0 = fpext <4 x float> %a0 to <4 x double>
120122
%x1 = fpext <4 x float> %a1 to <4 x double>
@@ -139,9 +141,8 @@ define <16 x float> @concat_fptrunc_v8f64_v16f32(<8 x double> %a0, <8 x double>
139141

140142
define <16 x i32> @rconcat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
141143
; CHECK-LABEL: @rconcat_sext_v8i16_v16i32(
142-
; CHECK-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32>
143-
; CHECK-NEXT: [[X1:%.*]] = sext <8 x i16> [[A1:%.*]] to <8 x i32>
144-
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
144+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
145+
; CHECK-NEXT: [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32>
145146
; CHECK-NEXT: ret <16 x i32> [[R]]
146147
;
147148
%x0 = sext <8 x i16> %a0 to <8 x i32>
@@ -154,9 +155,8 @@ define <16 x i32> @rconcat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
154155

155156
define <8 x double> @interleave_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> %a1) {
156157
; CHECK-LABEL: @interleave_fpext_v4f32_v8f64(
157-
; CHECK-NEXT: [[X0:%.*]] = fpext <4 x float> [[A0:%.*]] to <4 x double>
158-
; CHECK-NEXT: [[X1:%.*]] = fpext <4 x float> [[A1:%.*]] to <4 x double>
159-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x double> [[X0]], <4 x double> [[X1]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
158+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
159+
; CHECK-NEXT: [[R:%.*]] = fpext <8 x float> [[TMP1]] to <8 x double>
160160
; CHECK-NEXT: ret <8 x double> [[R]]
161161
;
162162
%x0 = fpext <4 x float> %a0 to <4 x double>
@@ -226,6 +226,3 @@ define <16 x i32> @concat_sext_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
226226
%r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
227227
ret <16 x i32> %r
228228
}
229-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
230-
; AVX: {{.*}}
231-
; SSE: {{.*}}

0 commit comments

Comments
 (0)