Skip to content

Commit 4a4987b

Browse files
committed
[SystemZ] Optimize vector zero/sign extensions
Generate more efficient code for zero or sign extensions where the source is a subvector generated via SHUFFLE_VECTOR. Specifically, recognize patterns corresponding to (series of) VECTOR UNPACK instructions, or the VECTOR SIGN EXTEND TO DOUBLEWORD instruction. As a special case, also handle zero or sign extensions of a vector element to i128. Fixes: #129576 Fixes: #129899
1 parent cdc7864 commit 4a4987b

10 files changed

+664
-55
lines changed

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

+75-11
Original file line numberDiff line numberDiff line change
@@ -5800,7 +5800,8 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
58005800
namespace {
58015801
// Describes a general N-operand vector shuffle.
58025802
struct GeneralShuffle {
5803-
GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
5803+
GeneralShuffle(EVT vt)
5804+
: VT(vt), UnpackFromEltSize(UINT_MAX), UnpackLow(false) {}
58045805
void addUndef();
58055806
bool add(SDValue, unsigned);
58065807
SDValue getNode(SelectionDAG &, const SDLoc &);
@@ -5821,8 +5822,10 @@ struct GeneralShuffle {
58215822

58225823
// Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
58235824
unsigned UnpackFromEltSize;
5825+
// True if the final unpack uses the low half.
5826+
bool UnpackLow;
58245827
};
5825-
}
5828+
} // namespace
58265829

58275830
// Add an extra undefined element to the shuffle.
58285831
void GeneralShuffle::addUndef() {
@@ -6027,11 +6030,21 @@ void GeneralShuffle::tryPrepareForUnpack() {
60276030
if (MatchUnpack) {
60286031
if (Ops.size() == 2) {
60296032
// Don't use unpack if a single source operand needs rearrangement.
6030-
for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
6031-
if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
6033+
bool CanUseUnpackLow = true, CanUseUnpackHigh = true;
6034+
for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) {
6035+
if (SrcBytes[i] == -1)
6036+
continue;
6037+
if (SrcBytes[i] % 16 != int(i))
6038+
CanUseUnpackHigh = false;
6039+
if (SrcBytes[i] % 16 != int(i + SystemZ::VectorBytes / 2))
6040+
CanUseUnpackLow = false;
6041+
if (!CanUseUnpackLow && !CanUseUnpackHigh) {
60326042
UnpackFromEltSize = UINT_MAX;
60336043
return;
60346044
}
6045+
}
6046+
if (!CanUseUnpackHigh)
6047+
UnpackLow = true;
60356048
}
60366049
break;
60376050
}
@@ -6046,13 +6059,19 @@ void GeneralShuffle::tryPrepareForUnpack() {
60466059

60476060
// Apply the unpack in reverse to the Bytes array.
60486061
unsigned B = 0;
6062+
if (UnpackLow) {
6063+
while (B < SystemZ::VectorBytes / 2)
6064+
Bytes[B++] = -1;
6065+
}
60496066
for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
60506067
Elt += UnpackFromEltSize;
60516068
for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
60526069
Bytes[B] = Bytes[Elt];
60536070
}
6054-
while (B < SystemZ::VectorBytes)
6055-
Bytes[B++] = -1;
6071+
if (!UnpackLow) {
6072+
while (B < SystemZ::VectorBytes)
6073+
Bytes[B++] = -1;
6074+
}
60566075

60576076
// Remove the zero vector from Ops
60586077
Ops.erase(&Ops[ZeroVecOpNo]);
@@ -6079,7 +6098,9 @@ SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
60796098
unsigned OutBits = InBits * 2;
60806099
EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
60816100
SystemZ::VectorBits / OutBits);
6082-
return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
6101+
return DAG.getNode(UnpackLow ? SystemZISD::UNPACKL_LOW
6102+
: SystemZISD::UNPACKL_HIGH,
6103+
DL, OutVT, PackedOp);
60836104
}
60846105

60856106
// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
@@ -6486,12 +6507,55 @@ lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
64866507
EVT InVT = PackedOp.getValueType();
64876508
unsigned ToBits = OutVT.getScalarSizeInBits();
64886509
unsigned FromBits = InVT.getScalarSizeInBits();
6510+
unsigned StartOffset = 0;
6511+
6512+
// If the input is a VECTOR_SHUFFLE, there are a number of important
6513+
// cases where we can directly implement the sign-extension of the
6514+
// original input lanes of the shuffle.
6515+
if (PackedOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
6516+
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(PackedOp.getNode());
6517+
ArrayRef<int> ShuffleMask = SVN->getMask();
6518+
int OutNumElts = OutVT.getVectorNumElements();
6519+
6520+
// Recognize the special case where the sign-extension can be done
6521+
// by the VSEG instruction. Handled via the default expander.
6522+
if (ToBits == 64 && OutNumElts == 2) {
6523+
int NumElem = ToBits / FromBits;
6524+
if (ShuffleMask[0] == NumElem - 1 && ShuffleMask[1] == 2 * NumElem - 1)
6525+
return SDValue();
6526+
}
6527+
6528+
// Recognize the special case where we can fold the shuffle by
6529+
// replacing some of the UNPACK_HIGH with UNPACK_LOW.
6530+
int StartOffsetCandidate = -1;
6531+
for (int Elt = 0; Elt < OutNumElts; Elt++) {
6532+
if (ShuffleMask[Elt] == -1)
6533+
continue;
6534+
if (ShuffleMask[Elt] % OutNumElts == Elt) {
6535+
if (StartOffsetCandidate == -1)
6536+
StartOffsetCandidate = ShuffleMask[Elt] - Elt;
6537+
if (StartOffsetCandidate == ShuffleMask[Elt] - Elt)
6538+
continue;
6539+
}
6540+
StartOffsetCandidate = -1;
6541+
break;
6542+
}
6543+
if (StartOffsetCandidate != -1) {
6544+
StartOffset = StartOffsetCandidate;
6545+
PackedOp = PackedOp.getOperand(0);
6546+
}
6547+
}
6548+
64896549
do {
64906550
FromBits *= 2;
6491-
EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
6492-
SystemZ::VectorBits / FromBits);
6493-
PackedOp =
6494-
DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
6551+
unsigned OutNumElts = SystemZ::VectorBits / FromBits;
6552+
EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), OutNumElts);
6553+
unsigned Opcode = SystemZISD::UNPACK_HIGH;
6554+
if (StartOffset >= OutNumElts) {
6555+
Opcode = SystemZISD::UNPACK_LOW;
6556+
StartOffset -= OutNumElts;
6557+
}
6558+
PackedOp = DAG.getNode(Opcode, SDLoc(PackedOp), OutVT, PackedOp);
64956559
} while (FromBits != ToBits);
64966560
return PackedOp;
64976561
}

llvm/lib/Target/SystemZ/SystemZInstrVector.td

+32
Original file line numberDiff line numberDiff line change
@@ -1970,6 +1970,22 @@ let Predicates = [FeatureVector] in {
19701970
(VLEG (VGBM 0), bdxaddr12only:$addr, 1)>;
19711971
}
19721972

1973+
// Zero-extensions from VR element to i128 on arch15.
1974+
let Predicates = [FeatureVectorEnhancements3] in {
1975+
def : Pat<(i128 (zext (i64 (z_vector_extract (v2i64 VR128:$src), 0)))),
1976+
(VUPLHG VR128:$src)>;
1977+
def : Pat<(i128 (zext (i64 (z_vector_extract (v2i64 VR128:$src), 1)))),
1978+
(VUPLLG VR128:$src)>;
1979+
def : Pat<(i128 (zext (i32 (z_vector_extract (v4i32 VR128:$src), 0)))),
1980+
(VUPLHG (VUPLHF VR128:$src))>;
1981+
def : Pat<(i128 (zext (i32 (z_vector_extract (v4i32 VR128:$src), 1)))),
1982+
(VUPLHG (VUPLLF VR128:$src))>;
1983+
def : Pat<(i128 (zext (i32 (z_vector_extract (v4i32 VR128:$src), 2)))),
1984+
(VUPLLG (VUPLHF VR128:$src))>;
1985+
def : Pat<(i128 (zext (i32 (z_vector_extract (v4i32 VR128:$src), 3)))),
1986+
(VUPLLG (VUPLLF VR128:$src))>;
1987+
}
1988+
19731989
// In-register i128 sign-extensions on arch15.
19741990
let Predicates = [FeatureVectorEnhancements3] in {
19751991
def : Pat<(i128 (sext_inreg VR128:$x, i8)), (VUPLG (VSEGB VR128:$x))>;
@@ -2034,6 +2050,22 @@ let Predicates = [FeatureVector] in {
20342050
(VSRAB (VLREPG bdxaddr12only:$addr), (VREPIB 64))>;
20352051
}
20362052

2053+
// Sign-extensions from VR element to i128 on arch15.
2054+
let Predicates = [FeatureVectorEnhancements3] in {
2055+
def : Pat<(i128 (sext (i64 (z_vector_extract (v2i64 VR128:$src), 0)))),
2056+
(VUPHG VR128:$src)>;
2057+
def : Pat<(i128 (sext (i64 (z_vector_extract (v2i64 VR128:$src), 1)))),
2058+
(VUPLG VR128:$src)>;
2059+
def : Pat<(i128 (sext (i32 (z_vector_extract (v4i32 VR128:$src), 0)))),
2060+
(VUPHG (VUPHF VR128:$src))>;
2061+
def : Pat<(i128 (sext (i32 (z_vector_extract (v4i32 VR128:$src), 1)))),
2062+
(VUPHG (VUPLF VR128:$src))>;
2063+
def : Pat<(i128 (sext (i32 (z_vector_extract (v4i32 VR128:$src), 2)))),
2064+
(VUPLG (VUPHF VR128:$src))>;
2065+
def : Pat<(i128 (sext (i32 (z_vector_extract (v4i32 VR128:$src), 3)))),
2066+
(VUPLG (VUPLF VR128:$src))>;
2067+
}
2068+
20372069
// i128 comparison pseudo-instructions.
20382070
let Predicates = [FeatureVector], Defs = [CC],
20392071
usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {

llvm/test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll

+19-27
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,7 @@ define <16 x i16> @fun3(<16 x i8> %val1, <16 x i8> %val2, <16 x i16> %val3, <16
5858
; CHECK: # %bb.0:
5959
; CHECK-DAG: vceqb [[REG0:%v[0-9]+]], %v24, %v26
6060
; CHECK-DAG: vuphb [[REG2:%v[0-9]+]], [[REG0]]
61-
; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], [[REG0]], [[REG0]]
62-
; CHECK-DAG: vuphb [[REG1]], [[REG1]]
61+
; CHECK-DAG: vuplb [[REG1:%v[0-9]+]], [[REG0]]
6362
; CHECK-DAG: vceqh [[REG3:%v[0-9]+]], %v28, %v25
6463
; CHECK-DAG: vceqh [[REG4:%v[0-9]+]], %v30, %v27
6564
; CHECK-DAG: vl [[REG5:%v[0-9]+]], 176(%r15)
@@ -186,10 +185,9 @@ define <8 x i32> @fun10(<8 x i16> %val1, <8 x i16> %val2, <8 x i16> %val3, <8 x
186185
; CHECK-DAG: vceqh [[REG1:%v[0-9]+]], %v28, %v30
187186
; CHECK-NEXT: vx [[REG2:%v[0-9]+]], [[REG0]], [[REG1]]
188187
; CHECK-DAG: vuphh [[REG3:%v[0-9]+]], [[REG2]]
189-
; CHECK-DAG: vmrlg [[REG4:%v[0-9]+]], [[REG2]], [[REG2]]
190-
; CHECK-DAG: vuphh [[REG5:%v[0-9]+]], [[REG4]]
188+
; CHECK-DAG: vuplhw [[REG4:%v[0-9]+]], [[REG2]]
191189
; CHECK-NEXT: vsel %v24, %v25, %v29, [[REG3]]
192-
; CHECK-NEXT: vsel %v26, %v27, %v31, [[REG5]]
190+
; CHECK-NEXT: vsel %v26, %v27, %v31, [[REG4]]
193191
; CHECK-NEXT: br %r14
194192
%cmp0 = icmp eq <8 x i16> %val1, %val2
195193
%cmp1 = icmp eq <8 x i16> %val3, %val4
@@ -347,10 +345,9 @@ define <4 x i64> @fun18(<4 x i32> %val1, <4 x i32> %val2, <4 x i16> %val3, <4 x
347345
; CHECK-NEXT: vuphh %v1, %v1
348346
; CHECK-NEXT: vn %v0, %v0, %v1
349347
; CHECK-DAG: vuphf [[REG0:%v[0-9]+]], %v0
350-
; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], %v0, %v0
351-
; CHECK-DAG: vuphf [[REG2:%v[0-9]+]], [[REG1]]
348+
; CHECK-DAG: vuplf [[REG1:%v[0-9]+]], %v0
352349
; CHECK-NEXT: vsel %v24, %v25, %v29, [[REG0]]
353-
; CHECK-NEXT: vsel %v26, %v27, %v31, [[REG2]]
350+
; CHECK-NEXT: vsel %v26, %v27, %v31, [[REG1]]
354351
; CHECK-NEXT: br %r14
355352
%cmp0 = icmp eq <4 x i32> %val1, %val2
356353
%cmp1 = icmp eq <4 x i16> %val3, %val4
@@ -455,14 +452,13 @@ define <4 x i64> @fun24(<4 x i64> %val1, <4 x i64> %val2, <4 x i32> %val3, <4 x
455452
; CHECK-LABEL: fun24:
456453
; CHECK: # %bb.0:
457454
; CHECK-NEXT: vceqf [[REG0:%v[0-9]+]], %v25, %v27
458-
; CHECK-NEXT: vuphf [[REG1:%v[0-9]+]], [[REG0]]
459-
; CHECK-NEXT: vmrlg [[REG2:%v[0-9]+]], [[REG0]], [[REG0]]
455+
; CHECK-DAG: vuphf [[REG1:%v[0-9]+]], [[REG0]]
456+
; CHECK-DAG: vuplf [[REG2:%v[0-9]+]], [[REG0]]
460457
; CHECK-DAG: vceqg [[REG3:%v[0-9]+]], %v24, %v28
461458
; CHECK-DAG: vceqg [[REG4:%v[0-9]+]], %v26, %v30
462-
; CHECK-DAG: vuphf [[REG5:%v[0-9]+]], [[REG2]]
463459
; CHECK-DAG: vl [[REG6:%v[0-9]+]], 176(%r15)
464460
; CHECK-DAG: vl [[REG7:%v[0-9]+]], 160(%r15)
465-
; CHECK-DAG: vx [[REG8:%v[0-9]+]], [[REG4]], [[REG5]]
461+
; CHECK-DAG: vx [[REG8:%v[0-9]+]], [[REG4]], [[REG2]]
466462
; CHECK-DAG: vx [[REG9:%v[0-9]+]], [[REG3]], [[REG1]]
467463
; CHECK-DAG: vsel %v24, %v29, [[REG7]], [[REG9]]
468464
; CHECK-DAG: vsel %v26, %v31, [[REG6]], [[REG8]]
@@ -631,8 +627,7 @@ define <4 x double> @fun29(<4 x float> %val1, <4 x float> %val2, <4 x float> %va
631627
; CHECK-NEXT: vfchdb %v2, %v3, %v2
632628
; CHECK-NEXT: vpkg %v1, %v2, %v1
633629
; CHECK-NEXT: vx %v0, %v0, %v1
634-
; CHECK-NEXT: vmrlg %v1, %v0, %v0
635-
; CHECK-NEXT: vuphf %v1, %v1
630+
; CHECK-NEXT: vuplf %v1, %v0
636631
; CHECK-NEXT: vuphf %v0, %v0
637632
; CHECK-NEXT: vsel %v24, %v25, %v29, %v0
638633
; CHECK-NEXT: vsel %v26, %v27, %v31, %v1
@@ -643,8 +638,7 @@ define <4 x double> @fun29(<4 x float> %val1, <4 x float> %val2, <4 x float> %va
643638
; CHECK-Z14-NEXT: vfchsb %v0, %v24, %v26
644639
; CHECK-Z14-NEXT: vfchsb %v1, %v28, %v30
645640
; CHECK-Z14-NEXT: vx %v0, %v0, %v1
646-
; CHECK-Z14-NEXT: vmrlg %v1, %v0, %v0
647-
; CHECK-Z14-NEXT: vuphf %v1, %v1
641+
; CHECK-Z14-NEXT: vuplf %v1, %v0
648642
; CHECK-Z14-NEXT: vuphf %v0, %v0
649643
; CHECK-Z14-NEXT: vsel %v24, %v25, %v29, %v0
650644
; CHECK-Z14-NEXT: vsel %v26, %v27, %v31, %v1
@@ -816,11 +810,10 @@ define <4 x double> @fun34(<4 x double> %val1, <4 x double> %val2, <4 x float> %
816810
; CHECK-DAG: vfchdb [[REG11:%v[0-9]+]], [[REG9]], [[REG7]]
817811
; CHECK-DAG: vpkg [[REG12:%v[0-9]+]], [[REG11]], [[REG4]]
818812
; CHECK-DAG: vuphf [[REG13:%v[0-9]+]], [[REG12]]
819-
; CHECK-DAG: vmrlg [[REG14:%v[0-9]+]], [[REG12]], [[REG12]]
820-
; CHECK-NEXT: vfchdb [[REG15:%v[0-9]+]], %v24, %v28
821-
; CHECK-NEXT: vfchdb [[REG16:%v[0-9]+]], %v26, %v30
822-
; CHECK-NEXT: vuphf [[REG17:%v[0-9]+]], [[REG14]]
823-
; CHECK-NEXT: vn [[REG18:%v[0-9]+]], [[REG16]], [[REG17]]
813+
; CHECK-DAG: vuplf [[REG14:%v[0-9]+]], [[REG12]]
814+
; CHECK-DAG: vfchdb [[REG15:%v[0-9]+]], %v24, %v28
815+
; CHECK-DAG: vfchdb [[REG16:%v[0-9]+]], %v26, %v30
816+
; CHECK-NEXT: vn [[REG18:%v[0-9]+]], [[REG16]], [[REG14]]
824817
; CHECK-NEXT: vn [[REG19:%v[0-9]+]], [[REG15]], [[REG13]]
825818
; CHECK-NEXT: vsel %v24, %v29, [[REG10]], [[REG19]]
826819
; CHECK-NEXT: vsel %v26, %v31, [[REG8]], [[REG18]]
@@ -829,13 +822,12 @@ define <4 x double> @fun34(<4 x double> %val1, <4 x double> %val2, <4 x float> %
829822
; CHECK-Z14-LABEL: fun34:
830823
; CHECK-Z14: # %bb.0:
831824
; CHECK-Z14-NEXT: vfchsb %v4, %v25, %v27
825+
; CHECK-Z14-NEXT: vl %v0, 176(%r15)
826+
; CHECK-Z14-NEXT: vl %v1, 160(%r15)
827+
; CHECK-Z14-NEXT: vfchdb %v2, %v24, %v28
828+
; CHECK-Z14-NEXT: vfchdb %v3, %v26, %v30
832829
; CHECK-Z14-NEXT: vuphf %v5, %v4
833-
; CHECK-Z14-NEXT: vmrlg %v4, %v4, %v4
834-
; CHECK-Z14-DAG: vfchdb %v2, %v24, %v28
835-
; CHECK-Z14-DAG: vfchdb %v3, %v26, %v30
836-
; CHECK-Z14-DAG: vuphf %v4, %v4
837-
; CHECK-Z14-DAG: vl %v0, 176(%r15)
838-
; CHECK-Z14-DAG: vl %v1, 160(%r15)
830+
; CHECK-Z14-NEXT: vuplf %v4, %v4
839831
; CHECK-Z14-NEXT: vn %v3, %v3, %v4
840832
; CHECK-Z14-NEXT: vn %v2, %v2, %v5
841833
; CHECK-Z14-NEXT: vsel %v24, %v29, %v1, %v2

llvm/test/CodeGen/SystemZ/vec-cmpsel.ll

+5-10
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ define <16 x i16> @fun3(<16 x i8> %val1, <16 x i8> %val2, <16 x i16> %val3, <16
4343
; CHECK: # %bb.0:
4444
; CHECK-NEXT: vceqb %v0, %v24, %v26
4545
; CHECK-DAG: vuphb [[REG0:%v[0-9]+]], %v0
46-
; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], %v0, %v0
47-
; CHECK-DAG: vuphb [[REG1]], [[REG1]]
46+
; CHECK-DAG: vuplb [[REG1:%v[0-9]+]], %v0
4847
; CHECK-NEXT: vsel %v24, %v28, %v25, [[REG0]]
4948
; CHECK-NEXT: vsel %v26, %v30, %v27, [[REG1]]
5049
; CHECK-NEXT: br %r14
@@ -129,8 +128,7 @@ define <8 x i32> @fun10(<8 x i16> %val1, <8 x i16> %val2, <8 x i32> %val3, <8 x
129128
; CHECK: # %bb.0:
130129
; CHECK-NEXT: vceqh %v0, %v24, %v26
131130
; CHECK-DAG: vuphh [[REG0:%v[0-9]+]], %v0
132-
; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], %v0, %v0
133-
; CHECK-DAG: vuphh [[REG1]], [[REG1]]
131+
; CHECK-DAG: vuplhw [[REG1:%v[0-9]+]], %v0
134132
; CHECK-NEXT: vsel %v24, %v28, %v25, [[REG0]]
135133
; CHECK-NEXT: vsel %v26, %v30, %v27, [[REG1]]
136134
; CHECK-NEXT: br %r14
@@ -228,8 +226,7 @@ define <4 x i64> @fun18(<4 x i32> %val1, <4 x i32> %val2, <4 x i64> %val3, <4 x
228226
; CHECK: # %bb.0:
229227
; CHECK-NEXT: vceqf %v0, %v24, %v26
230228
; CHECK-DAG: vuphf [[REG0:%v[0-9]+]], %v0
231-
; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], %v0, %v0
232-
; CHECK-DAG: vuphf [[REG1]], [[REG1]]
229+
; CHECK-DAG: vuplf [[REG1]], %v0
233230
; CHECK-NEXT: vsel %v24, %v28, %v25, [[REG0]]
234231
; CHECK-NEXT: vsel %v26, %v30, %v27, [[REG1]]
235232
; CHECK-NEXT: br %r14
@@ -428,8 +425,7 @@ define <4 x double> @fun29(<4 x float> %val1, <4 x float> %val2, <4 x double> %v
428425
; CHECK-NEXT: vldeb %v2, %v2
429426
; CHECK-NEXT: vfchdb %v1, %v2, %v1
430427
; CHECK-NEXT: vpkg [[REG0:%v[0-9]+]], %v1, %v0
431-
; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], [[REG0]], [[REG0]]
432-
; CHECK-DAG: vuphf [[REG1]], [[REG1]]
428+
; CHECK-DAG: vuplf [[REG1:%v[0-9]+]], [[REG0]]
433429
; CHECK-DAG: vuphf [[REG2:%v[0-9]+]], [[REG0]]
434430
; CHECK-NEXT: vsel %v24, %v28, %v25, [[REG2]]
435431
; CHECK-NEXT: vsel %v26, %v30, %v27, [[REG1]]
@@ -439,8 +435,7 @@ define <4 x double> @fun29(<4 x float> %val1, <4 x float> %val2, <4 x double> %v
439435
; CHECK-Z14: # %bb.0:
440436
; CHECK-Z14-NEXT: vfchsb %v0, %v24, %v26
441437
; CHECK-Z14-DAG: vuphf [[REG0:%v[0-9]+]], %v0
442-
; CHECK-Z14-DAG: vmrlg [[REG1:%v[0-9]+]], %v0, %v0
443-
; CHECK-Z14-DAG: vuphf [[REG1]], [[REG1]]
438+
; CHECK-Z14-DAG: vuplf [[REG1:%v[0-9]+]], %v0
444439
; CHECK-Z14-NEXT: vsel %v24, %v28, %v25, [[REG0]]
445440
; CHECK-Z14-NEXT: vsel %v26, %v30, %v27, [[REG1]]
446441
; CHECK-Z14-NEXT: br %r14

llvm/test/CodeGen/SystemZ/vec-move-23.ll

+5-7
Original file line numberDiff line numberDiff line change
@@ -143,19 +143,17 @@ define void @fun8(<2 x i64> %dwords, ptr %ptr) {
143143
; Test that this results in vectorized conversions.
144144
define void @fun9(ptr %Src, ptr %ptr) {
145145
; CHECK-LABEL: fun9
146-
; Z15: larl %r1, .LCPI9_0
147-
; Z15-NEXT: vl %v0, 16(%r2), 4
146+
; Z15: vl %v0, 16(%r2), 4
148147
; Z15-NEXT: vl %v1, 0(%r2), 4
149-
; Z15-NEXT: vl %v2, 0(%r1), 3
150-
; Z15-NEXT: vperm %v2, %v2, %v1, %v2
151-
; Z15-NEXT: vuplhh %v1, %v1
148+
; Z15-NEXT: vuplhh %v2, %v1
149+
; Z15-NEXT: vupllh %v1, %v1
152150
; Z15-NEXT: vuplhh %v0, %v0
153151
; Z15-NEXT: vcelfb %v2, %v2, 0, 0
154152
; Z15-NEXT: vcelfb %v1, %v1, 0, 0
155153
; Z15-NEXT: vcelfb %v0, %v0, 0, 0
156154
; Z15-NEXT: vsteg %v0, 32(%r3), 0
157-
; Z15-NEXT: vst %v2, 16(%r3), 4
158-
; Z15-NEXT: vst %v1, 0(%r3), 4
155+
; Z15-NEXT: vst %v1, 16(%r3), 4
156+
; Z15-NEXT: vst %v2, 0(%r3), 4
159157
; Z15-NEXT: br %r14
160158

161159
%Val = load <10 x i16>, ptr %Src

0 commit comments

Comments
 (0)