Skip to content

Commit 70893b6

Browse files
committed
[X86] matchUnaryShuffle - match SIGN_EXTEND_VECTOR_INREG patterns for 'all-signbits' sources
Adapt the existing ANY/ZERO_EXTEND_VECTOR_INREG shuffle matching to also recognise SIGN_EXTEND_VECTOR_INREG patterns to handle cases where we're effectively "splatting" all-signbits sources.
1 parent cee7e7b commit 70893b6

7 files changed

+134
-158
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6248,13 +6248,21 @@ static bool isUndefOrEqual(int Val, int CmpVal) {
62486248
}
62496249

62506250
/// Return true if every element in Mask is the undef sentinel value or equal to
6251-
/// the specified value..
6251+
/// the specified value.
62526252
static bool isUndefOrEqual(ArrayRef<int> Mask, int CmpVal) {
62536253
return llvm::all_of(Mask, [CmpVal](int M) {
62546254
return (M == SM_SentinelUndef) || (M == CmpVal);
62556255
});
62566256
}
62576257

6258+
/// Return true if every element in Mask, beginning from position Pos and ending
6259+
/// in Pos+Size is the undef sentinel value or equal to the specified value.
6260+
static bool isUndefOrEqualInRange(ArrayRef<int> Mask, int CmpVal, unsigned Pos,
6261+
unsigned Size) {
6262+
return llvm::all_of(Mask.slice(Pos, Size),
6263+
[CmpVal](int M) { return isUndefOrEqual(M, CmpVal); });
6264+
}
6265+
62586266
/// Val is either the undef or zero sentinel value.
62596267
static bool isUndefOrZero(int Val) {
62606268
return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
@@ -39566,31 +39574,41 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
3956639574
}
3956739575
}
3956839576

39569-
// Match against a ANY/ZERO_EXTEND_VECTOR_INREG instruction.
39577+
// Match against a ANY/SIGN/ZERO_EXTEND_VECTOR_INREG instruction.
3957039578
// TODO: Add 512-bit vector support (split AVX512F and AVX512BW).
3957139579
if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) ||
3957239580
(MaskVT.is256BitVector() && Subtarget.hasInt256()))) {
3957339581
unsigned MaxScale = 64 / MaskEltSize;
39582+
bool UseSign = V1.getScalarValueSizeInBits() == MaskEltSize &&
39583+
DAG.ComputeNumSignBits(V1) == MaskEltSize;
3957439584
for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
3957539585
bool MatchAny = true;
3957639586
bool MatchZero = true;
39587+
bool MatchSign = UseSign;
3957739588
unsigned NumDstElts = NumMaskElts / Scale;
39578-
for (unsigned i = 0; i != NumDstElts && (MatchAny || MatchZero); ++i) {
39589+
for (unsigned i = 0;
39590+
i != NumDstElts && (MatchAny || MatchSign || MatchZero); ++i) {
3957939591
if (!isUndefOrEqual(Mask[i * Scale], (int)i)) {
39580-
MatchAny = MatchZero = false;
39592+
MatchAny = MatchSign = MatchZero = false;
3958139593
break;
3958239594
}
39583-
MatchAny &= isUndefInRange(Mask, (i * Scale) + 1, Scale - 1);
39584-
MatchZero &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1);
39585-
}
39586-
if (MatchAny || MatchZero) {
39587-
assert(MatchZero && "Failed to match zext but matched aext?");
39595+
unsigned Pos = (i * Scale) + 1;
39596+
unsigned Len = Scale - 1;
39597+
MatchAny &= isUndefInRange(Mask, Pos, Len);
39598+
MatchZero &= isUndefOrZeroInRange(Mask, Pos, Len);
39599+
MatchSign &= isUndefOrEqualInRange(Mask, (int)i, Pos, Len);
39600+
}
39601+
if (MatchAny || MatchSign || MatchZero) {
39602+
assert((MatchSign || MatchZero) &&
39603+
"Failed to match sext/zext but matched aext?");
3958839604
unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize);
39589-
MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType() :
39590-
MVT::getIntegerVT(MaskEltSize);
39605+
MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType()
39606+
: MVT::getIntegerVT(MaskEltSize);
3959139607
SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize);
3959239608

39593-
Shuffle = unsigned(MatchAny ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND);
39609+
Shuffle = unsigned(
39610+
MatchAny ? ISD::ANY_EXTEND
39611+
: (MatchSign ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND));
3959439612
if (SrcVT.getVectorNumElements() != NumDstElts)
3959539613
Shuffle = DAG.getOpcode_EXTEND_VECTOR_INREG(Shuffle);
3959639614

llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll

Lines changed: 49 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -194,30 +194,55 @@ define <8 x i1> @t2_narrow(<8 x i16> %X) nounwind {
194194
}
195195

196196
define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
197-
; CHECK-SSE-LABEL: t3_wide:
198-
; CHECK-SSE: # %bb.0:
199-
; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411]
200-
; CHECK-SSE-NEXT: movdqa %xmm0, %xmm2
201-
; CHECK-SSE-NEXT: pmuludq %xmm1, %xmm2
202-
; CHECK-SSE-NEXT: movdqa %xmm0, %xmm3
203-
; CHECK-SSE-NEXT: psrlq $32, %xmm3
204-
; CHECK-SSE-NEXT: pmuludq %xmm1, %xmm3
205-
; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
206-
; CHECK-SSE-NEXT: paddq %xmm3, %xmm0
207-
; CHECK-SSE-NEXT: psllq $32, %xmm0
208-
; CHECK-SSE-NEXT: paddq %xmm2, %xmm0
209-
; CHECK-SSE-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
210-
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
211-
; CHECK-SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
212-
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
213-
; CHECK-SSE-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
214-
; CHECK-SSE-NEXT: pand %xmm2, %xmm1
215-
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
216-
; CHECK-SSE-NEXT: por %xmm1, %xmm0
217-
; CHECK-SSE-NEXT: pcmpeqd %xmm1, %xmm1
218-
; CHECK-SSE-NEXT: pxor %xmm0, %xmm1
219-
; CHECK-SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
220-
; CHECK-SSE-NEXT: retq
197+
; CHECK-SSE2-LABEL: t3_wide:
198+
; CHECK-SSE2: # %bb.0:
199+
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411]
200+
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
201+
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
202+
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
203+
; CHECK-SSE2-NEXT: psrlq $32, %xmm3
204+
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
205+
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
206+
; CHECK-SSE2-NEXT: paddq %xmm3, %xmm0
207+
; CHECK-SSE2-NEXT: psllq $32, %xmm0
208+
; CHECK-SSE2-NEXT: paddq %xmm2, %xmm0
209+
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
210+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
211+
; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
212+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
213+
; CHECK-SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
214+
; CHECK-SSE2-NEXT: pand %xmm2, %xmm1
215+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
216+
; CHECK-SSE2-NEXT: por %xmm1, %xmm0
217+
; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
218+
; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
219+
; CHECK-SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
220+
; CHECK-SSE2-NEXT: retq
221+
;
222+
; CHECK-SSE41-LABEL: t3_wide:
223+
; CHECK-SSE41: # %bb.0:
224+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411]
225+
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm2
226+
; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm2
227+
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm3
228+
; CHECK-SSE41-NEXT: psrlq $32, %xmm3
229+
; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm3
230+
; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
231+
; CHECK-SSE41-NEXT: paddq %xmm3, %xmm0
232+
; CHECK-SSE41-NEXT: psllq $32, %xmm0
233+
; CHECK-SSE41-NEXT: paddq %xmm2, %xmm0
234+
; CHECK-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
235+
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
236+
; CHECK-SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
237+
; CHECK-SSE41-NEXT: pmovsxdq %xmm0, %xmm2
238+
; CHECK-SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
239+
; CHECK-SSE41-NEXT: pand %xmm2, %xmm1
240+
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
241+
; CHECK-SSE41-NEXT: por %xmm1, %xmm0
242+
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm1
243+
; CHECK-SSE41-NEXT: pxor %xmm0, %xmm1
244+
; CHECK-SSE41-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
245+
; CHECK-SSE41-NEXT: retq
221246
;
222247
; CHECK-AVX1-LABEL: t3_wide:
223248
; CHECK-AVX1: # %bb.0:

llvm/test/CodeGen/X86/vector-reduce-smax.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
4444
; SSE41-NEXT: movdqa %xmm3, %xmm4
4545
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
4646
; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
47-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
47+
; SSE41-NEXT: pmovsxdq %xmm3, %xmm0
4848
; SSE41-NEXT: pand %xmm4, %xmm0
4949
; SSE41-NEXT: por %xmm3, %xmm0
5050
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
@@ -146,7 +146,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
146146
; SSE41-NEXT: movdqa %xmm4, %xmm5
147147
; SSE41-NEXT: pcmpeqd %xmm3, %xmm5
148148
; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
149-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
149+
; SSE41-NEXT: pmovsxdq %xmm4, %xmm0
150150
; SSE41-NEXT: pand %xmm5, %xmm0
151151
; SSE41-NEXT: por %xmm4, %xmm0
152152
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
@@ -324,7 +324,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
324324
; SSE41-NEXT: movdqa %xmm2, %xmm4
325325
; SSE41-NEXT: pcmpeqd %xmm5, %xmm4
326326
; SSE41-NEXT: pcmpgtd %xmm5, %xmm2
327-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
327+
; SSE41-NEXT: pmovsxdq %xmm2, %xmm0
328328
; SSE41-NEXT: pand %xmm4, %xmm0
329329
; SSE41-NEXT: por %xmm2, %xmm0
330330
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1
@@ -623,7 +623,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
623623
; SSE41-NEXT: movdqa %xmm2, %xmm3
624624
; SSE41-NEXT: pcmpeqd %xmm9, %xmm3
625625
; SSE41-NEXT: pcmpgtd %xmm9, %xmm2
626-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
626+
; SSE41-NEXT: pmovsxdq %xmm2, %xmm0
627627
; SSE41-NEXT: pand %xmm3, %xmm0
628628
; SSE41-NEXT: por %xmm2, %xmm0
629629
; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm1

llvm/test/CodeGen/X86/vector-reduce-smin.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
4343
; SSE41-NEXT: movdqa %xmm3, %xmm4
4444
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
4545
; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
46-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
46+
; SSE41-NEXT: pmovsxdq %xmm3, %xmm0
4747
; SSE41-NEXT: pand %xmm4, %xmm0
4848
; SSE41-NEXT: por %xmm3, %xmm0
4949
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
@@ -145,7 +145,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
145145
; SSE41-NEXT: movdqa %xmm3, %xmm4
146146
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
147147
; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
148-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
148+
; SSE41-NEXT: pmovsxdq %xmm3, %xmm0
149149
; SSE41-NEXT: pand %xmm4, %xmm0
150150
; SSE41-NEXT: por %xmm3, %xmm0
151151
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
@@ -324,7 +324,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
324324
; SSE41-NEXT: movdqa %xmm5, %xmm2
325325
; SSE41-NEXT: pcmpeqd %xmm0, %xmm2
326326
; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
327-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
327+
; SSE41-NEXT: pmovsxdq %xmm5, %xmm0
328328
; SSE41-NEXT: pand %xmm2, %xmm0
329329
; SSE41-NEXT: por %xmm5, %xmm0
330330
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1
@@ -623,7 +623,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
623623
; SSE41-NEXT: movdqa %xmm9, %xmm2
624624
; SSE41-NEXT: pcmpeqd %xmm0, %xmm2
625625
; SSE41-NEXT: pcmpgtd %xmm0, %xmm9
626-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2]
626+
; SSE41-NEXT: pmovsxdq %xmm9, %xmm0
627627
; SSE41-NEXT: pand %xmm2, %xmm0
628628
; SSE41-NEXT: por %xmm9, %xmm0
629629
; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm1

llvm/test/CodeGen/X86/vector-reduce-umax.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
4444
; SSE41-NEXT: movdqa %xmm3, %xmm4
4545
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
4646
; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
47-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
47+
; SSE41-NEXT: pmovsxdq %xmm3, %xmm0
4848
; SSE41-NEXT: pand %xmm4, %xmm0
4949
; SSE41-NEXT: por %xmm3, %xmm0
5050
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
@@ -164,7 +164,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
164164
; SSE41-NEXT: movdqa %xmm4, %xmm5
165165
; SSE41-NEXT: pcmpeqd %xmm3, %xmm5
166166
; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
167-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
167+
; SSE41-NEXT: pmovsxdq %xmm4, %xmm0
168168
; SSE41-NEXT: pand %xmm5, %xmm0
169169
; SSE41-NEXT: por %xmm4, %xmm0
170170
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
@@ -359,7 +359,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
359359
; SSE41-NEXT: movdqa %xmm2, %xmm4
360360
; SSE41-NEXT: pcmpeqd %xmm5, %xmm4
361361
; SSE41-NEXT: pcmpgtd %xmm5, %xmm2
362-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
362+
; SSE41-NEXT: pmovsxdq %xmm2, %xmm0
363363
; SSE41-NEXT: pand %xmm4, %xmm0
364364
; SSE41-NEXT: por %xmm2, %xmm0
365365
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1
@@ -686,7 +686,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
686686
; SSE41-NEXT: movdqa %xmm2, %xmm3
687687
; SSE41-NEXT: pcmpeqd %xmm9, %xmm3
688688
; SSE41-NEXT: pcmpgtd %xmm9, %xmm2
689-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
689+
; SSE41-NEXT: pmovsxdq %xmm2, %xmm0
690690
; SSE41-NEXT: pand %xmm3, %xmm0
691691
; SSE41-NEXT: por %xmm2, %xmm0
692692
; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm1

llvm/test/CodeGen/X86/vector-reduce-umin.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
4343
; SSE41-NEXT: movdqa %xmm3, %xmm4
4444
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
4545
; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
46-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
46+
; SSE41-NEXT: pmovsxdq %xmm3, %xmm0
4747
; SSE41-NEXT: pand %xmm4, %xmm0
4848
; SSE41-NEXT: por %xmm3, %xmm0
4949
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
@@ -163,7 +163,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
163163
; SSE41-NEXT: movdqa %xmm3, %xmm4
164164
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
165165
; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
166-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
166+
; SSE41-NEXT: pmovsxdq %xmm3, %xmm0
167167
; SSE41-NEXT: pand %xmm4, %xmm0
168168
; SSE41-NEXT: por %xmm3, %xmm0
169169
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
@@ -360,7 +360,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
360360
; SSE41-NEXT: movdqa %xmm5, %xmm2
361361
; SSE41-NEXT: pcmpeqd %xmm0, %xmm2
362362
; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
363-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
363+
; SSE41-NEXT: pmovsxdq %xmm5, %xmm0
364364
; SSE41-NEXT: pand %xmm2, %xmm0
365365
; SSE41-NEXT: por %xmm5, %xmm0
366366
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1
@@ -689,7 +689,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
689689
; SSE41-NEXT: movdqa %xmm9, %xmm2
690690
; SSE41-NEXT: pcmpeqd %xmm0, %xmm2
691691
; SSE41-NEXT: pcmpgtd %xmm0, %xmm9
692-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2]
692+
; SSE41-NEXT: pmovsxdq %xmm9, %xmm0
693693
; SSE41-NEXT: pand %xmm2, %xmm0
694694
; SSE41-NEXT: por %xmm9, %xmm0
695695
; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm1

0 commit comments

Comments
 (0)