Skip to content

Commit bfbfd1c

Browse files
committed
[X86] combineLoad - try to reuse existing constant pool entries for smaller vector constant data
If we already have a YMM/ZMM constant that a smaller XMM/YMM has matching lower bits, then ensure we reuse the same constant pool entry. Extends the similar combines we already have to reuse VBROADCAST_LOAD/SUBV_BROADCAST_LOAD constant loads. This is a mainly a canonicalization, but should make it easier for us to merge constant loads in a future commit (related to both #70947 and better X86FixupVectorConstantsPass usage for #71078).
1 parent a405193 commit bfbfd1c

9 files changed

+1751
-1781
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49796,17 +49796,18 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
4979649796
}
4979749797
}
4979849798

49799-
// If we also broadcast this to a wider type, then just extract the lowest
49800-
// subvector.
49799+
// If we also load/broadcast this to a wider type, then just extract the
49800+
// lowest subvector.
4980149801
if (Ext == ISD::NON_EXTLOAD && Subtarget.hasAVX() && Ld->isSimple() &&
4980249802
(RegVT.is128BitVector() || RegVT.is256BitVector())) {
4980349803
SDValue Ptr = Ld->getBasePtr();
4980449804
SDValue Chain = Ld->getChain();
4980549805
for (SDNode *User : Chain->uses()) {
4980649806
if (User != N &&
4980749807
(User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD ||
49808-
User->getOpcode() == X86ISD::VBROADCAST_LOAD) &&
49809-
cast<MemIntrinsicSDNode>(User)->getChain() == Chain &&
49808+
User->getOpcode() == X86ISD::VBROADCAST_LOAD ||
49809+
ISD::isNormalLoad(User)) &&
49810+
cast<MemSDNode>(User)->getChain() == Chain &&
4981049811
!User->hasAnyUseOfValue(1) &&
4981149812
User->getValueSizeInBits(0).getFixedValue() >
4981249813
RegVT.getFixedSizeInBits()) {
@@ -49819,9 +49820,13 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
4981949820
Extract = DAG.getBitcast(RegVT, Extract);
4982049821
return DCI.CombineTo(N, Extract, SDValue(User, 1));
4982149822
}
49822-
if (User->getOpcode() == X86ISD::VBROADCAST_LOAD &&
49823+
if ((User->getOpcode() == X86ISD::VBROADCAST_LOAD ||
49824+
(ISD::isNormalLoad(User) &&
49825+
cast<LoadSDNode>(User)->getBasePtr() != Ptr)) &&
4982349826
getTargetConstantFromBasePtr(Ptr)) {
49824-
// See if we are loading a constant that has also been broadcast.
49827+
// See if we are loading a constant that has also been broadcast or
49828+
// we are loading a constant that also matches in the lower
49829+
// bits of a longer constant (but from a different constant pool ptr).
4982549830
APInt Undefs, UserUndefs;
4982649831
SmallVector<APInt> Bits, UserBits;
4982749832
if (getTargetConstantBitsFromNode(SDValue(N, 0), 8, Undefs, Bits) &&

llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,7 +1400,7 @@ define <4 x i64> @f4xi64_i128(<4 x i64> %a) {
14001400
; AVX-64-LABEL: f4xi64_i128:
14011401
; AVX-64: # %bb.0:
14021402
; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
1403-
; AVX-64-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
1403+
; AVX-64-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1]
14041404
; AVX-64-NEXT: vpaddq %xmm2, %xmm1, %xmm1
14051405
; AVX-64-NEXT: vpaddq %xmm2, %xmm0, %xmm0
14061406
; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@@ -1535,7 +1535,7 @@ define <8 x i64> @f8xi64_i256(<8 x i64> %a) {
15351535
; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
15361536
; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [2,3]
15371537
; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2
1538-
; AVX-64-NEXT: vmovdqa {{.*#+}} xmm4 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
1538+
; AVX-64-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1]
15391539
; AVX-64-NEXT: vpaddq %xmm4, %xmm1, %xmm1
15401540
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
15411541
; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2

llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-7.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2157,7 +2157,7 @@ define void @load_i16_stride7_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
21572157
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm11 = ymm10[2,3,0,1]
21582158
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm10 = ymm10[0,1,2],ymm11[3],ymm10[4,5,6,7,8,9,10],ymm11[11],ymm10[12,13,14,15]
21592159
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm11 = ymm10[2,3,2,3,2,3,2,3,8,9,8,9,6,7,4,5,18,19,18,19,18,19,18,19,24,25,24,25,22,23,20,21]
2160-
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm10 = [255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0]
2160+
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm10 = <255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0>
21612161
; AVX2-SLOW-NEXT: vpblendvb %ymm10, %ymm8, %ymm11, %ymm8
21622162
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm11 = ymm5[0,1],ymm6[2],ymm5[3,4,5],ymm6[6],ymm5[7]
21632163
; AVX2-SLOW-NEXT: vextracti128 $1, %ymm11, %xmm12
@@ -2329,7 +2329,7 @@ define void @load_i16_stride7_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
23292329
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm12 = <2,5,1,u,4,u,u,u>
23302330
; AVX2-FAST-NEXT: vpermd %ymm11, %ymm12, %ymm11
23312331
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm12 = ymm11[2,3,2,3,2,3,2,3,8,9,0,1,6,7,8,9,18,19,18,19,18,19,18,19,24,25,16,17,22,23,24,25]
2332-
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm11 = [255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0]
2332+
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm11 = <255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0>
23332333
; AVX2-FAST-NEXT: vpblendvb %ymm11, %ymm10, %ymm12, %ymm10
23342334
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm12 = ymm4[0,1],ymm6[2],ymm4[3,4,5],ymm6[6],ymm4[7]
23352335
; AVX2-FAST-NEXT: vextracti128 $1, %ymm12, %xmm13
@@ -2496,7 +2496,7 @@ define void @load_i16_stride7_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
24962496
; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm12 = ymm11[2,3,0,1]
24972497
; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm11 = ymm11[0,1,2],ymm12[3],ymm11[4,5,6,7,8,9,10],ymm12[11],ymm11[12,13,14,15]
24982498
; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm12 = ymm11[2,3,2,3,2,3,2,3,8,9,8,9,6,7,4,5,18,19,18,19,18,19,18,19,24,25,24,25,22,23,20,21]
2499-
; AVX2-FAST-PERLANE-NEXT: vmovdqa {{.*#+}} xmm11 = [255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0]
2499+
; AVX2-FAST-PERLANE-NEXT: vmovdqa {{.*#+}} xmm11 = <255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0>
25002500
; AVX2-FAST-PERLANE-NEXT: vpblendvb %ymm11, %ymm8, %ymm12, %ymm8
25012501
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm12 = ymm5[0,1],ymm6[2],ymm5[3,4,5],ymm6[6],ymm5[7]
25022502
; AVX2-FAST-PERLANE-NEXT: vextracti128 $1, %ymm12, %xmm13

llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-5.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1685,7 +1685,7 @@ define void @load_i8_stride5_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
16851685
; AVX2-ONLY-NEXT: # ymm10 = mem[0,1,0,1]
16861686
; AVX2-ONLY-NEXT: vpblendvb %ymm10, %ymm7, %ymm8, %ymm7
16871687
; AVX2-ONLY-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[u,u,u,u,u,u,u,u,u,u,u,u,u,1,6,11,16,21,26,31,20,25,30,19,24,29,u,u,u,u,u,u]
1688-
; AVX2-ONLY-NEXT: vmovdqa {{.*#+}} xmm10 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0]
1688+
; AVX2-ONLY-NEXT: vmovdqa {{.*#+}} xmm10 = <255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0>
16891689
; AVX2-ONLY-NEXT: vpblendvb %ymm10, %ymm6, %ymm7, %ymm6
16901690
; AVX2-ONLY-NEXT: vmovdqa 144(%rdi), %xmm7
16911691
; AVX2-ONLY-NEXT: vpshufb {{.*#+}} xmm11 = xmm7[u,u,u,u,u,u,u,u,u,u],zero,zero,zero,xmm7[1,6,11]

llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-3.ll

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,13 +1238,12 @@ define void @store_i16_stride3_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
12381238
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm3[0,1,2,3],zmm6[4,5,6,7]
12391239
; AVX512F-NEXT: vmovdqa (%rdx), %ymm6
12401240
; AVX512F-NEXT: vmovdqa 32(%rdx), %ymm7
1241-
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm9 = <5,5,u,6,6,u,7,7>
1242-
; AVX512F-NEXT: vpermd %ymm7, %ymm9, %ymm9
1243-
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm10 = [0,65535,65535,0,65535,65535,0,65535,65535,0,65535,65535,0,65535,65535,0]
1244-
; AVX512F-NEXT: vpandn %ymm9, %ymm10, %ymm9
1245-
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm10 = [128,128,10,11,128,128,128,128,12,13,128,128,128,128,14,15,128,128,128,128,16,17,128,128,128,128,18,19,128,128,128,128]
1246-
; AVX512F-NEXT: vpshufb %ymm10, %ymm7, %ymm7
1247-
; AVX512F-NEXT: vinserti64x4 $1, %ymm9, %zmm7, %zmm7
1241+
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm9 = [128,128,10,11,128,128,128,128,12,13,128,128,128,128,14,15,128,128,128,128,16,17,128,128,128,128,18,19,128,128,128,128]
1242+
; AVX512F-NEXT: vpshufb %ymm9, %ymm7, %ymm10
1243+
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm11 = <5,5,u,6,6,u,7,7>
1244+
; AVX512F-NEXT: vpermd %ymm7, %ymm11, %ymm7
1245+
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm7, %ymm7
1246+
; AVX512F-NEXT: vinserti64x4 $1, %ymm7, %zmm10, %zmm7
12481247
; AVX512F-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm7
12491248
; AVX512F-NEXT: vmovdqa (%rdi), %ymm3
12501249
; AVX512F-NEXT: vpshufb %ymm5, %ymm3, %ymm3
@@ -1259,7 +1258,7 @@ define void @store_i16_stride3_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
12591258
; AVX512F-NEXT: vpshufb %xmm2, %xmm0, %xmm0
12601259
; AVX512F-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0
12611260
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm3[4,5,6,7]
1262-
; AVX512F-NEXT: vpshufb %ymm10, %ymm6, %ymm1
1261+
; AVX512F-NEXT: vpshufb %ymm9, %ymm6, %ymm1
12631262
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,0,u,1,1,u,2>
12641263
; AVX512F-NEXT: vpermd %ymm6, %ymm2, %ymm2
12651264
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [65535,65535,0,65535,65535,0,65535,65535,0,65535,65535,0,65535,65535,0,65535]

llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2831,15 +2831,15 @@ define void @store_i16_stride5_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
28312831
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm9 = ymm9[0,1,0,0]
28322832
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm2, %zmm9, %zmm2
28332833
; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm11, %zmm2
2834-
; AVX512F-SLOW-NEXT: vmovdqa (%r8), %ymm9
2835-
; AVX512F-SLOW-NEXT: vmovdqa 32(%r8), %ymm10
2834+
; AVX512F-SLOW-NEXT: vmovdqa (%r8), %ymm10
2835+
; AVX512F-SLOW-NEXT: vmovdqa 32(%r8), %ymm9
28362836
; AVX512F-SLOW-NEXT: vmovdqa {{.*#+}} ymm11 = [128,128,128,128,12,13,128,128,128,128,128,128,128,128,14,15,128,128,128,128,128,128,128,128,16,17,128,128,128,128,128,128]
2837-
; AVX512F-SLOW-NEXT: vpshufb %ymm11, %ymm10, %ymm4
2838-
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm10 = ymm10[0,1,1,1]
2837+
; AVX512F-SLOW-NEXT: vpshufb %ymm11, %ymm9, %ymm4
2838+
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm9 = ymm9[0,1,1,1]
28392839
; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} ymm21 = [65535,65535,65535,0,65535,65535,65535,65535,0,65535,65535,65535,65535,0,65535,65535]
2840-
; AVX512F-SLOW-NEXT: vpandnq %ymm10, %ymm21, %ymm10
2841-
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm10, %zmm10
2842-
; AVX512F-SLOW-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm10
2840+
; AVX512F-SLOW-NEXT: vpandnq %ymm9, %ymm21, %ymm9
2841+
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm4, %zmm9, %zmm9
2842+
; AVX512F-SLOW-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm9
28432843
; AVX512F-SLOW-NEXT: vmovdqa (%rdx), %xmm2
28442844
; AVX512F-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm4 = xmm12[0],xmm2[0],xmm12[1],xmm2[1],xmm12[2],xmm2[2],xmm12[3],xmm2[3]
28452845
; AVX512F-SLOW-NEXT: vpshufb %xmm13, %xmm4, %xmm4
@@ -2860,7 +2860,7 @@ define void @store_i16_stride5_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
28602860
; AVX512F-SLOW-NEXT: vmovdqa64 {{.*#+}} zmm7 = [65535,65535,0,0,65535,65535,65535,0,0,65535,65535,65535,0,0,65535,65535,65535,0,0,65535,65535,65535,0,0,65535,65535,65535,0,0,65535,65535,65535]
28612861
; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm2, %zmm7, %zmm4
28622862
; AVX512F-SLOW-NEXT: vpbroadcastq (%r8), %ymm2
2863-
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm8 = ymm9[0,1,1,1]
2863+
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm8 = ymm10[0,1,1,1]
28642864
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm8, %zmm2, %zmm2
28652865
; AVX512F-SLOW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm2
28662866
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm4 = ymm18[0,1,2,1,4,5,6,5]
@@ -2909,16 +2909,15 @@ define void @store_i16_stride5_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
29092909
; AVX512F-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
29102910
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm1
29112911
; AVX512F-SLOW-NEXT: vpternlogq $226, %zmm0, %zmm16, %zmm1
2912-
; AVX512F-SLOW-NEXT: vpbroadcastq 16(%r8), %ymm0
2913-
; AVX512F-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [65535,0,65535,65535,65535,65535,0,65535,65535,65535,65535,0,65535,65535,65535,65535]
2914-
; AVX512F-SLOW-NEXT: vpandn %ymm0, %ymm3, %ymm0
2915-
; AVX512F-SLOW-NEXT: vpshufb %ymm11, %ymm9, %ymm3
2916-
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0
2912+
; AVX512F-SLOW-NEXT: vpshufb %ymm11, %ymm10, %ymm0
2913+
; AVX512F-SLOW-NEXT: vpbroadcastq 16(%r8), %ymm3
2914+
; AVX512F-SLOW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
2915+
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
29172916
; AVX512F-SLOW-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
29182917
; AVX512F-SLOW-NEXT: vmovdqa64 %zmm0, 64(%r9)
29192918
; AVX512F-SLOW-NEXT: vmovdqa64 %zmm4, 256(%r9)
29202919
; AVX512F-SLOW-NEXT: vmovdqa64 %zmm2, (%r9)
2921-
; AVX512F-SLOW-NEXT: vmovdqa64 %zmm10, 192(%r9)
2920+
; AVX512F-SLOW-NEXT: vmovdqa64 %zmm9, 192(%r9)
29222921
; AVX512F-SLOW-NEXT: vmovdqa64 %zmm19, 128(%r9)
29232922
; AVX512F-SLOW-NEXT: vzeroupper
29242923
; AVX512F-SLOW-NEXT: retq
@@ -3019,11 +3018,10 @@ define void @store_i16_stride5_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
30193018
; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm7, %zmm13, %zmm7
30203019
; AVX512F-FAST-NEXT: vmovdqa64 {{.*#+}} zmm20 = [65535,65535,65535,0,0,65535,65535,65535,0,0,65535,65535,65535,0,0,65535,65535,65535,0,0,65535,65535,65535,0,0,65535,65535,65535,0,0,65535,65535]
30213020
; AVX512F-FAST-NEXT: vpternlogq $226, %zmm3, %zmm20, %zmm7
3021+
; AVX512F-FAST-NEXT: vmovdqa64 %ymm24, %ymm3
3022+
; AVX512F-FAST-NEXT: vpshufb %ymm3, %ymm0, %ymm0
30223023
; AVX512F-FAST-NEXT: vpbroadcastq 16(%r8), %ymm3
3023-
; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm13 = [65535,0,65535,65535,65535,65535,0,65535,65535,65535,65535,0,65535,65535,65535,65535]
3024-
; AVX512F-FAST-NEXT: vpandn %ymm3, %ymm13, %ymm3
3025-
; AVX512F-FAST-NEXT: vmovdqa64 %ymm24, %ymm11
3026-
; AVX512F-FAST-NEXT: vpshufb %ymm11, %ymm0, %ymm0
3024+
; AVX512F-FAST-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
30273025
; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
30283026
; AVX512F-FAST-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm7, %zmm0
30293027
; AVX512F-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm13 = [30,31,28,29,26,27,30,31,30,31,28,29,30,31,28,29,30,31,28,29,26,27,30,31,30,31,28,29,30,31,28,29]

0 commit comments

Comments
 (0)