@@ -314,8 +314,8 @@ define void @vec64_i16_widen_to_i32_factor2_broadcast_to_v2i32_factor2(ptr %in.v
314
314
;
315
315
; AVX512F-LABEL: vec64_i16_widen_to_i32_factor2_broadcast_to_v2i32_factor2:
316
316
; AVX512F: # %bb.0:
317
- ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
318
- ; AVX512F-NEXT: vpaddb (%rsi), %xmm0 , %xmm0
317
+ ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
318
+ ; AVX512F-NEXT: vpaddb (%rsi), %ymm0 , %ymm0
319
319
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,10,11,0,1,14,15,u,u,u,u,u,u,u,u]
320
320
; AVX512F-NEXT: vpaddb (%rdx), %ymm0, %ymm0
321
321
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
@@ -324,8 +324,8 @@ define void @vec64_i16_widen_to_i32_factor2_broadcast_to_v2i32_factor2(ptr %in.v
324
324
;
325
325
; AVX512DQ-LABEL: vec64_i16_widen_to_i32_factor2_broadcast_to_v2i32_factor2:
326
326
; AVX512DQ: # %bb.0:
327
- ; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
328
- ; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0 , %xmm0
327
+ ; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
328
+ ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0 , %ymm0
329
329
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,10,11,0,1,14,15,u,u,u,u,u,u,u,u]
330
330
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm0, %ymm0
331
331
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
@@ -981,7 +981,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
981
981
; AVX512F-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,5,0,7]
982
982
; AVX512F-NEXT: vmovdqa (%rdi), %ymm1
983
983
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
984
- ; AVX512F-NEXT: vpermd %ymm1 , %ymm0 , %ymm0
984
+ ; AVX512F-NEXT: vpermd %zmm1 , %zmm0 , %zmm0
985
985
; AVX512F-NEXT: vpaddb (%rdx), %ymm0, %ymm0
986
986
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
987
987
; AVX512F-NEXT: vzeroupper
@@ -992,7 +992,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
992
992
; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,5,0,7]
993
993
; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm1
994
994
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
995
- ; AVX512DQ-NEXT: vpermd %ymm1 , %ymm0 , %ymm0
995
+ ; AVX512DQ-NEXT: vpermd %zmm1 , %zmm0 , %zmm0
996
996
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm0, %ymm0
997
997
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
998
998
; AVX512DQ-NEXT: vzeroupper
@@ -3507,12 +3507,13 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
3507
3507
;
3508
3508
; AVX512F-LABEL: vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12:
3509
3509
; AVX512F: # %bb.0:
3510
- ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
3510
+ ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
3511
+ ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3511
3512
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
3512
3513
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3513
- ; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3514
+ ; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm2
3515
+ ; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7],ymm2[8],ymm1[9],ymm2[10],ymm1[11],ymm2[12],ymm1[13],ymm2[14],ymm1[15]
3514
3516
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
3515
- ; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
3516
3517
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
3517
3518
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3518
3519
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3522,12 +3523,13 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
3522
3523
;
3523
3524
; AVX512DQ-LABEL: vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12:
3524
3525
; AVX512DQ: # %bb.0:
3525
- ; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
3526
+ ; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
3527
+ ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3526
3528
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
3527
3529
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3528
- ; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3530
+ ; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm2
3531
+ ; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7],ymm2[8],ymm1[9],ymm2[10],ymm1[11],ymm2[12],ymm1[13],ymm2[14],ymm1[15]
3529
3532
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
3530
- ; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
3531
3533
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
3532
3534
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3533
3535
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3766,10 +3768,10 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
3766
3768
;
3767
3769
; AVX512F-LABEL: vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6:
3768
3770
; AVX512F: # %bb.0:
3769
- ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
3771
+ ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
3772
+ ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3770
3773
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
3771
3774
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3772
- ; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3773
3775
; AVX512F-NEXT: vpbroadcastq %xmm0, %ymm2
3774
3776
; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3],ymm2[4],ymm1[5,6,7],ymm2[8],ymm1[9,10,11],ymm2[12],ymm1[13,14,15]
3775
3777
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
@@ -3782,10 +3784,10 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
3782
3784
;
3783
3785
; AVX512DQ-LABEL: vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6:
3784
3786
; AVX512DQ: # %bb.0:
3785
- ; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
3787
+ ; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
3788
+ ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3786
3789
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
3787
3790
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3788
- ; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3789
3791
; AVX512DQ-NEXT: vpbroadcastq %xmm0, %ymm2
3790
3792
; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3],ymm2[4],ymm1[5,6,7],ymm2[8],ymm1[9,10,11],ymm2[12],ymm1[13,14,15]
3791
3793
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
@@ -4145,9 +4147,9 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr %
4145
4147
;
4146
4148
; AVX512F-LABEL: vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2:
4147
4149
; AVX512F: # %bb.0:
4148
- ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
4150
+ ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
4151
+ ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
4149
4152
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
4150
- ; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
4151
4153
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
4152
4154
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
4153
4155
; AVX512F-NEXT: vpbroadcastw %xmm0, %xmm0
@@ -4159,9 +4161,9 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr %
4159
4161
;
4160
4162
; AVX512DQ-LABEL: vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2:
4161
4163
; AVX512DQ: # %bb.0:
4162
- ; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
4164
+ ; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
4165
+ ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
4163
4166
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
4164
- ; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
4165
4167
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
4166
4168
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
4167
4169
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %xmm0
0 commit comments