@@ -314,8 +314,8 @@ define void @vec64_i16_widen_to_i32_factor2_broadcast_to_v2i32_factor2(ptr %in.v
314
314
;
315
315
; AVX512F-LABEL: vec64_i16_widen_to_i32_factor2_broadcast_to_v2i32_factor2:
316
316
; AVX512F: # %bb.0:
317
- ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
318
- ; AVX512F-NEXT: vpaddb (%rsi), %ymm0 , %ymm0
317
+ ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
318
+ ; AVX512F-NEXT: vpaddb (%rsi), %xmm0 , %xmm0
319
319
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,10,11,0,1,14,15,u,u,u,u,u,u,u,u]
320
320
; AVX512F-NEXT: vpaddb (%rdx), %ymm0, %ymm0
321
321
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
@@ -324,8 +324,8 @@ define void @vec64_i16_widen_to_i32_factor2_broadcast_to_v2i32_factor2(ptr %in.v
324
324
;
325
325
; AVX512DQ-LABEL: vec64_i16_widen_to_i32_factor2_broadcast_to_v2i32_factor2:
326
326
; AVX512DQ: # %bb.0:
327
- ; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
328
- ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0 , %ymm0
327
+ ; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
328
+ ; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0 , %xmm0
329
329
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,10,11,0,1,14,15,u,u,u,u,u,u,u,u]
330
330
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm0, %ymm0
331
331
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
@@ -981,7 +981,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
981
981
; AVX512F-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,5,0,7]
982
982
; AVX512F-NEXT: vmovdqa (%rdi), %ymm1
983
983
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
984
- ; AVX512F-NEXT: vpermd %zmm1 , %zmm0 , %zmm0
984
+ ; AVX512F-NEXT: vpermd %ymm1 , %ymm0 , %ymm0
985
985
; AVX512F-NEXT: vpaddb (%rdx), %ymm0, %ymm0
986
986
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
987
987
; AVX512F-NEXT: vzeroupper
@@ -992,7 +992,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
992
992
; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,5,0,7]
993
993
; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm1
994
994
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
995
- ; AVX512DQ-NEXT: vpermd %zmm1 , %zmm0 , %zmm0
995
+ ; AVX512DQ-NEXT: vpermd %ymm1 , %ymm0 , %ymm0
996
996
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm0, %ymm0
997
997
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
998
998
; AVX512DQ-NEXT: vzeroupper
@@ -3507,13 +3507,12 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
3507
3507
;
3508
3508
; AVX512F-LABEL: vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12:
3509
3509
; AVX512F: # %bb.0:
3510
- ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
3511
- ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3510
+ ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
3512
3511
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
3513
3512
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3514
- ; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm2
3515
- ; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7],ymm2[8],ymm1[9],ymm2[10],ymm1[11],ymm2[12],ymm1[13],ymm2[14],ymm1[15]
3513
+ ; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3516
3514
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
3515
+ ; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
3517
3516
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
3518
3517
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3519
3518
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3523,13 +3522,12 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
3523
3522
;
3524
3523
; AVX512DQ-LABEL: vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12:
3525
3524
; AVX512DQ: # %bb.0:
3526
- ; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
3527
- ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3525
+ ; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
3528
3526
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
3529
3527
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3530
- ; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm2
3531
- ; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7],ymm2[8],ymm1[9],ymm2[10],ymm1[11],ymm2[12],ymm1[13],ymm2[14],ymm1[15]
3528
+ ; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3532
3529
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
3530
+ ; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
3533
3531
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
3534
3532
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3535
3533
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3768,10 +3766,10 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
3768
3766
;
3769
3767
; AVX512F-LABEL: vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6:
3770
3768
; AVX512F: # %bb.0:
3771
- ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
3772
- ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3769
+ ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
3773
3770
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
3774
3771
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3772
+ ; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3775
3773
; AVX512F-NEXT: vpbroadcastq %xmm0, %ymm2
3776
3774
; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3],ymm2[4],ymm1[5,6,7],ymm2[8],ymm1[9,10,11],ymm2[12],ymm1[13,14,15]
3777
3775
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
@@ -3784,10 +3782,10 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
3784
3782
;
3785
3783
; AVX512DQ-LABEL: vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6:
3786
3784
; AVX512DQ: # %bb.0:
3787
- ; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
3788
- ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3785
+ ; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
3789
3786
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
3790
3787
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3788
+ ; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3791
3789
; AVX512DQ-NEXT: vpbroadcastq %xmm0, %ymm2
3792
3790
; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3],ymm2[4],ymm1[5,6,7],ymm2[8],ymm1[9,10,11],ymm2[12],ymm1[13,14,15]
3793
3791
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
@@ -4147,9 +4145,9 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr %
4147
4145
;
4148
4146
; AVX512F-LABEL: vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2:
4149
4147
; AVX512F: # %bb.0:
4150
- ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
4151
- ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
4148
+ ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
4152
4149
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
4150
+ ; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
4153
4151
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
4154
4152
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
4155
4153
; AVX512F-NEXT: vpbroadcastw %xmm0, %xmm0
@@ -4161,9 +4159,9 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr %
4161
4159
;
4162
4160
; AVX512DQ-LABEL: vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2:
4163
4161
; AVX512DQ: # %bb.0:
4164
- ; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
4165
- ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
4162
+ ; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
4166
4163
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
4164
+ ; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
4167
4165
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
4168
4166
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
4169
4167
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %xmm0
0 commit comments