@@ -63,6 +63,37 @@ define <8 x i32> @combine_pmaddwd_concat(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>
63
63
ret <8 x i32 > %3
64
64
}
65
65
66
+ define <8 x i32 > @combine_pmaddwd_concat_freeze (<8 x i16 > %a0 , <8 x i16 > %a1 ) {
67
+ ; SSE-LABEL: combine_pmaddwd_concat_freeze:
68
+ ; SSE: # %bb.0:
69
+ ; SSE-NEXT: pmovsxbw {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
70
+ ; SSE-NEXT: pmaddwd %xmm2, %xmm0
71
+ ; SSE-NEXT: pmaddwd %xmm2, %xmm1
72
+ ; SSE-NEXT: retq
73
+ ;
74
+ ; AVX1-LABEL: combine_pmaddwd_concat_freeze:
75
+ ; AVX1: # %bb.0:
76
+ ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
77
+ ; AVX1-NEXT: vpmaddwd %xmm2, %xmm0, %xmm0
78
+ ; AVX1-NEXT: vpmaddwd %xmm2, %xmm1, %xmm1
79
+ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
80
+ ; AVX1-NEXT: retq
81
+ ;
82
+ ; AVX2-LABEL: combine_pmaddwd_concat_freeze:
83
+ ; AVX2: # %bb.0:
84
+ ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
85
+ ; AVX2-NEXT: vpmaddwd %xmm2, %xmm0, %xmm0
86
+ ; AVX2-NEXT: vpmaddwd %xmm2, %xmm1, %xmm1
87
+ ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
88
+ ; AVX2-NEXT: retq
89
+ %lo = call <4 x i32 > @llvm.x86.sse2.pmadd.wd (<8 x i16 > %a0 , <8 x i16 > <i16 1 , i16 1 , i16 1 , i16 1 , i16 1 , i16 1 , i16 1 , i16 1 >)
90
+ %hi = call <4 x i32 > @llvm.x86.sse2.pmadd.wd (<8 x i16 > %a1 , <8 x i16 > <i16 1 , i16 1 , i16 1 , i16 1 , i16 1 , i16 1 , i16 1 , i16 1 >)
91
+ %flo = freeze <4 x i32 > %lo
92
+ %fhi = freeze <4 x i32 > %hi
93
+ %res = shufflevector <4 x i32 > %flo , <4 x i32 > %fhi , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
94
+ ret <8 x i32 > %res
95
+ }
96
+
66
97
define <4 x i32 > @combine_pmaddwd_demandedelts (<8 x i16 > %a0 , <8 x i16 > %a1 ) {
67
98
; SSE-LABEL: combine_pmaddwd_demandedelts:
68
99
; SSE: # %bb.0:
@@ -178,6 +209,37 @@ define <16 x i16> @combine_pmaddubsw_concat(<16 x i8> %a0, <16 x i8> %a1, <16 x
178
209
ret <16 x i16 > %3
179
210
}
180
211
212
+ define <16 x i16 > @combine_pmaddubsw_concat_freeze (<16 x i8 > %a0 , <16 x i8 > %a1 ) {
213
+ ; SSE-LABEL: combine_pmaddubsw_concat_freeze:
214
+ ; SSE: # %bb.0:
215
+ ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
216
+ ; SSE-NEXT: pmaddubsw %xmm2, %xmm0
217
+ ; SSE-NEXT: pmaddubsw %xmm2, %xmm1
218
+ ; SSE-NEXT: retq
219
+ ;
220
+ ; AVX1-LABEL: combine_pmaddubsw_concat_freeze:
221
+ ; AVX1: # %bb.0:
222
+ ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
223
+ ; AVX1-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm0
224
+ ; AVX1-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
225
+ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
226
+ ; AVX1-NEXT: retq
227
+ ;
228
+ ; AVX2-LABEL: combine_pmaddubsw_concat_freeze:
229
+ ; AVX2: # %bb.0:
230
+ ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
231
+ ; AVX2-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm0
232
+ ; AVX2-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
233
+ ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
234
+ ; AVX2-NEXT: retq
235
+ %lo = call <8 x i16 > @llvm.x86.ssse3.pmadd.ub.sw.128 (<16 x i8 > %a0 , <16 x i8 > <i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 >)
236
+ %hi = call <8 x i16 > @llvm.x86.ssse3.pmadd.ub.sw.128 (<16 x i8 > %a1 , <16 x i8 > <i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 >)
237
+ %flo = freeze <8 x i16 > %lo
238
+ %fhi = freeze <8 x i16 > %hi
239
+ %res = shufflevector <8 x i16 > %flo , <8 x i16 > %fhi , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
240
+ ret <16 x i16 > %res
241
+ }
242
+
181
243
define <8 x i16 > @combine_pmaddubsw_demandedelts (<16 x i8 > %a0 , <16 x i8 > %a1 ) {
182
244
; SSE-LABEL: combine_pmaddubsw_demandedelts:
183
245
; SSE: # %bb.0:
0 commit comments