@@ -201,6 +201,81 @@ define <8 x i32> @combine_v8i32_abs_pos(<8 x i32> %a) {
201
201
ret <8 x i32 > %2
202
202
}
203
203
204
+ ; TODO: (abs x) upper bits are known zero if x has extra sign bits
205
+ define i32 @combine_i32_abs_zerosign (i32 %a ) {
206
+ ; CHECK-LABEL: combine_i32_abs_zerosign:
207
+ ; CHECK: # %bb.0:
208
+ ; CHECK-NEXT: sarl $15, %edi
209
+ ; CHECK-NEXT: movl %edi, %eax
210
+ ; CHECK-NEXT: negl %eax
211
+ ; CHECK-NEXT: cmovsl %edi, %eax
212
+ ; CHECK-NEXT: andl $-524288, %eax # imm = 0xFFF80000
213
+ ; CHECK-NEXT: retq
214
+ %1 = ashr i32 %a , 15
215
+ %2 = call i32 @llvm.abs.i32 (i32 %1 , i1 false )
216
+ %3 = and i32 %2 , -524288 ; 0xFFF80000
217
+ ret i32 %3
218
+ }
219
+
220
+ define <8 x i16 > @combine_v8i16_abs_zerosign (<8 x i16 > %a ) {
221
+ ; SSE2-LABEL: combine_v8i16_abs_zerosign:
222
+ ; SSE2: # %bb.0:
223
+ ; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
224
+ ; SSE2-NEXT: pxor %xmm1, %xmm1
225
+ ; SSE2-NEXT: psubw %xmm0, %xmm1
226
+ ; SSE2-NEXT: pand %xmm1, %xmm0
227
+ ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
228
+ ; SSE2-NEXT: retq
229
+ ;
230
+ ; SSE42-LABEL: combine_v8i16_abs_zerosign:
231
+ ; SSE42: # %bb.0:
232
+ ; SSE42-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
233
+ ; SSE42-NEXT: pabsw %xmm0, %xmm0
234
+ ; SSE42-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
235
+ ; SSE42-NEXT: retq
236
+ ;
237
+ ; AVX2-LABEL: combine_v8i16_abs_zerosign:
238
+ ; AVX2: # %bb.0:
239
+ ; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
240
+ ; AVX2-NEXT: vpabsw %xmm0, %xmm0
241
+ ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
242
+ ; AVX2-NEXT: retq
243
+ ;
244
+ ; AVX512F-LABEL: combine_v8i16_abs_zerosign:
245
+ ; AVX512F: # %bb.0:
246
+ ; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
247
+ ; AVX512F-NEXT: vpabsw %xmm0, %xmm0
248
+ ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
249
+ ; AVX512F-NEXT: retq
250
+ ;
251
+ ; AVX512VL-LABEL: combine_v8i16_abs_zerosign:
252
+ ; AVX512VL: # %bb.0:
253
+ ; AVX512VL-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
254
+ ; AVX512VL-NEXT: vpabsw %xmm0, %xmm0
255
+ ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
256
+ ; AVX512VL-NEXT: retq
257
+ %1 = ashr <8 x i16 > %a , <i16 7 , i16 8 , i16 9 , i16 10 , i16 11 , i16 12 , i16 13 , i16 14 >
258
+ %2 = call <8 x i16 > @llvm.abs.v8i16 (<8 x i16 > %1 , i1 false )
259
+ %3 = and <8 x i16 > %2 , <i16 32768 , i16 32768 , i16 32768 , i16 32768 , i16 32768 , i16 32768 , i16 32768 , i16 32768 >
260
+ ret <8 x i16 > %3
261
+ }
262
+
263
+ ; negative test - mask extends beyond known zero bits
264
+ define i32 @combine_i32_abs_zerosign_negative (i32 %a ) {
265
+ ; CHECK-LABEL: combine_i32_abs_zerosign_negative:
266
+ ; CHECK: # %bb.0:
267
+ ; CHECK-NEXT: sarl $3, %edi
268
+ ; CHECK-NEXT: movl %edi, %eax
269
+ ; CHECK-NEXT: negl %eax
270
+ ; CHECK-NEXT: cmovsl %edi, %eax
271
+ ; CHECK-NEXT: andl $-524288, %eax # imm = 0xFFF80000
272
+ ; CHECK-NEXT: retq
273
+ %1 = ashr i32 %a , 3
274
+ %2 = call i32 @llvm.abs.i32 (i32 %1 , i1 false )
275
+ %3 = and i32 %2 , -524288 ; 0xFFF80000
276
+ ret i32 %3
277
+ }
278
+
204
279
declare <16 x i8 > @llvm.abs.v16i8 (<16 x i8 >, i1 ) nounwind readnone
205
280
declare <4 x i32 > @llvm.abs.v4i32 (<4 x i32 >, i1 ) nounwind readnone
206
281
declare <8 x i16 > @llvm.abs.v8i16 (<8 x i16 >, i1 ) nounwind readnone
0 commit comments