@@ -322,5 +322,139 @@ define void @g(i32 %a) nounwind {
322
322
ret void
323
323
}
324
324
325
+ define i32 @shift_zext_shl (i8 zeroext %x ) {
326
+ ; X86-LABEL: shift_zext_shl:
327
+ ; X86: # %bb.0:
328
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
329
+ ; X86-NEXT: andb $64, %al
330
+ ; X86-NEXT: movzbl %al, %eax
331
+ ; X86-NEXT: shll $9, %eax
332
+ ; X86-NEXT: movzwl %ax, %eax
333
+ ; X86-NEXT: retl
334
+ ;
335
+ ; X64-LABEL: shift_zext_shl:
336
+ ; X64: # %bb.0:
337
+ ; X64-NEXT: andb $64, %dil
338
+ ; X64-NEXT: movzbl %dil, %eax
339
+ ; X64-NEXT: shll $9, %eax
340
+ ; X64-NEXT: movzwl %ax, %eax
341
+ ; X64-NEXT: retq
342
+ %a = and i8 %x , 64
343
+ %b = zext i8 %a to i16
344
+ %c = shl i16 %b , 9
345
+ %d = zext i16 %c to i32
346
+ ret i32 %d
347
+ }
348
+
349
+ define i32 @shift_zext_shl2 (i8 zeroext %x ) {
350
+ ; X86-LABEL: shift_zext_shl2:
351
+ ; X86: # %bb.0:
352
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
353
+ ; X86-NEXT: andl $64, %eax
354
+ ; X86-NEXT: shll $9, %eax
355
+ ; X86-NEXT: retl
356
+ ;
357
+ ; X64-LABEL: shift_zext_shl2:
358
+ ; X64: # %bb.0:
359
+ ; X64-NEXT: movl %edi, %eax
360
+ ; X64-NEXT: andl $64, %eax
361
+ ; X64-NEXT: shll $9, %eax
362
+ ; X64-NEXT: retq
363
+ %a = and i8 %x , 64
364
+ %b = zext i8 %a to i32
365
+ %c = shl i32 %b , 9
366
+ ret i32 %c
367
+ }
368
+
369
+ define <4 x i32 > @shift_zext_shl_vec (<4 x i8 > %x ) nounwind {
370
+ ; X86-LABEL: shift_zext_shl_vec:
371
+ ; X86: # %bb.0:
372
+ ; X86-NEXT: pushl %ebx
373
+ ; X86-NEXT: pushl %edi
374
+ ; X86-NEXT: pushl %esi
375
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
376
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi
377
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
378
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
379
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
380
+ ; X86-NEXT: andb $64, %bl
381
+ ; X86-NEXT: movzbl %bl, %edi
382
+ ; X86-NEXT: shll $9, %edi
383
+ ; X86-NEXT: movzwl %di, %edi
384
+ ; X86-NEXT: andl $63, %ecx
385
+ ; X86-NEXT: shll $8, %ecx
386
+ ; X86-NEXT: andl $31, %edx
387
+ ; X86-NEXT: shll $7, %edx
388
+ ; X86-NEXT: andl $23, %esi
389
+ ; X86-NEXT: shll $6, %esi
390
+ ; X86-NEXT: movl %esi, 12(%eax)
391
+ ; X86-NEXT: movl %edx, 8(%eax)
392
+ ; X86-NEXT: movl %ecx, 4(%eax)
393
+ ; X86-NEXT: movl %edi, (%eax)
394
+ ; X86-NEXT: popl %esi
395
+ ; X86-NEXT: popl %edi
396
+ ; X86-NEXT: popl %ebx
397
+ ; X86-NEXT: retl $4
398
+ ;
399
+ ; X64-LABEL: shift_zext_shl_vec:
400
+ ; X64: # %bb.0:
401
+ ; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
402
+ ; X64-NEXT: pxor %xmm1, %xmm1
403
+ ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
404
+ ; X64-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
405
+ ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
406
+ ; X64-NEXT: retq
407
+ %a = and <4 x i8 > %x , <i8 64 , i8 63 , i8 31 , i8 23 >
408
+ %b = zext <4 x i8 > %a to <4 x i16 >
409
+ %c = shl <4 x i16 > %b , <i16 9 , i16 8 , i16 7 , i16 6 >
410
+ %d = zext <4 x i16 > %c to <4 x i32 >
411
+ ret <4 x i32 > %d
412
+ }
413
+
414
+ define <4 x i32 > @shift_zext_shl2_vec (<4 x i8 > %x ) nounwind {
415
+ ; X86-LABEL: shift_zext_shl2_vec:
416
+ ; X86: # %bb.0:
417
+ ; X86-NEXT: pushl %edi
418
+ ; X86-NEXT: pushl %esi
419
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
420
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
421
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
422
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi
423
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edi
424
+ ; X86-NEXT: andl $23, %edi
425
+ ; X86-NEXT: andl $31, %esi
426
+ ; X86-NEXT: andl $63, %edx
427
+ ; X86-NEXT: andl $64, %ecx
428
+ ; X86-NEXT: shll $9, %ecx
429
+ ; X86-NEXT: shll $8, %edx
430
+ ; X86-NEXT: shll $7, %esi
431
+ ; X86-NEXT: shll $6, %edi
432
+ ; X86-NEXT: movl %edi, 12(%eax)
433
+ ; X86-NEXT: movl %esi, 8(%eax)
434
+ ; X86-NEXT: movl %edx, 4(%eax)
435
+ ; X86-NEXT: movl %ecx, (%eax)
436
+ ; X86-NEXT: popl %esi
437
+ ; X86-NEXT: popl %edi
438
+ ; X86-NEXT: retl $4
439
+ ;
440
+ ; X64-LABEL: shift_zext_shl2_vec:
441
+ ; X64: # %bb.0:
442
+ ; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
443
+ ; X64-NEXT: pxor %xmm1, %xmm1
444
+ ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
445
+ ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
446
+ ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
447
+ ; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
448
+ ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
449
+ ; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
450
+ ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
451
+ ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
452
+ ; X64-NEXT: retq
453
+ %a = and <4 x i8 > %x , <i8 64 , i8 63 , i8 31 , i8 23 >
454
+ %b = zext <4 x i8 > %a to <4 x i32 >
455
+ %c = shl <4 x i32 > %b , <i32 9 , i32 8 , i32 7 , i32 6 >
456
+ ret <4 x i32 > %c
457
+ }
458
+
325
459
declare dso_local void @f (i64 )
326
460
0 commit comments