@@ -322,5 +322,154 @@ define void @g(i32 %a) nounwind {
322
322
ret void
323
323
}
324
324
325
+ define i32 @shift_zext_shl (i8 zeroext %x ) {
326
+ ; X86-LABEL: shift_zext_shl:
327
+ ; X86: # %bb.0:
328
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
329
+ ; X86-NEXT: andb $64, %al
330
+ ; X86-NEXT: movzbl %al, %eax
331
+ ; X86-NEXT: shll $9, %eax
332
+ ; X86-NEXT: movzwl %ax, %eax
333
+ ; X86-NEXT: retl
334
+ ;
335
+ ; X64-LABEL: shift_zext_shl:
336
+ ; X64: # %bb.0:
337
+ ; X64-NEXT: andb $64, %dil
338
+ ; X64-NEXT: movzbl %dil, %eax
339
+ ; X64-NEXT: shll $9, %eax
340
+ ; X64-NEXT: movzwl %ax, %eax
341
+ ; X64-NEXT: retq
342
+ %a = and i8 %x , 64
343
+ %b = zext i8 %a to i16
344
+ %c = shl i16 %b , 9
345
+ %d = zext i16 %c to i32
346
+ ret i32 %d
347
+ }
348
+
349
+ define i32 @shift_zext_shl2 (i8 zeroext %x ) {
350
+ ; X86-LABEL: shift_zext_shl2:
351
+ ; X86: # %bb.0:
352
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
353
+ ; X86-NEXT: andl $64, %eax
354
+ ; X86-NEXT: shll $9, %eax
355
+ ; X86-NEXT: retl
356
+ ;
357
+ ; X64-LABEL: shift_zext_shl2:
358
+ ; X64: # %bb.0:
359
+ ; X64-NEXT: movl %edi, %eax
360
+ ; X64-NEXT: andl $64, %eax
361
+ ; X64-NEXT: shll $9, %eax
362
+ ; X64-NEXT: retq
363
+ %a = and i8 %x , 64
364
+ %b = zext i8 %a to i32
365
+ %c = shl i32 %b , 9
366
+ ret i32 %c
367
+ }
368
+
369
+ define <4 x i32 > @shift_zext_shl_vec (<4 x i8 > %x ) {
370
+ ; X86-LABEL: shift_zext_shl_vec:
371
+ ; X86: # %bb.0:
372
+ ; X86-NEXT: pushl %ebx
373
+ ; X86-NEXT: .cfi_def_cfa_offset 8
374
+ ; X86-NEXT: pushl %edi
375
+ ; X86-NEXT: .cfi_def_cfa_offset 12
376
+ ; X86-NEXT: pushl %esi
377
+ ; X86-NEXT: .cfi_def_cfa_offset 16
378
+ ; X86-NEXT: .cfi_offset %esi, -16
379
+ ; X86-NEXT: .cfi_offset %edi, -12
380
+ ; X86-NEXT: .cfi_offset %ebx, -8
381
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
382
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi
383
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
384
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
385
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
386
+ ; X86-NEXT: andb $64, %bl
387
+ ; X86-NEXT: movzbl %bl, %edi
388
+ ; X86-NEXT: shll $9, %edi
389
+ ; X86-NEXT: movzwl %di, %edi
390
+ ; X86-NEXT: andl $63, %ecx
391
+ ; X86-NEXT: shll $8, %ecx
392
+ ; X86-NEXT: andl $31, %edx
393
+ ; X86-NEXT: shll $7, %edx
394
+ ; X86-NEXT: andl $23, %esi
395
+ ; X86-NEXT: shll $6, %esi
396
+ ; X86-NEXT: movl %esi, 12(%eax)
397
+ ; X86-NEXT: movl %edx, 8(%eax)
398
+ ; X86-NEXT: movl %ecx, 4(%eax)
399
+ ; X86-NEXT: movl %edi, (%eax)
400
+ ; X86-NEXT: popl %esi
401
+ ; X86-NEXT: .cfi_def_cfa_offset 12
402
+ ; X86-NEXT: popl %edi
403
+ ; X86-NEXT: .cfi_def_cfa_offset 8
404
+ ; X86-NEXT: popl %ebx
405
+ ; X86-NEXT: .cfi_def_cfa_offset 4
406
+ ; X86-NEXT: retl $4
407
+ ;
408
+ ; X64-LABEL: shift_zext_shl_vec:
409
+ ; X64: # %bb.0:
410
+ ; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
411
+ ; X64-NEXT: pxor %xmm1, %xmm1
412
+ ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
413
+ ; X64-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
414
+ ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
415
+ ; X64-NEXT: retq
416
+ %a = and <4 x i8 > %x , <i8 64 , i8 63 , i8 31 , i8 23 >
417
+ %b = zext <4 x i8 > %a to <4 x i16 >
418
+ %c = shl <4 x i16 > %b , <i16 9 , i16 8 , i16 7 , i16 6 >
419
+ %d = zext <4 x i16 > %c to <4 x i32 >
420
+ ret <4 x i32 > %d
421
+ }
422
+
423
+ define <4 x i32 > @shift_zext_shl2_vec (<4 x i8 > %x ) {
424
+ ; X86-LABEL: shift_zext_shl2_vec:
425
+ ; X86: # %bb.0:
426
+ ; X86-NEXT: pushl %edi
427
+ ; X86-NEXT: .cfi_def_cfa_offset 8
428
+ ; X86-NEXT: pushl %esi
429
+ ; X86-NEXT: .cfi_def_cfa_offset 12
430
+ ; X86-NEXT: .cfi_offset %esi, -12
431
+ ; X86-NEXT: .cfi_offset %edi, -8
432
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
433
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
434
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
435
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi
436
+ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edi
437
+ ; X86-NEXT: andl $23, %edi
438
+ ; X86-NEXT: andl $31, %esi
439
+ ; X86-NEXT: andl $63, %edx
440
+ ; X86-NEXT: andl $64, %ecx
441
+ ; X86-NEXT: shll $9, %ecx
442
+ ; X86-NEXT: shll $8, %edx
443
+ ; X86-NEXT: shll $7, %esi
444
+ ; X86-NEXT: shll $6, %edi
445
+ ; X86-NEXT: movl %edi, 12(%eax)
446
+ ; X86-NEXT: movl %esi, 8(%eax)
447
+ ; X86-NEXT: movl %edx, 4(%eax)
448
+ ; X86-NEXT: movl %ecx, (%eax)
449
+ ; X86-NEXT: popl %esi
450
+ ; X86-NEXT: .cfi_def_cfa_offset 8
451
+ ; X86-NEXT: popl %edi
452
+ ; X86-NEXT: .cfi_def_cfa_offset 4
453
+ ; X86-NEXT: retl $4
454
+ ;
455
+ ; X64-LABEL: shift_zext_shl2_vec:
456
+ ; X64: # %bb.0:
457
+ ; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
458
+ ; X64-NEXT: pxor %xmm1, %xmm1
459
+ ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
460
+ ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
461
+ ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
462
+ ; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
463
+ ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
464
+ ; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
465
+ ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
466
+ ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
467
+ ; X64-NEXT: retq
468
+ %a = and <4 x i8 > %x , <i8 64 , i8 63 , i8 31 , i8 23 >
469
+ %b = zext <4 x i8 > %a to <4 x i32 >
470
+ %c = shl <4 x i32 > %b , <i32 9 , i32 8 , i32 7 , i32 6 >
471
+ ret <4 x i32 > %c
472
+ }
473
+
325
474
declare dso_local void @f (i64 )
326
475
0 commit comments