@@ -543,10 +543,10 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
543
543
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi
544
544
; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
545
545
; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi
546
+ ; X64-NO-BMI2-NO-SHLD-NEXT: leal (%rax,%rax), %r8d
546
547
; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
547
- ; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8
548
548
; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8
549
- ; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdi , %r8
549
+ ; X64-NO-BMI2-NO-SHLD-NEXT: orl %edi , %r8d
550
550
; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
551
551
; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax
552
552
; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil
@@ -573,19 +573,19 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
573
573
; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
574
574
; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
575
575
; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
576
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
577
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
576
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq % xmm0, %rax
577
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
578
578
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
579
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
580
579
; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi
581
580
; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil
582
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax ), %r8
581
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: leal (%rcx,%rcx ), %r8d
583
582
; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi
584
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rdi
585
583
; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax
584
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax
585
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
586
586
; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil
587
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi , %rax
588
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: movb %al , (%rdx)
587
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax , %rcx
588
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: movb %cl , (%rdx)
589
589
; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
590
590
;
591
591
; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
@@ -651,10 +651,10 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
651
651
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi
652
652
; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
653
653
; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi
654
+ ; X64-NO-BMI2-NO-SHLD-NEXT: leal (%rax,%rax), %r8d
654
655
; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
655
- ; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8
656
656
; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8
657
- ; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdi , %r8
657
+ ; X64-NO-BMI2-NO-SHLD-NEXT: orl %edi , %r8d
658
658
; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
659
659
; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax
660
660
; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil
@@ -681,19 +681,19 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
681
681
; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
682
682
; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
683
683
; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
684
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
685
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
684
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq % xmm0, %rax
685
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
686
686
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
687
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
688
687
; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi
689
688
; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil
690
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax ), %r8
689
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: leal (%rcx,%rcx ), %r8d
691
690
; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi
692
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rdi
693
691
; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax
692
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax
693
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
694
694
; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil
695
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi , %rax
696
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: movw %ax , (%rdx)
695
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax , %rcx
696
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: movw %cx , (%rdx)
697
697
; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
698
698
;
699
699
; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
@@ -758,10 +758,10 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
758
758
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi
759
759
; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
760
760
; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi
761
+ ; X64-NO-BMI2-NO-SHLD-NEXT: leal (%rax,%rax), %r8d
761
762
; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
762
- ; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8
763
763
; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8
764
- ; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdi , %r8
764
+ ; X64-NO-BMI2-NO-SHLD-NEXT: orl %edi , %r8d
765
765
; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
766
766
; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax
767
767
; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil
@@ -788,19 +788,19 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
788
788
; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
789
789
; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
790
790
; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
791
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
792
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
791
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq % xmm0, %rax
792
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
793
793
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
794
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
795
794
; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi
796
795
; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil
797
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax ), %r8
796
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: leal (%rcx,%rcx ), %r8d
798
797
; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi
799
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rdi
800
798
; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax
799
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax
800
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
801
801
; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil
802
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi , %rax
803
- ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %eax , (%rdx)
802
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax , %rcx
803
+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx , (%rdx)
804
804
; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
805
805
;
806
806
; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
0 commit comments