|
5 | 5 | define <16 x i8> @load_v3i8(ptr %src) { |
6 | 6 | ; CHECK-LABEL: load_v3i8: |
7 | 7 | ; CHECK: ; %bb.0: |
8 | | -; CHECK-NEXT: sub sp, sp, #16 |
9 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
10 | | -; CHECK-NEXT: ldrh w8, [x0] |
11 | | -; CHECK-NEXT: strh w8, [sp, #12] |
12 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
13 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
14 | | -; CHECK-NEXT: umov.h w8, v0[0] |
15 | | -; CHECK-NEXT: umov.h w9, v0[1] |
| 8 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 9 | +; CHECK-NEXT: ldrh w9, [x0] |
| 10 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
16 | 11 | ; CHECK-NEXT: fmov s0, w8 |
17 | | -; CHECK-NEXT: add x8, x0, #2 |
18 | | -; CHECK-NEXT: mov.b v0[1], w9 |
19 | | -; CHECK-NEXT: ld1.b { v0 }[2], [x8] |
20 | | -; CHECK-NEXT: add sp, sp, #16 |
21 | 12 | ; CHECK-NEXT: ret |
22 | 13 | ; |
23 | 14 | ; BE-LABEL: load_v3i8: |
@@ -47,19 +38,14 @@ define <16 x i8> @load_v3i8(ptr %src) { |
47 | 38 | define <4 x i32> @load_v3i8_to_4xi32(ptr %src) { |
48 | 39 | ; CHECK-LABEL: load_v3i8_to_4xi32: |
49 | 40 | ; CHECK: ; %bb.0: |
50 | | -; CHECK-NEXT: sub sp, sp, #16 |
51 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
52 | | -; CHECK-NEXT: ldrh w8, [x0] |
| 41 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 42 | +; CHECK-NEXT: ldrh w9, [x0] |
53 | 43 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff |
54 | | -; CHECK-NEXT: strh w8, [sp, #12] |
55 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
56 | | -; CHECK-NEXT: ldrsb w8, [x0, #2] |
57 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
58 | | -; CHECK-NEXT: mov.h v0[1], v0[1] |
59 | | -; CHECK-NEXT: mov.h v0[2], w8 |
| 44 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 45 | +; CHECK-NEXT: fmov s0, w8 |
| 46 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
60 | 47 | ; CHECK-NEXT: ushll.4s v0, v0, #0 |
61 | 48 | ; CHECK-NEXT: and.16b v0, v0, v1 |
62 | | -; CHECK-NEXT: add sp, sp, #16 |
63 | 49 | ; CHECK-NEXT: ret |
64 | 50 | ; |
65 | 51 | ; BE-LABEL: load_v3i8_to_4xi32: |
@@ -90,19 +76,14 @@ define <4 x i32> @load_v3i8_to_4xi32(ptr %src) { |
90 | 76 | define <4 x i32> @load_v3i8_to_4xi32_align_2(ptr %src) { |
91 | 77 | ; CHECK-LABEL: load_v3i8_to_4xi32_align_2: |
92 | 78 | ; CHECK: ; %bb.0: |
93 | | -; CHECK-NEXT: sub sp, sp, #16 |
94 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
95 | | -; CHECK-NEXT: ldrh w8, [x0] |
| 79 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 80 | +; CHECK-NEXT: ldrh w9, [x0] |
96 | 81 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff |
97 | | -; CHECK-NEXT: strh w8, [sp, #12] |
98 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
99 | | -; CHECK-NEXT: ldrsb w8, [x0, #2] |
100 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
101 | | -; CHECK-NEXT: mov.h v0[1], v0[1] |
102 | | -; CHECK-NEXT: mov.h v0[2], w8 |
| 82 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 83 | +; CHECK-NEXT: fmov s0, w8 |
| 84 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
103 | 85 | ; CHECK-NEXT: ushll.4s v0, v0, #0 |
104 | 86 | ; CHECK-NEXT: and.16b v0, v0, v1 |
105 | | -; CHECK-NEXT: add sp, sp, #16 |
106 | 87 | ; CHECK-NEXT: ret |
107 | 88 | ; |
108 | 89 | ; BE-LABEL: load_v3i8_to_4xi32_align_2: |
@@ -161,19 +142,14 @@ define <4 x i32> @load_v3i8_to_4xi32_align_4(ptr %src) { |
161 | 142 | define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) { |
162 | 143 | ; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_1: |
163 | 144 | ; CHECK: ; %bb.0: |
164 | | -; CHECK-NEXT: sub sp, sp, #16 |
165 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
166 | | -; CHECK-NEXT: ldurh w8, [x0, #1] |
| 145 | +; CHECK-NEXT: ldrb w8, [x0, #3] |
| 146 | +; CHECK-NEXT: ldurh w9, [x0, #1] |
167 | 147 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff |
168 | | -; CHECK-NEXT: strh w8, [sp, #12] |
169 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
170 | | -; CHECK-NEXT: ldrsb w8, [x0, #3] |
171 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
172 | | -; CHECK-NEXT: mov.h v0[1], v0[1] |
173 | | -; CHECK-NEXT: mov.h v0[2], w8 |
| 148 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 149 | +; CHECK-NEXT: fmov s0, w8 |
| 150 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
174 | 151 | ; CHECK-NEXT: ushll.4s v0, v0, #0 |
175 | 152 | ; CHECK-NEXT: and.16b v0, v0, v1 |
176 | | -; CHECK-NEXT: add sp, sp, #16 |
177 | 153 | ; CHECK-NEXT: ret |
178 | 154 | ; |
179 | 155 | ; BE-LABEL: load_v3i8_to_4xi32_const_offset_1: |
@@ -205,19 +181,14 @@ define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) { |
205 | 181 | define <4 x i32> @load_v3i8_to_4xi32_const_offset_3(ptr %src) { |
206 | 182 | ; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_3: |
207 | 183 | ; CHECK: ; %bb.0: |
208 | | -; CHECK-NEXT: sub sp, sp, #16 |
209 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
210 | | -; CHECK-NEXT: ldurh w8, [x0, #3] |
| 184 | +; CHECK-NEXT: ldrb w8, [x0, #5] |
| 185 | +; CHECK-NEXT: ldurh w9, [x0, #3] |
211 | 186 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff |
212 | | -; CHECK-NEXT: strh w8, [sp, #12] |
213 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
214 | | -; CHECK-NEXT: ldrsb w8, [x0, #5] |
215 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
216 | | -; CHECK-NEXT: mov.h v0[1], v0[1] |
217 | | -; CHECK-NEXT: mov.h v0[2], w8 |
| 187 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 188 | +; CHECK-NEXT: fmov s0, w8 |
| 189 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
218 | 190 | ; CHECK-NEXT: ushll.4s v0, v0, #0 |
219 | 191 | ; CHECK-NEXT: and.16b v0, v0, v1 |
220 | | -; CHECK-NEXT: add sp, sp, #16 |
221 | 192 | ; CHECK-NEXT: ret |
222 | 193 | ; |
223 | 194 | ; BE-LABEL: load_v3i8_to_4xi32_const_offset_3: |
@@ -349,18 +320,14 @@ define <3 x i32> @load_v3i32(ptr %src) { |
349 | 320 | define <3 x i32> @load_v3i8_zext_to_3xi32(ptr %src) { |
350 | 321 | ; CHECK-LABEL: load_v3i8_zext_to_3xi32: |
351 | 322 | ; CHECK: ; %bb.0: |
352 | | -; CHECK-NEXT: sub sp, sp, #16 |
353 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
354 | | -; CHECK-NEXT: ldrh w8, [x0] |
| 323 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 324 | +; CHECK-NEXT: ldrh w9, [x0] |
355 | 325 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff |
356 | | -; CHECK-NEXT: strh w8, [sp, #12] |
357 | | -; CHECK-NEXT: add x8, x0, #2 |
358 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
359 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
360 | | -; CHECK-NEXT: ld1.b { v0 }[4], [x8] |
| 326 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 327 | +; CHECK-NEXT: fmov s0, w8 |
| 328 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
361 | 329 | ; CHECK-NEXT: ushll.4s v0, v0, #0 |
362 | 330 | ; CHECK-NEXT: and.16b v0, v0, v1 |
363 | | -; CHECK-NEXT: add sp, sp, #16 |
364 | 331 | ; CHECK-NEXT: ret |
365 | 332 | ; |
366 | 333 | ; BE-LABEL: load_v3i8_zext_to_3xi32: |
@@ -389,18 +356,14 @@ define <3 x i32> @load_v3i8_zext_to_3xi32(ptr %src) { |
389 | 356 | define <3 x i32> @load_v3i8_sext_to_3xi32(ptr %src) { |
390 | 357 | ; CHECK-LABEL: load_v3i8_sext_to_3xi32: |
391 | 358 | ; CHECK: ; %bb.0: |
392 | | -; CHECK-NEXT: sub sp, sp, #16 |
393 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
394 | | -; CHECK-NEXT: ldrh w8, [x0] |
395 | | -; CHECK-NEXT: strh w8, [sp, #12] |
396 | | -; CHECK-NEXT: add x8, x0, #2 |
397 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
398 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
399 | | -; CHECK-NEXT: ld1.b { v0 }[4], [x8] |
| 359 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 360 | +; CHECK-NEXT: ldrh w9, [x0] |
| 361 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 362 | +; CHECK-NEXT: fmov s0, w8 |
| 363 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
400 | 364 | ; CHECK-NEXT: ushll.4s v0, v0, #0 |
401 | 365 | ; CHECK-NEXT: shl.4s v0, v0, #24 |
402 | 366 | ; CHECK-NEXT: sshr.4s v0, v0, #24 |
403 | | -; CHECK-NEXT: add sp, sp, #16 |
404 | 367 | ; CHECK-NEXT: ret |
405 | 368 | ; |
406 | 369 | ; BE-LABEL: load_v3i8_sext_to_3xi32: |
@@ -514,19 +477,15 @@ entry: |
514 | 477 | define void @load_ext_to_64bits(ptr %src, ptr %dst) { |
515 | 478 | ; CHECK-LABEL: load_ext_to_64bits: |
516 | 479 | ; CHECK: ; %bb.0: ; %entry |
517 | | -; CHECK-NEXT: sub sp, sp, #16 |
518 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
519 | | -; CHECK-NEXT: ldrh w8, [x0] |
520 | | -; CHECK-NEXT: strh w8, [sp, #12] |
521 | | -; CHECK-NEXT: add x8, x0, #2 |
522 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
523 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
524 | | -; CHECK-NEXT: ld1.b { v0 }[4], [x8] |
| 480 | +; CHECK-NEXT: ldrb w9, [x0, #2] |
525 | 481 | ; CHECK-NEXT: add x8, x1, #4 |
| 482 | +; CHECK-NEXT: ldrh w10, [x0] |
| 483 | +; CHECK-NEXT: orr w9, w10, w9, lsl #16 |
| 484 | +; CHECK-NEXT: fmov s0, w9 |
| 485 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
526 | 486 | ; CHECK-NEXT: bic.4h v0, #255, lsl #8 |
527 | 487 | ; CHECK-NEXT: st1.h { v0 }[2], [x8] |
528 | 488 | ; CHECK-NEXT: str s0, [x1] |
529 | | -; CHECK-NEXT: add sp, sp, #16 |
530 | 489 | ; CHECK-NEXT: ret |
531 | 490 | ; |
532 | 491 | ; BE-LABEL: load_ext_to_64bits: |
@@ -617,24 +576,20 @@ entry: |
617 | 576 | define void @load_ext_add_to_64bits(ptr %src, ptr %dst) { |
618 | 577 | ; CHECK-LABEL: load_ext_add_to_64bits: |
619 | 578 | ; CHECK: ; %bb.0: ; %entry |
620 | | -; CHECK-NEXT: sub sp, sp, #16 |
621 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
622 | | -; CHECK-NEXT: ldrh w8, [x0] |
| 579 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 580 | +; CHECK-NEXT: ldrh w9, [x0] |
| 581 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 582 | +; CHECK-NEXT: fmov s0, w8 |
623 | 583 | ; CHECK-NEXT: Lloh2: |
624 | | -; CHECK-NEXT: adrp x9, lCPI15_0@PAGE |
625 | | -; CHECK-NEXT: strh w8, [sp, #12] |
626 | | -; CHECK-NEXT: add x8, x0, #2 |
627 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
| 584 | +; CHECK-NEXT: adrp x8, lCPI15_0@PAGE |
| 585 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
628 | 586 | ; CHECK-NEXT: Lloh3: |
629 | | -; CHECK-NEXT: ldr d1, [x9, lCPI15_0@PAGEOFF] |
630 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
631 | | -; CHECK-NEXT: ld1.b { v0 }[4], [x8] |
| 587 | +; CHECK-NEXT: ldr d1, [x8, lCPI15_0@PAGEOFF] |
632 | 588 | ; CHECK-NEXT: add x8, x1, #4 |
633 | 589 | ; CHECK-NEXT: bic.4h v0, #255, lsl #8 |
634 | 590 | ; CHECK-NEXT: add.4h v0, v0, v1 |
635 | 591 | ; CHECK-NEXT: st1.h { v0 }[2], [x8] |
636 | 592 | ; CHECK-NEXT: str s0, [x1] |
637 | | -; CHECK-NEXT: add sp, sp, #16 |
638 | 593 | ; CHECK-NEXT: ret |
639 | 594 | ; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3 |
640 | 595 | ; |
@@ -883,24 +838,21 @@ define void @shift_trunc_volatile_store(ptr %src, ptr %dst) { |
883 | 838 | define void @load_v3i8_zext_to_3xi32_add_trunc_store(ptr %src) { |
884 | 839 | ; CHECK-LABEL: load_v3i8_zext_to_3xi32_add_trunc_store: |
885 | 840 | ; CHECK: ; %bb.0: |
886 | | -; CHECK-NEXT: sub sp, sp, #16 |
887 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
888 | | -; CHECK-NEXT: ldrh w9, [x0] |
| 841 | +; CHECK-NEXT: ldrb w10, [x0, #2] |
889 | 842 | ; CHECK-NEXT: Lloh4: |
890 | 843 | ; CHECK-NEXT: adrp x8, lCPI22_0@PAGE |
891 | | -; CHECK-NEXT: strh w9, [sp, #12] |
| 844 | +; CHECK-NEXT: ldrh w11, [x0] |
892 | 845 | ; CHECK-NEXT: add x9, x0, #2 |
893 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
894 | 846 | ; CHECK-NEXT: Lloh5: |
895 | 847 | ; CHECK-NEXT: ldr q1, [x8, lCPI22_0@PAGEOFF] |
896 | 848 | ; CHECK-NEXT: add x8, x0, #1 |
897 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
898 | | -; CHECK-NEXT: ld1.b { v0 }[4], [x9] |
| 849 | +; CHECK-NEXT: orr w10, w11, w10, lsl #16 |
| 850 | +; CHECK-NEXT: fmov s0, w10 |
| 851 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
899 | 852 | ; CHECK-NEXT: uaddw.4s v0, v1, v0 |
900 | | -; CHECK-NEXT: st1.b { v0 }[4], [x8] |
901 | 853 | ; CHECK-NEXT: st1.b { v0 }[8], [x9] |
902 | 854 | ; CHECK-NEXT: st1.b { v0 }[0], [x0] |
903 | | -; CHECK-NEXT: add sp, sp, #16 |
| 855 | +; CHECK-NEXT: st1.b { v0 }[4], [x8] |
904 | 856 | ; CHECK-NEXT: ret |
905 | 857 | ; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5 |
906 | 858 | ; |
@@ -939,24 +891,21 @@ define void @load_v3i8_zext_to_3xi32_add_trunc_store(ptr %src) { |
939 | 891 | define void @load_v3i8_sext_to_3xi32_add_trunc_store(ptr %src) { |
940 | 892 | ; CHECK-LABEL: load_v3i8_sext_to_3xi32_add_trunc_store: |
941 | 893 | ; CHECK: ; %bb.0: |
942 | | -; CHECK-NEXT: sub sp, sp, #16 |
943 | | -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
944 | | -; CHECK-NEXT: ldrh w9, [x0] |
| 894 | +; CHECK-NEXT: ldrb w10, [x0, #2] |
945 | 895 | ; CHECK-NEXT: Lloh6: |
946 | 896 | ; CHECK-NEXT: adrp x8, lCPI23_0@PAGE |
947 | | -; CHECK-NEXT: strh w9, [sp, #12] |
| 897 | +; CHECK-NEXT: ldrh w11, [x0] |
948 | 898 | ; CHECK-NEXT: add x9, x0, #2 |
949 | | -; CHECK-NEXT: ldr s0, [sp, #12] |
950 | 899 | ; CHECK-NEXT: Lloh7: |
951 | 900 | ; CHECK-NEXT: ldr q1, [x8, lCPI23_0@PAGEOFF] |
952 | 901 | ; CHECK-NEXT: add x8, x0, #1 |
953 | | -; CHECK-NEXT: ushll.8h v0, v0, #0 |
954 | | -; CHECK-NEXT: ld1.b { v0 }[4], [x9] |
| 902 | +; CHECK-NEXT: orr w10, w11, w10, lsl #16 |
| 903 | +; CHECK-NEXT: fmov s0, w10 |
| 904 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
955 | 905 | ; CHECK-NEXT: uaddw.4s v0, v1, v0 |
956 | | -; CHECK-NEXT: st1.b { v0 }[4], [x8] |
957 | 906 | ; CHECK-NEXT: st1.b { v0 }[8], [x9] |
958 | 907 | ; CHECK-NEXT: st1.b { v0 }[0], [x0] |
959 | | -; CHECK-NEXT: add sp, sp, #16 |
| 908 | +; CHECK-NEXT: st1.b { v0 }[4], [x8] |
960 | 909 | ; CHECK-NEXT: ret |
961 | 910 | ; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7 |
962 | 911 | ; |
|
0 commit comments