1
- ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2
2
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK
3
3
4
4
; Should codegen to a nop, since idx is zero.
@@ -84,14 +84,15 @@ define <4 x i32> @extract_v4i32_nxv2i32_idx4(<vscale x 2 x i32> %vec) nounwind #
84
84
; CHECK: // %bb.0:
85
85
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
86
86
; CHECK-NEXT: addvl sp, sp, #-1
87
- ; CHECK-NEXT: ptrue p0.d
87
+ ; CHECK-NEXT: ptrue p0.d, vl4
88
88
; CHECK-NEXT: mov x8, #4 // =0x4
89
89
; CHECK-NEXT: mov x9, sp
90
- ; CHECK-NEXT: ptrue p1.d, vl4
91
- ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
92
- ; CHECK-NEXT: ld1d { z0.d }, p1/z, [x9, x8, lsl #3]
93
- ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
94
- ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
90
+ ; CHECK-NEXT: mov z2.d, z0.d
91
+ ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x9, x8, lsl #3]
92
+ ; CHECK-NEXT: ptrue p0.d
93
+ ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s
94
+ ; CHECK-NEXT: st1d { z2.d }, p0, [sp]
95
+ ; CHECK-NEXT: mov v0.16b, v1.16b
95
96
; CHECK-NEXT: addvl sp, sp, #1
96
97
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
97
98
; CHECK-NEXT: ret
@@ -149,14 +150,15 @@ define <8 x i16> @extract_v8i16_nxv4i16_idx8(<vscale x 4 x i16> %vec) nounwind #
149
150
; CHECK: // %bb.0:
150
151
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
151
152
; CHECK-NEXT: addvl sp, sp, #-1
152
- ; CHECK-NEXT: ptrue p0.s
153
+ ; CHECK-NEXT: ptrue p0.s, vl8
153
154
; CHECK-NEXT: mov x8, #8 // =0x8
154
155
; CHECK-NEXT: mov x9, sp
155
- ; CHECK-NEXT: ptrue p1.s, vl8
156
- ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
157
- ; CHECK-NEXT: ld1w { z0.s }, p1/z, [x9, x8, lsl #2]
158
- ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
159
- ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
156
+ ; CHECK-NEXT: mov z2.d, z0.d
157
+ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x9, x8, lsl #2]
158
+ ; CHECK-NEXT: ptrue p0.s
159
+ ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
160
+ ; CHECK-NEXT: st1w { z2.s }, p0, [sp]
161
+ ; CHECK-NEXT: mov v0.16b, v1.16b
160
162
; CHECK-NEXT: addvl sp, sp, #1
161
163
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
162
164
; CHECK-NEXT: ret
@@ -182,15 +184,16 @@ define <8 x i16> @extract_v8i16_nxv2i16_idx8(<vscale x 2 x i16> %vec) nounwind #
182
184
; CHECK: // %bb.0:
183
185
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
184
186
; CHECK-NEXT: addvl sp, sp, #-1
185
- ; CHECK-NEXT: ptrue p0.d
187
+ ; CHECK-NEXT: ptrue p0.d, vl8
186
188
; CHECK-NEXT: mov x8, #8 // =0x8
187
189
; CHECK-NEXT: mov x9, sp
188
- ; CHECK-NEXT: ptrue p1.d, vl8
189
- ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
190
- ; CHECK-NEXT: ld1d { z0.d }, p1/z, [x9, x8, lsl #3]
191
- ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
192
- ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
193
- ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
190
+ ; CHECK-NEXT: mov z2.d, z0.d
191
+ ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x9, x8, lsl #3]
192
+ ; CHECK-NEXT: ptrue p0.d
193
+ ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s
194
+ ; CHECK-NEXT: st1d { z2.d }, p0, [sp]
195
+ ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
196
+ ; CHECK-NEXT: mov v0.16b, v1.16b
194
197
; CHECK-NEXT: addvl sp, sp, #1
195
198
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
196
199
; CHECK-NEXT: ret
@@ -247,14 +250,15 @@ define <16 x i8> @extract_v16i8_nxv8i8_idx16(<vscale x 8 x i8> %vec) nounwind #1
247
250
; CHECK: // %bb.0:
248
251
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
249
252
; CHECK-NEXT: addvl sp, sp, #-1
250
- ; CHECK-NEXT: ptrue p0.h
253
+ ; CHECK-NEXT: ptrue p0.h, vl16
251
254
; CHECK-NEXT: mov x8, #16 // =0x10
252
255
; CHECK-NEXT: mov x9, sp
253
- ; CHECK-NEXT: ptrue p1.h, vl16
254
- ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
255
- ; CHECK-NEXT: ld1h { z0.h }, p1/z, [x9, x8, lsl #1]
256
- ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
257
- ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
256
+ ; CHECK-NEXT: mov z2.d, z0.d
257
+ ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x9, x8, lsl #1]
258
+ ; CHECK-NEXT: ptrue p0.h
259
+ ; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b
260
+ ; CHECK-NEXT: st1h { z2.h }, p0, [sp]
261
+ ; CHECK-NEXT: mov v0.16b, v1.16b
258
262
; CHECK-NEXT: addvl sp, sp, #1
259
263
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
260
264
; CHECK-NEXT: ret
@@ -280,15 +284,16 @@ define <16 x i8> @extract_v16i8_nxv4i8_idx16(<vscale x 4 x i8> %vec) nounwind #1
280
284
; CHECK: // %bb.0:
281
285
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
282
286
; CHECK-NEXT: addvl sp, sp, #-1
283
- ; CHECK-NEXT: ptrue p0.s
287
+ ; CHECK-NEXT: ptrue p0.s, vl16
284
288
; CHECK-NEXT: mov x8, #16 // =0x10
285
289
; CHECK-NEXT: mov x9, sp
286
- ; CHECK-NEXT: ptrue p1.s, vl16
287
- ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
288
- ; CHECK-NEXT: ld1w { z0.s }, p1/z, [x9, x8, lsl #2]
289
- ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
290
- ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
291
- ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
290
+ ; CHECK-NEXT: mov z2.d, z0.d
291
+ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x9, x8, lsl #2]
292
+ ; CHECK-NEXT: ptrue p0.s
293
+ ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
294
+ ; CHECK-NEXT: st1w { z2.s }, p0, [sp]
295
+ ; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b
296
+ ; CHECK-NEXT: mov v0.16b, v1.16b
292
297
; CHECK-NEXT: addvl sp, sp, #1
293
298
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
294
299
; CHECK-NEXT: ret
@@ -437,8 +442,10 @@ define <2 x i64> @extract_fixed_v2i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind
437
442
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
438
443
; CHECK-NEXT: addvl sp, sp, #-1
439
444
; CHECK-NEXT: ptrue p0.d
440
- ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
441
- ; CHECK-NEXT: ldr q0, [sp, #16]
445
+ ; CHECK-NEXT: mov z2.d, z0.d
446
+ ; CHECK-NEXT: ldr q1, [sp, #16]
447
+ ; CHECK-NEXT: mov v0.16b, v1.16b
448
+ ; CHECK-NEXT: st1d { z2.d }, p0, [sp]
442
449
; CHECK-NEXT: addvl sp, sp, #1
443
450
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
444
451
; CHECK-NEXT: ret
0 commit comments