@@ -71,15 +71,16 @@ ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %retval
71
71
define {<vscale x 2 x i64 >, <vscale x 2 x i64 >} @vector_deinterleave_nxv2i64_nxv4i64 (<vscale x 4 x i64 > %vec ) {
72
72
; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv4i64:
73
73
; CHECK: # %bb.0:
74
+ ; CHECK-NEXT: li a0, 85
75
+ ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
76
+ ; CHECK-NEXT: vmv.v.x v16, a0
77
+ ; CHECK-NEXT: li a0, 170
78
+ ; CHECK-NEXT: vmv.v.x v17, a0
74
79
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
75
- ; CHECK-NEXT: vid.v v12
76
- ; CHECK-NEXT: vand.vi v12, v12, 1
77
- ; CHECK-NEXT: vmseq.vi v16, v12, 0
78
80
; CHECK-NEXT: vcompress.vm v12, v8, v16
79
- ; CHECK-NEXT: vmnot.m v14, v16
80
- ; CHECK-NEXT: vcompress.vm v16, v8, v14
81
+ ; CHECK-NEXT: vcompress.vm v20, v8, v17
81
82
; CHECK-NEXT: vmv2r.v v8, v12
82
- ; CHECK-NEXT: vmv2r.v v10, v16
83
+ ; CHECK-NEXT: vmv2r.v v10, v20
83
84
; CHECK-NEXT: ret
84
85
%retval = call {<vscale x 2 x i64 >, <vscale x 2 x i64 >} @llvm.vector.deinterleave2.nxv4i64 (<vscale x 4 x i64 > %vec )
85
86
ret {<vscale x 2 x i64 >, <vscale x 2 x i64 >} %retval
@@ -88,15 +89,16 @@ ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
88
89
define {<vscale x 4 x i64 >, <vscale x 4 x i64 >} @vector_deinterleave_nxv4i64_nxv8i64 (<vscale x 8 x i64 > %vec ) {
89
90
; CHECK-LABEL: vector_deinterleave_nxv4i64_nxv8i64:
90
91
; CHECK: # %bb.0:
92
+ ; CHECK-NEXT: li a0, 85
93
+ ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
94
+ ; CHECK-NEXT: vmv.v.x v24, a0
95
+ ; CHECK-NEXT: li a0, 170
96
+ ; CHECK-NEXT: vmv.v.x v25, a0
91
97
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
92
- ; CHECK-NEXT: vid.v v16
93
- ; CHECK-NEXT: vand.vi v16, v16, 1
94
- ; CHECK-NEXT: vmseq.vi v24, v16, 0
95
98
; CHECK-NEXT: vcompress.vm v16, v8, v24
96
- ; CHECK-NEXT: vmnot.m v20, v24
97
- ; CHECK-NEXT: vcompress.vm v24, v8, v20
99
+ ; CHECK-NEXT: vcompress.vm v0, v8, v25
98
100
; CHECK-NEXT: vmv4r.v v8, v16
99
- ; CHECK-NEXT: vmv4r.v v12, v24
101
+ ; CHECK-NEXT: vmv4r.v v12, v0
100
102
; CHECK-NEXT: ret
101
103
%retval = call {<vscale x 4 x i64 >, <vscale x 4 x i64 >} @llvm.vector.deinterleave2.nxv8i64 (<vscale x 8 x i64 > %vec )
102
104
ret {<vscale x 4 x i64 >, <vscale x 4 x i64 >} %retval
@@ -182,50 +184,41 @@ define {<vscale x 8 x i64>, <vscale x 8 x i64>} @vector_deinterleave_nxv8i64_nxv
182
184
; CHECK-NEXT: addi sp, sp, -16
183
185
; CHECK-NEXT: .cfi_def_cfa_offset 16
184
186
; CHECK-NEXT: csrr a0, vlenb
185
- ; CHECK-NEXT: li a1, 24
186
- ; CHECK-NEXT: mul a0, a0, a1
187
- ; CHECK-NEXT: sub sp, sp, a0
188
- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
189
- ; CHECK-NEXT: csrr a0, vlenb
190
187
; CHECK-NEXT: slli a0, a0, 4
191
- ; CHECK-NEXT: add a0, sp, a0
192
- ; CHECK-NEXT: addi a0, a0, 16
193
- ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
188
+ ; CHECK-NEXT: sub sp, sp, a0
189
+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
190
+ ; CHECK-NEXT: li a0, 85
191
+ ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
192
+ ; CHECK-NEXT: vmv.v.x v7, a0
193
+ ; CHECK-NEXT: li a0, 170
194
+ ; CHECK-NEXT: vmv.v.x v6, a0
194
195
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
195
- ; CHECK-NEXT: vid.v v16
196
- ; CHECK-NEXT: vand.vi v24, v16, 1
197
- ; CHECK-NEXT: vmseq.vi v16, v24, 0
198
- ; CHECK-NEXT: vcompress.vm v24, v8, v16
196
+ ; CHECK-NEXT: vcompress.vm v24, v8, v7
197
+ ; CHECK-NEXT: vmv1r.v v28, v7
198
+ ; CHECK-NEXT: vmv1r.v v29, v6
199
+ ; CHECK-NEXT: vcompress.vm v0, v8, v29
200
+ ; CHECK-NEXT: vcompress.vm v8, v16, v28
199
201
; CHECK-NEXT: addi a0, sp, 16
200
- ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
201
- ; CHECK-NEXT: vmnot.m v17, v16
202
- ; CHECK-NEXT: vcompress.vm v0, v8, v17
203
- ; CHECK-NEXT: csrr a0, vlenb
204
- ; CHECK-NEXT: slli a0, a0, 4
205
- ; CHECK-NEXT: add a0, sp, a0
206
- ; CHECK-NEXT: addi a0, a0, 16
207
- ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
208
- ; CHECK-NEXT: vcompress.vm v24, v8, v16
202
+ ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
203
+ ; CHECK-NEXT: vcompress.vm v8, v16, v29
209
204
; CHECK-NEXT: csrr a0, vlenb
210
205
; CHECK-NEXT: slli a0, a0, 3
211
206
; CHECK-NEXT: add a0, sp, a0
212
207
; CHECK-NEXT: addi a0, a0, 16
213
- ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
214
- ; CHECK-NEXT: vcompress.vm v24, v8, v17
208
+ ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
209
+ ; CHECK-NEXT: addi a0, sp, 16
210
+ ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
211
+ ; CHECK-NEXT: vmv4r.v v28, v8
215
212
; CHECK-NEXT: csrr a0, vlenb
216
213
; CHECK-NEXT: slli a0, a0, 3
217
214
; CHECK-NEXT: add a0, sp, a0
218
215
; CHECK-NEXT: addi a0, a0, 16
219
216
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
220
- ; CHECK-NEXT: addi a0, sp, 16
221
- ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
222
- ; CHECK-NEXT: vmv4r.v v20, v8
223
- ; CHECK-NEXT: vmv4r.v v4, v24
224
- ; CHECK-NEXT: vmv8r.v v8, v16
217
+ ; CHECK-NEXT: vmv4r.v v4, v8
218
+ ; CHECK-NEXT: vmv8r.v v8, v24
225
219
; CHECK-NEXT: vmv8r.v v16, v0
226
220
; CHECK-NEXT: csrr a0, vlenb
227
- ; CHECK-NEXT: li a1, 24
228
- ; CHECK-NEXT: mul a0, a0, a1
221
+ ; CHECK-NEXT: slli a0, a0, 4
229
222
; CHECK-NEXT: add sp, sp, a0
230
223
; CHECK-NEXT: .cfi_def_cfa sp, 16
231
224
; CHECK-NEXT: addi sp, sp, 16
@@ -350,15 +343,16 @@ ret {<vscale x 4 x float>, <vscale x 4 x float>} %retval
350
343
define {<vscale x 2 x double >, <vscale x 2 x double >} @vector_deinterleave_nxv2f64_nxv4f64 (<vscale x 4 x double > %vec ) {
351
344
; CHECK-LABEL: vector_deinterleave_nxv2f64_nxv4f64:
352
345
; CHECK: # %bb.0:
346
+ ; CHECK-NEXT: li a0, 85
347
+ ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
348
+ ; CHECK-NEXT: vmv.v.x v16, a0
349
+ ; CHECK-NEXT: li a0, 170
350
+ ; CHECK-NEXT: vmv.v.x v17, a0
353
351
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
354
- ; CHECK-NEXT: vid.v v12
355
- ; CHECK-NEXT: vand.vi v12, v12, 1
356
- ; CHECK-NEXT: vmseq.vi v16, v12, 0
357
352
; CHECK-NEXT: vcompress.vm v12, v8, v16
358
- ; CHECK-NEXT: vmnot.m v14, v16
359
- ; CHECK-NEXT: vcompress.vm v16, v8, v14
353
+ ; CHECK-NEXT: vcompress.vm v20, v8, v17
360
354
; CHECK-NEXT: vmv2r.v v8, v12
361
- ; CHECK-NEXT: vmv2r.v v10, v16
355
+ ; CHECK-NEXT: vmv2r.v v10, v20
362
356
; CHECK-NEXT: ret
363
357
%retval = call {<vscale x 2 x double >, <vscale x 2 x double >} @llvm.vector.deinterleave2.nxv4f64 (<vscale x 4 x double > %vec )
364
358
ret {<vscale x 2 x double >, <vscale x 2 x double >} %retval
@@ -423,50 +417,41 @@ define {<vscale x 8 x double>, <vscale x 8 x double>} @vector_deinterleave_nxv8f
423
417
; CHECK-NEXT: addi sp, sp, -16
424
418
; CHECK-NEXT: .cfi_def_cfa_offset 16
425
419
; CHECK-NEXT: csrr a0, vlenb
426
- ; CHECK-NEXT: li a1, 24
427
- ; CHECK-NEXT: mul a0, a0, a1
428
- ; CHECK-NEXT: sub sp, sp, a0
429
- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
430
- ; CHECK-NEXT: csrr a0, vlenb
431
420
; CHECK-NEXT: slli a0, a0, 4
432
- ; CHECK-NEXT: add a0, sp, a0
433
- ; CHECK-NEXT: addi a0, a0, 16
434
- ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
421
+ ; CHECK-NEXT: sub sp, sp, a0
422
+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
423
+ ; CHECK-NEXT: li a0, 85
424
+ ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
425
+ ; CHECK-NEXT: vmv.v.x v7, a0
426
+ ; CHECK-NEXT: li a0, 170
427
+ ; CHECK-NEXT: vmv.v.x v6, a0
435
428
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
436
- ; CHECK-NEXT: vid.v v16
437
- ; CHECK-NEXT: vand.vi v24, v16, 1
438
- ; CHECK-NEXT: vmseq.vi v16, v24, 0
439
- ; CHECK-NEXT: vcompress.vm v24, v8, v16
429
+ ; CHECK-NEXT: vcompress.vm v24, v8, v7
430
+ ; CHECK-NEXT: vmv1r.v v28, v7
431
+ ; CHECK-NEXT: vmv1r.v v29, v6
432
+ ; CHECK-NEXT: vcompress.vm v0, v8, v29
433
+ ; CHECK-NEXT: vcompress.vm v8, v16, v28
440
434
; CHECK-NEXT: addi a0, sp, 16
441
- ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
442
- ; CHECK-NEXT: vmnot.m v17, v16
443
- ; CHECK-NEXT: vcompress.vm v0, v8, v17
444
- ; CHECK-NEXT: csrr a0, vlenb
445
- ; CHECK-NEXT: slli a0, a0, 4
446
- ; CHECK-NEXT: add a0, sp, a0
447
- ; CHECK-NEXT: addi a0, a0, 16
448
- ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
449
- ; CHECK-NEXT: vcompress.vm v24, v8, v16
435
+ ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
436
+ ; CHECK-NEXT: vcompress.vm v8, v16, v29
450
437
; CHECK-NEXT: csrr a0, vlenb
451
438
; CHECK-NEXT: slli a0, a0, 3
452
439
; CHECK-NEXT: add a0, sp, a0
453
440
; CHECK-NEXT: addi a0, a0, 16
454
- ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
455
- ; CHECK-NEXT: vcompress.vm v24, v8, v17
441
+ ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
442
+ ; CHECK-NEXT: addi a0, sp, 16
443
+ ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
444
+ ; CHECK-NEXT: vmv4r.v v28, v8
456
445
; CHECK-NEXT: csrr a0, vlenb
457
446
; CHECK-NEXT: slli a0, a0, 3
458
447
; CHECK-NEXT: add a0, sp, a0
459
448
; CHECK-NEXT: addi a0, a0, 16
460
449
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
461
- ; CHECK-NEXT: addi a0, sp, 16
462
- ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
463
- ; CHECK-NEXT: vmv4r.v v20, v8
464
- ; CHECK-NEXT: vmv4r.v v4, v24
465
- ; CHECK-NEXT: vmv8r.v v8, v16
450
+ ; CHECK-NEXT: vmv4r.v v4, v8
451
+ ; CHECK-NEXT: vmv8r.v v8, v24
466
452
; CHECK-NEXT: vmv8r.v v16, v0
467
453
; CHECK-NEXT: csrr a0, vlenb
468
- ; CHECK-NEXT: li a1, 24
469
- ; CHECK-NEXT: mul a0, a0, a1
454
+ ; CHECK-NEXT: slli a0, a0, 4
470
455
; CHECK-NEXT: add sp, sp, a0
471
456
; CHECK-NEXT: .cfi_def_cfa sp, 16
472
457
; CHECK-NEXT: addi sp, sp, 16
0 commit comments