Skip to content

Commit 984cb79

Browse files
authored
[RISCV] Use vmv.v.x to materialize masks in deinterleave2 lowering (#118500)
This is a follow up to 2af2634 to use vmv.v.x of i8 constants instead of the prior vid/vand/vmsne sequence. The advantage of the vmv.v.x sequence is that it's always m1 (so cheaper at high LMUL), and can be rematerialized by the register allocator if needed to locally reduce register pressure.
1 parent 4ad0fdd commit 984cb79

File tree

3 files changed

+102
-129
lines changed

3 files changed

+102
-129
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10901,23 +10901,23 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
1090110901
return DAG.getMergeValues({Even, Odd}, DL);
1090210902
}
1090310903

10904-
// For the indices, use the same SEW to avoid an extra vsetvli
10905-
// TODO: If container type is larger than m1, we can consider using a splat
10906-
// of a constant instead of the following sequence
10907-
10908-
// Create a vector of even indices {0, 1, 2, ...}
10909-
MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
10910-
SDValue StepVec = DAG.getStepVector(DL, IdxVT);
10911-
// 0, 1, 0, 1, 0, 1
10912-
SDValue ZeroOnes =
10913-
DAG.getNode(ISD::AND, DL, IdxVT, StepVec, DAG.getConstant(1, DL, IdxVT));
10904+
// For the indices, use the vmv.v.x of an i8 constant to fill the largest
10905+
// possibly mask vector, then extract the required subvector. Doing this
10906+
// (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
10907+
// creation to be rematerialized during register allocation to reduce
10908+
// register pressure if needed.
10909+
1091410910
MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
10915-
SDValue EvenMask =
10916-
DAG.getSetCC(DL, MaskVT, ZeroOnes, DAG.getConstant(0, DL, IdxVT),
10917-
ISD::CondCode::SETEQ);
10918-
// Have the latter be the not of the former to minimize the live range of
10919-
// the index vector since that might be large.
10920-
SDValue OddMask = DAG.getLogicalNOT(DL, EvenMask, MaskVT);
10911+
10912+
SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
10913+
EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
10914+
SDValue EvenMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, EvenSplat,
10915+
DAG.getVectorIdxConstant(0, DL));
10916+
10917+
SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
10918+
OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
10919+
SDValue OddMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, OddSplat,
10920+
DAG.getVectorIdxConstant(0, DL));
1092110921

1092210922
// vcompress the even and odd elements into two separate vectors
1092310923
SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,

llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll

Lines changed: 23 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -106,56 +106,44 @@ define {<vscale x 8 x i64>, <vscale x 8 x i64>} @vector_deinterleave_load_nxv8i6
106106
; CHECK-NEXT: addi sp, sp, -16
107107
; CHECK-NEXT: .cfi_def_cfa_offset 16
108108
; CHECK-NEXT: csrr a1, vlenb
109-
; CHECK-NEXT: li a2, 24
110-
; CHECK-NEXT: mul a1, a1, a2
109+
; CHECK-NEXT: slli a1, a1, 4
111110
; CHECK-NEXT: sub sp, sp, a1
112-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
111+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
112+
; CHECK-NEXT: li a1, 85
113+
; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma
114+
; CHECK-NEXT: vmv.v.x v16, a1
113115
; CHECK-NEXT: csrr a1, vlenb
114-
; CHECK-NEXT: vl8re64.v v16, (a0)
115-
; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma
116-
; CHECK-NEXT: vid.v v8
116+
; CHECK-NEXT: vl8re64.v v24, (a0)
117117
; CHECK-NEXT: slli a1, a1, 3
118-
; CHECK-NEXT: vand.vi v8, v8, 1
119118
; CHECK-NEXT: add a0, a0, a1
120-
; CHECK-NEXT: vmseq.vi v24, v8, 0
121-
; CHECK-NEXT: vl8re64.v v8, (a0)
122-
; CHECK-NEXT: csrr a0, vlenb
123-
; CHECK-NEXT: slli a0, a0, 4
124-
; CHECK-NEXT: add a0, sp, a0
125-
; CHECK-NEXT: addi a0, a0, 16
126-
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
127-
; CHECK-NEXT: vmnot.m v6, v24
128-
; CHECK-NEXT: vcompress.vm v8, v16, v24
129-
; CHECK-NEXT: vmv1r.v v13, v24
130-
; CHECK-NEXT: vcompress.vm v24, v16, v6
131-
; CHECK-NEXT: vmv1r.v v12, v6
132-
; CHECK-NEXT: csrr a0, vlenb
133-
; CHECK-NEXT: slli a0, a0, 4
134-
; CHECK-NEXT: add a0, sp, a0
135-
; CHECK-NEXT: addi a0, a0, 16
136-
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
137-
; CHECK-NEXT: vcompress.vm v0, v16, v13
119+
; CHECK-NEXT: li a1, 170
120+
; CHECK-NEXT: vl8re64.v v0, (a0)
121+
; CHECK-NEXT: vmv.v.x v17, a1
122+
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
123+
; CHECK-NEXT: vcompress.vm v8, v24, v16
124+
; CHECK-NEXT: vmv1r.v v12, v16
125+
; CHECK-NEXT: vmv1r.v v13, v17
126+
; CHECK-NEXT: vcompress.vm v16, v24, v13
127+
; CHECK-NEXT: vcompress.vm v24, v0, v12
138128
; CHECK-NEXT: addi a0, sp, 16
139-
; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
140-
; CHECK-NEXT: vcompress.vm v0, v16, v12
129+
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
130+
; CHECK-NEXT: vcompress.vm v24, v0, v13
141131
; CHECK-NEXT: csrr a0, vlenb
142132
; CHECK-NEXT: slli a0, a0, 3
143133
; CHECK-NEXT: add a0, sp, a0
144134
; CHECK-NEXT: addi a0, a0, 16
145-
; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
135+
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
146136
; CHECK-NEXT: addi a0, sp, 16
147-
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
148-
; CHECK-NEXT: vmv4r.v v12, v16
137+
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
138+
; CHECK-NEXT: vmv4r.v v12, v24
149139
; CHECK-NEXT: csrr a0, vlenb
150140
; CHECK-NEXT: slli a0, a0, 3
151141
; CHECK-NEXT: add a0, sp, a0
152142
; CHECK-NEXT: addi a0, a0, 16
153-
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
154-
; CHECK-NEXT: vmv4r.v v28, v16
155-
; CHECK-NEXT: vmv8r.v v16, v24
143+
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
144+
; CHECK-NEXT: vmv4r.v v20, v24
156145
; CHECK-NEXT: csrr a0, vlenb
157-
; CHECK-NEXT: li a1, 24
158-
; CHECK-NEXT: mul a0, a0, a1
146+
; CHECK-NEXT: slli a0, a0, 4
159147
; CHECK-NEXT: add sp, sp, a0
160148
; CHECK-NEXT: .cfi_def_cfa sp, 16
161149
; CHECK-NEXT: addi sp, sp, 16

llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll

Lines changed: 63 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,16 @@ ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %retval
7171
define {<vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv4i64(<vscale x 4 x i64> %vec) {
7272
; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv4i64:
7373
; CHECK: # %bb.0:
74+
; CHECK-NEXT: li a0, 85
75+
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
76+
; CHECK-NEXT: vmv.v.x v16, a0
77+
; CHECK-NEXT: li a0, 170
78+
; CHECK-NEXT: vmv.v.x v17, a0
7479
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
75-
; CHECK-NEXT: vid.v v12
76-
; CHECK-NEXT: vand.vi v12, v12, 1
77-
; CHECK-NEXT: vmseq.vi v16, v12, 0
7880
; CHECK-NEXT: vcompress.vm v12, v8, v16
79-
; CHECK-NEXT: vmnot.m v14, v16
80-
; CHECK-NEXT: vcompress.vm v16, v8, v14
81+
; CHECK-NEXT: vcompress.vm v20, v8, v17
8182
; CHECK-NEXT: vmv2r.v v8, v12
82-
; CHECK-NEXT: vmv2r.v v10, v16
83+
; CHECK-NEXT: vmv2r.v v10, v20
8384
; CHECK-NEXT: ret
8485
%retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %vec)
8586
ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
@@ -88,15 +89,16 @@ ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
8889
define {<vscale x 4 x i64>, <vscale x 4 x i64>} @vector_deinterleave_nxv4i64_nxv8i64(<vscale x 8 x i64> %vec) {
8990
; CHECK-LABEL: vector_deinterleave_nxv4i64_nxv8i64:
9091
; CHECK: # %bb.0:
92+
; CHECK-NEXT: li a0, 85
93+
; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
94+
; CHECK-NEXT: vmv.v.x v24, a0
95+
; CHECK-NEXT: li a0, 170
96+
; CHECK-NEXT: vmv.v.x v25, a0
9197
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
92-
; CHECK-NEXT: vid.v v16
93-
; CHECK-NEXT: vand.vi v16, v16, 1
94-
; CHECK-NEXT: vmseq.vi v24, v16, 0
9598
; CHECK-NEXT: vcompress.vm v16, v8, v24
96-
; CHECK-NEXT: vmnot.m v20, v24
97-
; CHECK-NEXT: vcompress.vm v24, v8, v20
99+
; CHECK-NEXT: vcompress.vm v0, v8, v25
98100
; CHECK-NEXT: vmv4r.v v8, v16
99-
; CHECK-NEXT: vmv4r.v v12, v24
101+
; CHECK-NEXT: vmv4r.v v12, v0
100102
; CHECK-NEXT: ret
101103
%retval = call {<vscale x 4 x i64>, <vscale x 4 x i64>} @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %vec)
102104
ret {<vscale x 4 x i64>, <vscale x 4 x i64>} %retval
@@ -182,50 +184,41 @@ define {<vscale x 8 x i64>, <vscale x 8 x i64>} @vector_deinterleave_nxv8i64_nxv
182184
; CHECK-NEXT: addi sp, sp, -16
183185
; CHECK-NEXT: .cfi_def_cfa_offset 16
184186
; CHECK-NEXT: csrr a0, vlenb
185-
; CHECK-NEXT: li a1, 24
186-
; CHECK-NEXT: mul a0, a0, a1
187-
; CHECK-NEXT: sub sp, sp, a0
188-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
189-
; CHECK-NEXT: csrr a0, vlenb
190187
; CHECK-NEXT: slli a0, a0, 4
191-
; CHECK-NEXT: add a0, sp, a0
192-
; CHECK-NEXT: addi a0, a0, 16
193-
; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
188+
; CHECK-NEXT: sub sp, sp, a0
189+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
190+
; CHECK-NEXT: li a0, 85
191+
; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
192+
; CHECK-NEXT: vmv.v.x v7, a0
193+
; CHECK-NEXT: li a0, 170
194+
; CHECK-NEXT: vmv.v.x v6, a0
194195
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
195-
; CHECK-NEXT: vid.v v16
196-
; CHECK-NEXT: vand.vi v24, v16, 1
197-
; CHECK-NEXT: vmseq.vi v16, v24, 0
198-
; CHECK-NEXT: vcompress.vm v24, v8, v16
196+
; CHECK-NEXT: vcompress.vm v24, v8, v7
197+
; CHECK-NEXT: vmv1r.v v28, v7
198+
; CHECK-NEXT: vmv1r.v v29, v6
199+
; CHECK-NEXT: vcompress.vm v0, v8, v29
200+
; CHECK-NEXT: vcompress.vm v8, v16, v28
199201
; CHECK-NEXT: addi a0, sp, 16
200-
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
201-
; CHECK-NEXT: vmnot.m v17, v16
202-
; CHECK-NEXT: vcompress.vm v0, v8, v17
203-
; CHECK-NEXT: csrr a0, vlenb
204-
; CHECK-NEXT: slli a0, a0, 4
205-
; CHECK-NEXT: add a0, sp, a0
206-
; CHECK-NEXT: addi a0, a0, 16
207-
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
208-
; CHECK-NEXT: vcompress.vm v24, v8, v16
202+
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
203+
; CHECK-NEXT: vcompress.vm v8, v16, v29
209204
; CHECK-NEXT: csrr a0, vlenb
210205
; CHECK-NEXT: slli a0, a0, 3
211206
; CHECK-NEXT: add a0, sp, a0
212207
; CHECK-NEXT: addi a0, a0, 16
213-
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
214-
; CHECK-NEXT: vcompress.vm v24, v8, v17
208+
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
209+
; CHECK-NEXT: addi a0, sp, 16
210+
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
211+
; CHECK-NEXT: vmv4r.v v28, v8
215212
; CHECK-NEXT: csrr a0, vlenb
216213
; CHECK-NEXT: slli a0, a0, 3
217214
; CHECK-NEXT: add a0, sp, a0
218215
; CHECK-NEXT: addi a0, a0, 16
219216
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
220-
; CHECK-NEXT: addi a0, sp, 16
221-
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
222-
; CHECK-NEXT: vmv4r.v v20, v8
223-
; CHECK-NEXT: vmv4r.v v4, v24
224-
; CHECK-NEXT: vmv8r.v v8, v16
217+
; CHECK-NEXT: vmv4r.v v4, v8
218+
; CHECK-NEXT: vmv8r.v v8, v24
225219
; CHECK-NEXT: vmv8r.v v16, v0
226220
; CHECK-NEXT: csrr a0, vlenb
227-
; CHECK-NEXT: li a1, 24
228-
; CHECK-NEXT: mul a0, a0, a1
221+
; CHECK-NEXT: slli a0, a0, 4
229222
; CHECK-NEXT: add sp, sp, a0
230223
; CHECK-NEXT: .cfi_def_cfa sp, 16
231224
; CHECK-NEXT: addi sp, sp, 16
@@ -350,15 +343,16 @@ ret {<vscale x 4 x float>, <vscale x 4 x float>} %retval
350343
define {<vscale x 2 x double>, <vscale x 2 x double>} @vector_deinterleave_nxv2f64_nxv4f64(<vscale x 4 x double> %vec) {
351344
; CHECK-LABEL: vector_deinterleave_nxv2f64_nxv4f64:
352345
; CHECK: # %bb.0:
346+
; CHECK-NEXT: li a0, 85
347+
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
348+
; CHECK-NEXT: vmv.v.x v16, a0
349+
; CHECK-NEXT: li a0, 170
350+
; CHECK-NEXT: vmv.v.x v17, a0
353351
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
354-
; CHECK-NEXT: vid.v v12
355-
; CHECK-NEXT: vand.vi v12, v12, 1
356-
; CHECK-NEXT: vmseq.vi v16, v12, 0
357352
; CHECK-NEXT: vcompress.vm v12, v8, v16
358-
; CHECK-NEXT: vmnot.m v14, v16
359-
; CHECK-NEXT: vcompress.vm v16, v8, v14
353+
; CHECK-NEXT: vcompress.vm v20, v8, v17
360354
; CHECK-NEXT: vmv2r.v v8, v12
361-
; CHECK-NEXT: vmv2r.v v10, v16
355+
; CHECK-NEXT: vmv2r.v v10, v20
362356
; CHECK-NEXT: ret
363357
%retval = call {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %vec)
364358
ret {<vscale x 2 x double>, <vscale x 2 x double>} %retval
@@ -423,50 +417,41 @@ define {<vscale x 8 x double>, <vscale x 8 x double>} @vector_deinterleave_nxv8f
423417
; CHECK-NEXT: addi sp, sp, -16
424418
; CHECK-NEXT: .cfi_def_cfa_offset 16
425419
; CHECK-NEXT: csrr a0, vlenb
426-
; CHECK-NEXT: li a1, 24
427-
; CHECK-NEXT: mul a0, a0, a1
428-
; CHECK-NEXT: sub sp, sp, a0
429-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
430-
; CHECK-NEXT: csrr a0, vlenb
431420
; CHECK-NEXT: slli a0, a0, 4
432-
; CHECK-NEXT: add a0, sp, a0
433-
; CHECK-NEXT: addi a0, a0, 16
434-
; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
421+
; CHECK-NEXT: sub sp, sp, a0
422+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
423+
; CHECK-NEXT: li a0, 85
424+
; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
425+
; CHECK-NEXT: vmv.v.x v7, a0
426+
; CHECK-NEXT: li a0, 170
427+
; CHECK-NEXT: vmv.v.x v6, a0
435428
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
436-
; CHECK-NEXT: vid.v v16
437-
; CHECK-NEXT: vand.vi v24, v16, 1
438-
; CHECK-NEXT: vmseq.vi v16, v24, 0
439-
; CHECK-NEXT: vcompress.vm v24, v8, v16
429+
; CHECK-NEXT: vcompress.vm v24, v8, v7
430+
; CHECK-NEXT: vmv1r.v v28, v7
431+
; CHECK-NEXT: vmv1r.v v29, v6
432+
; CHECK-NEXT: vcompress.vm v0, v8, v29
433+
; CHECK-NEXT: vcompress.vm v8, v16, v28
440434
; CHECK-NEXT: addi a0, sp, 16
441-
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
442-
; CHECK-NEXT: vmnot.m v17, v16
443-
; CHECK-NEXT: vcompress.vm v0, v8, v17
444-
; CHECK-NEXT: csrr a0, vlenb
445-
; CHECK-NEXT: slli a0, a0, 4
446-
; CHECK-NEXT: add a0, sp, a0
447-
; CHECK-NEXT: addi a0, a0, 16
448-
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
449-
; CHECK-NEXT: vcompress.vm v24, v8, v16
435+
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
436+
; CHECK-NEXT: vcompress.vm v8, v16, v29
450437
; CHECK-NEXT: csrr a0, vlenb
451438
; CHECK-NEXT: slli a0, a0, 3
452439
; CHECK-NEXT: add a0, sp, a0
453440
; CHECK-NEXT: addi a0, a0, 16
454-
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
455-
; CHECK-NEXT: vcompress.vm v24, v8, v17
441+
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
442+
; CHECK-NEXT: addi a0, sp, 16
443+
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
444+
; CHECK-NEXT: vmv4r.v v28, v8
456445
; CHECK-NEXT: csrr a0, vlenb
457446
; CHECK-NEXT: slli a0, a0, 3
458447
; CHECK-NEXT: add a0, sp, a0
459448
; CHECK-NEXT: addi a0, a0, 16
460449
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
461-
; CHECK-NEXT: addi a0, sp, 16
462-
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
463-
; CHECK-NEXT: vmv4r.v v20, v8
464-
; CHECK-NEXT: vmv4r.v v4, v24
465-
; CHECK-NEXT: vmv8r.v v8, v16
450+
; CHECK-NEXT: vmv4r.v v4, v8
451+
; CHECK-NEXT: vmv8r.v v8, v24
466452
; CHECK-NEXT: vmv8r.v v16, v0
467453
; CHECK-NEXT: csrr a0, vlenb
468-
; CHECK-NEXT: li a1, 24
469-
; CHECK-NEXT: mul a0, a0, a1
454+
; CHECK-NEXT: slli a0, a0, 4
470455
; CHECK-NEXT: add sp, sp, a0
471456
; CHECK-NEXT: .cfi_def_cfa sp, 16
472457
; CHECK-NEXT: addi sp, sp, 16

0 commit comments

Comments
 (0)