Skip to content

Commit 3f0404a

Browse files
[AArch64] Restore Z-registers before P-registers (#79623)
This is needed by PR#77665[1] that uses a P-register while restoring Z-registers. The reverse for SVE register restore in the epilogue was added to guarantee performance, but further work was done to improve sve frame restore and besides that the schedule also may change the order of the restore, undoing the reverse restore. [1]#77665
1 parent d022f32 commit 3f0404a

12 files changed

+177
-176
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3189,11 +3189,6 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
31893189
return MIB->getIterator();
31903190
};
31913191

3192-
// SVE objects are always restored in reverse order.
3193-
for (const RegPairInfo &RPI : reverse(RegPairs))
3194-
if (RPI.isScalable())
3195-
EmitMI(RPI);
3196-
31973192
if (homogeneousPrologEpilog(MF, &MBB)) {
31983193
auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog))
31993194
.setMIFlag(MachineInstr::FrameDestroy);
@@ -3204,11 +3199,19 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
32043199
return true;
32053200
}
32063201

3202+
// For performance reasons restore SVE register in increasing order
3203+
auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; };
3204+
auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);
3205+
auto PPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsPPR);
3206+
std::reverse(PPRBegin, PPREnd.base());
3207+
auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; };
3208+
auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);
3209+
auto ZPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsZPR);
3210+
std::reverse(ZPRBegin, ZPREnd.base());
3211+
32073212
if (ReverseCSRRestoreSeq) {
32083213
MachineBasicBlock::iterator First = MBB.end();
32093214
for (const RegPairInfo &RPI : reverse(RegPairs)) {
3210-
if (RPI.isScalable())
3211-
continue;
32123215
MachineBasicBlock::iterator It = EmitMI(RPI);
32133216
if (First == MBB.end())
32143217
First = It;
@@ -3217,8 +3220,6 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
32173220
MBB.splice(MBBI, &MBB, First);
32183221
} else {
32193222
for (const RegPairInfo &RPI : RegPairs) {
3220-
if (RPI.isScalable())
3221-
continue;
32223223
(void)EmitMI(RPI);
32233224
}
32243225
}

llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
; CHECK-NEXT: // implicit-def: $p4
2020
; CHECK-NEXT: addvl sp, sp, #1
2121
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
22-
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
2322
; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
23+
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
2424
; CHECK-NEXT: addvl sp, sp, #2
2525
; CHECK-NEXT: .cfi_def_cfa wsp, 16
2626
; CHECK-NEXT: .cfi_restore z8

llvm/test/CodeGen/AArch64/framelayout-sve.mir

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -772,9 +772,9 @@ body: |
772772

773773
# CHECK: $sp = frame-destroy ADDXri $sp, 32, 0
774774
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22
775-
# CHECK-NEXT: $z10 = frame-destroy LDR_ZXI $sp, 0
775+
# CHECK-NEXT: $z10 = frame-destroy LDR_ZXI $sp, 0
776776
# CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 1
777-
# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2
777+
# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2
778778
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3
779779
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
780780
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z8
@@ -873,14 +873,14 @@ body: |
873873
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22
874874
# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 1
875875
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22
876-
# CHECK: $p15 = frame-destroy LDR_PXI $sp, 4
877-
# CHECK: $p14 = frame-destroy LDR_PXI $sp, 5
878-
# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14
879-
# CHECK: $p4 = frame-destroy LDR_PXI $sp, 15
880876
# CHECK: $z23 = frame-destroy LDR_ZXI $sp, 2
881877
# CHECK: $z22 = frame-destroy LDR_ZXI $sp, 3
882878
# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16
883879
# CHECK: $z8 = frame-destroy LDR_ZXI $sp, 17
880+
# CHECK: $p15 = frame-destroy LDR_PXI $sp, 4
881+
# CHECK: $p14 = frame-destroy LDR_PXI $sp, 5
882+
# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14
883+
# CHECK: $p4 = frame-destroy LDR_PXI $sp, 15
884884
# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 18
885885
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 32
886886
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z8
@@ -1037,14 +1037,14 @@ body: |
10371037
# CHECK-NEXT: $sp = frame-setup ANDXri killed $[[TMP]]
10381038

10391039
# CHECK: $sp = frame-destroy ADDVL_XXI $fp, -18
1040+
# CHECK: $z23 = frame-destroy LDR_ZXI $sp, 2
1041+
# CHECK-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 3
1042+
# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16
1043+
# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 17
10401044
# CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 4
10411045
# CHECK-NEXT: $p14 = frame-destroy LDR_PXI $sp, 5
10421046
# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14
10431047
# CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 15
1044-
# CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 2
1045-
# CHECK-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 3
1046-
# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16
1047-
# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 17
10481048
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z8
10491049
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z9
10501050
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z10
@@ -1198,10 +1198,10 @@ body: |
11981198

11991199
# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 7
12001200
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22
1201-
# CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 6
1202-
# CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 7
12031201
# CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 1
12041202
# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2
1203+
# CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 6
1204+
# CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 7
12051205
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3
12061206
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
12071207
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z8

llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -226,30 +226,30 @@ define <vscale x 2 x double> @streaming_compatible_with_scalable_vectors(<vscale
226226
; CHECK-NEXT: ldr z1, [sp] // 16-byte Folded Reload
227227
; CHECK-NEXT: fadd z0.d, z1.d, z0.d
228228
; CHECK-NEXT: addvl sp, sp, #2
229-
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
230229
; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
231230
; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
232-
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
233231
; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
234232
; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
235-
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
236233
; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
237234
; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
238-
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
239235
; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
240236
; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
241-
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
242237
; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
243238
; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
244-
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
245239
; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
246240
; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
247-
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
248241
; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
249242
; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
250-
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
251243
; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
252244
; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
245+
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
246+
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
247+
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
248+
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
249+
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
250+
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
251+
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
252+
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
253253
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
254254
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
255255
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
@@ -318,30 +318,30 @@ define <vscale x 2 x i1> @streaming_compatible_with_predicate_vectors(<vscale x
318318
; CHECK-NEXT: ldr p1, [sp, #6, mul vl] // 2-byte Folded Reload
319319
; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
320320
; CHECK-NEXT: addvl sp, sp, #1
321-
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
322321
; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
323322
; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
324-
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
325323
; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
326324
; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
327-
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
328325
; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
329326
; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
330-
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
331327
; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
332328
; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
333-
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
334329
; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
335330
; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
336-
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
337331
; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
338332
; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
339-
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
340333
; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
341334
; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
342-
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
343335
; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
344336
; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
337+
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
338+
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
339+
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
340+
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
341+
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
342+
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
343+
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
344+
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
345345
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
346346
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
347347
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload

llvm/test/CodeGen/AArch64/sme-streaming-interface.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -187,30 +187,30 @@ define <vscale x 4 x i32> @smstart_clobber_sve(<vscale x 4 x i32> %x) nounwind {
187187
; CHECK-NEXT: smstop sm
188188
; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
189189
; CHECK-NEXT: addvl sp, sp, #1
190-
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
191190
; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
192191
; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
193-
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
194192
; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
195193
; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
196-
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
197194
; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
198195
; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
199-
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
200196
; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
201197
; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
202-
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
203198
; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
204199
; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
205-
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
206200
; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
207201
; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
208-
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
209202
; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
210203
; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
211-
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
212204
; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
213205
; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
206+
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
207+
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
208+
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
209+
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
210+
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
211+
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
212+
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
213+
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
214214
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
215215
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
216216
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
@@ -267,30 +267,30 @@ define <vscale x 4 x i32> @smstart_clobber_sve_duplicate(<vscale x 4 x i32> %x)
267267
; CHECK-NEXT: smstop sm
268268
; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
269269
; CHECK-NEXT: addvl sp, sp, #1
270-
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
271270
; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
272271
; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
273-
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
274272
; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
275273
; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
276-
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
277274
; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
278275
; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
279-
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
280276
; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
281277
; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
282-
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
283278
; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
284279
; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
285-
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
286280
; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
287281
; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
288-
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
289282
; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
290283
; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
291-
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
292284
; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
293285
; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
286+
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
287+
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
288+
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
289+
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
290+
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
291+
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
292+
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
293+
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
294294
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
295295
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
296296
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload

0 commit comments

Comments
 (0)