Skip to content

Commit e0919b1

Browse files
committed
[CodeGen] Renumber slot indexes before register allocation (#66334)
RegAllocGreedy uses SlotIndexes::getApproxInstrDistance to approximate the length of a live range for its heuristics. Renumbering all slot indexes with the default instruction distance ensures that this estimate will be as accurate as possible, and will not depend on the history of how instructions have been added to and removed from SlotIndexes's maps. This also means that enabling -early-live-intervals, which runs the SlotIndexes analysis earlier, will not cause large amounts of churn due to different register allocator decisions.
1 parent 1d305f9 commit e0919b1

File tree

334 files changed

+207340
-209013
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

334 files changed

+207340
-209013
lines changed

llvm/include/llvm/CodeGen/SlotIndexes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,9 @@ class raw_ostream;
640640
renumberIndexes(newItr);
641641
llvm::sort(idx2MBBMap, less_first());
642642
}
643+
644+
/// Renumber all indexes using the default instruction distance.
645+
void packIndexes();
643646
};
644647

645648
// Specialize IntervalMapInfo for half-open slot index intervals.

llvm/lib/CodeGen/RegAllocGreedy.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2692,6 +2692,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
26922692
return false;
26932693

26942694
Indexes = &getAnalysis<SlotIndexes>();
2695+
// Renumber to get accurate and consistent results from
2696+
// SlotIndexes::getApproxInstrDistance.
2697+
Indexes->packIndexes();
26952698
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
26962699
DomTree = &getAnalysis<MachineDominatorTree>();
26972700
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();

llvm/lib/CodeGen/SlotIndexes.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,11 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
237237
}
238238
}
239239

240+
void SlotIndexes::packIndexes() {
241+
for (auto [Index, Entry] : enumerate(indexList))
242+
Entry.setIndex(Index * SlotIndex::InstrDist);
243+
}
244+
240245
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
241246
LLVM_DUMP_METHOD void SlotIndexes::dump() const {
242247
for (const IndexListEntry &ILE : indexList) {

llvm/test/CodeGen/AArch64/active_lane_mask.ll

Lines changed: 45 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -221,69 +221,69 @@ define <vscale x 32 x i1> @lane_mask_nxv32i1_i64(i64 %index, i64 %TC) {
221221
; CHECK-NEXT: index z1.d, #0, #1
222222
; CHECK-NEXT: mov z0.d, x0
223223
; CHECK-NEXT: ptrue p0.d
224-
; CHECK-NEXT: mov z7.d, x1
224+
; CHECK-NEXT: mov z3.d, x1
225225
; CHECK-NEXT: mov z2.d, z1.d
226-
; CHECK-NEXT: mov z3.d, z1.d
226+
; CHECK-NEXT: mov z4.d, z1.d
227227
; CHECK-NEXT: mov z6.d, z1.d
228-
; CHECK-NEXT: uqadd z5.d, z1.d, z0.d
228+
; CHECK-NEXT: uqadd z25.d, z1.d, z0.d
229229
; CHECK-NEXT: incd z1.d, all, mul #8
230230
; CHECK-NEXT: incd z2.d
231-
; CHECK-NEXT: incd z3.d, all, mul #2
231+
; CHECK-NEXT: incd z4.d, all, mul #2
232232
; CHECK-NEXT: incd z6.d, all, mul #4
233-
; CHECK-NEXT: cmphi p1.d, p0/z, z7.d, z5.d
233+
; CHECK-NEXT: cmphi p1.d, p0/z, z3.d, z25.d
234234
; CHECK-NEXT: uqadd z1.d, z1.d, z0.d
235-
; CHECK-NEXT: mov z4.d, z2.d
236-
; CHECK-NEXT: uqadd z24.d, z2.d, z0.d
237-
; CHECK-NEXT: mov z25.d, z2.d
238-
; CHECK-NEXT: mov z27.d, z3.d
239-
; CHECK-NEXT: uqadd z26.d, z3.d, z0.d
235+
; CHECK-NEXT: mov z5.d, z2.d
236+
; CHECK-NEXT: uqadd z26.d, z2.d, z0.d
237+
; CHECK-NEXT: mov z7.d, z2.d
238+
; CHECK-NEXT: mov z24.d, z4.d
239+
; CHECK-NEXT: uqadd z27.d, z4.d, z0.d
240240
; CHECK-NEXT: uqadd z28.d, z6.d, z0.d
241241
; CHECK-NEXT: incd z2.d, all, mul #8
242-
; CHECK-NEXT: incd z3.d, all, mul #8
243-
; CHECK-NEXT: incd z6.d, all, mul #8
244-
; CHECK-NEXT: incd z4.d, all, mul #2
245-
; CHECK-NEXT: incd z25.d, all, mul #4
246-
; CHECK-NEXT: cmphi p2.d, p0/z, z7.d, z24.d
247-
; CHECK-NEXT: incd z27.d, all, mul #4
248-
; CHECK-NEXT: cmphi p3.d, p0/z, z7.d, z26.d
249-
; CHECK-NEXT: cmphi p5.d, p0/z, z7.d, z28.d
250-
; CHECK-NEXT: uqadd z2.d, z2.d, z0.d
251-
; CHECK-NEXT: uqadd z3.d, z3.d, z0.d
252-
; CHECK-NEXT: mov z24.d, z4.d
253-
; CHECK-NEXT: uqadd z5.d, z4.d, z0.d
254-
; CHECK-NEXT: uqadd z26.d, z25.d, z0.d
255242
; CHECK-NEXT: incd z4.d, all, mul #8
256-
; CHECK-NEXT: incd z25.d, all, mul #8
257-
; CHECK-NEXT: uzp1 p1.s, p1.s, p2.s
243+
; CHECK-NEXT: incd z6.d, all, mul #8
244+
; CHECK-NEXT: incd z5.d, all, mul #2
245+
; CHECK-NEXT: incd z7.d, all, mul #4
246+
; CHECK-NEXT: cmphi p2.d, p0/z, z3.d, z26.d
258247
; CHECK-NEXT: incd z24.d, all, mul #4
259-
; CHECK-NEXT: cmphi p8.d, p0/z, z7.d, z2.d
260-
; CHECK-NEXT: cmphi p4.d, p0/z, z7.d, z5.d
261-
; CHECK-NEXT: uqadd z5.d, z27.d, z0.d
262-
; CHECK-NEXT: incd z27.d, all, mul #8
248+
; CHECK-NEXT: cmphi p3.d, p0/z, z3.d, z27.d
249+
; CHECK-NEXT: cmphi p5.d, p0/z, z3.d, z28.d
250+
; CHECK-NEXT: uqadd z2.d, z2.d, z0.d
263251
; CHECK-NEXT: uqadd z4.d, z4.d, z0.d
264-
; CHECK-NEXT: cmphi p6.d, p0/z, z7.d, z26.d
265-
; CHECK-NEXT: uqadd z28.d, z24.d, z0.d
252+
; CHECK-NEXT: uqadd z6.d, z6.d, z0.d
253+
; CHECK-NEXT: mov z26.d, z5.d
254+
; CHECK-NEXT: uqadd z25.d, z5.d, z0.d
255+
; CHECK-NEXT: uqadd z27.d, z7.d, z0.d
256+
; CHECK-NEXT: incd z5.d, all, mul #8
257+
; CHECK-NEXT: incd z7.d, all, mul #8
258+
; CHECK-NEXT: uzp1 p1.s, p1.s, p2.s
259+
; CHECK-NEXT: incd z26.d, all, mul #4
260+
; CHECK-NEXT: cmphi p8.d, p0/z, z3.d, z2.d
261+
; CHECK-NEXT: cmphi p4.d, p0/z, z3.d, z25.d
262+
; CHECK-NEXT: uqadd z25.d, z24.d, z0.d
266263
; CHECK-NEXT: incd z24.d, all, mul #8
264+
; CHECK-NEXT: uqadd z5.d, z5.d, z0.d
265+
; CHECK-NEXT: uqadd z7.d, z7.d, z0.d
266+
; CHECK-NEXT: cmphi p6.d, p0/z, z3.d, z27.d
267+
; CHECK-NEXT: uqadd z28.d, z26.d, z0.d
268+
; CHECK-NEXT: incd z26.d, all, mul #8
267269
; CHECK-NEXT: uzp1 p3.s, p3.s, p4.s
268-
; CHECK-NEXT: cmphi p7.d, p0/z, z7.d, z5.d
269-
; CHECK-NEXT: uqadd z5.d, z6.d, z0.d
270-
; CHECK-NEXT: uqadd z6.d, z25.d, z0.d
271-
; CHECK-NEXT: uqadd z25.d, z27.d, z0.d
272-
; CHECK-NEXT: cmphi p4.d, p0/z, z7.d, z1.d
270+
; CHECK-NEXT: uqadd z24.d, z24.d, z0.d
271+
; CHECK-NEXT: cmphi p7.d, p0/z, z3.d, z25.d
272+
; CHECK-NEXT: cmphi p4.d, p0/z, z3.d, z1.d
273273
; CHECK-NEXT: uzp1 p5.s, p5.s, p6.s
274-
; CHECK-NEXT: cmphi p6.d, p0/z, z7.d, z3.d
275-
; CHECK-NEXT: cmphi p9.d, p0/z, z7.d, z4.d
276-
; CHECK-NEXT: uqadd z0.d, z24.d, z0.d
277-
; CHECK-NEXT: cmphi p2.d, p0/z, z7.d, z28.d
278-
; CHECK-NEXT: cmphi p10.d, p0/z, z7.d, z6.d
274+
; CHECK-NEXT: cmphi p6.d, p0/z, z3.d, z4.d
275+
; CHECK-NEXT: cmphi p9.d, p0/z, z3.d, z5.d
276+
; CHECK-NEXT: cmphi p10.d, p0/z, z3.d, z7.d
277+
; CHECK-NEXT: uqadd z0.d, z26.d, z0.d
278+
; CHECK-NEXT: cmphi p2.d, p0/z, z3.d, z28.d
279279
; CHECK-NEXT: uzp1 p4.s, p4.s, p8.s
280-
; CHECK-NEXT: cmphi p8.d, p0/z, z7.d, z25.d
280+
; CHECK-NEXT: cmphi p8.d, p0/z, z3.d, z24.d
281281
; CHECK-NEXT: uzp1 p6.s, p6.s, p9.s
282282
; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload
283-
; CHECK-NEXT: uzp1 p2.s, p7.s, p2.s
284-
; CHECK-NEXT: cmphi p7.d, p0/z, z7.d, z5.d
285-
; CHECK-NEXT: cmphi p0.d, p0/z, z7.d, z0.d
286283
; CHECK-NEXT: uzp1 p1.h, p1.h, p3.h
284+
; CHECK-NEXT: uzp1 p2.s, p7.s, p2.s
285+
; CHECK-NEXT: cmphi p7.d, p0/z, z3.d, z6.d
286+
; CHECK-NEXT: cmphi p0.d, p0/z, z3.d, z0.d
287287
; CHECK-NEXT: uzp1 p7.s, p7.s, p10.s
288288
; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload
289289
; CHECK-NEXT: uzp1 p0.s, p8.s, p0.s

llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,28 +10,28 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
1010
define zeroext i8 @fullGtU(i32 %i1, i32 %i2) {
1111
; CHECK-LABEL: fullGtU:
1212
; CHECK: ; %bb.0: ; %entry
13-
; CHECK-NEXT: adrp x8, _block@GOTPAGE
13+
; CHECK-NEXT: adrp x9, _block@GOTPAGE
1414
; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
1515
; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0
16-
; CHECK-NEXT: sxtw x9, w0
16+
; CHECK-NEXT: sxtw x8, w0
1717
; CHECK-NEXT: sxtw x10, w1
18-
; CHECK-NEXT: ldr x8, [x8, _block@GOTPAGEOFF]
19-
; CHECK-NEXT: ldr x8, [x8]
20-
; CHECK-NEXT: ldrb w11, [x8, x9]
21-
; CHECK-NEXT: ldrb w12, [x8, x10]
18+
; CHECK-NEXT: ldr x9, [x9, _block@GOTPAGEOFF]
19+
; CHECK-NEXT: ldr x9, [x9]
20+
; CHECK-NEXT: ldrb w11, [x9, x8]
21+
; CHECK-NEXT: ldrb w12, [x9, x10]
2222
; CHECK-NEXT: cmp w11, w12
2323
; CHECK-NEXT: b.ne LBB0_3
2424
; CHECK-NEXT: ; %bb.1: ; %if.end
25-
; CHECK-NEXT: add x9, x9, x8
26-
; CHECK-NEXT: add x8, x10, x8
27-
; CHECK-NEXT: ldrb w10, [x9, #1]
28-
; CHECK-NEXT: ldrb w11, [x8, #1]
25+
; CHECK-NEXT: add x8, x8, x9
26+
; CHECK-NEXT: add x9, x10, x9
27+
; CHECK-NEXT: ldrb w10, [x8, #1]
28+
; CHECK-NEXT: ldrb w11, [x9, #1]
2929
; CHECK-NEXT: cmp w10, w11
3030
; CHECK-NEXT: b.ne LBB0_3
3131
; CHECK-NEXT: ; %bb.2: ; %if.end25
32-
; CHECK-NEXT: ldrb w9, [x9, #2]
3332
; CHECK-NEXT: ldrb w8, [x8, #2]
34-
; CHECK-NEXT: cmp w9, w8
33+
; CHECK-NEXT: ldrb w9, [x9, #2]
34+
; CHECK-NEXT: cmp w8, w9
3535
; CHECK-NEXT: cset w8, hi
3636
; CHECK-NEXT: csel w0, wzr, w8, eq
3737
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/arm64-cse.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,16 @@ target triple = "arm64-apple-ios"
88
define ptr @t1(ptr %base, ptr nocapture %offset, i32 %size) nounwind {
99
; CHECK-LABEL: t1:
1010
; CHECK: ; %bb.0: ; %entry
11-
; CHECK-NEXT: ldr w9, [x1]
12-
; CHECK-NEXT: subs w8, w9, w2
11+
; CHECK-NEXT: ldr w8, [x1]
12+
; CHECK-NEXT: subs w9, w8, w2
1313
; CHECK-NEXT: b.ge LBB0_2
1414
; CHECK-NEXT: ; %bb.1:
1515
; CHECK-NEXT: mov x0, xzr
1616
; CHECK-NEXT: ret
1717
; CHECK-NEXT: LBB0_2: ; %if.end
18-
; CHECK-NEXT: add x0, x0, w8, sxtw
19-
; CHECK-NEXT: sub w9, w9, w8
20-
; CHECK-NEXT: str w9, [x1]
18+
; CHECK-NEXT: add x0, x0, w9, sxtw
19+
; CHECK-NEXT: sub w8, w8, w9
20+
; CHECK-NEXT: str w8, [x1]
2121
; CHECK-NEXT: ret
2222
entry:
2323
%0 = load i32, ptr %offset, align 4

llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,30 +1059,30 @@ define void @stack_realign2(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr
10591059
; ENABLE-NEXT: .cfi_offset w27, -88
10601060
; ENABLE-NEXT: .cfi_offset w28, -96
10611061
; ENABLE-NEXT: lsl w8, w1, w0
1062-
; ENABLE-NEXT: lsr w10, w0, w1
1063-
; ENABLE-NEXT: lsl w16, w0, w1
1062+
; ENABLE-NEXT: lsr w9, w0, w1
1063+
; ENABLE-NEXT: lsl w14, w0, w1
10641064
; ENABLE-NEXT: lsr w11, w1, w0
1065-
; ENABLE-NEXT: add w14, w1, w0
1066-
; ENABLE-NEXT: sub w9, w8, w10
1065+
; ENABLE-NEXT: add w15, w1, w0
1066+
; ENABLE-NEXT: sub w10, w8, w9
10671067
; ENABLE-NEXT: subs w17, w1, w0
1068-
; ENABLE-NEXT: add w15, w16, w8
1069-
; ENABLE-NEXT: add w12, w10, w11
1070-
; ENABLE-NEXT: add w13, w11, w14
1068+
; ENABLE-NEXT: add w16, w14, w8
1069+
; ENABLE-NEXT: add w12, w9, w11
1070+
; ENABLE-NEXT: add w13, w11, w15
10711071
; ENABLE-NEXT: b.le LBB14_2
10721072
; ENABLE-NEXT: ; %bb.1: ; %true
10731073
; ENABLE-NEXT: str w0, [sp]
10741074
; ENABLE-NEXT: ; InlineAsm Start
10751075
; ENABLE-NEXT: nop
10761076
; ENABLE-NEXT: ; InlineAsm End
10771077
; ENABLE-NEXT: LBB14_2: ; %false
1078-
; ENABLE-NEXT: str w16, [x2]
1078+
; ENABLE-NEXT: str w14, [x2]
10791079
; ENABLE-NEXT: str w8, [x3]
1080-
; ENABLE-NEXT: str w10, [x4]
1080+
; ENABLE-NEXT: str w9, [x4]
10811081
; ENABLE-NEXT: str w11, [x5]
1082-
; ENABLE-NEXT: str w14, [x6]
1082+
; ENABLE-NEXT: str w15, [x6]
10831083
; ENABLE-NEXT: str w17, [x7]
10841084
; ENABLE-NEXT: stp w0, w1, [x2, #4]
1085-
; ENABLE-NEXT: stp w15, w9, [x2, #12]
1085+
; ENABLE-NEXT: stp w16, w10, [x2, #12]
10861086
; ENABLE-NEXT: stp w12, w13, [x2, #20]
10871087
; ENABLE-NEXT: sub sp, x29, #80
10881088
; ENABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
@@ -1118,30 +1118,30 @@ define void @stack_realign2(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr
11181118
; DISABLE-NEXT: .cfi_offset w27, -88
11191119
; DISABLE-NEXT: .cfi_offset w28, -96
11201120
; DISABLE-NEXT: lsl w8, w1, w0
1121-
; DISABLE-NEXT: lsr w10, w0, w1
1122-
; DISABLE-NEXT: lsl w16, w0, w1
1121+
; DISABLE-NEXT: lsr w9, w0, w1
1122+
; DISABLE-NEXT: lsl w14, w0, w1
11231123
; DISABLE-NEXT: lsr w11, w1, w0
1124-
; DISABLE-NEXT: add w14, w1, w0
1125-
; DISABLE-NEXT: sub w9, w8, w10
1124+
; DISABLE-NEXT: add w15, w1, w0
1125+
; DISABLE-NEXT: sub w10, w8, w9
11261126
; DISABLE-NEXT: subs w17, w1, w0
1127-
; DISABLE-NEXT: add w15, w16, w8
1128-
; DISABLE-NEXT: add w12, w10, w11
1129-
; DISABLE-NEXT: add w13, w11, w14
1127+
; DISABLE-NEXT: add w16, w14, w8
1128+
; DISABLE-NEXT: add w12, w9, w11
1129+
; DISABLE-NEXT: add w13, w11, w15
11301130
; DISABLE-NEXT: b.le LBB14_2
11311131
; DISABLE-NEXT: ; %bb.1: ; %true
11321132
; DISABLE-NEXT: str w0, [sp]
11331133
; DISABLE-NEXT: ; InlineAsm Start
11341134
; DISABLE-NEXT: nop
11351135
; DISABLE-NEXT: ; InlineAsm End
11361136
; DISABLE-NEXT: LBB14_2: ; %false
1137-
; DISABLE-NEXT: str w16, [x2]
1137+
; DISABLE-NEXT: str w14, [x2]
11381138
; DISABLE-NEXT: str w8, [x3]
1139-
; DISABLE-NEXT: str w10, [x4]
1139+
; DISABLE-NEXT: str w9, [x4]
11401140
; DISABLE-NEXT: str w11, [x5]
1141-
; DISABLE-NEXT: str w14, [x6]
1141+
; DISABLE-NEXT: str w15, [x6]
11421142
; DISABLE-NEXT: str w17, [x7]
11431143
; DISABLE-NEXT: stp w0, w1, [x2, #4]
1144-
; DISABLE-NEXT: stp w15, w9, [x2, #12]
1144+
; DISABLE-NEXT: stp w16, w10, [x2, #12]
11451145
; DISABLE-NEXT: stp w12, w13, [x2, #20]
11461146
; DISABLE-NEXT: sub sp, x29, #80
11471147
; DISABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload

llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,25 +26,25 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
2626
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
2727
; CHECK-NEXT: .LBB0_1: // %vector.body
2828
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
29-
; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
29+
; CHECK-NEXT: zip2 p3.d, p1.d, p1.d
3030
; CHECK-NEXT: add x13, x0, x8
3131
; CHECK-NEXT: add x14, x1, x8
32-
; CHECK-NEXT: zip1 p3.d, p1.d, p1.d
32+
; CHECK-NEXT: zip1 p2.d, p1.d, p1.d
3333
; CHECK-NEXT: mov z6.d, z1.d
3434
; CHECK-NEXT: mov z7.d, z0.d
3535
; CHECK-NEXT: whilelo p1.d, x12, x9
3636
; CHECK-NEXT: add x8, x8, x11
3737
; CHECK-NEXT: add x12, x12, x10
38-
; CHECK-NEXT: ld1d { z2.d }, p2/z, [x13, #1, mul vl]
39-
; CHECK-NEXT: ld1d { z3.d }, p3/z, [x13]
40-
; CHECK-NEXT: ld1d { z4.d }, p2/z, [x14, #1, mul vl]
41-
; CHECK-NEXT: ld1d { z5.d }, p3/z, [x14]
38+
; CHECK-NEXT: ld1d { z2.d }, p3/z, [x13, #1, mul vl]
39+
; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13]
40+
; CHECK-NEXT: ld1d { z4.d }, p3/z, [x14, #1, mul vl]
41+
; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14]
4242
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
4343
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
4444
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
4545
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
46-
; CHECK-NEXT: mov z0.d, p2/m, z7.d
47-
; CHECK-NEXT: mov z1.d, p3/m, z6.d
46+
; CHECK-NEXT: mov z0.d, p3/m, z7.d
47+
; CHECK-NEXT: mov z1.d, p2/m, z6.d
4848
; CHECK-NEXT: b.mi .LBB0_1
4949
; CHECK-NEXT: // %bb.2: // %exit.block
5050
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
@@ -237,19 +237,19 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
237237
; CHECK-NEXT: add x9, x9, x11
238238
; CHECK-NEXT: add x8, x8, x12
239239
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
240-
; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
241-
; CHECK-NEXT: zip1 p3.d, p1.d, p1.d
240+
; CHECK-NEXT: zip2 p3.d, p1.d, p1.d
241+
; CHECK-NEXT: zip1 p2.d, p1.d, p1.d
242242
; CHECK-NEXT: whilelo p1.d, x9, x10
243-
; CHECK-NEXT: ld1d { z2.d }, p2/z, [x13, #1, mul vl]
244-
; CHECK-NEXT: ld1d { z3.d }, p3/z, [x13]
245-
; CHECK-NEXT: ld1d { z4.d }, p2/z, [x14, #1, mul vl]
246-
; CHECK-NEXT: ld1d { z5.d }, p3/z, [x14]
243+
; CHECK-NEXT: ld1d { z2.d }, p3/z, [x13, #1, mul vl]
244+
; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13]
245+
; CHECK-NEXT: ld1d { z4.d }, p3/z, [x14, #1, mul vl]
246+
; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14]
247247
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
248248
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
249249
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
250250
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
251-
; CHECK-NEXT: mov z0.d, p2/m, z7.d
252-
; CHECK-NEXT: mov z1.d, p3/m, z6.d
251+
; CHECK-NEXT: mov z0.d, p3/m, z7.d
252+
; CHECK-NEXT: mov z1.d, p2/m, z6.d
253253
; CHECK-NEXT: b.mi .LBB2_1
254254
; CHECK-NEXT: // %bb.2: // %exit.block
255255
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d

0 commit comments

Comments
 (0)