Skip to content

Commit 2d00c6f

Browse files
authored
[RISCV] Add a rematerializable pseudo instruction for LUI+ADDI for global addresses. (#93352)
This allows register allocation to rematerialize these instead of spilling and reloading. We need to make it a single instruction due to limitations in rematerialization. This pseudo is expanded to an LUI+ADDI pair between regalloc and post RA scheduling. This improves the dynamic instruction count on 531.deepsjeng_r from spec2017 by 3.2% for the train dataset. 500.perlbench and 502.gcc see a 1% improvement. There are couple regressions, but they are 0.1% or smaller. AArch64 has similar pseudo instructions like MOVaddr
1 parent 00bd2fa commit 2d00c6f

19 files changed

+358
-311
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.td

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1311,6 +1311,26 @@ def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12),
13111311

13121312
/// HI and ADD_LO address nodes.
13131313

1314+
// Pseudo for a rematerializable LUI+ADDI sequence for loading an address.
1315+
// It will be expanded after register allocation.
1316+
// FIXME: The scheduling information does not reflect the multiple instructions.
1317+
let Size = 8, isReMaterializable = 1 in
1318+
def PseudoMovAddr : Pseudo<(outs GPR:$dst), (ins uimm20_lui:$hi, simm12:$lo), []>,
1319+
Sched<[WriteIALU]>;
1320+
1321+
def riscv_hi_oneuse : unop_oneuse<riscv_hi>;
1322+
def addr_hi_lo : PatFrag<(ops node:$hi, node:$lo),
1323+
(riscv_add_lo (riscv_hi_oneuse node:$hi), node:$lo)>;
1324+
1325+
def : Pat<(addr_hi_lo tglobaladdr:$hi, tglobaladdr:$lo),
1326+
(PseudoMovAddr tglobaladdr:$hi, tglobaladdr:$lo)>;
1327+
def : Pat<(addr_hi_lo tblockaddress:$hi, tblockaddress:$lo),
1328+
(PseudoMovAddr tblockaddress:$hi, tblockaddress:$lo)>;
1329+
def : Pat<(addr_hi_lo tjumptable:$hi, tjumptable:$lo),
1330+
(PseudoMovAddr tjumptable:$hi, tjumptable:$lo)>;
1331+
def : Pat<(addr_hi_lo tconstpool:$hi, tconstpool:$lo),
1332+
(PseudoMovAddr tconstpool:$hi, tconstpool:$lo)>;
1333+
13141334
def : Pat<(riscv_hi tglobaladdr:$in), (LUI tglobaladdr:$in)>;
13151335
def : Pat<(riscv_hi tblockaddress:$in), (LUI tblockaddress:$in)>;
13161336
def : Pat<(riscv_hi tjumptable:$in), (LUI tjumptable:$in)>;

llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE,
8484
// 3) The offset value in the Global Address or Constant Pool is 0.
8585
bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
8686
MachineInstr *&Lo) {
87-
if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC)
87+
if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC &&
88+
Hi.getOpcode() != RISCV::PseudoMovAddr)
8889
return false;
8990

9091
const MachineOperand &HiOp1 = Hi.getOperand(1);
@@ -97,16 +98,22 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
9798
HiOp1.getOffset() != 0)
9899
return false;
99100

100-
Register HiDestReg = Hi.getOperand(0).getReg();
101-
if (!MRI->hasOneUse(HiDestReg))
102-
return false;
101+
if (Hi.getOpcode() == RISCV::PseudoMovAddr) {
102+
// Most of the code should handle it correctly without modification by
103+
// setting Lo and Hi both point to PseudoMovAddr
104+
Lo = &Hi;
105+
} else {
106+
Register HiDestReg = Hi.getOperand(0).getReg();
107+
if (!MRI->hasOneUse(HiDestReg))
108+
return false;
103109

104-
Lo = &*MRI->use_instr_begin(HiDestReg);
105-
if (Lo->getOpcode() != RISCV::ADDI)
106-
return false;
110+
Lo = &*MRI->use_instr_begin(HiDestReg);
111+
if (Lo->getOpcode() != RISCV::ADDI)
112+
return false;
113+
}
107114

108115
const MachineOperand &LoOp2 = Lo->getOperand(2);
109-
if (Hi.getOpcode() == RISCV::LUI) {
116+
if (Hi.getOpcode() == RISCV::LUI || Hi.getOpcode() == RISCV::PseudoMovAddr) {
110117
if (LoOp2.getTargetFlags() != RISCVII::MO_LO ||
111118
!(LoOp2.isGlobal() || LoOp2.isCPI() || LoOp2.isBlockAddress()) ||
112119
LoOp2.getOffset() != 0)
@@ -466,6 +473,13 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
466473

467474
Hi.getOperand(1).setOffset(NewOffset);
468475
MachineOperand &ImmOp = Lo.getOperand(2);
476+
// Expand PseudoMovAddr into LUI
477+
if (Hi.getOpcode() == RISCV::PseudoMovAddr) {
478+
auto *TII = ST->getInstrInfo();
479+
Hi.setDesc(TII->get(RISCV::LUI));
480+
Hi.removeOperand(2);
481+
}
482+
469483
if (Hi.getOpcode() != RISCV::AUIPC)
470484
ImmOp.setOffset(NewOffset);
471485

@@ -501,6 +515,11 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
501515
}
502516
}
503517

518+
// Prevent Lo (originally PseudoMovAddr, which is also pointed by Hi) from
519+
// being erased
520+
if (&Lo == &Hi)
521+
return true;
522+
504523
MRI->replaceRegWith(Lo.getOperand(0).getReg(), Hi.getOperand(0).getReg());
505524
Lo.eraseFromParent();
506525
return true;

llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ class RISCVPostRAExpandPseudo : public MachineFunctionPass {
4444
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
4545
MachineBasicBlock::iterator &NextMBBI);
4646
bool expandMovImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
47+
bool expandMovAddr(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
4748
};
4849

4950
char RISCVPostRAExpandPseudo::ID = 0;
@@ -75,6 +76,8 @@ bool RISCVPostRAExpandPseudo::expandMI(MachineBasicBlock &MBB,
7576
switch (MBBI->getOpcode()) {
7677
case RISCV::PseudoMovImm:
7778
return expandMovImm(MBB, MBBI);
79+
case RISCV::PseudoMovAddr:
80+
return expandMovAddr(MBB, MBBI);
7881
default:
7982
return false;
8083
}
@@ -101,6 +104,26 @@ bool RISCVPostRAExpandPseudo::expandMovImm(MachineBasicBlock &MBB,
101104
return true;
102105
}
103106

107+
bool RISCVPostRAExpandPseudo::expandMovAddr(MachineBasicBlock &MBB,
108+
MachineBasicBlock::iterator MBBI) {
109+
DebugLoc DL = MBBI->getDebugLoc();
110+
111+
Register DstReg = MBBI->getOperand(0).getReg();
112+
bool DstIsDead = MBBI->getOperand(0).isDead();
113+
bool Renamable = MBBI->getOperand(0).isRenamable();
114+
115+
BuildMI(MBB, MBBI, DL, TII->get(RISCV::LUI))
116+
.addReg(DstReg, RegState::Define | getRenamableRegState(Renamable))
117+
.add(MBBI->getOperand(1));
118+
BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI))
119+
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead) |
120+
getRenamableRegState(Renamable))
121+
.addReg(DstReg, RegState::Kill | getRenamableRegState(Renamable))
122+
.add(MBBI->getOperand(2));
123+
MBBI->eraseFromParent();
124+
return true;
125+
}
126+
104127
} // end of anonymous namespace
105128

106129
INITIALIZE_PASS(RISCVPostRAExpandPseudo, "riscv-expand-pseudolisimm32",

llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -383,8 +383,8 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
383383
; RV32I-NEXT: mv a1, s3
384384
; RV32I-NEXT: call __mulsi3
385385
; RV32I-NEXT: mv s1, a0
386-
; RV32I-NEXT: lui a0, %hi(.LCPI3_0)
387-
; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0)
386+
; RV32I-NEXT: lui s4, %hi(.LCPI3_0)
387+
; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0)
388388
; RV32I-NEXT: neg a0, s2
389389
; RV32I-NEXT: and a0, s2, a0
390390
; RV32I-NEXT: mv a1, s3
@@ -442,28 +442,28 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
442442
; RV32M-LABEL: test_cttz_i64:
443443
; RV32M: # %bb.0:
444444
; RV32M-NEXT: lui a2, 30667
445-
; RV32M-NEXT: addi a2, a2, 1329
446-
; RV32M-NEXT: lui a3, %hi(.LCPI3_0)
447-
; RV32M-NEXT: addi a3, a3, %lo(.LCPI3_0)
445+
; RV32M-NEXT: addi a3, a2, 1329
446+
; RV32M-NEXT: lui a2, %hi(.LCPI3_0)
447+
; RV32M-NEXT: addi a2, a2, %lo(.LCPI3_0)
448448
; RV32M-NEXT: bnez a1, .LBB3_3
449449
; RV32M-NEXT: # %bb.1:
450450
; RV32M-NEXT: li a1, 32
451451
; RV32M-NEXT: beqz a0, .LBB3_4
452452
; RV32M-NEXT: .LBB3_2:
453453
; RV32M-NEXT: neg a1, a0
454454
; RV32M-NEXT: and a0, a0, a1
455-
; RV32M-NEXT: mul a0, a0, a2
455+
; RV32M-NEXT: mul a0, a0, a3
456456
; RV32M-NEXT: srli a0, a0, 27
457-
; RV32M-NEXT: add a0, a3, a0
457+
; RV32M-NEXT: add a0, a2, a0
458458
; RV32M-NEXT: lbu a0, 0(a0)
459459
; RV32M-NEXT: li a1, 0
460460
; RV32M-NEXT: ret
461461
; RV32M-NEXT: .LBB3_3:
462462
; RV32M-NEXT: neg a4, a1
463463
; RV32M-NEXT: and a1, a1, a4
464-
; RV32M-NEXT: mul a1, a1, a2
464+
; RV32M-NEXT: mul a1, a1, a3
465465
; RV32M-NEXT: srli a1, a1, 27
466-
; RV32M-NEXT: add a1, a3, a1
466+
; RV32M-NEXT: add a1, a2, a1
467467
; RV32M-NEXT: lbu a1, 0(a1)
468468
; RV32M-NEXT: bnez a0, .LBB3_2
469469
; RV32M-NEXT: .LBB3_4:
@@ -814,8 +814,8 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
814814
; RV32I-NEXT: mv a1, s3
815815
; RV32I-NEXT: call __mulsi3
816816
; RV32I-NEXT: mv s0, a0
817-
; RV32I-NEXT: lui a0, %hi(.LCPI7_0)
818-
; RV32I-NEXT: addi s4, a0, %lo(.LCPI7_0)
817+
; RV32I-NEXT: lui s4, %hi(.LCPI7_0)
818+
; RV32I-NEXT: addi s4, s4, %lo(.LCPI7_0)
819819
; RV32I-NEXT: neg a0, s1
820820
; RV32I-NEXT: and a0, s1, a0
821821
; RV32I-NEXT: mv a1, s3

llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ define signext i32 @ctz_dereferencing_pointer(ptr %b) nounwind {
4848
; RV32I-NEXT: mv a1, s1
4949
; RV32I-NEXT: call __mulsi3
5050
; RV32I-NEXT: mv s0, a0
51-
; RV32I-NEXT: lui a0, %hi(.LCPI0_0)
52-
; RV32I-NEXT: addi s3, a0, %lo(.LCPI0_0)
51+
; RV32I-NEXT: lui s3, %hi(.LCPI0_0)
52+
; RV32I-NEXT: addi s3, s3, %lo(.LCPI0_0)
5353
; RV32I-NEXT: neg a0, s4
5454
; RV32I-NEXT: and a0, s4, a0
5555
; RV32I-NEXT: mv a1, s1
@@ -511,8 +511,8 @@ define signext i32 @ctz4(i64 %b) nounwind {
511511
; RV32I-NEXT: mv a1, s3
512512
; RV32I-NEXT: call __mulsi3
513513
; RV32I-NEXT: mv s1, a0
514-
; RV32I-NEXT: lui a0, %hi(.LCPI6_0)
515-
; RV32I-NEXT: addi s4, a0, %lo(.LCPI6_0)
514+
; RV32I-NEXT: lui s4, %hi(.LCPI6_0)
515+
; RV32I-NEXT: addi s4, s4, %lo(.LCPI6_0)
516516
; RV32I-NEXT: neg a0, s2
517517
; RV32I-NEXT: and a0, s2, a0
518518
; RV32I-NEXT: mv a1, s3

llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,31 +24,31 @@ define void @_Z3foov() {
2424
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49)
2525
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49)
2626
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
27-
; CHECK-NEXT: vle16.v v10, (a0)
27+
; CHECK-NEXT: vle16.v v8, (a0)
2828
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_48)
2929
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_48)
30-
; CHECK-NEXT: vle8.v v8, (a0)
30+
; CHECK-NEXT: vle8.v v10, (a0)
3131
; CHECK-NEXT: csrr a0, vlenb
3232
; CHECK-NEXT: slli a0, a0, 3
3333
; CHECK-NEXT: add a0, sp, a0
3434
; CHECK-NEXT: addi a0, a0, 16
35-
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
35+
; CHECK-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill
3636
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_46)
3737
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_46)
38-
; CHECK-NEXT: vle16.v v12, (a0)
38+
; CHECK-NEXT: vle16.v v10, (a0)
3939
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_45)
4040
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45)
41-
; CHECK-NEXT: vle16.v v14, (a0)
41+
; CHECK-NEXT: vle16.v v12, (a0)
4242
; CHECK-NEXT: addi a0, sp, 16
4343
; CHECK-NEXT: csrr a1, vlenb
4444
; CHECK-NEXT: slli a1, a1, 1
45+
; CHECK-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
46+
; CHECK-NEXT: add a0, a0, a1
4547
; CHECK-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
4648
; CHECK-NEXT: add a0, a0, a1
4749
; CHECK-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill
4850
; CHECK-NEXT: add a0, a0, a1
4951
; CHECK-NEXT: vs2r.v v14, (a0) # Unknown-size Folded Spill
50-
; CHECK-NEXT: add a0, a0, a1
51-
; CHECK-NEXT: vs2r.v v16, (a0) # Unknown-size Folded Spill
5252
; CHECK-NEXT: #APP
5353
; CHECK-NEXT: #NO_APP
5454
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40)

llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -389,8 +389,8 @@ define dso_local i32 @load_ga() local_unnamed_addr #0 {
389389
define dso_local i64 @load_ga_8() nounwind {
390390
; RV32I-LABEL: load_ga_8:
391391
; RV32I: # %bb.0: # %entry
392-
; RV32I-NEXT: lui a0, %hi(ga_8)
393-
; RV32I-NEXT: addi a1, a0, %lo(ga_8)
392+
; RV32I-NEXT: lui a1, %hi(ga_8)
393+
; RV32I-NEXT: addi a1, a1, %lo(ga_8)
394394
; RV32I-NEXT: lw a0, 8(a1)
395395
; RV32I-NEXT: lw a1, 12(a1)
396396
; RV32I-NEXT: ret

llvm/test/CodeGen/RISCV/rv32xtheadbb.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,8 @@ define i64 @cttz_i64(i64 %a) nounwind {
209209
; RV32I-NEXT: mv a1, s3
210210
; RV32I-NEXT: call __mulsi3
211211
; RV32I-NEXT: mv s1, a0
212-
; RV32I-NEXT: lui a0, %hi(.LCPI3_0)
213-
; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0)
212+
; RV32I-NEXT: lui s4, %hi(.LCPI3_0)
213+
; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0)
214214
; RV32I-NEXT: neg a0, s2
215215
; RV32I-NEXT: and a0, s2, a0
216216
; RV32I-NEXT: mv a1, s3

llvm/test/CodeGen/RISCV/rv32zbb.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,8 @@ define i64 @cttz_i64(i64 %a) nounwind {
199199
; RV32I-NEXT: mv a1, s3
200200
; RV32I-NEXT: call __mulsi3
201201
; RV32I-NEXT: mv s1, a0
202-
; RV32I-NEXT: lui a0, %hi(.LCPI3_0)
203-
; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0)
202+
; RV32I-NEXT: lui s4, %hi(.LCPI3_0)
203+
; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0)
204204
; RV32I-NEXT: neg a0, s2
205205
; RV32I-NEXT: and a0, s2, a0
206206
; RV32I-NEXT: mv a1, s3

llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -126,28 +126,28 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
126126
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
127127
; CHECK-NEXT: vid.v v8
128128
; CHECK-NEXT: vsaddu.vx v8, v8, a1
129-
; CHECK-NEXT: vmsltu.vx v0, v8, a2
130129
; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
131130
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0)
132-
; CHECK-NEXT: vle8.v v8, (a0)
131+
; CHECK-NEXT: vle8.v v16, (a0)
132+
; CHECK-NEXT: vmsltu.vx v0, v8, a2
133133
; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
134134
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_1)
135-
; CHECK-NEXT: vle8.v v9, (a0)
135+
; CHECK-NEXT: vle8.v v8, (a0)
136+
; CHECK-NEXT: vsext.vf8 v24, v16
137+
; CHECK-NEXT: vsaddu.vx v16, v24, a1
138+
; CHECK-NEXT: vmsltu.vx v9, v16, a2
136139
; CHECK-NEXT: vsext.vf8 v16, v8
137140
; CHECK-NEXT: vsaddu.vx v16, v16, a1
138-
; CHECK-NEXT: vmsltu.vx v8, v16, a2
139-
; CHECK-NEXT: vsext.vf8 v16, v9
140-
; CHECK-NEXT: vsaddu.vx v16, v16, a1
141141
; CHECK-NEXT: lui a0, %hi(.LCPI9_2)
142142
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2)
143-
; CHECK-NEXT: vle8.v v9, (a0)
143+
; CHECK-NEXT: vle8.v v8, (a0)
144144
; CHECK-NEXT: vmsltu.vx v10, v16, a2
145145
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
146-
; CHECK-NEXT: vslideup.vi v0, v8, 2
146+
; CHECK-NEXT: vslideup.vi v0, v9, 2
147147
; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
148148
; CHECK-NEXT: vslideup.vi v0, v10, 4
149149
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
150-
; CHECK-NEXT: vsext.vf8 v16, v9
150+
; CHECK-NEXT: vsext.vf8 v16, v8
151151
; CHECK-NEXT: vsaddu.vx v8, v16, a1
152152
; CHECK-NEXT: vmsltu.vx v16, v8, a2
153153
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
@@ -169,13 +169,13 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
169169
; CHECK-NEXT: vle8.v v9, (a0)
170170
; CHECK-NEXT: vsext.vf8 v16, v8
171171
; CHECK-NEXT: vsaddu.vx v16, v16, a1
172-
; CHECK-NEXT: vmsltu.vx v10, v16, a2
172+
; CHECK-NEXT: vmsltu.vx v8, v16, a2
173173
; CHECK-NEXT: vsext.vf8 v16, v9
174174
; CHECK-NEXT: vsaddu.vx v16, v16, a1
175-
; CHECK-NEXT: vmsltu.vx v8, v16, a2
176175
; CHECK-NEXT: lui a0, %hi(.LCPI10_2)
177176
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2)
178177
; CHECK-NEXT: vle8.v v9, (a0)
178+
; CHECK-NEXT: vmsltu.vx v10, v16, a2
179179
; CHECK-NEXT: lui a0, %hi(.LCPI10_3)
180180
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3)
181181
; CHECK-NEXT: vle8.v v11, (a0)
@@ -187,10 +187,10 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
187187
; CHECK-NEXT: vmsltu.vx v11, v16, a2
188188
; CHECK-NEXT: vid.v v16
189189
; CHECK-NEXT: vsaddu.vx v16, v16, a1
190-
; CHECK-NEXT: vmsltu.vx v0, v16, a2
191190
; CHECK-NEXT: lui a0, %hi(.LCPI10_4)
192191
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4)
193192
; CHECK-NEXT: vle8.v v12, (a0)
193+
; CHECK-NEXT: vmsltu.vx v0, v16, a2
194194
; CHECK-NEXT: lui a0, %hi(.LCPI10_5)
195195
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5)
196196
; CHECK-NEXT: vle8.v v13, (a0)
@@ -201,27 +201,27 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
201201
; CHECK-NEXT: vsaddu.vx v16, v16, a1
202202
; CHECK-NEXT: vmsltu.vx v13, v16, a2
203203
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
204-
; CHECK-NEXT: vslideup.vi v8, v10, 2
204+
; CHECK-NEXT: vslideup.vi v10, v8, 2
205205
; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
206-
; CHECK-NEXT: vslideup.vi v8, v9, 4
206+
; CHECK-NEXT: vslideup.vi v10, v9, 4
207207
; CHECK-NEXT: lui a0, %hi(.LCPI10_6)
208208
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6)
209209
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
210-
; CHECK-NEXT: vle8.v v9, (a0)
210+
; CHECK-NEXT: vle8.v v8, (a0)
211211
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
212-
; CHECK-NEXT: vslideup.vi v8, v11, 6
212+
; CHECK-NEXT: vslideup.vi v10, v11, 6
213213
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
214214
; CHECK-NEXT: vslideup.vi v0, v12, 2
215215
; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
216216
; CHECK-NEXT: vslideup.vi v0, v13, 4
217217
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
218-
; CHECK-NEXT: vsext.vf8 v16, v9
218+
; CHECK-NEXT: vsext.vf8 v16, v8
219219
; CHECK-NEXT: vsaddu.vx v16, v16, a1
220-
; CHECK-NEXT: vmsltu.vx v9, v16, a2
220+
; CHECK-NEXT: vmsltu.vx v8, v16, a2
221221
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
222-
; CHECK-NEXT: vslideup.vi v0, v9, 6
222+
; CHECK-NEXT: vslideup.vi v0, v8, 6
223223
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
224-
; CHECK-NEXT: vslideup.vi v0, v8, 8
224+
; CHECK-NEXT: vslideup.vi v0, v10, 8
225225
; CHECK-NEXT: ret
226226
%mask = call <128 x i1> @llvm.get.active.lane.mask.v128i1.i64(i64 %index, i64 %tc)
227227
ret <128 x i1> %mask

0 commit comments

Comments
 (0)