Skip to content

Commit 603ba4c

Browse files
authored
[RISCV] Separate doLocalPostpass into new pass and move to post vector regalloc (#88295)
This patch splits off part of the work to move vsetvli insertion to post regalloc in #70549. The doLocalPostpass operates outside of RISCVInsertVSETVLI's dataflow, so we can move it to its own pass. We can then move it to post vector regalloc which should be a smaller change. A couple of things that are different from #70549: - This manually fixes up the LiveIntervals rather than recomputing it via createAndComputeVirtRegInterval. I'm not sure if there's much of a difference with either. - For the postpass it's sufficient enough to just check isUndef() in hasUndefinedMergeOp, i.e. we don't need to lookup the def in VNInfo. Running on llvm-test-suite and SPEC CPU 2017 there aren't any changes in the number of vsetvlis removed. There are some minor scheduling diffs as well as extra spills and less spills in some cases (caused by transient vsetvlis existing between RISCVInsertVSETVLI and RISCVCoalesceVSETVLI when vec regalloc happens), but they are minor and should go away once we finish moving the rest of RISCVInsertVSETVLI. We could also potentially turn off this pass for unoptimised builds.
1 parent c071c1d commit 603ba4c

36 files changed

+1280
-1356
lines changed

llvm/lib/Target/RISCV/RISCV.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
6161
FunctionPass *createRISCVInsertVSETVLIPass();
6262
void initializeRISCVInsertVSETVLIPass(PassRegistry &);
6363

64+
FunctionPass *createRISCVCoalesceVSETVLIPass();
65+
void initializeRISCVCoalesceVSETVLIPass(PassRegistry &);
66+
6467
FunctionPass *createRISCVPostRAExpandPseudoPass();
6568
void initializeRISCVPostRAExpandPseudoPass(PassRegistry &);
6669
FunctionPass *createRISCVInsertReadWriteCSRPass();

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 109 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,19 @@
2727
#include "RISCV.h"
2828
#include "RISCVSubtarget.h"
2929
#include "llvm/ADT/Statistic.h"
30+
#include "llvm/CodeGen/LiveDebugVariables.h"
3031
#include "llvm/CodeGen/LiveIntervals.h"
32+
#include "llvm/CodeGen/LiveStacks.h"
3133
#include "llvm/CodeGen/MachineFunctionPass.h"
3234
#include <queue>
3335
using namespace llvm;
3436

3537
#define DEBUG_TYPE "riscv-insert-vsetvli"
3638
#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
39+
#define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"
3740

3841
STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
39-
STATISTIC(NumRemovedVSETVL, "Number of VSETVL inst removed");
42+
STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
4043

4144
static cl::opt<bool> DisableInsertVSETVLPHIOpt(
4245
"riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
@@ -190,6 +193,11 @@ static bool hasUndefinedMergeOp(const MachineInstr &MI,
190193
if (UseMO.getReg() == RISCV::NoRegister)
191194
return true;
192195

196+
if (UseMO.isUndef())
197+
return true;
198+
if (UseMO.getReg().isPhysical())
199+
return false;
200+
193201
if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
194202
if (UseMI->isImplicitDef())
195203
return true;
@@ -778,18 +786,52 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
778786
VSETVLIInfo &Info) const;
779787
void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
780788
void emitVSETVLIs(MachineBasicBlock &MBB);
781-
void doLocalPostpass(MachineBasicBlock &MBB);
782789
void doPRE(MachineBasicBlock &MBB);
783790
void insertReadVL(MachineBasicBlock &MBB);
784791
};
785792

793+
class RISCVCoalesceVSETVLI : public MachineFunctionPass {
794+
public:
795+
static char ID;
796+
const RISCVSubtarget *ST;
797+
const TargetInstrInfo *TII;
798+
MachineRegisterInfo *MRI;
799+
LiveIntervals *LIS;
800+
801+
RISCVCoalesceVSETVLI() : MachineFunctionPass(ID) {}
802+
bool runOnMachineFunction(MachineFunction &MF) override;
803+
804+
void getAnalysisUsage(AnalysisUsage &AU) const override {
805+
AU.setPreservesCFG();
806+
807+
AU.addRequired<LiveIntervals>();
808+
AU.addPreserved<LiveIntervals>();
809+
AU.addRequired<SlotIndexes>();
810+
AU.addPreserved<SlotIndexes>();
811+
AU.addPreserved<LiveDebugVariables>();
812+
AU.addPreserved<LiveStacks>();
813+
814+
MachineFunctionPass::getAnalysisUsage(AU);
815+
}
816+
817+
StringRef getPassName() const override { return RISCV_COALESCE_VSETVLI_NAME; }
818+
819+
private:
820+
bool coalesceVSETVLIs(MachineBasicBlock &MBB);
821+
};
822+
786823
} // end anonymous namespace
787824

788825
char RISCVInsertVSETVLI::ID = 0;
789826

790827
INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
791828
false, false)
792829

830+
char RISCVCoalesceVSETVLI::ID = 0;
831+
832+
INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli",
833+
RISCV_COALESCE_VSETVLI_NAME, false, false)
834+
793835
// Return a VSETVLIInfo representing the changes made by this VSETVLI or
794836
// VSETIVLI instruction.
795837
static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
@@ -1511,12 +1553,12 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
15111553

15121554
auto &AVL = MI.getOperand(1);
15131555
auto &PrevAVL = PrevMI.getOperand(1);
1514-
assert(MRI.isSSA());
15151556

15161557
// If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
15171558
// For now just check that PrevMI uses the same virtual register.
15181559
if (AVL.isReg() && AVL.getReg() != RISCV::X0 &&
1519-
(!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg()))
1560+
(!MRI.hasOneDef(AVL.getReg()) || !PrevAVL.isReg() ||
1561+
PrevAVL.getReg() != AVL.getReg()))
15201562
return false;
15211563
}
15221564

@@ -1526,7 +1568,7 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
15261568
return areCompatibleVTYPEs(PriorVType, VType, Used);
15271569
}
15281570

1529-
void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
1571+
bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
15301572
MachineInstr *NextMI = nullptr;
15311573
// We can have arbitrary code in successors, so VL and VTYPE
15321574
// must be considered demanded.
@@ -1558,20 +1600,49 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
15581600

15591601
if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
15601602
if (!isVLPreservingConfig(*NextMI)) {
1561-
MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
1603+
Register DefReg = NextMI->getOperand(0).getReg();
1604+
1605+
MI.getOperand(0).setReg(DefReg);
15621606
MI.getOperand(0).setIsDead(false);
1607+
1608+
// The def of DefReg moved to MI, so extend the LiveInterval up to
1609+
// it.
1610+
if (DefReg.isVirtual()) {
1611+
LiveInterval &DefLI = LIS->getInterval(DefReg);
1612+
SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
1613+
VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1614+
LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
1615+
DefLI.addSegment(S);
1616+
DefVNI->def = MISlot;
1617+
// Mark DefLI as spillable if it was previously unspillable
1618+
DefLI.setWeight(0);
1619+
1620+
// DefReg may have had no uses, in which case we need to shrink
1621+
// the LiveInterval up to MI.
1622+
LIS->shrinkToUses(&DefLI);
1623+
}
1624+
15631625
Register OldVLReg;
15641626
if (MI.getOperand(1).isReg())
15651627
OldVLReg = MI.getOperand(1).getReg();
15661628
if (NextMI->getOperand(1).isImm())
15671629
MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
15681630
else
15691631
MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
1632+
1633+
// Clear NextMI's AVL early so we're not counting it as a use.
1634+
if (NextMI->getOperand(1).isReg())
1635+
NextMI->getOperand(1).setReg(RISCV::NoRegister);
1636+
15701637
if (OldVLReg) {
15711638
MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
15721639
if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
15731640
MRI->use_nodbg_empty(OldVLReg))
15741641
VLOpDef->eraseFromParent();
1642+
1643+
// NextMI no longer uses OldVLReg so shrink its LiveInterval.
1644+
if (OldVLReg.isVirtual())
1645+
LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
15751646
}
15761647
MI.setDesc(NextMI->getDesc());
15771648
}
@@ -1584,9 +1655,13 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
15841655
Used = getDemanded(MI, MRI, ST);
15851656
}
15861657

1587-
NumRemovedVSETVL += ToDelete.size();
1588-
for (auto *MI : ToDelete)
1658+
NumCoalescedVSETVL += ToDelete.size();
1659+
for (auto *MI : ToDelete) {
1660+
LIS->RemoveMachineInstrFromMaps(*MI);
15891661
MI->eraseFromParent();
1662+
}
1663+
1664+
return !ToDelete.empty();
15901665
}
15911666

15921667
void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
@@ -1661,15 +1736,6 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
16611736
for (MachineBasicBlock &MBB : MF)
16621737
emitVSETVLIs(MBB);
16631738

1664-
// Now that all vsetvlis are explicit, go through and do block local
1665-
// DSE and peephole based demanded fields based transforms. Note that
1666-
// this *must* be done outside the main dataflow so long as we allow
1667-
// any cross block analysis within the dataflow. We can't have both
1668-
// demanded fields based mutation and non-local analysis in the
1669-
// dataflow at the same time without introducing inconsistencies.
1670-
for (MachineBasicBlock &MBB : MF)
1671-
doLocalPostpass(MBB);
1672-
16731739
// Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
16741740
// of VLEFF/VLSEGFF.
16751741
for (MachineBasicBlock &MBB : MF)
@@ -1683,3 +1749,29 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
16831749
FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
16841750
return new RISCVInsertVSETVLI();
16851751
}
1752+
1753+
// Now that all vsetvlis are explicit, go through and do block local
1754+
// DSE and peephole based demanded fields based transforms. Note that
1755+
// this *must* be done outside the main dataflow so long as we allow
1756+
// any cross block analysis within the dataflow. We can't have both
1757+
// demanded fields based mutation and non-local analysis in the
1758+
// dataflow at the same time without introducing inconsistencies.
1759+
bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1760+
// Skip if the vector extension is not enabled.
1761+
ST = &MF.getSubtarget<RISCVSubtarget>();
1762+
if (!ST->hasVInstructions())
1763+
return false;
1764+
TII = ST->getInstrInfo();
1765+
MRI = &MF.getRegInfo();
1766+
LIS = &getAnalysis<LiveIntervals>();
1767+
1768+
bool Changed = false;
1769+
for (MachineBasicBlock &MBB : MF)
1770+
Changed |= coalesceVSETVLIs(MBB);
1771+
1772+
return Changed;
1773+
}
1774+
1775+
FunctionPass *llvm::createRISCVCoalesceVSETVLIPass() {
1776+
return new RISCVCoalesceVSETVLI();
1777+
}

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
116116
initializeRISCVExpandPseudoPass(*PR);
117117
initializeRISCVFoldMasksPass(*PR);
118118
initializeRISCVInsertVSETVLIPass(*PR);
119+
initializeRISCVCoalesceVSETVLIPass(*PR);
119120
initializeRISCVInsertReadWriteCSRPass(*PR);
120121
initializeRISCVInsertWriteVXRMPass(*PR);
121122
initializeRISCVDAGToDAGISelPass(*PR);
@@ -388,12 +389,14 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {
388389

389390
bool RISCVPassConfig::addRegAssignAndRewriteFast() {
390391
addPass(createRVVRegAllocPass(false));
392+
addPass(createRISCVCoalesceVSETVLIPass());
391393
return TargetPassConfig::addRegAssignAndRewriteFast();
392394
}
393395

394396
bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
395397
addPass(createRVVRegAllocPass(true));
396398
addPass(createVirtRegRewriter(false));
399+
addPass(createRISCVCoalesceVSETVLIPass());
397400
return TargetPassConfig::addRegAssignAndRewriteOptimized();
398401
}
399402

llvm/test/CodeGen/RISCV/O0-pipeline.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@
4747
; CHECK-NEXT: Eliminate PHI nodes for register allocation
4848
; CHECK-NEXT: Two-Address instruction pass
4949
; CHECK-NEXT: Fast Register Allocator
50+
; CHECK-NEXT: MachineDominator Tree Construction
51+
; CHECK-NEXT: Slot index numbering
52+
; CHECK-NEXT: Live Interval Analysis
53+
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
5054
; CHECK-NEXT: Fast Register Allocator
5155
; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis
5256
; CHECK-NEXT: Fixup Statepoint Caller Saved

llvm/test/CodeGen/RISCV/O3-pipeline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@
143143
; CHECK-NEXT: Machine Optimization Remark Emitter
144144
; CHECK-NEXT: Greedy Register Allocator
145145
; CHECK-NEXT: Virtual Register Rewriter
146+
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
146147
; CHECK-NEXT: Virtual Register Map
147148
; CHECK-NEXT: Live Register Matrix
148149
; CHECK-NEXT: Greedy Register Allocator

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1407,8 +1407,8 @@ define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float
14071407
; CHECK-NEXT: vfmv.v.f v8, fa4
14081408
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
14091409
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
1410-
; CHECK-NEXT: vmv.v.i v0, 15
14111410
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
1411+
; CHECK-NEXT: vmv.v.i v0, 15
14121412
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
14131413
; CHECK-NEXT: ret
14141414
%v0 = insertelement <8 x float> poison, float %e0, i64 0
@@ -1458,8 +1458,8 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d
14581458
; CHECK-NEXT: vfmv.v.f v8, fa4
14591459
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
14601460
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
1461-
; CHECK-NEXT: vmv.v.i v0, 15
14621461
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
1462+
; CHECK-NEXT: vmv.v.i v0, 15
14631463
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
14641464
; CHECK-NEXT: ret
14651465
%v0 = insertelement <8 x double> poison, double %e0, i64 0

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
5757
; RV32-V512-NEXT: vid.v v10
5858
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
5959
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
60-
; RV32-V512-NEXT: vmv.v.i v0, 10
6160
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
61+
; RV32-V512-NEXT: vmv.v.i v0, 10
6262
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
6363
; RV32-V512-NEXT: vmv.v.v v8, v10
6464
; RV32-V512-NEXT: ret
@@ -68,8 +68,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
6868
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
6969
; RV64-V512-NEXT: vid.v v10
7070
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
71-
; RV64-V512-NEXT: vmv.v.i v0, 10
7271
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
72+
; RV64-V512-NEXT: vmv.v.i v0, 10
7373
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
7474
; RV64-V512-NEXT: vmv.v.v v8, v10
7575
; RV64-V512-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -395,8 +395,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
395395
; RV32-NEXT: fmin.d fa5, fa5, fa4
396396
; RV32-NEXT: fcvt.w.d a2, fa5, rtz
397397
; RV32-NEXT: and a0, a0, a2
398-
; RV32-NEXT: vmv.v.i v0, 15
399398
; RV32-NEXT: vslide1down.vx v9, v9, a0
399+
; RV32-NEXT: vmv.v.i v0, 15
400400
; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
401401
; RV32-NEXT: vse8.v v9, (a1)
402402
; RV32-NEXT: addi sp, s0, -128
@@ -496,8 +496,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
496496
; RV64-NEXT: fmin.d fa5, fa5, fa4
497497
; RV64-NEXT: fcvt.l.d a2, fa5, rtz
498498
; RV64-NEXT: and a0, a0, a2
499-
; RV64-NEXT: vmv.v.i v0, 15
500499
; RV64-NEXT: vslide1down.vx v9, v9, a0
500+
; RV64-NEXT: vmv.v.i v0, 15
501501
; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
502502
; RV64-NEXT: vse8.v v9, (a1)
503503
; RV64-NEXT: addi sp, s0, -128
@@ -580,8 +580,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
580580
; RV32-NEXT: fmax.d fa4, fa4, fa3
581581
; RV32-NEXT: fmin.d fa5, fa4, fa5
582582
; RV32-NEXT: fcvt.wu.d a0, fa5, rtz
583-
; RV32-NEXT: vmv.v.i v0, 15
584583
; RV32-NEXT: vslide1down.vx v9, v9, a0
584+
; RV32-NEXT: vmv.v.i v0, 15
585585
; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
586586
; RV32-NEXT: vse8.v v9, (a1)
587587
; RV32-NEXT: addi sp, s0, -128
@@ -656,8 +656,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
656656
; RV64-NEXT: fmax.d fa4, fa4, fa3
657657
; RV64-NEXT: fmin.d fa5, fa4, fa5
658658
; RV64-NEXT: fcvt.lu.d a0, fa5, rtz
659-
; RV64-NEXT: vmv.v.i v0, 15
660659
; RV64-NEXT: vslide1down.vx v9, v9, a0
660+
; RV64-NEXT: vmv.v.i v0, 15
661661
; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
662662
; RV64-NEXT: vse8.v v9, (a1)
663663
; RV64-NEXT: addi sp, s0, -128

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
7070
; RV32-V512-NEXT: vid.v v10
7171
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
7272
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
73-
; RV32-V512-NEXT: vmv.v.i v0, 10
7473
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
74+
; RV32-V512-NEXT: vmv.v.i v0, 10
7575
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
7676
; RV32-V512-NEXT: vmv.v.v v8, v10
7777
; RV32-V512-NEXT: ret
@@ -81,8 +81,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
8181
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
8282
; RV64-V512-NEXT: vid.v v10
8383
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
84-
; RV64-V512-NEXT: vmv.v.i v0, 10
8584
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
85+
; RV64-V512-NEXT: vmv.v.i v0, 10
8686
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
8787
; RV64-V512-NEXT: vmv.v.v v8, v10
8888
; RV64-V512-NEXT: ret
@@ -195,8 +195,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
195195
; V128-NEXT: vsetivli zero, 4, e32, m1, ta, mu
196196
; V128-NEXT: vid.v v8
197197
; V128-NEXT: vsrl.vi v8, v8, 1
198-
; V128-NEXT: vmv.v.i v0, 10
199198
; V128-NEXT: vadd.vi v8, v8, 1
199+
; V128-NEXT: vmv.v.i v0, 10
200200
; V128-NEXT: vrgather.vv v10, v9, v8, v0.t
201201
; V128-NEXT: vmv.v.v v8, v10
202202
; V128-NEXT: ret
@@ -210,8 +210,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
210210
; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, mu
211211
; V512-NEXT: vid.v v8
212212
; V512-NEXT: vsrl.vi v8, v8, 1
213-
; V512-NEXT: vmv.v.i v0, 10
214213
; V512-NEXT: vadd.vi v8, v8, 1
214+
; V512-NEXT: vmv.v.i v0, 10
215215
; V512-NEXT: vrgather.vv v10, v9, v8, v0.t
216216
; V512-NEXT: vmv1r.v v8, v10
217217
; V512-NEXT: ret

0 commit comments

Comments
 (0)