Skip to content

Commit 08a3c53

Browse files
authored
[RegAlloc] Scale the spill weight by target factor (#113675)
Currently, the spill weight is only determined by isDef/isUse and block frequency. However, for registers with different register classes, the costs of spilling them are different. For example, for `LMUL>1` registers (in which, several physical registers compound a bigger logical register), the costs are larger than `LMUL=1` case (in which, there is only one physical register). To solve this problem, a new target hook `getSpillWeightScaleFactor` is added. Targets can override the default factor (which is `1.0`) according to the register class. For RISC-V, the factors are set to the `RegClassWeight` which is used to track register pressure. The values of `RegClassWeight` happen to be the number of register units. I believe all of the targets with compounded registers can benefit from this change, but only RISC-V is customized in this patch since it has widely been agreed to do so. The other targets need more performance data to go further. Partially fixes #113489.
1 parent 3438dfc commit 08a3c53

File tree

76 files changed

+4186
-7952
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+4186
-7952
lines changed

llvm/include/llvm/CodeGen/TargetRegisterInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -958,6 +958,9 @@ class TargetRegisterInfo : public MCRegisterInfo {
958958
/// Returns a -1 terminated array of pressure set IDs.
959959
virtual const int *getRegUnitPressureSets(unsigned RegUnit) const = 0;
960960

961+
/// Get the scale factor of spill weight for this register class.
962+
virtual float getSpillWeightScaleFactor(const TargetRegisterClass *RC) const;
963+
961964
/// Get a list of 'hint' registers that the register allocator should try
962965
/// first when allocating a physical register for the virtual register
963966
/// VirtReg. These registers are effectively moved to the front of the

llvm/lib/CodeGen/CalcSpillWeights.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,10 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
335335
if (isRematerializable(LI, LIS, VRM, *MF.getSubtarget().getInstrInfo()))
336336
TotalWeight *= 0.5F;
337337

338+
// Finally, we scale the weight by the scale factor of register class.
339+
const TargetRegisterClass *RC = MRI.getRegClass(LI.reg());
340+
TotalWeight *= TRI.getSpillWeightScaleFactor(RC);
341+
338342
if (IsLocalSplitArtifact)
339343
return normalize(TotalWeight, Start->distance(*End), NumInstr);
340344
return normalize(TotalWeight, LI.getSize(), NumInstr);

llvm/lib/CodeGen/TargetRegisterInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,11 @@ bool TargetRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
456456
return shareSameRegisterFile(*this, DefRC, DefSubReg, SrcRC, SrcSubReg);
457457
}
458458

459+
float TargetRegisterInfo::getSpillWeightScaleFactor(
460+
const TargetRegisterClass *RC) const {
461+
return 1.0;
462+
}
463+
459464
// Compute target-independent register allocator hints to help eliminate copies.
460465
bool TargetRegisterInfo::getRegAllocationHints(
461466
Register VirtReg, ArrayRef<MCPhysReg> Order,

llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,11 @@ RISCVRegisterInfo::getRegisterCostTableIndex(const MachineFunction &MF) const {
810810
: 0;
811811
}
812812

813+
float RISCVRegisterInfo::getSpillWeightScaleFactor(
814+
const TargetRegisterClass *RC) const {
815+
return getRegClassWeight(RC).RegWeight;
816+
}
817+
813818
// Add two address hints to improve chances of being able to use a compressed
814819
// instruction.
815820
bool RISCVRegisterInfo::getRegAllocationHints(

llvm/lib/Target/RISCV/RISCVRegisterInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
130130

131131
unsigned getRegisterCostTableIndex(const MachineFunction &MF) const override;
132132

133+
float getSpillWeightScaleFactor(const TargetRegisterClass *RC) const override;
134+
133135
bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order,
134136
SmallVectorImpl<MCPhysReg> &Hints,
135137
const MachineFunction &MF, const VirtRegMap *VRM,

llvm/test/CodeGen/RISCV/rvv/abs-vp.ll

Lines changed: 6 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -561,19 +561,8 @@ declare <vscale x 16 x i64> @llvm.vp.abs.nxv16i64(<vscale x 16 x i64>, i1 immarg
561561
define <vscale x 16 x i64> @vp_abs_nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
562562
; CHECK-LABEL: vp_abs_nxv16i64:
563563
; CHECK: # %bb.0:
564-
; CHECK-NEXT: addi sp, sp, -16
565-
; CHECK-NEXT: .cfi_def_cfa_offset 16
566-
; CHECK-NEXT: csrr a1, vlenb
567-
; CHECK-NEXT: slli a1, a1, 4
568-
; CHECK-NEXT: sub sp, sp, a1
569-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
570564
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
571-
; CHECK-NEXT: vmv1r.v v24, v0
572-
; CHECK-NEXT: csrr a1, vlenb
573-
; CHECK-NEXT: slli a1, a1, 3
574-
; CHECK-NEXT: add a1, sp, a1
575-
; CHECK-NEXT: addi a1, a1, 16
576-
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
565+
; CHECK-NEXT: vmv1r.v v7, v0
577566
; CHECK-NEXT: csrr a1, vlenb
578567
; CHECK-NEXT: srli a2, a1, 3
579568
; CHECK-NEXT: sub a3, a0, a1
@@ -582,30 +571,16 @@ define <vscale x 16 x i64> @vp_abs_nxv16i64(<vscale x 16 x i64> %va, <vscale x 1
582571
; CHECK-NEXT: addi a2, a2, -1
583572
; CHECK-NEXT: and a2, a2, a3
584573
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
585-
; CHECK-NEXT: vrsub.vi v8, v16, 0, v0.t
586-
; CHECK-NEXT: vmax.vv v8, v16, v8, v0.t
587-
; CHECK-NEXT: addi a2, sp, 16
588-
; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
574+
; CHECK-NEXT: vrsub.vi v24, v16, 0, v0.t
575+
; CHECK-NEXT: vmax.vv v16, v16, v24, v0.t
589576
; CHECK-NEXT: bltu a0, a1, .LBB46_2
590577
; CHECK-NEXT: # %bb.1:
591578
; CHECK-NEXT: mv a0, a1
592579
; CHECK-NEXT: .LBB46_2:
593-
; CHECK-NEXT: vmv1r.v v0, v24
594-
; CHECK-NEXT: slli a1, a1, 3
595-
; CHECK-NEXT: add a1, sp, a1
596-
; CHECK-NEXT: addi a1, a1, 16
597-
; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
580+
; CHECK-NEXT: vmv1r.v v0, v7
598581
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
599-
; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t
600-
; CHECK-NEXT: vmax.vv v8, v8, v16, v0.t
601-
; CHECK-NEXT: addi a0, sp, 16
602-
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
603-
; CHECK-NEXT: csrr a0, vlenb
604-
; CHECK-NEXT: slli a0, a0, 4
605-
; CHECK-NEXT: add sp, sp, a0
606-
; CHECK-NEXT: .cfi_def_cfa sp, 16
607-
; CHECK-NEXT: addi sp, sp, 16
608-
; CHECK-NEXT: .cfi_def_cfa_offset 0
582+
; CHECK-NEXT: vrsub.vi v24, v8, 0, v0.t
583+
; CHECK-NEXT: vmax.vv v8, v8, v24, v0.t
609584
; CHECK-NEXT: ret
610585
%v = call <vscale x 16 x i64> @llvm.vp.abs.nxv16i64(<vscale x 16 x i64> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
611586
ret <vscale x 16 x i64> %v

llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll

Lines changed: 57 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -2302,33 +2302,35 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
23022302
; RV32-NEXT: vsll.vx v24, v24, a4, v0.t
23032303
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
23042304
; RV32-NEXT: csrr a3, vlenb
2305-
; RV32-NEXT: slli a3, a3, 4
2305+
; RV32-NEXT: slli a3, a3, 3
23062306
; RV32-NEXT: add a3, sp, a3
23072307
; RV32-NEXT: addi a3, a3, 16
23082308
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
23092309
; RV32-NEXT: vlse64.v v16, (a5), zero
23102310
; RV32-NEXT: csrr a3, vlenb
2311-
; RV32-NEXT: slli a3, a3, 3
2311+
; RV32-NEXT: slli a3, a3, 4
23122312
; RV32-NEXT: add a3, sp, a3
23132313
; RV32-NEXT: addi a3, a3, 16
23142314
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
23152315
; RV32-NEXT: lui a3, 4080
2316-
; RV32-NEXT: vand.vx v24, v8, a3, v0.t
2317-
; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
2318-
; RV32-NEXT: addi a5, sp, 16
2319-
; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
2320-
; RV32-NEXT: vand.vv v24, v8, v16, v0.t
2321-
; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
2322-
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
2323-
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2316+
; RV32-NEXT: vand.vx v16, v8, a3, v0.t
2317+
; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
23242318
; RV32-NEXT: csrr a5, vlenb
23252319
; RV32-NEXT: slli a5, a5, 4
23262320
; RV32-NEXT: add a5, sp, a5
23272321
; RV32-NEXT: addi a5, a5, 16
2322+
; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload
2323+
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
2324+
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
2325+
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2326+
; RV32-NEXT: csrr a5, vlenb
2327+
; RV32-NEXT: slli a5, a5, 3
2328+
; RV32-NEXT: add a5, sp, a5
2329+
; RV32-NEXT: addi a5, a5, 16
23282330
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
23292331
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
23302332
; RV32-NEXT: csrr a5, vlenb
2331-
; RV32-NEXT: slli a5, a5, 4
2333+
; RV32-NEXT: slli a5, a5, 3
23322334
; RV32-NEXT: add a5, sp, a5
23332335
; RV32-NEXT: addi a5, a5, 16
23342336
; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
@@ -2342,7 +2344,7 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
23422344
; RV32-NEXT: vand.vx v24, v24, a3, v0.t
23432345
; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
23442346
; RV32-NEXT: csrr a1, vlenb
2345-
; RV32-NEXT: slli a1, a1, 3
2347+
; RV32-NEXT: slli a1, a1, 4
23462348
; RV32-NEXT: add a1, sp, a1
23472349
; RV32-NEXT: addi a1, a1, 16
23482350
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
@@ -2360,7 +2362,7 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
23602362
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
23612363
; RV32-NEXT: vmv.v.x v24, a1
23622364
; RV32-NEXT: csrr a1, vlenb
2363-
; RV32-NEXT: slli a1, a1, 4
2365+
; RV32-NEXT: slli a1, a1, 3
23642366
; RV32-NEXT: add a1, sp, a1
23652367
; RV32-NEXT: addi a1, a1, 16
23662368
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
@@ -2687,33 +2689,35 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
26872689
; RV32-NEXT: vsll.vx v24, v24, a4, v0.t
26882690
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
26892691
; RV32-NEXT: csrr a3, vlenb
2690-
; RV32-NEXT: slli a3, a3, 4
2692+
; RV32-NEXT: slli a3, a3, 3
26912693
; RV32-NEXT: add a3, sp, a3
26922694
; RV32-NEXT: addi a3, a3, 16
26932695
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
26942696
; RV32-NEXT: vlse64.v v16, (a5), zero
26952697
; RV32-NEXT: csrr a3, vlenb
2696-
; RV32-NEXT: slli a3, a3, 3
2698+
; RV32-NEXT: slli a3, a3, 4
26972699
; RV32-NEXT: add a3, sp, a3
26982700
; RV32-NEXT: addi a3, a3, 16
26992701
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
27002702
; RV32-NEXT: lui a3, 4080
2701-
; RV32-NEXT: vand.vx v24, v8, a3, v0.t
2702-
; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
2703-
; RV32-NEXT: addi a5, sp, 16
2704-
; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
2705-
; RV32-NEXT: vand.vv v24, v8, v16, v0.t
2706-
; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
2707-
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
2708-
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2703+
; RV32-NEXT: vand.vx v16, v8, a3, v0.t
2704+
; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
27092705
; RV32-NEXT: csrr a5, vlenb
27102706
; RV32-NEXT: slli a5, a5, 4
27112707
; RV32-NEXT: add a5, sp, a5
27122708
; RV32-NEXT: addi a5, a5, 16
2709+
; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload
2710+
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
2711+
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
2712+
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
2713+
; RV32-NEXT: csrr a5, vlenb
2714+
; RV32-NEXT: slli a5, a5, 3
2715+
; RV32-NEXT: add a5, sp, a5
2716+
; RV32-NEXT: addi a5, a5, 16
27132717
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
27142718
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
27152719
; RV32-NEXT: csrr a5, vlenb
2716-
; RV32-NEXT: slli a5, a5, 4
2720+
; RV32-NEXT: slli a5, a5, 3
27172721
; RV32-NEXT: add a5, sp, a5
27182722
; RV32-NEXT: addi a5, a5, 16
27192723
; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
@@ -2727,7 +2731,7 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
27272731
; RV32-NEXT: vand.vx v24, v24, a3, v0.t
27282732
; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
27292733
; RV32-NEXT: csrr a1, vlenb
2730-
; RV32-NEXT: slli a1, a1, 3
2734+
; RV32-NEXT: slli a1, a1, 4
27312735
; RV32-NEXT: add a1, sp, a1
27322736
; RV32-NEXT: addi a1, a1, 16
27332737
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
@@ -2745,7 +2749,7 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
27452749
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
27462750
; RV32-NEXT: vmv.v.x v24, a1
27472751
; RV32-NEXT: csrr a1, vlenb
2748-
; RV32-NEXT: slli a1, a1, 4
2752+
; RV32-NEXT: slli a1, a1, 3
27492753
; RV32-NEXT: add a1, sp, a1
27502754
; RV32-NEXT: addi a1, a1, 16
27512755
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
@@ -3052,19 +3056,8 @@ declare <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16>, <v
30523056
define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
30533057
; CHECK-LABEL: vp_bitreverse_nxv64i16:
30543058
; CHECK: # %bb.0:
3055-
; CHECK-NEXT: addi sp, sp, -16
3056-
; CHECK-NEXT: .cfi_def_cfa_offset 16
3057-
; CHECK-NEXT: csrr a1, vlenb
3058-
; CHECK-NEXT: slli a1, a1, 4
3059-
; CHECK-NEXT: sub sp, sp, a1
3060-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
30613059
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
3062-
; CHECK-NEXT: vmv1r.v v24, v0
3063-
; CHECK-NEXT: csrr a1, vlenb
3064-
; CHECK-NEXT: slli a1, a1, 3
3065-
; CHECK-NEXT: add a1, sp, a1
3066-
; CHECK-NEXT: addi a1, a1, 16
3067-
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
3060+
; CHECK-NEXT: vmv1r.v v7, v0
30683061
; CHECK-NEXT: csrr a3, vlenb
30693062
; CHECK-NEXT: lui a1, 1
30703063
; CHECK-NEXT: lui a2, 3
@@ -3080,63 +3073,48 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vsc
30803073
; CHECK-NEXT: addi a2, a2, 819
30813074
; CHECK-NEXT: addi a1, a6, 1365
30823075
; CHECK-NEXT: vsetvli zero, a5, e16, m8, ta, ma
3083-
; CHECK-NEXT: vsrl.vi v8, v16, 8, v0.t
3076+
; CHECK-NEXT: vsrl.vi v24, v16, 8, v0.t
30843077
; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t
3085-
; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3086-
; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
3078+
; CHECK-NEXT: vor.vv v16, v16, v24, v0.t
3079+
; CHECK-NEXT: vsrl.vi v24, v16, 4, v0.t
3080+
; CHECK-NEXT: vand.vx v24, v24, a4, v0.t
30873081
; CHECK-NEXT: vand.vx v16, v16, a4, v0.t
3088-
; CHECK-NEXT: vand.vx v8, v8, a4, v0.t
3089-
; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
3090-
; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3091-
; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t
3082+
; CHECK-NEXT: vsll.vi v16, v16, 4, v0.t
3083+
; CHECK-NEXT: vor.vv v16, v24, v16, v0.t
3084+
; CHECK-NEXT: vsrl.vi v24, v16, 2, v0.t
3085+
; CHECK-NEXT: vand.vx v24, v24, a2, v0.t
30923086
; CHECK-NEXT: vand.vx v16, v16, a2, v0.t
3093-
; CHECK-NEXT: vand.vx v8, v8, a2, v0.t
3094-
; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
3095-
; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3096-
; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
3087+
; CHECK-NEXT: vsll.vi v16, v16, 2, v0.t
3088+
; CHECK-NEXT: vor.vv v16, v24, v16, v0.t
3089+
; CHECK-NEXT: vsrl.vi v24, v16, 1, v0.t
3090+
; CHECK-NEXT: vand.vx v24, v24, a1, v0.t
30973091
; CHECK-NEXT: vand.vx v16, v16, a1, v0.t
3098-
; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
3099-
; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
3100-
; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3101-
; CHECK-NEXT: addi a5, sp, 16
3102-
; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
3092+
; CHECK-NEXT: vsll.vi v16, v16, 1, v0.t
3093+
; CHECK-NEXT: vor.vv v16, v24, v16, v0.t
31033094
; CHECK-NEXT: bltu a0, a3, .LBB46_2
31043095
; CHECK-NEXT: # %bb.1:
31053096
; CHECK-NEXT: mv a0, a3
31063097
; CHECK-NEXT: .LBB46_2:
3107-
; CHECK-NEXT: vmv1r.v v0, v24
3108-
; CHECK-NEXT: csrr a3, vlenb
3109-
; CHECK-NEXT: slli a3, a3, 3
3110-
; CHECK-NEXT: add a3, sp, a3
3111-
; CHECK-NEXT: addi a3, a3, 16
3112-
; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
3098+
; CHECK-NEXT: vmv1r.v v0, v7
31133099
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
3114-
; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
3100+
; CHECK-NEXT: vsrl.vi v24, v8, 8, v0.t
31153101
; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
3116-
; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
3117-
; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
3118-
; CHECK-NEXT: vand.vx v16, v16, a4, v0.t
3102+
; CHECK-NEXT: vor.vv v8, v8, v24, v0.t
3103+
; CHECK-NEXT: vsrl.vi v24, v8, 4, v0.t
3104+
; CHECK-NEXT: vand.vx v24, v24, a4, v0.t
31193105
; CHECK-NEXT: vand.vx v8, v8, a4, v0.t
31203106
; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
3121-
; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3122-
; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t
3123-
; CHECK-NEXT: vand.vx v16, v16, a2, v0.t
3107+
; CHECK-NEXT: vor.vv v8, v24, v8, v0.t
3108+
; CHECK-NEXT: vsrl.vi v24, v8, 2, v0.t
3109+
; CHECK-NEXT: vand.vx v24, v24, a2, v0.t
31243110
; CHECK-NEXT: vand.vx v8, v8, a2, v0.t
31253111
; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
3126-
; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3127-
; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
3128-
; CHECK-NEXT: vand.vx v16, v16, a1, v0.t
3112+
; CHECK-NEXT: vor.vv v8, v24, v8, v0.t
3113+
; CHECK-NEXT: vsrl.vi v24, v8, 1, v0.t
3114+
; CHECK-NEXT: vand.vx v24, v24, a1, v0.t
31293115
; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
31303116
; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
3131-
; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
3132-
; CHECK-NEXT: addi a0, sp, 16
3133-
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
3134-
; CHECK-NEXT: csrr a0, vlenb
3135-
; CHECK-NEXT: slli a0, a0, 4
3136-
; CHECK-NEXT: add sp, sp, a0
3137-
; CHECK-NEXT: .cfi_def_cfa sp, 16
3138-
; CHECK-NEXT: addi sp, sp, 16
3139-
; CHECK-NEXT: .cfi_def_cfa_offset 0
3117+
; CHECK-NEXT: vor.vv v8, v24, v8, v0.t
31403118
; CHECK-NEXT: ret
31413119
;
31423120
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16:

0 commit comments

Comments
 (0)