[RISCV] Reduce the VL of both operands in VMERGE_VVM #144759

mshockwave · 2025-06-18T17:26:44Z

The tryToReduceVL function in RISCVVectorPeephole currently only reduces the VL of the instruction that defines the true operand in VMERGE_VVM. We should be able to reduce VL of both operands. This patch generalizes this function to support multiple operands from a single instruction.

This was motivated by https://github.com/llvm/llvm-project/pull/144170/files#r2146240973

llvmbot · 2025-06-18T17:27:14Z

@llvm/pr-subscribers-backend-risc-v

Author: Min-Yih Hsu (mshockwave)

Changes

The tryToReduceVL function in RISCVVectorPeephole currently only reduces the VL of the instruction that defines the true operand in VMERGE_VVM. We should be able to reduce VL of both operands. This patch generalized this function to support multiple operands from a single instruction.

This was motivated by https://github.com/llvm/llvm-project/pull/144170/files#r2146240973

Full diff: https://github.com/llvm/llvm-project/pull/144759.diff

3 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp (+40-34)
(modified) llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll (+1-2)
(modified) llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll (+2-2)

diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index c9c2413d009b7..f7acd676461fb 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -112,7 +112,7 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
   //
   // TODO: We can handle a bunch more instructions here, and probably
   // recurse backwards through operands too.
-  unsigned SrcIdx = 0;
+  SmallVector<unsigned, 2> SrcIndices = {0};
   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
   default:
     return false;
@@ -122,10 +122,10 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
   case RISCV::VSE64_V:
     break;
   case RISCV::VMV_V_V:
-    SrcIdx = 2;
+    SrcIndices[0] = 2;
     break;
   case RISCV::VMERGE_VVM:
-    SrcIdx = 3; // TODO: We can also handle the false operand.
+    SrcIndices.assign({2, 3});
     break;
   case RISCV::VREDSUM_VS:
   case RISCV::VREDMAXU_VS:
@@ -143,7 +143,7 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
   case RISCV::VFREDMIN_VS:
   case RISCV::VFWREDUSUM_VS:
   case RISCV::VFWREDOSUM_VS:
-    SrcIdx = 2;
+    SrcIndices[0] = 2;
     break;
   }
 
@@ -151,42 +151,48 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
   if (VL.isImm() && VL.getImm() == RISCV::VLMaxSentinel)
     return false;
 
-  Register SrcReg = MI.getOperand(SrcIdx).getReg();
-  // Note: one *use*, not one *user*.
-  if (!MRI->hasOneUse(SrcReg))
-    return false;
-
-  MachineInstr *Src = MRI->getVRegDef(SrcReg);
-  if (!Src || Src->hasUnmodeledSideEffects() ||
-      Src->getParent() != MI.getParent() || Src->getNumDefs() != 1 ||
-      !RISCVII::hasVLOp(Src->getDesc().TSFlags) ||
-      !RISCVII::hasSEWOp(Src->getDesc().TSFlags))
-    return false;
-
-  // Src's dest needs to have the same EEW as MI's input.
-  if (!hasSameEEW(MI, *Src))
-    return false;
-
-  bool ElementsDependOnVL = RISCVII::elementsDependOnVL(
-      TII->get(RISCV::getRVVMCOpcode(Src->getOpcode())).TSFlags);
-  if (ElementsDependOnVL || Src->mayRaiseFPException())
-    return false;
+  bool Changed = false;
+  for (unsigned SrcIdx : SrcIndices) {
+    Register SrcReg = MI.getOperand(SrcIdx).getReg();
+    // Note: one *use*, not one *user*.
+    if (!MRI->hasOneUse(SrcReg))
+      continue;
+
+    MachineInstr *Src = MRI->getVRegDef(SrcReg);
+    if (!Src || Src->hasUnmodeledSideEffects() ||
+        Src->getParent() != MI.getParent() || Src->getNumDefs() != 1 ||
+        !RISCVII::hasVLOp(Src->getDesc().TSFlags) ||
+        !RISCVII::hasSEWOp(Src->getDesc().TSFlags))
+      continue;
+
+    // Src's dest needs to have the same EEW as MI's input.
+    if (!hasSameEEW(MI, *Src))
+      continue;
+
+    bool ElementsDependOnVL = RISCVII::elementsDependOnVL(
+        TII->get(RISCV::getRVVMCOpcode(Src->getOpcode())).TSFlags);
+    if (ElementsDependOnVL || Src->mayRaiseFPException())
+      continue;
+
+    MachineOperand &SrcVL =
+        Src->getOperand(RISCVII::getVLOpNum(Src->getDesc()));
+    if (VL.isIdenticalTo(SrcVL) || !RISCV::isVLKnownLE(VL, SrcVL))
+      continue;
 
-  MachineOperand &SrcVL = Src->getOperand(RISCVII::getVLOpNum(Src->getDesc()));
-  if (VL.isIdenticalTo(SrcVL) || !RISCV::isVLKnownLE(VL, SrcVL))
-    return false;
+    if (!ensureDominates(VL, *Src))
+      continue;
 
-  if (!ensureDominates(VL, *Src))
-    return false;
+    if (VL.isImm())
+      SrcVL.ChangeToImmediate(VL.getImm());
+    else if (VL.isReg())
+      SrcVL.ChangeToRegister(VL.getReg(), false);
 
-  if (VL.isImm())
-    SrcVL.ChangeToImmediate(VL.getImm());
-  else if (VL.isReg())
-    SrcVL.ChangeToRegister(VL.getReg(), false);
+    Changed = true;
+  }
 
   // TODO: For instructions with a passthru, we could clear the passthru
   // and tail policy since we've just proven the tail is not demanded.
-  return true;
+  return Changed;
 }
 
 /// Check if an operand is an immediate or a materialized ADDI $x0, imm.
diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
index 75537406f3515..372b07e0137b4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
@@ -34,9 +34,8 @@ define <vscale x 1 x i8> @masked_load_passthru_nxv1i8(ptr %a, <vscale x 1 x i1>
 ; ZVE32:       # %bb.0:
 ; ZVE32-NEXT:    csrr a1, vlenb
 ; ZVE32-NEXT:    srli a1, a1, 3
-; ZVE32-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
-; ZVE32-NEXT:    vmv.v.i v8, 0
 ; ZVE32-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu
+; ZVE32-NEXT:    vmv.v.i v8, 0
 ; ZVE32-NEXT:    vle8.v v8, (a0), v0.t
 ; ZVE32-NEXT:    ret
   %load = call <vscale x 1 x i8> @llvm.masked.load.nxv1i8(ptr %a, i32 1, <vscale x 1 x i1> %mask, <vscale x 1 x i8> zeroinitializer)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 10a92f0188a93..1cbb980aebffc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -3063,9 +3063,9 @@ define <vscale x 4 x i32> @vmv_v_x(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
 define <vscale x 1 x i8> @vmv_v_v(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, iXLen %vl) {
 ; NOVLOPT-LABEL: vmv_v_v:
 ; NOVLOPT:       # %bb.0:
-; NOVLOPT-NEXT:    vsetvli a1, zero, e8, mf8, tu, ma
+; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf8, tu, ma
 ; NOVLOPT-NEXT:    vmv.v.v v8, v9
-; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; NOVLOPT-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
 ; NOVLOPT-NEXT:    vmerge.vvm v8, v8, v10, v0
 ; NOVLOPT-NEXT:    ret
 ;

mshockwave · 2025-06-18T17:29:31Z

llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp

@@ -112,7 +112,7 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
  //
  // TODO: We can handle a bunch more instructions here, and probably
  // recurse backwards through operands too.


I'm not sure if I need to remove this comment, because at least for this case (VMERGE_VVM) we neither need to process them recursively nor need to do it in a backward fashion.

This TODO describes what the VL optimizer does.

Yeah, this TODO is definitely stale. Longer term, we should probably eliminate this bit of code in favor of VLOptimizer just to have fewer copies doing roughly the same thing.

mshockwave · 2025-06-18T17:30:33Z

llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll

@@ -3063,9 +3063,9 @@ define <vscale x 4 x i32> @vmv_v_x(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
 define <vscale x 1 x i8> @vmv_v_v(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, iXLen %vl) {


Interestingly, with this patch, the NOVLOPT produces the same result as VLOPT. So I think we offload some of the optimizations from VL optimizer.

topperc · 2025-06-18T17:49:35Z

Why doesn't the VL optimizer handle this?

mshockwave · 2025-06-18T18:05:32Z

Why doesn't the VL optimizer handle this?

Because VMERGE_VVM would be eliminated by another optimization in VectorPeephole, so by the time we reached VL optimizer, we couldn't correctly perform this VL reduction (because we're not sure if we actually need VLMAX number of splat element or not at that point)

preames

LGTM

I suspect we could handle this in VLOptimizer, but I have no problem with this as an incremental improvement.

For the VLOptimizer case, v8 should the passthru operand, and the load should have a "tail undefined" property. Given that, we should be able to reduce the VL of vmv.v.i that way as well.

preames · 2025-06-18T18:49:56Z

llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp

@@ -112,7 +112,7 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
  //
  // TODO: We can handle a bunch more instructions here, and probably
  // recurse backwards through operands too.


Yeah, this TODO is definitely stale. Longer term, we should probably eliminate this bit of code in favor of VLOptimizer just to have fewer copies doing roughly the same thing.

topperc

LGTM

[RISCV] Reduce the VL of both operands in VMERGE_VVM

8daa197

mshockwave requested review from preames, lukel97 and topperc June 18, 2025 17:26

llvmbot added the backend:RISC-V label Jun 18, 2025

mshockwave commented Jun 18, 2025

View reviewed changes

mshockwave mentioned this pull request Jun 18, 2025

[RISCV] Lower VP_SELECT constant false to use vmerge.vxm/vmerge.vim #144461

Open

preames approved these changes Jun 18, 2025

View reviewed changes

topperc approved these changes Jun 18, 2025

View reviewed changes

mshockwave merged commit d10079e into llvm:main Jun 18, 2025
9 checks passed

mshockwave deleted the patch/rvv/vector-peephole-vmerge branch June 18, 2025 21:24

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[RISCV] Reduce the VL of both operands in VMERGE_VVM #144759

[RISCV] Reduce the VL of both operands in VMERGE_VVM #144759

Uh oh!

mshockwave commented Jun 18, 2025 •

edited

Loading

Uh oh!

llvmbot commented Jun 18, 2025

Uh oh!

mshockwave Jun 18, 2025

Uh oh!

topperc Jun 18, 2025

Uh oh!

preames Jun 18, 2025

Uh oh!

mshockwave Jun 18, 2025

Uh oh!

topperc commented Jun 18, 2025

Uh oh!

mshockwave commented Jun 18, 2025

Uh oh!

preames left a comment

Uh oh!

preames Jun 18, 2025

Uh oh!

topperc left a comment

Uh oh!

Uh oh!

Uh oh!

		@@ -3063,9 +3063,9 @@ define <vscale x 4 x i32> @vmv_v_x(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
		define <vscale x 1 x i8> @vmv_v_v(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, iXLen %vl) {

[RISCV] Reduce the VL of both operands in VMERGE_VVM #144759

[RISCV] Reduce the VL of both operands in VMERGE_VVM #144759

Uh oh!

Conversation

mshockwave commented Jun 18, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jun 18, 2025

Uh oh!

mshockwave Jun 18, 2025

Choose a reason for hiding this comment

Uh oh!

topperc Jun 18, 2025

Choose a reason for hiding this comment

Uh oh!

preames Jun 18, 2025

Choose a reason for hiding this comment

Uh oh!

mshockwave Jun 18, 2025

Choose a reason for hiding this comment

Uh oh!

topperc commented Jun 18, 2025

Uh oh!

mshockwave commented Jun 18, 2025

Uh oh!

preames left a comment

Choose a reason for hiding this comment

Uh oh!

preames Jun 18, 2025

Choose a reason for hiding this comment

Uh oh!

topperc left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

mshockwave commented Jun 18, 2025 •

edited

Loading