-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Reduce the VL of both operands in VMERGE_VVM #144759
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Reduce the VL of both operands in VMERGE_VVM #144759
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Min-Yih Hsu (mshockwave) ChangesThe This was motivated by https://github.com/llvm/llvm-project/pull/144170/files#r2146240973 Full diff: https://github.com/llvm/llvm-project/pull/144759.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index c9c2413d009b7..f7acd676461fb 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -112,7 +112,7 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
//
// TODO: We can handle a bunch more instructions here, and probably
// recurse backwards through operands too.
- unsigned SrcIdx = 0;
+ SmallVector<unsigned, 2> SrcIndices = {0};
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
default:
return false;
@@ -122,10 +122,10 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
case RISCV::VSE64_V:
break;
case RISCV::VMV_V_V:
- SrcIdx = 2;
+ SrcIndices[0] = 2;
break;
case RISCV::VMERGE_VVM:
- SrcIdx = 3; // TODO: We can also handle the false operand.
+ SrcIndices.assign({2, 3});
break;
case RISCV::VREDSUM_VS:
case RISCV::VREDMAXU_VS:
@@ -143,7 +143,7 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
case RISCV::VFREDMIN_VS:
case RISCV::VFWREDUSUM_VS:
case RISCV::VFWREDOSUM_VS:
- SrcIdx = 2;
+ SrcIndices[0] = 2;
break;
}
@@ -151,42 +151,48 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
if (VL.isImm() && VL.getImm() == RISCV::VLMaxSentinel)
return false;
- Register SrcReg = MI.getOperand(SrcIdx).getReg();
- // Note: one *use*, not one *user*.
- if (!MRI->hasOneUse(SrcReg))
- return false;
-
- MachineInstr *Src = MRI->getVRegDef(SrcReg);
- if (!Src || Src->hasUnmodeledSideEffects() ||
- Src->getParent() != MI.getParent() || Src->getNumDefs() != 1 ||
- !RISCVII::hasVLOp(Src->getDesc().TSFlags) ||
- !RISCVII::hasSEWOp(Src->getDesc().TSFlags))
- return false;
-
- // Src's dest needs to have the same EEW as MI's input.
- if (!hasSameEEW(MI, *Src))
- return false;
-
- bool ElementsDependOnVL = RISCVII::elementsDependOnVL(
- TII->get(RISCV::getRVVMCOpcode(Src->getOpcode())).TSFlags);
- if (ElementsDependOnVL || Src->mayRaiseFPException())
- return false;
+ bool Changed = false;
+ for (unsigned SrcIdx : SrcIndices) {
+ Register SrcReg = MI.getOperand(SrcIdx).getReg();
+ // Note: one *use*, not one *user*.
+ if (!MRI->hasOneUse(SrcReg))
+ continue;
+
+ MachineInstr *Src = MRI->getVRegDef(SrcReg);
+ if (!Src || Src->hasUnmodeledSideEffects() ||
+ Src->getParent() != MI.getParent() || Src->getNumDefs() != 1 ||
+ !RISCVII::hasVLOp(Src->getDesc().TSFlags) ||
+ !RISCVII::hasSEWOp(Src->getDesc().TSFlags))
+ continue;
+
+ // Src's dest needs to have the same EEW as MI's input.
+ if (!hasSameEEW(MI, *Src))
+ continue;
+
+ bool ElementsDependOnVL = RISCVII::elementsDependOnVL(
+ TII->get(RISCV::getRVVMCOpcode(Src->getOpcode())).TSFlags);
+ if (ElementsDependOnVL || Src->mayRaiseFPException())
+ continue;
+
+ MachineOperand &SrcVL =
+ Src->getOperand(RISCVII::getVLOpNum(Src->getDesc()));
+ if (VL.isIdenticalTo(SrcVL) || !RISCV::isVLKnownLE(VL, SrcVL))
+ continue;
- MachineOperand &SrcVL = Src->getOperand(RISCVII::getVLOpNum(Src->getDesc()));
- if (VL.isIdenticalTo(SrcVL) || !RISCV::isVLKnownLE(VL, SrcVL))
- return false;
+ if (!ensureDominates(VL, *Src))
+ continue;
- if (!ensureDominates(VL, *Src))
- return false;
+ if (VL.isImm())
+ SrcVL.ChangeToImmediate(VL.getImm());
+ else if (VL.isReg())
+ SrcVL.ChangeToRegister(VL.getReg(), false);
- if (VL.isImm())
- SrcVL.ChangeToImmediate(VL.getImm());
- else if (VL.isReg())
- SrcVL.ChangeToRegister(VL.getReg(), false);
+ Changed = true;
+ }
// TODO: For instructions with a passthru, we could clear the passthru
// and tail policy since we've just proven the tail is not demanded.
- return true;
+ return Changed;
}
/// Check if an operand is an immediate or a materialized ADDI $x0, imm.
diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
index 75537406f3515..372b07e0137b4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
@@ -34,9 +34,8 @@ define <vscale x 1 x i8> @masked_load_passthru_nxv1i8(ptr %a, <vscale x 1 x i1>
; ZVE32: # %bb.0:
; ZVE32-NEXT: csrr a1, vlenb
; ZVE32-NEXT: srli a1, a1, 3
-; ZVE32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
-; ZVE32-NEXT: vmv.v.i v8, 0
; ZVE32-NEXT: vsetvli zero, a1, e8, mf4, ta, mu
+; ZVE32-NEXT: vmv.v.i v8, 0
; ZVE32-NEXT: vle8.v v8, (a0), v0.t
; ZVE32-NEXT: ret
%load = call <vscale x 1 x i8> @llvm.masked.load.nxv1i8(ptr %a, i32 1, <vscale x 1 x i1> %mask, <vscale x 1 x i8> zeroinitializer)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 10a92f0188a93..1cbb980aebffc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -3063,9 +3063,9 @@ define <vscale x 4 x i32> @vmv_v_x(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
define <vscale x 1 x i8> @vmv_v_v(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, iXLen %vl) {
; NOVLOPT-LABEL: vmv_v_v:
; NOVLOPT: # %bb.0:
-; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, tu, ma
+; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
; NOVLOPT-NEXT: vmv.v.v v8, v9
-; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; NOVLOPT-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; NOVLOPT-NEXT: vmerge.vvm v8, v8, v10, v0
; NOVLOPT-NEXT: ret
;
|
@@ -112,7 +112,7 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const { | |||
// | |||
// TODO: We can handle a bunch more instructions here, and probably | |||
// recurse backwards through operands too. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure if I need to remove this comment, because at least for this case (VMERGE_VVM) we neither need to process them recursively nor need to do it in a backward fashion.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This TODO describes what the VL optimizer does.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, this TODO is definitely stale. Longer term, we should probably eliminate this bit of code in favor of VLOptimizer just to have fewer copies doing roughly the same thing.
@@ -3063,9 +3063,9 @@ define <vscale x 4 x i32> @vmv_v_x(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) { | |||
define <vscale x 1 x i8> @vmv_v_v(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, iXLen %vl) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Interestingly, with this patch, the NOVLOPT produces the same result as VLOPT. So I think we offload some of the optimizations from VL optimizer.
Why doesn't the VL optimizer handle this? |
Because VMERGE_VVM would be eliminated by another optimization in VectorPeephole, so by the time we reached VL optimizer, we couldn't correctly perform this VL reduction (because we're not sure if we actually need VLMAX number of splat element or not at that point) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
I suspect we could handle this in VLOptimizer, but I have no problem with this as an incremental improvement.
For the VLOptimizer case, v8 should the passthru operand, and the load should have a "tail undefined" property. Given that, we should be able to reduce the VL of vmv.v.i that way as well.
@@ -112,7 +112,7 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const { | |||
// | |||
// TODO: We can handle a bunch more instructions here, and probably | |||
// recurse backwards through operands too. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, this TODO is definitely stale. Longer term, we should probably eliminate this bit of code in favor of VLOptimizer just to have fewer copies doing roughly the same thing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
The
tryToReduceVL
function in RISCVVectorPeephole currently only reduces the VL of the instruction that defines the true operand in VMERGE_VVM. We should be able to reduce VL of both operands. This patch generalizes this function to support multiple operands from a single instruction.This was motivated by https://github.com/llvm/llvm-project/pull/144170/files#r2146240973