Skip to content

[RISCV][VLOPT] Remove unnecessary passthru restriction #124549

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 1 addition & 20 deletions llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1143,27 +1143,8 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
if (MI.getNumDefs() != 1)
return false;

// If we're not using VLMAX, then we need to be careful whether we are using
// TA/TU when there is a non-undef Passthru. But when we are using VLMAX, it
// does not matter whether we are using TA/TU with a non-undef Passthru, since
// there are no tail elements to be preserved.
unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
const MachineOperand &VLOp = MI.getOperand(VLOpNum);
if (VLOp.isReg() || VLOp.getImm() != RISCV::VLMaxSentinel) {
// If MI has a non-undef passthru, we will not try to optimize it since
// that requires us to preserve tail elements according to TA/TU.
// Otherwise, The MI has an undef Passthru, so it doesn't matter whether we
// are using TA/TU.
bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc);
unsigned PassthruOpIdx = MI.getNumExplicitDefs();
if (HasPassthru &&
MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister) {
LLVM_DEBUG(
dbgs() << " Not a candidate because it uses non-undef passthru"
" with non-VLMAX VL\n");
return false;
}
}

// If the VL is 1, then there is no need to reduce it. This is an
// optimization, not needed to preserve correctness.
Expand Down Expand Up @@ -1247,7 +1228,7 @@ std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
return std::nullopt;
}

// Tied operands might pass through.
// If used as a passthru, elements past VL will be read.
if (UserOp.isTied()) {
LLVM_DEBUG(dbgs() << " Abort because user used as tied operand\n");
return std::nullopt;
Expand Down
24 changes: 16 additions & 8 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3919,11 +3919,12 @@ define void @trunc_v6bf16(ptr %x) {
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v8, v10
; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
Expand Down Expand Up @@ -4002,11 +4003,12 @@ define void @trunc_v6f16(ptr %x) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v8, v10
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
Expand Down Expand Up @@ -4098,12 +4100,13 @@ define void @ceil_v6bf16(ptr %x) {
; CHECK-NEXT: vfabs.v v8, v10
; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a1, 3
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
Expand Down Expand Up @@ -4189,12 +4192,13 @@ define void @ceil_v6f16(ptr %x) {
; ZVFHMIN-NEXT: vfabs.v v8, v10
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
; ZVFHMIN-NEXT: fsrmi a1, 3
; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: fsrm a1
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
Expand Down Expand Up @@ -4290,12 +4294,13 @@ define void @floor_v6bf16(ptr %x) {
; CHECK-NEXT: vfabs.v v8, v10
; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a1, 2
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
Expand Down Expand Up @@ -4381,12 +4386,13 @@ define void @floor_v6f16(ptr %x) {
; ZVFHMIN-NEXT: vfabs.v v8, v10
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
; ZVFHMIN-NEXT: fsrmi a1, 2
; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: fsrm a1
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
Expand Down Expand Up @@ -4482,12 +4488,13 @@ define void @round_v6bf16(ptr %x) {
; CHECK-NEXT: vfabs.v v8, v10
; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a1, 4
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
Expand Down Expand Up @@ -4573,12 +4580,13 @@ define void @round_v6f16(ptr %x) {
; ZVFHMIN-NEXT: vfabs.v v8, v10
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
; ZVFHMIN-NEXT: fsrmi a1, 4
; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: fsrm a1
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: vse16.v v8, (a0)
; ZVFHMIN-NEXT: ret
Expand Down
60 changes: 52 additions & 8 deletions llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ define <vscale x 4 x i32> @different_vl_with_ta(<vscale x 4 x i32> %a, <vscale x
ret <vscale x 4 x i32> %w
}

; Test case to make sure VL won't propgate if using tail-undisturbed policy.
; We can propagate VL to a tail-undisturbed policy, provided none of its users
; are passthrus (i.e. read past VL).
define <vscale x 4 x i32> @different_vl_with_tu(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
; CHECK-LABEL: different_vl_with_tu:
; CHECK: # %bb.0:
Expand All @@ -118,22 +119,65 @@ define <vscale x 4 x i32> @different_vl_with_tu(<vscale x 4 x i32> %passthru, <v
; CHECK-NEXT: vadd.vv v8, v14, v10
; CHECK-NEXT: ret
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen %vl2)
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen %vl2)
ret <vscale x 4 x i32> %w
}

; Test case to make sure VL won't propgate if using tail-undisturbed policy.
; We can propagate VL to a tail-undisturbed policy, provided none of its users
; are passthrus (i.e. read past VL).
define <vscale x 4 x i32> @different_imm_vl_with_tu(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
; CHECK-LABEL: different_imm_vl_with_tu:
; NOVLOPT-LABEL: different_imm_vl_with_tu:
; NOVLOPT: # %bb.0:
; NOVLOPT-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; NOVLOPT-NEXT: vmv2r.v v14, v10
; NOVLOPT-NEXT: vadd.vv v14, v10, v12
; NOVLOPT-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; NOVLOPT-NEXT: vadd.vv v8, v14, v10
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: different_imm_vl_with_tu:
; VLOPT: # %bb.0:
; VLOPT-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; VLOPT-NEXT: vmv2r.v v14, v10
; VLOPT-NEXT: vadd.vv v14, v10, v12
; VLOPT-NEXT: vadd.vv v8, v14, v10
; VLOPT-NEXT: ret
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5)
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 4)
ret <vscale x 4 x i32> %w
}

; We can't reduce the VL as %v is used as a passthru, i.e. the elements past VL
; are demanded.
define <vscale x 4 x i32> @different_vl_as_passthru(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
; CHECK-LABEL: different_vl_as_passthru:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
; CHECK-NEXT: vmv2r.v v12, v8
; CHECK-NEXT: vadd.vv v12, v8, v10
; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma
; CHECK-NEXT: vadd.vv v12, v8, v10
; CHECK-NEXT: vmv2r.v v8, v12
; CHECK-NEXT: ret
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %v, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl2)
ret <vscale x 4 x i32> %w
}

; We can't reduce the VL as %v is used as a passthru, i.e. the elements past VL
; are demanded.
define <vscale x 4 x i32> @different_imm_vl_as_passthru(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
; CHECK-LABEL: different_imm_vl_as_passthru:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; CHECK-NEXT: vmv2r.v v14, v10
; CHECK-NEXT: vadd.vv v14, v10, v12
; CHECK-NEXT: vmv2r.v v12, v8
; CHECK-NEXT: vadd.vv v12, v8, v10
; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; CHECK-NEXT: vadd.vv v8, v14, v10
; CHECK-NEXT: vadd.vv v12, v8, v10
; CHECK-NEXT: vmv2r.v v8, v12
; CHECK-NEXT: ret
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5)
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen 4)
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %v, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 4)
ret <vscale x 4 x i32> %w
}

Expand Down
Loading