Skip to content

Commit 445bc1b

Browse files
committed
[RISCV][VLOPT] Allow users that are passthrus if tail elements aren't demanded
The motivation for this to allow reducing the vl when a user is a ternary pseudo, where the third operand is tied and also acts as a passthru. When checking the users of an instruction, we currently bail if the user is used as a passthru because all of its elements past vl will be used for the tail. We can allow passthru users if we know the tail of their result isn't used, and we can reuse checkUsers to check this. It's worth noting that this is all irrelevant of the tail policy, because tail agnostic still ends up using the passthru. I've checked that SPEC CPU 2017 + llvm-test-suite pass with this. Fixes llvm#123760
1 parent 06892e0 commit 445bc1b

File tree

5 files changed

+46
-43
lines changed

5 files changed

+46
-43
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ class RISCVVLOptimizer : public MachineFunctionPass {
5353
std::optional<MachineOperand> getMinimumVLForUser(MachineOperand &UserOp);
5454
/// Returns the largest common VL MachineOperand that may be used to optimize
5555
/// MI. Returns std::nullopt if it failed to find a suitable VL.
56-
std::optional<MachineOperand> checkUsers(MachineInstr &MI);
56+
std::optional<MachineOperand> checkUsers(const MachineInstr &MI);
5757
bool tryReduceVL(MachineInstr &MI);
5858
bool isCandidate(const MachineInstr &MI) const;
5959
};
@@ -1221,7 +1221,8 @@ RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) {
12211221
return VLOp;
12221222
}
12231223

1224-
std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
1224+
std::optional<MachineOperand>
1225+
RISCVVLOptimizer::checkUsers(const MachineInstr &MI) {
12251226
// FIXME: Avoid visiting each user for each time we visit something on the
12261227
// worklist, combined with an extra visit from the outer loop. Restructure
12271228
// along lines of an instcombine style worklist which integrates the outer
@@ -1235,16 +1236,21 @@ std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
12351236
return std::nullopt;
12361237
}
12371238

1238-
// Tied operands might pass through.
1239-
if (UserOp.isTied()) {
1240-
LLVM_DEBUG(dbgs() << " Abort because user used as tied operand\n");
1241-
return std::nullopt;
1242-
}
1243-
12441239
auto VLOp = getMinimumVLForUser(UserOp);
12451240
if (!VLOp)
12461241
return std::nullopt;
12471242

1243+
// If the user is a passthru, we will need to preserve it if its tail is
1244+
// demanded.
1245+
if (UserOp.isTied()) {
1246+
auto DemandedVL = checkUsers(UserMI);
1247+
if (!DemandedVL || !RISCV::isVLKnownLE(*DemandedVL, *VLOp)) {
1248+
LLVM_DEBUG(dbgs() << " Abort because user is passthru in "
1249+
"instruction with demanded tail\n");
1250+
return std::nullopt;
1251+
}
1252+
}
1253+
12481254
// Use the largest VL among all the users. If we cannot determine this
12491255
// statically, then we cannot optimize the VL.
12501256
if (!CommonVL || RISCV::isVLKnownLE(*CommonVL, *VLOp)) {

llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -894,9 +894,10 @@ define void @test_dag_loop() {
894894
; CHECK: # %bb.0: # %entry
895895
; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
896896
; CHECK-NEXT: vmclr.m v0
897+
; CHECK-NEXT: vsetivli zero, 0, e8, m4, ta, ma
897898
; CHECK-NEXT: vmv.v.i v8, 0
898899
; CHECK-NEXT: vmv.v.i v12, 0
899-
; CHECK-NEXT: vsetivli zero, 0, e8, m4, tu, mu
900+
; CHECK-NEXT: vsetvli zero, zero, e8, m4, tu, mu
900901
; CHECK-NEXT: vssubu.vx v12, v8, zero, v0.t
901902
; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
902903
; CHECK-NEXT: vmseq.vv v0, v12, v8

llvm/test/CodeGen/RISCV/rvv/vl-opt.ll

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -151,14 +151,22 @@ define <vscale x 4 x i32> @dont_optimize_tied_def(<vscale x 4 x i32> %a, <vscale
151151
}
152152

153153
define void @optimize_ternary_use(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, ptr %p, iXLen %vl) {
154-
; CHECK-LABEL: optimize_ternary_use:
155-
; CHECK: # %bb.0:
156-
; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
157-
; CHECK-NEXT: vzext.vf2 v14, v8
158-
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
159-
; CHECK-NEXT: vmadd.vv v14, v10, v12
160-
; CHECK-NEXT: vse32.v v14, (a0)
161-
; CHECK-NEXT: ret
154+
; NOVLOPT-LABEL: optimize_ternary_use:
155+
; NOVLOPT: # %bb.0:
156+
; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
157+
; NOVLOPT-NEXT: vzext.vf2 v14, v8
158+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
159+
; NOVLOPT-NEXT: vmadd.vv v14, v10, v12
160+
; NOVLOPT-NEXT: vse32.v v14, (a0)
161+
; NOVLOPT-NEXT: ret
162+
;
163+
; VLOPT-LABEL: optimize_ternary_use:
164+
; VLOPT: # %bb.0:
165+
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
166+
; VLOPT-NEXT: vzext.vf2 v14, v8
167+
; VLOPT-NEXT: vmadd.vv v14, v10, v12
168+
; VLOPT-NEXT: vse32.v v14, (a0)
169+
; VLOPT-NEXT: ret
162170
%1 = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
163171
%2 = mul <vscale x 4 x i32> %b, %1
164172
%3 = add <vscale x 4 x i32> %2, %c

llvm/test/CodeGen/RISCV/rvv/vl-opt.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ name: passthru_not_demanded
156156
body: |
157157
bb.0:
158158
; CHECK-LABEL: name: passthru_not_demanded
159-
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
159+
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
160160
; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
161161
; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
162162
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
@@ -182,7 +182,7 @@ name: passthru_not_demanded_passthru_chain
182182
body: |
183183
bb.0:
184184
; CHECK-LABEL: name: passthru_not_demanded_passthru_chain
185-
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
185+
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
186186
; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
187187
; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 %y, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
188188
; CHECK-NEXT: %a:vr = PseudoVADD_VV_M1 %z, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */

llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1638,9 +1638,8 @@ define <vscale x 1 x i64> @vmadd_vx_nxv1i64(<vscale x 1 x i64> %a, i64 %b, <vsca
16381638
; RV32-NEXT: sw a0, 8(sp)
16391639
; RV32-NEXT: sw a1, 12(sp)
16401640
; RV32-NEXT: addi a0, sp, 8
1641-
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1642-
; RV32-NEXT: vlse64.v v10, (a0), zero
16431641
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1642+
; RV32-NEXT: vlse64.v v10, (a0), zero
16441643
; RV32-NEXT: vmadd.vv v10, v8, v9
16451644
; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma
16461645
; RV32-NEXT: vmerge.vvm v8, v8, v10, v0
@@ -1669,9 +1668,8 @@ define <vscale x 1 x i64> @vmadd_vx_nxv1i64_unmasked(<vscale x 1 x i64> %a, i64
16691668
; RV32-NEXT: sw a0, 8(sp)
16701669
; RV32-NEXT: sw a1, 12(sp)
16711670
; RV32-NEXT: addi a0, sp, 8
1672-
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1673-
; RV32-NEXT: vlse64.v v10, (a0), zero
16741671
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1672+
; RV32-NEXT: vlse64.v v10, (a0), zero
16751673
; RV32-NEXT: vmadd.vv v10, v8, v9
16761674
; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma
16771675
; RV32-NEXT: vmv.v.v v8, v10
@@ -1713,9 +1711,8 @@ define <vscale x 1 x i64> @vmadd_vx_nxv1i64_ta(<vscale x 1 x i64> %a, i64 %b, <v
17131711
; RV32-NEXT: sw a0, 8(sp)
17141712
; RV32-NEXT: sw a1, 12(sp)
17151713
; RV32-NEXT: addi a0, sp, 8
1716-
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1717-
; RV32-NEXT: vlse64.v v10, (a0), zero
17181714
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1715+
; RV32-NEXT: vlse64.v v10, (a0), zero
17191716
; RV32-NEXT: vmadd.vv v10, v8, v9
17201717
; RV32-NEXT: vmerge.vvm v8, v8, v10, v0
17211718
; RV32-NEXT: addi sp, sp, 16
@@ -1776,9 +1773,8 @@ define <vscale x 2 x i64> @vmadd_vx_nxv2i64(<vscale x 2 x i64> %a, i64 %b, <vsca
17761773
; RV32-NEXT: sw a0, 8(sp)
17771774
; RV32-NEXT: sw a1, 12(sp)
17781775
; RV32-NEXT: addi a0, sp, 8
1779-
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
1780-
; RV32-NEXT: vlse64.v v12, (a0), zero
17811776
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1777+
; RV32-NEXT: vlse64.v v12, (a0), zero
17821778
; RV32-NEXT: vmadd.vv v12, v8, v10
17831779
; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma
17841780
; RV32-NEXT: vmerge.vvm v8, v8, v12, v0
@@ -1807,9 +1803,8 @@ define <vscale x 2 x i64> @vmadd_vx_nxv2i64_unmasked(<vscale x 2 x i64> %a, i64
18071803
; RV32-NEXT: sw a0, 8(sp)
18081804
; RV32-NEXT: sw a1, 12(sp)
18091805
; RV32-NEXT: addi a0, sp, 8
1810-
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
1811-
; RV32-NEXT: vlse64.v v12, (a0), zero
18121806
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1807+
; RV32-NEXT: vlse64.v v12, (a0), zero
18131808
; RV32-NEXT: vmadd.vv v12, v8, v10
18141809
; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma
18151810
; RV32-NEXT: vmv.v.v v8, v12
@@ -1851,9 +1846,8 @@ define <vscale x 2 x i64> @vmadd_vx_nxv2i64_ta(<vscale x 2 x i64> %a, i64 %b, <v
18511846
; RV32-NEXT: sw a0, 8(sp)
18521847
; RV32-NEXT: sw a1, 12(sp)
18531848
; RV32-NEXT: addi a0, sp, 8
1854-
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
1855-
; RV32-NEXT: vlse64.v v12, (a0), zero
18561849
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1850+
; RV32-NEXT: vlse64.v v12, (a0), zero
18571851
; RV32-NEXT: vmadd.vv v12, v8, v10
18581852
; RV32-NEXT: vmerge.vvm v8, v8, v12, v0
18591853
; RV32-NEXT: addi sp, sp, 16
@@ -1914,9 +1908,8 @@ define <vscale x 4 x i64> @vmadd_vx_nxv4i64(<vscale x 4 x i64> %a, i64 %b, <vsca
19141908
; RV32-NEXT: sw a0, 8(sp)
19151909
; RV32-NEXT: sw a1, 12(sp)
19161910
; RV32-NEXT: addi a0, sp, 8
1917-
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
1918-
; RV32-NEXT: vlse64.v v16, (a0), zero
19191911
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
1912+
; RV32-NEXT: vlse64.v v16, (a0), zero
19201913
; RV32-NEXT: vmadd.vv v16, v8, v12
19211914
; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, ma
19221915
; RV32-NEXT: vmerge.vvm v8, v8, v16, v0
@@ -1945,9 +1938,8 @@ define <vscale x 4 x i64> @vmadd_vx_nxv4i64_unmasked(<vscale x 4 x i64> %a, i64
19451938
; RV32-NEXT: sw a0, 8(sp)
19461939
; RV32-NEXT: sw a1, 12(sp)
19471940
; RV32-NEXT: addi a0, sp, 8
1948-
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
1949-
; RV32-NEXT: vlse64.v v16, (a0), zero
19501941
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
1942+
; RV32-NEXT: vlse64.v v16, (a0), zero
19511943
; RV32-NEXT: vmadd.vv v16, v8, v12
19521944
; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, ma
19531945
; RV32-NEXT: vmv.v.v v8, v16
@@ -1989,9 +1981,8 @@ define <vscale x 4 x i64> @vmadd_vx_nxv4i64_ta(<vscale x 4 x i64> %a, i64 %b, <v
19891981
; RV32-NEXT: sw a0, 8(sp)
19901982
; RV32-NEXT: sw a1, 12(sp)
19911983
; RV32-NEXT: addi a0, sp, 8
1992-
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
1993-
; RV32-NEXT: vlse64.v v16, (a0), zero
19941984
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
1985+
; RV32-NEXT: vlse64.v v16, (a0), zero
19951986
; RV32-NEXT: vmadd.vv v16, v8, v12
19961987
; RV32-NEXT: vmerge.vvm v8, v8, v16, v0
19971988
; RV32-NEXT: addi sp, sp, 16
@@ -2054,9 +2045,8 @@ define <vscale x 8 x i64> @vmadd_vx_nxv8i64(<vscale x 8 x i64> %a, i64 %b, <vsca
20542045
; RV32-NEXT: sw a0, 8(sp)
20552046
; RV32-NEXT: sw a1, 12(sp)
20562047
; RV32-NEXT: addi a0, sp, 8
2057-
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2058-
; RV32-NEXT: vlse64.v v24, (a0), zero
20592048
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2049+
; RV32-NEXT: vlse64.v v24, (a0), zero
20602050
; RV32-NEXT: vmadd.vv v24, v8, v16
20612051
; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma
20622052
; RV32-NEXT: vmerge.vvm v8, v8, v24, v0
@@ -2085,9 +2075,8 @@ define <vscale x 8 x i64> @vmadd_vx_nxv8i64_unmasked(<vscale x 8 x i64> %a, i64
20852075
; RV32-NEXT: sw a0, 8(sp)
20862076
; RV32-NEXT: sw a1, 12(sp)
20872077
; RV32-NEXT: addi a0, sp, 8
2088-
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2089-
; RV32-NEXT: vlse64.v v24, (a0), zero
20902078
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2079+
; RV32-NEXT: vlse64.v v24, (a0), zero
20912080
; RV32-NEXT: vmadd.vv v24, v8, v16
20922081
; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma
20932082
; RV32-NEXT: vmv.v.v v8, v24
@@ -2130,9 +2119,8 @@ define <vscale x 8 x i64> @vmadd_vx_nxv8i64_ta(<vscale x 8 x i64> %a, i64 %b, <v
21302119
; RV32-NEXT: sw a0, 8(sp)
21312120
; RV32-NEXT: sw a1, 12(sp)
21322121
; RV32-NEXT: addi a0, sp, 8
2133-
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2134-
; RV32-NEXT: vlse64.v v24, (a0), zero
21352122
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2123+
; RV32-NEXT: vlse64.v v24, (a0), zero
21362124
; RV32-NEXT: vmadd.vv v24, v8, v16
21372125
; RV32-NEXT: vmerge.vvm v8, v8, v24, v0
21382126
; RV32-NEXT: addi sp, sp, 16

0 commit comments

Comments
 (0)