From 3f551352dc7db31ad4ce4743d7cf3eb54619f18a Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 23 Jan 2025 12:36:29 +0800 Subject: [PATCH 1/2] Precommit tests --- llvm/test/CodeGen/RISCV/rvv/vl-opt.ll | 15 +++++++ llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 61 ++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll index 3e49da014d56f..82cb72d5996e2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll @@ -194,3 +194,18 @@ define @dont_optimize_tied_def( %a, %2 } +define void @optimize_ternary_use( %a, %b, %c, ptr %p, iXLen %vl) { +; CHECK-LABEL: optimize_ternary_use: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v14, v8 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmadd.vv v14, v10, v12 +; CHECK-NEXT: vse32.v v14, (a0) +; CHECK-NEXT: ret + %1 = zext %a to + %2 = mul %b, %1 + %3 = add %2, %c + call void @llvm.riscv.vse( %3, ptr %p, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir index e753a0b7ea378..229372bb3e1b2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir @@ -209,3 +209,64 @@ body: | bb.1: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ PseudoRET +... +--- +# Can reduce %x even though %y uses it as a passthru, because %y's inactive elements aren't demanded +name: passthru_not_demanded +body: | + bb.0: + ; CHECK-LABEL: name: passthru_not_demanded + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ +... +--- +# Can't reduce %x because %y uses it as a passthru, and %y's inactive elements are demanded by %z +name: passthru_demanded +body: | + bb.0: + ; CHECK-LABEL: name: passthru_demanded + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */ +... +--- +# Can reduce %x even though %y uses it as a passthru, because %y's inactive elements aren't demanded +name: passthru_not_demanded_passthru_chain +body: | + bb.0: + ; CHECK-LABEL: name: passthru_not_demanded_passthru_chain + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 %y, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %a:vr = PseudoVADD_VV_M1 %z, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %b:vr = PseudoVADD_VV_M1 $noreg, %a, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %z:vr = PseudoVADD_VV_M1 %y, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %a:vr = PseudoVADD_VV_M1 %z, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %b:vr = PseudoVADD_VV_M1 $noreg, %a, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ +... +--- +# Can't reduce %x because %y uses it as a passthru, and %y's inactive elements are ultimately demanded in %b +name: passthru_demanded_passthru_chain +body: | + bb.0: + ; CHECK-LABEL: name: passthru_demanded_passthru_chain + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 %y, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %a:vr = PseudoVADD_VV_M1 %z, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %b:vr = PseudoVADD_VV_M1 $noreg, %a, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %z:vr = PseudoVADD_VV_M1 %y, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %a:vr = PseudoVADD_VV_M1 %z, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %b:vr = PseudoVADD_VV_M1 $noreg, %a, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */ +... From f77d01de8368423659210804f903e2e846ff4ebf Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 27 Jan 2025 22:09:10 +0800 Subject: [PATCH 2/2] [RISCV][VLOPT] Allow users that are passthrus if tail elements aren't demanded --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 31 +++++++++++-------- llvm/test/CodeGen/RISCV/rvv/vl-opt.ll | 24 ++++++++++----- llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 4 +-- llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll | 36 ++++++++-------------- 4 files changed, 49 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 6c4d1b48b6ffc..cd866f04af20e 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -1188,6 +1188,25 @@ RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) { return std::nullopt; } + unsigned VLOpNum = RISCVII::getVLOpNum(Desc); + const MachineOperand &VLOp = UserMI.getOperand(VLOpNum); + // Looking for an immediate or a register VL that isn't X0. + assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) && + "Did not expect X0 VL"); + + // If the user is a passthru it will read the elements past VL, so + // abort if any of the elements past VL are demanded. + if (UserOp.isTied()) { + assert(UserOp.getOperandNo() == UserMI.getNumExplicitDefs() && + RISCVII::isFirstDefTiedToFirstUse(UserMI.getDesc())); + auto DemandedVL = DemandedVLs[&UserMI]; + if (!DemandedVL || !RISCV::isVLKnownLE(*DemandedVL, VLOp)) { + LLVM_DEBUG(dbgs() << " Abort because user is passthru in " + "instruction with demanded tail\n"); + return std::nullopt; + } + } + // Instructions like reductions may use a vector register as a scalar // register. In this case, we should treat it as only reading the first lane. if (isVectorOpUsedAsScalarOp(UserOp)) { @@ -1200,12 +1219,6 @@ RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) { return MachineOperand::CreateImm(1); } - unsigned VLOpNum = RISCVII::getVLOpNum(Desc); - const MachineOperand &VLOp = UserMI.getOperand(VLOpNum); - // Looking for an immediate or a register VL that isn't X0. - assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) && - "Did not expect X0 VL"); - // If we know the demanded VL of UserMI, then we can reduce the VL it // requires. if (auto DemandedVL = DemandedVLs[&UserMI]) { @@ -1227,12 +1240,6 @@ std::optional RISCVVLOptimizer::checkUsers(MachineInstr &MI) { return std::nullopt; } - // If used as a passthru, elements past VL will be read. - if (UserOp.isTied()) { - LLVM_DEBUG(dbgs() << " Abort because user used as tied operand\n"); - return std::nullopt; - } - auto VLOp = getMinimumVLForUser(UserOp); if (!VLOp) return std::nullopt; diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll index 82cb72d5996e2..f0b05d2420b1a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll @@ -195,14 +195,22 @@ define @dont_optimize_tied_def( %a, %a, %b, %c, ptr %p, iXLen %vl) { -; CHECK-LABEL: optimize_ternary_use: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v14, v8 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmadd.vv v14, v10, v12 -; CHECK-NEXT: vse32.v v14, (a0) -; CHECK-NEXT: ret +; NOVLOPT-LABEL: optimize_ternary_use: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vzext.vf2 v14, v8 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vmadd.vv v14, v10, v12 +; NOVLOPT-NEXT: vse32.v v14, (a0) +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: optimize_ternary_use: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vzext.vf2 v14, v8 +; VLOPT-NEXT: vmadd.vv v14, v10, v12 +; VLOPT-NEXT: vse32.v v14, (a0) +; VLOPT-NEXT: ret %1 = zext %a to %2 = mul %b, %1 %3 = add %2, %c diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir index 229372bb3e1b2..78054c73d848f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir @@ -216,7 +216,7 @@ name: passthru_not_demanded body: | bb.0: ; CHECK-LABEL: name: passthru_not_demanded - ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ @@ -242,7 +242,7 @@ name: passthru_not_demanded_passthru_chain body: | bb.0: ; CHECK-LABEL: name: passthru_not_demanded_passthru_chain - ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 %y, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %a:vr = PseudoVADD_VV_M1 %z, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll index eb74b238c01b9..a29af3d5b54b0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll @@ -1638,9 +1638,8 @@ define @vmadd_vx_nxv1i64( %a, i64 %b, @vmadd_vx_nxv1i64_unmasked( %a, i64 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero ; RV32-NEXT: vmadd.vv v10, v8, v9 ; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV32-NEXT: vmv.v.v v8, v10 @@ -1713,9 +1711,8 @@ define @vmadd_vx_nxv1i64_ta( %a, i64 %b, @vmadd_vx_nxv2i64( %a, i64 %b, @vmadd_vx_nxv2i64_unmasked( %a, i64 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vmadd.vv v12, v8, v10 ; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma ; RV32-NEXT: vmv.v.v v8, v12 @@ -1851,9 +1846,8 @@ define @vmadd_vx_nxv2i64_ta( %a, i64 %b, @vmadd_vx_nxv4i64( %a, i64 %b, @vmadd_vx_nxv4i64_unmasked( %a, i64 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vmadd.vv v16, v8, v12 ; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, ma ; RV32-NEXT: vmv.v.v v8, v16 @@ -1989,9 +1981,8 @@ define @vmadd_vx_nxv4i64_ta( %a, i64 %b, @vmadd_vx_nxv8i64( %a, i64 %b, @vmadd_vx_nxv8i64_unmasked( %a, i64 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vmadd.vv v24, v8, v16 ; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV32-NEXT: vmv.v.v v8, v24 @@ -2130,9 +2119,8 @@ define @vmadd_vx_nxv8i64_ta( %a, i64 %b,