diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 6c4d1b48b6ffc..cd866f04af20e 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -1188,6 +1188,25 @@ RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) { return std::nullopt; } + unsigned VLOpNum = RISCVII::getVLOpNum(Desc); + const MachineOperand &VLOp = UserMI.getOperand(VLOpNum); + // Looking for an immediate or a register VL that isn't X0. + assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) && + "Did not expect X0 VL"); + + // If the user is a passthru it will read the elements past VL, so + // abort if any of the elements past VL are demanded. + if (UserOp.isTied()) { + assert(UserOp.getOperandNo() == UserMI.getNumExplicitDefs() && + RISCVII::isFirstDefTiedToFirstUse(UserMI.getDesc())); + auto DemandedVL = DemandedVLs[&UserMI]; + if (!DemandedVL || !RISCV::isVLKnownLE(*DemandedVL, VLOp)) { + LLVM_DEBUG(dbgs() << " Abort because user is passthru in " + "instruction with demanded tail\n"); + return std::nullopt; + } + } + // Instructions like reductions may use a vector register as a scalar // register. In this case, we should treat it as only reading the first lane. if (isVectorOpUsedAsScalarOp(UserOp)) { @@ -1200,12 +1219,6 @@ RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) { return MachineOperand::CreateImm(1); } - unsigned VLOpNum = RISCVII::getVLOpNum(Desc); - const MachineOperand &VLOp = UserMI.getOperand(VLOpNum); - // Looking for an immediate or a register VL that isn't X0. - assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) && - "Did not expect X0 VL"); - // If we know the demanded VL of UserMI, then we can reduce the VL it // requires. if (auto DemandedVL = DemandedVLs[&UserMI]) { @@ -1227,12 +1240,6 @@ std::optional RISCVVLOptimizer::checkUsers(MachineInstr &MI) { return std::nullopt; } - // If used as a passthru, elements past VL will be read. - if (UserOp.isTied()) { - LLVM_DEBUG(dbgs() << " Abort because user used as tied operand\n"); - return std::nullopt; - } - auto VLOp = getMinimumVLForUser(UserOp); if (!VLOp) return std::nullopt; diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll index 3e49da014d56f..f0b05d2420b1a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll @@ -194,3 +194,26 @@ define @dont_optimize_tied_def( %a, %2 } +define void @optimize_ternary_use( %a, %b, %c, ptr %p, iXLen %vl) { +; NOVLOPT-LABEL: optimize_ternary_use: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vzext.vf2 v14, v8 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vmadd.vv v14, v10, v12 +; NOVLOPT-NEXT: vse32.v v14, (a0) +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: optimize_ternary_use: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vzext.vf2 v14, v8 +; VLOPT-NEXT: vmadd.vv v14, v10, v12 +; VLOPT-NEXT: vse32.v v14, (a0) +; VLOPT-NEXT: ret + %1 = zext %a to + %2 = mul %b, %1 + %3 = add %2, %c + call void @llvm.riscv.vse( %3, ptr %p, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir index e753a0b7ea378..78054c73d848f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir @@ -209,3 +209,64 @@ body: | bb.1: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ PseudoRET +... +--- +# Can reduce %x even though %y uses it as a passthru, because %y's inactive elements aren't demanded +name: passthru_not_demanded +body: | + bb.0: + ; CHECK-LABEL: name: passthru_not_demanded + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ +... +--- +# Can't reduce %x because %y uses it as a passthru, and %y's inactive elements are demanded by %z +name: passthru_demanded +body: | + bb.0: + ; CHECK-LABEL: name: passthru_demanded + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */ +... +--- +# Can reduce %x even though %y uses it as a passthru, because %y's inactive elements aren't demanded +name: passthru_not_demanded_passthru_chain +body: | + bb.0: + ; CHECK-LABEL: name: passthru_not_demanded_passthru_chain + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 %y, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %a:vr = PseudoVADD_VV_M1 %z, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %b:vr = PseudoVADD_VV_M1 $noreg, %a, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %z:vr = PseudoVADD_VV_M1 %y, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %a:vr = PseudoVADD_VV_M1 %z, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %b:vr = PseudoVADD_VV_M1 $noreg, %a, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ +... +--- +# Can't reduce %x because %y uses it as a passthru, and %y's inactive elements are ultimately demanded in %b +name: passthru_demanded_passthru_chain +body: | + bb.0: + ; CHECK-LABEL: name: passthru_demanded_passthru_chain + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 %y, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %a:vr = PseudoVADD_VV_M1 %z, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %b:vr = PseudoVADD_VV_M1 $noreg, %a, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %z:vr = PseudoVADD_VV_M1 %y, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %a:vr = PseudoVADD_VV_M1 %z, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %b:vr = PseudoVADD_VV_M1 $noreg, %a, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */ +... diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll index eb74b238c01b9..a29af3d5b54b0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll @@ -1638,9 +1638,8 @@ define @vmadd_vx_nxv1i64( %a, i64 %b, @vmadd_vx_nxv1i64_unmasked( %a, i64 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero ; RV32-NEXT: vmadd.vv v10, v8, v9 ; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV32-NEXT: vmv.v.v v8, v10 @@ -1713,9 +1711,8 @@ define @vmadd_vx_nxv1i64_ta( %a, i64 %b, @vmadd_vx_nxv2i64( %a, i64 %b, @vmadd_vx_nxv2i64_unmasked( %a, i64 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vmadd.vv v12, v8, v10 ; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma ; RV32-NEXT: vmv.v.v v8, v12 @@ -1851,9 +1846,8 @@ define @vmadd_vx_nxv2i64_ta( %a, i64 %b, @vmadd_vx_nxv4i64( %a, i64 %b, @vmadd_vx_nxv4i64_unmasked( %a, i64 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vmadd.vv v16, v8, v12 ; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, ma ; RV32-NEXT: vmv.v.v v8, v16 @@ -1989,9 +1981,8 @@ define @vmadd_vx_nxv4i64_ta( %a, i64 %b, @vmadd_vx_nxv8i64( %a, i64 %b, @vmadd_vx_nxv8i64_unmasked( %a, i64 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vmadd.vv v24, v8, v16 ; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV32-NEXT: vmv.v.v v8, v24 @@ -2130,9 +2119,8 @@ define @vmadd_vx_nxv8i64_ta( %a, i64 %b,