diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 4d5fa79389ea6..b0282b72c6a8d 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -33,6 +33,7 @@ add_llvm_target(RISCVCodeGen RISCVMakeCompressible.cpp RISCVExpandAtomicPseudoInsts.cpp RISCVExpandPseudoInsts.cpp + RISCVFoldMasks.cpp RISCVFrameLowering.cpp RISCVGatherScatterLowering.cpp RISCVInsertVSETVLI.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index 3d8e33dc716ea..4e870d444120c 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -45,6 +45,9 @@ void initializeRISCVMakeCompressibleOptPass(PassRegistry &); FunctionPass *createRISCVGatherScatterLoweringPass(); void initializeRISCVGatherScatterLoweringPass(PassRegistry &); +FunctionPass *createRISCVFoldMasksPass(); +void initializeRISCVFoldMasksPass(PassRegistry &); + FunctionPass *createRISCVOptWInstrsPass(); void initializeRISCVOptWInstrsPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp new file mode 100644 index 0000000000000..d1c77a6cc7756 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp @@ -0,0 +1,174 @@ +//===- RISCVFoldMasks.cpp - MI Vector Pseudo Mask Peepholes ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// This pass performs various peephole optimisations that fold masks into vector +// pseudo instructions after instruction selection. +// +// Currently it converts +// PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew +// -> +// PseudoVMV_V_V %false, %true, %vl, %sew +// +//===---------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-fold-masks" + +namespace { + +class RISCVFoldMasks : public MachineFunctionPass { +public: + static char ID; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + const TargetRegisterInfo *TRI; + RISCVFoldMasks() : MachineFunctionPass(ID) { + initializeRISCVFoldMasksPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } + + StringRef getPassName() const override { return "RISC-V Fold Masks"; } + +private: + bool convertVMergeToVMv(MachineInstr &MI, MachineInstr *MaskDef); + + bool isAllOnesMask(MachineInstr *MaskCopy); +}; + +} // namespace + +char RISCVFoldMasks::ID = 0; + +INITIALIZE_PASS(RISCVFoldMasks, DEBUG_TYPE, "RISC-V Fold Masks", false, false) + +bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskCopy) { + if (!MaskCopy) + return false; + assert(MaskCopy->isCopy() && MaskCopy->getOperand(0).getReg() == RISCV::V0); + Register SrcReg = + TRI->lookThruCopyLike(MaskCopy->getOperand(1).getReg(), MRI); + if (!SrcReg.isVirtual()) + return false; + MachineInstr *SrcDef = MRI->getVRegDef(SrcReg); + if (!SrcDef) + return false; + + // TODO: Check that the VMSET is the expected bitwidth? The pseudo has + // undefined behaviour if it's the wrong bitwidth, so we could choose to + // assume that it's all-ones? Same applies to its VL. + switch (SrcDef->getOpcode()) { + case RISCV::PseudoVMSET_M_B1: + case RISCV::PseudoVMSET_M_B2: + case RISCV::PseudoVMSET_M_B4: + case RISCV::PseudoVMSET_M_B8: + case RISCV::PseudoVMSET_M_B16: + case RISCV::PseudoVMSET_M_B32: + case RISCV::PseudoVMSET_M_B64: + return true; + default: + return false; + } +} + +// Transform (VMERGE_VVM_ false, false, true, allones, vl, sew) to +// (VMV_V_V_ false, true, vl, sew). It may decrease uses of VMSET. +bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) { +#define CASE_VMERGE_TO_VMV(lmul) \ + case RISCV::PseudoVMERGE_VVM_##lmul: \ + NewOpc = RISCV::PseudoVMV_V_V_##lmul; \ + break; + unsigned NewOpc; + switch (MI.getOpcode()) { + default: + llvm_unreachable("Expected VMERGE_VVM_ instruction."); + CASE_VMERGE_TO_VMV(MF8) + CASE_VMERGE_TO_VMV(MF4) + CASE_VMERGE_TO_VMV(MF2) + CASE_VMERGE_TO_VMV(M1) + CASE_VMERGE_TO_VMV(M2) + CASE_VMERGE_TO_VMV(M4) + CASE_VMERGE_TO_VMV(M8) + } + + Register MergeReg = MI.getOperand(1).getReg(); + Register FalseReg = MI.getOperand(2).getReg(); + // Check merge == false (or merge == undef) + if (MergeReg != RISCV::NoRegister && TRI->lookThruCopyLike(MergeReg, MRI) != + TRI->lookThruCopyLike(FalseReg, MRI)) + return false; + + assert(MI.getOperand(4).isReg() && MI.getOperand(4).getReg() == RISCV::V0); + if (!isAllOnesMask(V0Def)) + return false; + + MI.setDesc(TII->get(NewOpc)); + MI.removeOperand(1); // Merge operand + MI.tieOperands(0, 1); // Tie false to dest + MI.removeOperand(3); // Mask operand + MI.addOperand( + MachineOperand::CreateImm(RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)); + + // vmv.v.v doesn't have a mask operand, so we may be able to inflate the + // register class for the destination and merge operands e.g. VRNoV0 -> VR + MRI->recomputeRegClass(MI.getOperand(0).getReg()); + MRI->recomputeRegClass(MI.getOperand(1).getReg()); + return true; +} + +bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + // Skip if the vector extension is not enabled. + const RISCVSubtarget &ST = MF.getSubtarget(); + if (!ST.hasVInstructions()) + return false; + + TII = ST.getInstrInfo(); + MRI = &MF.getRegInfo(); + TRI = MRI->getTargetRegisterInfo(); + + bool Changed = false; + + // Masked pseudos coming out of isel will have their mask operand in the form: + // + // $v0:vr = COPY %mask:vr + // %x:vr = Pseudo_MASK %a:vr, %b:br, $v0:vr + // + // Because $v0 isn't in SSA, keep track of it so we can check the mask operand + // on each pseudo. + MachineInstr *CurrentV0Def; + for (MachineBasicBlock &MBB : MF) { + CurrentV0Def = nullptr; + for (MachineInstr &MI : MBB) { + unsigned BaseOpc = RISCV::getRVVMCOpcode(MI.getOpcode()); + if (BaseOpc == RISCV::VMERGE_VVM) + Changed |= convertVMergeToVMv(MI, CurrentV0Def); + + if (MI.definesRegister(RISCV::V0, TRI)) + CurrentV0Def = &MI; + } + } + + return Changed; +} + +FunctionPass *llvm::createRISCVFoldMasksPass() { return new RISCVFoldMasks(); } diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 94d1994895325..c2cac993fe13c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3685,40 +3685,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { return true; } -// Transform (VMERGE_VVM_ false, false, true, allones, vl, sew) to -// (VMV_V_V_ false, true, vl, sew). It may decrease uses of VMSET. -bool RISCVDAGToDAGISel::performVMergeToVMv(SDNode *N) { -#define CASE_VMERGE_TO_VMV(lmul) \ - case RISCV::PseudoVMERGE_VVM_##lmul: \ - NewOpc = RISCV::PseudoVMV_V_V_##lmul; \ - break; - unsigned NewOpc; - switch (N->getMachineOpcode()) { - default: - llvm_unreachable("Expected VMERGE_VVM_ instruction."); - CASE_VMERGE_TO_VMV(MF8) - CASE_VMERGE_TO_VMV(MF4) - CASE_VMERGE_TO_VMV(MF2) - CASE_VMERGE_TO_VMV(M1) - CASE_VMERGE_TO_VMV(M2) - CASE_VMERGE_TO_VMV(M4) - CASE_VMERGE_TO_VMV(M8) - } - - if (!usesAllOnesMask(N, /* MaskOpIdx */ 3)) - return false; - - SDLoc DL(N); - SDValue PolicyOp = - CurDAG->getTargetConstant(/*TUMU*/ 0, DL, Subtarget->getXLenVT()); - SDNode *Result = CurDAG->getMachineNode( - NewOpc, DL, N->getValueType(0), - {N->getOperand(1), N->getOperand(2), N->getOperand(4), N->getOperand(5), - PolicyOp}); - ReplaceUses(N, Result); - return true; -} - bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { bool MadeChange = false; SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); @@ -3730,8 +3696,6 @@ bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { if (IsVMerge(N) || IsVMv(N)) MadeChange |= performCombineVMergeAndVOps(N); - if (IsVMerge(N) && N->getOperand(0) == N->getOperand(1)) - MadeChange |= performVMergeToVMv(N); } return MadeChange; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 953ac097b9150..85683a3adc968 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -101,6 +101,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVOptWInstrsPass(*PR); initializeRISCVPreRAExpandPseudoPass(*PR); initializeRISCVExpandPseudoPass(*PR); + initializeRISCVFoldMasksPass(*PR); initializeRISCVInsertVSETVLIPass(*PR); initializeRISCVInsertReadWriteCSRPass(*PR); initializeRISCVDAGToDAGISelPass(*PR); @@ -414,7 +415,10 @@ void RISCVPassConfig::addPreEmitPass2() { } void RISCVPassConfig::addMachineSSAOptimization() { + addPass(createRISCVFoldMasksPass()); + TargetPassConfig::addMachineSSAOptimization(); + if (EnableMachineCombiner) addPass(&MachineCombinerID); diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index cf0826096bd41..414b721661021 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -82,6 +82,7 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: RISC-V DAG->DAG Pattern Instruction Selection ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions +; CHECK-NEXT: RISC-V Fold Masks ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Early Tail Duplication ; CHECK-NEXT: Optimize machine instruction PHIs diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir new file mode 100644 index 0000000000000..442419efb83ca --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir @@ -0,0 +1,71 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -run-pass=riscv-fold-masks \ +# RUN: -verify-machineinstrs | FileCheck %s + +--- +name: undef_passthru +body: | + bb.0: + liveins: $x1, $v8, $v9 + ; CHECK-LABEL: name: undef_passthru + ; CHECK: liveins: $x1, $v8, $v9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %false:vr = COPY $v8 + ; CHECK-NEXT: %true:vr = COPY $v9 + ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 + ; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */ + ; CHECK-NEXT: $v0 = COPY %mask + ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %false, %true, %avl, 5 /* e32 */, 0 /* tu, mu */ + %false:vr = COPY $v8 + %true:vr = COPY $v9 + %avl:gprnox0 = COPY $x1 + %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 + $v0 = COPY %mask + %x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, %avl, 5 +... +--- +name: undef_false +body: | + bb.0: + liveins: $x1, $v8, $v9 + ; CHECK-LABEL: name: undef_false + ; CHECK: liveins: $x1, $v8, $v9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pt:vrnov0 = COPY $v8 + ; CHECK-NEXT: %false:vr = COPY $noreg + ; CHECK-NEXT: %true:vr = COPY $v9 + ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 + ; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */ + ; CHECK-NEXT: $v0 = COPY %mask + ; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5 /* e32 */ + %pt:vrnov0 = COPY $v8 + %false:vr = COPY $noreg + %true:vr = COPY $v9 + %avl:gprnox0 = COPY $x1 + %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 + $v0 = COPY %mask + %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5 +... +--- +name: equal_passthru_false +body: | + bb.0: + liveins: $x1, $v8, $v9 + ; CHECK-LABEL: name: equal_passthru_false + ; CHECK: liveins: $x1, $v8, $v9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %false:vr = COPY $v8 + ; CHECK-NEXT: %pt:vrnov0 = COPY $v8 + ; CHECK-NEXT: %true:vr = COPY $v9 + ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 + ; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */ + ; CHECK-NEXT: $v0 = COPY %mask + ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %false, %true, %avl, 5 /* e32 */, 0 /* tu, mu */ + %false:vr = COPY $v8 + %pt:vrnov0 = COPY $v8 + %true:vr = COPY $v9 + %avl:gprnox0 = COPY $x1 + %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 + $v0 = COPY %mask + %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5 +...