From b322a5a47b21482687feb10c8825cc5cfe0bd0f7 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 31 Oct 2023 14:15:34 +0000 Subject: [PATCH 1/6] [RISCV] Move performCombineVMergeAndVOps into RISCVFoldMasks Continuing on from 72e6c1c70d5e07bbc8cb7cae2ed915108daf93aa, this moves the vmerge fold peephole into RISCVFoldMasks. Instead of reasoning in chain and glue nodes, this sinks the True operand to just before the vmerge pseudo so it can reuse its VL/mask/false/passthru operands. Most of the time we're able to do this without any hitches because we check that the vmerge is the only user of True. However there are certain cases where we have write-after-read dependencies of physical registers that we also need to sink too, e.g. fcsr in @vmerge_vfcvt_rm in test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll We also need to implement the masked -> unmasked peephole, since the vmerge fold peephole can produce new masked pseudos with all one masks that should then be unmasked. However that peephole is still kept in RISCVISelDAGToDAG to minimize the diff for this patch. It will be switched over in a later patch. --- llvm/lib/Target/RISCV/RISCVFoldMasks.cpp | 362 +++++++++++++++++- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 303 --------------- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 2 - .../RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll | 2 +- .../RISCV/rvv/rvv-peephole-vmerge-vops.ll | 18 +- 5 files changed, 354 insertions(+), 333 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp index 871fc76d7d988..2ec561c7bad2f 100644 --- a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp +++ b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp @@ -9,16 +9,29 @@ // This pass performs various peephole optimisations that fold masks into vector // pseudo instructions after instruction selection. // -// Currently it converts -// PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew +// It performs the following transforms: +// +// %true = PseudoFOO %passthru, ..., %vl, %sew +// %x = PseudoVMERGE_VVM %passthru, %passthru, %true, %mask, %vl, %sew +// -> +// %x = PseudoFOO_MASK %false, ..., %mask, %vl, %sew +// +// %x = PseudoFOO_MASK ..., %allonesmask, %vl, %sew +// -> +// %x = PseudoFOO ..., %vl, %sew +// +// %x = PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew // -> -// PseudoVMV_V_V %false, %true, %vl, %sew +// %x = PseudoVMV_V_V %false, %true, %vl, %sew // //===---------------------------------------------------------------------===// #include "RISCV.h" #include "RISCVISelDAGToDAG.h" +#include "RISCVInstrInfo.h" #include "RISCVSubtarget.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -49,10 +62,12 @@ class RISCVFoldMasks : public MachineFunctionPass { StringRef getPassName() const override { return "RISC-V Fold Masks"; } private: - bool convertToUnmasked(MachineInstr &MI, MachineInstr *MaskDef); + bool foldVMergeIntoOps(MachineInstr &MI, MachineInstr *MaskDef); bool convertVMergeToVMv(MachineInstr &MI, MachineInstr *MaskDef); + bool convertToUnmasked(MachineInstr &MI, MachineInstr *MaskDef); bool isAllOnesMask(MachineInstr *MaskDef); + bool isOpSameAs(const MachineOperand &LHS, const MachineOperand &RHS); }; } // namespace @@ -64,13 +79,16 @@ INITIALIZE_PASS(RISCVFoldMasks, DEBUG_TYPE, "RISC-V Fold Masks", false, false) bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskDef) { if (!MaskDef) return false; - assert(MaskDef->isCopy() && MaskDef->getOperand(0).getReg() == RISCV::V0); - Register SrcReg = TRI->lookThruCopyLike(MaskDef->getOperand(1).getReg(), MRI); - if (!SrcReg.isVirtual()) - return false; - MaskDef = MRI->getVRegDef(SrcReg); - if (!MaskDef) - return false; + if (MaskDef->isCopy()) { + assert(MaskDef->getOperand(0).getReg() == RISCV::V0); + Register SrcReg = + TRI->lookThruCopyLike(MaskDef->getOperand(1).getReg(), MRI); + if (!SrcReg.isVirtual()) + return false; + MaskDef = MRI->getVRegDef(SrcReg); + if (!MaskDef) + return false; + } // TODO: Check that the VMSET is the expected bitwidth? The pseudo has // undefined behaviour if it's the wrong bitwidth, so we could choose to @@ -89,6 +107,313 @@ bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskDef) { } } +static unsigned getVMSetForLMul(RISCVII::VLMUL LMUL) { + switch (LMUL) { + case RISCVII::LMUL_F8: + return RISCV::PseudoVMSET_M_B1; + case RISCVII::LMUL_F4: + return RISCV::PseudoVMSET_M_B2; + case RISCVII::LMUL_F2: + return RISCV::PseudoVMSET_M_B4; + case RISCVII::LMUL_1: + return RISCV::PseudoVMSET_M_B8; + case RISCVII::LMUL_2: + return RISCV::PseudoVMSET_M_B16; + case RISCVII::LMUL_4: + return RISCV::PseudoVMSET_M_B32; + case RISCVII::LMUL_8: + return RISCV::PseudoVMSET_M_B64; + case RISCVII::LMUL_RESERVED: + llvm_unreachable("Unexpected LMUL"); + } + llvm_unreachable("Unknown VLMUL enum"); +} + +/// Inserts an operand at Idx in MI, pushing back any operands. +static void insertOperand(MachineInstr &MI, MachineOperand MO, unsigned Idx) { + SmallVector OpsToAddBack; + unsigned NumTailOps = MI.getNumOperands() - Idx; + for (unsigned I = 0; I < NumTailOps; I++) { + OpsToAddBack.push_back(MI.getOperand(Idx)); + MI.removeOperand(Idx); + } + MI.addOperand(MO); + for (MachineOperand &TailOp : OpsToAddBack) + MI.addOperand(TailOp); +} + +// Try to sink From to before To, also sinking any instructions between From and +// To where there is a write-after-read dependency on a physical register. +static bool sinkInstructionAndDeps(MachineInstr &From, MachineInstr &To) { + assert(From.getParent() == To.getParent()); + SmallVector Worklist, ToSink; + Worklist.push_back(&From); + + // Rather than compute whether or not we saw a store for every instruction, + // just compute it once even if it's more conservative. + bool SawStore = false; + for (MachineBasicBlock::instr_iterator II = From.getIterator(); + II != To.getIterator(); II++) { + if (II->mayStore()) { + SawStore = true; + break; + } + } + + while (!Worklist.empty()) { + MachineInstr *MI = Worklist.pop_back_val(); + + if (!MI->isSafeToMove(nullptr, SawStore)) + return false; + + SmallSet Defs, Uses; + for (MachineOperand &Def : MI->all_defs()) + Defs.insert(Def.getReg()); + for (MachineOperand &Use : MI->all_uses()) + Uses.insert(Use.getReg()); + + // If anything from [MI, To] uses a definition of MI, we can't sink it. + for (MachineBasicBlock::instr_iterator II = MI->getIterator(); + II != To.getIterator(); II++) { + for (MachineOperand &Use : II->all_uses()) { + if (Defs.contains(Use.getReg())) + return false; + } + } + + // If MI uses any physical registers, we need to sink any instructions after + // it where there might be a write-after-read dependency. + for (MachineBasicBlock::instr_iterator II = MI->getIterator(); + II != To.getIterator(); II++) { + bool NeedsSink = any_of(II->all_defs(), [&Uses](MachineOperand &Def) { + return Def.getReg().isPhysical() && Uses.contains(Def.getReg()); + }); + if (NeedsSink) + Worklist.push_back(&*II); + } + + ToSink.push_back(MI); + } + + for (MachineInstr *MI : ToSink) + MI->moveBefore(&To); + return true; +} + +// Returns true if LHS is the same register as RHS, or if LHS is undefined. +bool RISCVFoldMasks::isOpSameAs(const MachineOperand &LHS, + const MachineOperand &RHS) { + if (LHS.getReg() == RISCV::NoRegister) + return true; + if (RHS.getReg() == RISCV::NoRegister) + return false; + return TRI->lookThruCopyLike(LHS.getReg(), MRI) == + TRI->lookThruCopyLike(RHS.getReg(), MRI); +} + +// Try to fold away VMERGE_VVM instructions. We handle these cases: +// -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction +// folds to a masked TU instruction. VMERGE_VVM must have have merge operand +// same as false operand. +// -Masked TA VMERGE_VVM combined with an unmasked TA instruction fold to a +// masked TA instruction. +// -Unmasked TU VMERGE_VVM combined with a masked MU TA instruction folds to +// masked TU instruction. Both instructions must have the same merge operand. +// VMERGE_VVM must have have merge operand same as false operand. +// Note: The VMERGE_VVM forms above (TA, and TU) refer to the policy implied, +// not the pseudo name. That is, a TA VMERGE_VVM can be either the _TU pseudo +// form with an IMPLICIT_DEF passthrough operand or the unsuffixed (TA) pseudo +// form. +bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI, + MachineInstr *MaskDef) { + MachineOperand *True; + MachineOperand *Merge; + MachineOperand *False; + + const unsigned BaseOpc = RISCV::getRVVMCOpcode(MI.getOpcode()); + // A vmv.v.v is equivalent to a vmerge with an all-ones mask. + if (BaseOpc == RISCV::VMV_V_V) { + Merge = &MI.getOperand(1); + False = &MI.getOperand(1); + True = &MI.getOperand(2); + } else if (BaseOpc == RISCV::VMERGE_VVM) { + Merge = &MI.getOperand(1); + False = &MI.getOperand(2); + True = &MI.getOperand(3); + } else + return false; + + MachineInstr &TrueMI = *MRI->getVRegDef(True->getReg()); + + // We require that either merge and false are the same, or that merge + // is undefined. + if (!isOpSameAs(*Merge, *False)) + return false; + + // N must be the only user of True. + if (!MRI->hasOneUse(True->getReg())) + return false; + + unsigned TrueOpc = TrueMI.getOpcode(); + const MCInstrDesc &TrueMCID = TrueMI.getDesc(); + bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID); + + bool IsMasked = false; + const RISCV::RISCVMaskedPseudoInfo *Info = + RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc); + if (!Info && HasTiedDest) { + Info = RISCV::getMaskedPseudoInfo(TrueOpc); + IsMasked = true; + } + + if (!Info) + return false; + + // When Mask is not a true mask, this transformation is illegal for some + // operations whose results are affected by mask, like viota.m. + if (Info->MaskAffectsResult && BaseOpc == RISCV::VMERGE_VVM && + !isAllOnesMask(MaskDef)) + return false; + + MachineOperand &TrueMergeOp = TrueMI.getOperand(1); + if (HasTiedDest && TrueMergeOp.getReg() != RISCV::NoRegister) { + // The vmerge instruction must be TU. + // FIXME: This could be relaxed, but we need to handle the policy for the + // resulting op correctly. + if (Merge->getReg() == RISCV::NoRegister) + return false; + // Both the vmerge instruction and the True instruction must have the same + // merge operand. + if (!isOpSameAs(TrueMergeOp, *False)) + return false; + } + + if (IsMasked) { + assert(HasTiedDest && "Expected tied dest"); + // The vmerge instruction must be TU. + if (Merge->getReg() == RISCV::NoRegister) + return false; + // The vmerge instruction must have an all 1s mask since we're going to keep + // the mask from the True instruction. + // FIXME: Support mask agnostic True instruction which would have an + // undef merge operand. + if (BaseOpc == RISCV::VMERGE_VVM && !isAllOnesMask(MaskDef)) + return false; + } + + // Skip if True has side effect. + // TODO: Support vleff and vlsegff. + if (TII->get(TrueOpc).hasUnmodeledSideEffects()) + return false; + + // The vector policy operand may be present for masked intrinsics + const MachineOperand &TrueVL = + TrueMI.getOperand(RISCVII::getVLOpNum(TrueMCID)); + + auto GetMinVL = + [](const MachineOperand &LHS, + const MachineOperand &RHS) -> std::optional { + if (LHS.isReg() && RHS.isReg() && LHS.getReg().isVirtual() && + LHS.getReg() == RHS.getReg()) + return LHS; + if (LHS.isImm() && LHS.getImm() == RISCV::VLMaxSentinel) + return RHS; + if (RHS.isImm() && RHS.getImm() == RISCV::VLMaxSentinel) + return LHS; + if (!LHS.isImm() || !RHS.isImm()) + return std::nullopt; + return LHS.getImm() <= RHS.getImm() ? LHS : RHS; + }; + + // Because MI and True must have the same merge operand (or True's operand is + // implicit_def), the "effective" body is the minimum of their VLs. + const MachineOperand VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc())); + auto MinVL = GetMinVL(TrueVL, VL); + if (!MinVL) + return false; + bool VLChanged = !MinVL->isIdenticalTo(VL); + + // If we end up changing the VL or mask of True, then we need to make sure it + // doesn't raise any observable fp exceptions, since changing the active + // elements will affect how fflags is set. + if (VLChanged || !IsMasked) + if (TrueMCID.mayRaiseFPException() && + !TrueMI.getFlag(MachineInstr::MIFlag::NoFPExcept)) + return false; + + unsigned MaskedOpc = Info->MaskedPseudo; + const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc); +#ifndef NDEBUG + assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) && + "Expected instructions with mask have policy operand."); + assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(), + MCOI::TIED_TO) == 0 && + "Expected instructions with mask have a tied dest."); +#endif + + // Sink True down to MI so that it can access MI's operands. + if (!sinkInstructionAndDeps(TrueMI, MI)) + return false; + + // Set the merge to the false operand of the merge. + TrueMI.getOperand(1).setReg(False->getReg()); + + // If we're converting it to a masked pseudo, reuse MI's mask. + if (!IsMasked) { + if (BaseOpc == RISCV::VMV_V_V) { + // If MI is a vmv.v.v, it won't have a mask operand. So insert an all-ones + // mask just before True. + unsigned VMSetOpc = + getVMSetForLMul(RISCVII::getLMul(MI.getDesc().TSFlags)); + BuildMI(*MI.getParent(), TrueMI, MI.getDebugLoc(), TII->get(VMSetOpc)) + .addDef(RISCV::V0) + .add(VL) + .add(TrueMI.getOperand(RISCVII::getSEWOpNum(TrueMCID))); + } + + TrueMI.setDesc(MaskedMCID); + + // TODO: Increment MaskOpIdx by number of explicit defs in tablegen? + unsigned MaskOpIdx = Info->MaskOpIdx + TrueMI.getNumExplicitDefs(); + insertOperand(TrueMI, MachineOperand::CreateReg(RISCV::V0, false), + MaskOpIdx); + } + + // Update the AVL. + if (MinVL->isReg()) + TrueMI.getOperand(RISCVII::getVLOpNum(MaskedMCID)) + .ChangeToRegister(MinVL->getReg(), false); + else + TrueMI.getOperand(RISCVII::getVLOpNum(MaskedMCID)) + .ChangeToImmediate(MinVL->getImm()); + + // Use a tumu policy, relaxing it to tail agnostic provided that the merge + // operand is undefined. + // + // However, if the VL became smaller than what the vmerge had originally, then + // elements past VL that were previously in the vmerge's body will have moved + // to the tail. In that case we always need to use tail undisturbed to + // preserve them. + uint64_t Policy = (Merge->getReg() == RISCV::NoRegister && !VLChanged) + ? RISCVII::TAIL_AGNOSTIC + : RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; + TrueMI.getOperand(RISCVII::getVecPolicyOpNum(MaskedMCID)).setImm(Policy); + + const TargetRegisterClass *V0RC = + TII->getRegClass(MaskedMCID, 0, TRI, *MI.getMF()); + + // The destination and passthru can no longer be in V0. + MRI->constrainRegClass(TrueMI.getOperand(0).getReg(), V0RC); + Register PassthruReg = TrueMI.getOperand(1).getReg(); + if (PassthruReg != RISCV::NoRegister) + MRI->constrainRegClass(PassthruReg, V0RC); + + MRI->replaceRegWith(MI.getOperand(0).getReg(), TrueMI.getOperand(0).getReg()); + MI.eraseFromParent(); + + return true; +} + // Transform (VMERGE_VVM_ false, false, true, allones, vl, sew) to // (VMV_V_V_ false, true, vl, sew). It may decrease uses of VMSET. bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) { @@ -109,11 +434,8 @@ bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) { CASE_VMERGE_TO_VMV(M8) } - Register MergeReg = MI.getOperand(1).getReg(); - Register FalseReg = MI.getOperand(2).getReg(); // Check merge == false (or merge == undef) - if (MergeReg != RISCV::NoRegister && TRI->lookThruCopyLike(MergeReg, MRI) != - TRI->lookThruCopyLike(FalseReg, MRI)) + if (!isOpSameAs(MI.getOperand(1), MI.getOperand(2))) return false; assert(MI.getOperand(4).isReg() && MI.getOperand(4).getReg() == RISCV::V0); @@ -202,11 +524,17 @@ bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) { // on each pseudo. MachineInstr *CurrentV0Def; for (MachineBasicBlock &MBB : MF) { + CurrentV0Def = nullptr; + for (MachineInstr &MI : make_early_inc_range(MBB)) { + Changed |= foldVMergeIntoOps(MI, CurrentV0Def); + if (MI.definesRegister(RISCV::V0, TRI)) + CurrentV0Def = &MI; + } + CurrentV0Def = nullptr; for (MachineInstr &MI : MBB) { - Changed |= convertToUnmasked(MI, CurrentV0Def); Changed |= convertVMergeToVMv(MI, CurrentV0Def); - + Changed |= convertToUnmasked(MI, CurrentV0Def); if (MI.definesRegister(RISCV::V0, TRI)) CurrentV0Def = &MI; } diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index ae7f1743b5239..07748ace31a81 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -160,8 +160,6 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() { CurDAG->setRoot(Dummy.getValue()); - MadeChange |= doPeepholeMergeVVMFold(); - // After we're done with everything else, convert IMPLICIT_DEF // passthru operands to NoRegister. This is required to workaround // an optimization deficiency in MachineCSE. This really should @@ -3432,307 +3430,6 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) { return true; } -static bool IsVMerge(SDNode *N) { - return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM; -} - -static bool IsVMv(SDNode *N) { - return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V; -} - -static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) { - switch (LMUL) { - case RISCVII::LMUL_F8: - return RISCV::PseudoVMSET_M_B1; - case RISCVII::LMUL_F4: - return RISCV::PseudoVMSET_M_B2; - case RISCVII::LMUL_F2: - return RISCV::PseudoVMSET_M_B4; - case RISCVII::LMUL_1: - return RISCV::PseudoVMSET_M_B8; - case RISCVII::LMUL_2: - return RISCV::PseudoVMSET_M_B16; - case RISCVII::LMUL_4: - return RISCV::PseudoVMSET_M_B32; - case RISCVII::LMUL_8: - return RISCV::PseudoVMSET_M_B64; - case RISCVII::LMUL_RESERVED: - llvm_unreachable("Unexpected LMUL"); - } - llvm_unreachable("Unknown VLMUL enum"); -} - -// Try to fold away VMERGE_VVM instructions. We handle these cases: -// -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction -// folds to a masked TU instruction. VMERGE_VVM must have have merge operand -// same as false operand. -// -Masked TA VMERGE_VVM combined with an unmasked TA instruction fold to a -// masked TA instruction. -// -Unmasked TU VMERGE_VVM combined with a masked MU TA instruction folds to -// masked TU instruction. Both instructions must have the same merge operand. -// VMERGE_VVM must have have merge operand same as false operand. -// Note: The VMERGE_VVM forms above (TA, and TU) refer to the policy implied, -// not the pseudo name. That is, a TA VMERGE_VVM can be either the _TU pseudo -// form with an IMPLICIT_DEF passthrough operand or the unsuffixed (TA) pseudo -// form. -bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { - SDValue Merge, False, True, VL, Mask, Glue; - // A vmv.v.v is equivalent to a vmerge with an all-ones mask. - if (IsVMv(N)) { - Merge = N->getOperand(0); - False = N->getOperand(0); - True = N->getOperand(1); - VL = N->getOperand(2); - // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones - // mask later below. - } else { - assert(IsVMerge(N)); - Merge = N->getOperand(0); - False = N->getOperand(1); - True = N->getOperand(2); - Mask = N->getOperand(3); - VL = N->getOperand(4); - // We always have a glue node for the mask at v0. - Glue = N->getOperand(N->getNumOperands() - 1); - } - assert(!Mask || cast(Mask)->getReg() == RISCV::V0); - assert(!Glue || Glue.getValueType() == MVT::Glue); - - // We require that either merge and false are the same, or that merge - // is undefined. - if (Merge != False && !isImplicitDef(Merge)) - return false; - - assert(True.getResNo() == 0 && - "Expect True is the first output of an instruction."); - - // Need N is the exactly one using True. - if (!True.hasOneUse()) - return false; - - if (!True.isMachineOpcode()) - return false; - - unsigned TrueOpc = True.getMachineOpcode(); - const MCInstrDesc &TrueMCID = TII->get(TrueOpc); - uint64_t TrueTSFlags = TrueMCID.TSFlags; - bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID); - - bool IsMasked = false; - const RISCV::RISCVMaskedPseudoInfo *Info = - RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc); - if (!Info && HasTiedDest) { - Info = RISCV::getMaskedPseudoInfo(TrueOpc); - IsMasked = true; - } - - if (!Info) - return false; - - // When Mask is not a true mask, this transformation is illegal for some - // operations whose results are affected by mask, like viota.m. - if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue)) - return false; - - if (HasTiedDest && !isImplicitDef(True->getOperand(0))) { - // The vmerge instruction must be TU. - // FIXME: This could be relaxed, but we need to handle the policy for the - // resulting op correctly. - if (isImplicitDef(Merge)) - return false; - SDValue MergeOpTrue = True->getOperand(0); - // Both the vmerge instruction and the True instruction must have the same - // merge operand. - if (False != MergeOpTrue) - return false; - } - - if (IsMasked) { - assert(HasTiedDest && "Expected tied dest"); - // The vmerge instruction must be TU. - if (isImplicitDef(Merge)) - return false; - // The vmerge instruction must have an all 1s mask since we're going to keep - // the mask from the True instruction. - // FIXME: Support mask agnostic True instruction which would have an - // undef merge operand. - if (Mask && !usesAllOnesMask(Mask, Glue)) - return false; - } - - // Skip if True has side effect. - // TODO: Support vleff and vlsegff. - if (TII->get(TrueOpc).hasUnmodeledSideEffects()) - return false; - - // The last operand of a masked instruction may be glued. - bool HasGlueOp = True->getGluedNode() != nullptr; - - // The chain operand may exist either before the glued operands or in the last - // position. - unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1; - bool HasChainOp = - True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other; - - if (HasChainOp) { - // Avoid creating cycles in the DAG. We must ensure that none of the other - // operands depend on True through it's Chain. - SmallVector LoopWorklist; - SmallPtrSet Visited; - LoopWorklist.push_back(False.getNode()); - if (Mask) - LoopWorklist.push_back(Mask.getNode()); - LoopWorklist.push_back(VL.getNode()); - if (Glue) - LoopWorklist.push_back(Glue.getNode()); - if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist)) - return false; - } - - // The vector policy operand may be present for masked intrinsics - bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags); - unsigned TrueVLIndex = - True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; - SDValue TrueVL = True.getOperand(TrueVLIndex); - SDValue SEW = True.getOperand(TrueVLIndex + 1); - - auto GetMinVL = [](SDValue LHS, SDValue RHS) { - if (LHS == RHS) - return LHS; - if (isAllOnesConstant(LHS)) - return RHS; - if (isAllOnesConstant(RHS)) - return LHS; - auto *CLHS = dyn_cast(LHS); - auto *CRHS = dyn_cast(RHS); - if (!CLHS || !CRHS) - return SDValue(); - return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS; - }; - - // Because N and True must have the same merge operand (or True's operand is - // implicit_def), the "effective" body is the minimum of their VLs. - SDValue OrigVL = VL; - VL = GetMinVL(TrueVL, VL); - if (!VL) - return false; - - // If we end up changing the VL or mask of True, then we need to make sure it - // doesn't raise any observable fp exceptions, since changing the active - // elements will affect how fflags is set. - if (TrueVL != VL || !IsMasked) - if (mayRaiseFPException(True.getNode()) && - !True->getFlags().hasNoFPExcept()) - return false; - - SDLoc DL(N); - - // From the preconditions we checked above, we know the mask and thus glue - // for the result node will be taken from True. - if (IsMasked) { - Mask = True->getOperand(Info->MaskOpIdx); - Glue = True->getOperand(True->getNumOperands() - 1); - assert(Glue.getValueType() == MVT::Glue); - } - // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create - // an all-ones mask to use. - else if (IsVMv(N)) { - unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags; - unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags)); - ElementCount EC = N->getValueType(0).getVectorElementCount(); - MVT MaskVT = MVT::getVectorVT(MVT::i1, EC); - - SDValue AllOnesMask = - SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0); - SDValue MaskCopy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, - RISCV::V0, AllOnesMask, SDValue()); - Mask = CurDAG->getRegister(RISCV::V0, MaskVT); - Glue = MaskCopy.getValue(1); - } - - unsigned MaskedOpc = Info->MaskedPseudo; -#ifndef NDEBUG - const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc); - assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) && - "Expected instructions with mask have policy operand."); - assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(), - MCOI::TIED_TO) == 0 && - "Expected instructions with mask have a tied dest."); -#endif - - // Use a tumu policy, relaxing it to tail agnostic provided that the merge - // operand is undefined. - // - // However, if the VL became smaller than what the vmerge had originally, then - // elements past VL that were previously in the vmerge's body will have moved - // to the tail. In that case we always need to use tail undisturbed to - // preserve them. - bool MergeVLShrunk = VL != OrigVL; - uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk) - ? RISCVII::TAIL_AGNOSTIC - : /*TUMU*/ 0; - SDValue PolicyOp = - CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT()); - - - SmallVector Ops; - Ops.push_back(False); - - const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags); - const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode; - assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx); - Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd); - - Ops.push_back(Mask); - - // For unmasked "VOp" with rounding mode operand, that is interfaces like - // (..., rm, vl) or (..., rm, vl, policy). - // Its masked version is (..., vm, rm, vl, policy). - // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td - if (HasRoundingMode) - Ops.push_back(True->getOperand(TrueVLIndex - 1)); - - Ops.append({VL, SEW, PolicyOp}); - - // Result node should have chain operand of True. - if (HasChainOp) - Ops.push_back(True.getOperand(TrueChainOpIdx)); - - // Add the glue for the CopyToReg of mask->v0. - Ops.push_back(Glue); - - MachineSDNode *Result = - CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops); - Result->setFlags(True->getFlags()); - - if (!cast(True)->memoperands_empty()) - CurDAG->setNodeMemRefs(Result, cast(True)->memoperands()); - - // Replace vmerge.vvm node by Result. - ReplaceUses(SDValue(N, 0), SDValue(Result, 0)); - - // Replace another value of True. E.g. chain and VL. - for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx) - ReplaceUses(True.getValue(Idx), SDValue(Result, Idx)); - - return true; -} - -bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { - bool MadeChange = false; - SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); - - while (Position != CurDAG->allnodes_begin()) { - SDNode *N = &*--Position; - if (N->use_empty() || !N->isMachineOpcode()) - continue; - - if (IsVMerge(N) || IsVMv(N)) - MadeChange |= performCombineVMergeAndVOps(N); - } - return MadeChange; -} - /// If our passthru is an implicit_def, use noreg instead. This side /// steps issues with MachineCSE not being able to CSE expressions with /// IMPLICIT_DEF operands while preserving the semantic intent. See diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 77e174135a599..f7551fe4d29a6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -188,9 +188,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { private: bool doPeepholeSExtW(SDNode *Node); bool doPeepholeMaskedRVV(MachineSDNode *Node); - bool doPeepholeMergeVVMFold(); bool doPeepholeNoRegPassThru(); - bool performCombineVMergeAndVOps(SDNode *N); }; namespace RISCV { diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll index d612298bf50e7..4f88806054dfb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc < %s -mtriple=riscv64 -mattr=+v -stop-after=finalize-isel | FileCheck %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -stop-after=riscv-fold-masks | FileCheck %s declare @llvm.vp.merge.nxv2i32(, , , i32) declare @llvm.vp.select.nxv2i32(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index c639f092444fc..5c8b02237382f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -952,22 +952,20 @@ define @vpselect_trunc( %passthru, @llvm.riscv.vle.nxv32i16.i64( undef, * null, i64 1) From 26d98c4403b851e6d6a07b8dc804372e68a57ea0 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 9 Nov 2023 12:30:29 +0800 Subject: [PATCH 2/6] Keep a more canonical form by copying masks from a virtual register first --- llvm/lib/Target/RISCV/RISCVFoldMasks.cpp | 25 ++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp index 2ec561c7bad2f..913cef586e3ec 100644 --- a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp +++ b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp @@ -79,16 +79,13 @@ INITIALIZE_PASS(RISCVFoldMasks, DEBUG_TYPE, "RISC-V Fold Masks", false, false) bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskDef) { if (!MaskDef) return false; - if (MaskDef->isCopy()) { - assert(MaskDef->getOperand(0).getReg() == RISCV::V0); - Register SrcReg = - TRI->lookThruCopyLike(MaskDef->getOperand(1).getReg(), MRI); - if (!SrcReg.isVirtual()) - return false; - MaskDef = MRI->getVRegDef(SrcReg); - if (!MaskDef) - return false; - } + assert(MaskDef->isCopy() && MaskDef->getOperand(0).getReg() == RISCV::V0); + Register SrcReg = TRI->lookThruCopyLike(MaskDef->getOperand(1).getReg(), MRI); + if (!SrcReg.isVirtual()) + return false; + MaskDef = MRI->getVRegDef(SrcReg); + if (!MaskDef) + return false; // TODO: Check that the VMSET is the expected bitwidth? The pseudo has // undefined behaviour if it's the wrong bitwidth, so we could choose to @@ -365,10 +362,14 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI, // mask just before True. unsigned VMSetOpc = getVMSetForLMul(RISCVII::getLMul(MI.getDesc().TSFlags)); - BuildMI(*MI.getParent(), TrueMI, MI.getDebugLoc(), TII->get(VMSetOpc)) - .addDef(RISCV::V0) + Register Dest = MRI->createVirtualRegister(&RISCV::VRRegClass); + BuildMI(*MI.getParent(), TrueMI, MI.getDebugLoc(), TII->get(VMSetOpc), + Dest) .add(VL) .add(TrueMI.getOperand(RISCVII::getSEWOpNum(TrueMCID))); + BuildMI(*MI.getParent(), TrueMI, MI.getDebugLoc(), TII->get(RISCV::COPY), + RISCV::V0) + .addReg(Dest); } TrueMI.setDesc(MaskedMCID); From c211c56791986c66083b335bcaf0aab5c6b65a92 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 9 Nov 2023 12:32:20 +0800 Subject: [PATCH 3/6] Remove unnecessary includes --- llvm/lib/Target/RISCV/RISCVFoldMasks.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp index 913cef586e3ec..14841b7c7eb04 100644 --- a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp +++ b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp @@ -28,14 +28,8 @@ #include "RISCV.h" #include "RISCVISelDAGToDAG.h" -#include "RISCVInstrInfo.h" #include "RISCVSubtarget.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" using namespace llvm; From 121273057999cfbcb58e62c2e5bc5442dd0d50f0 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 9 Nov 2023 12:39:03 +0800 Subject: [PATCH 4/6] Use MachineInstr::insert --- llvm/lib/Target/RISCV/RISCVFoldMasks.cpp | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp index 14841b7c7eb04..9c959b1e1b352 100644 --- a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp +++ b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp @@ -120,19 +120,6 @@ static unsigned getVMSetForLMul(RISCVII::VLMUL LMUL) { llvm_unreachable("Unknown VLMUL enum"); } -/// Inserts an operand at Idx in MI, pushing back any operands. -static void insertOperand(MachineInstr &MI, MachineOperand MO, unsigned Idx) { - SmallVector OpsToAddBack; - unsigned NumTailOps = MI.getNumOperands() - Idx; - for (unsigned I = 0; I < NumTailOps; I++) { - OpsToAddBack.push_back(MI.getOperand(Idx)); - MI.removeOperand(Idx); - } - MI.addOperand(MO); - for (MachineOperand &TailOp : OpsToAddBack) - MI.addOperand(TailOp); -} - // Try to sink From to before To, also sinking any instructions between From and // To where there is a write-after-read dependency on a physical register. static bool sinkInstructionAndDeps(MachineInstr &From, MachineInstr &To) { @@ -370,8 +357,8 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI, // TODO: Increment MaskOpIdx by number of explicit defs in tablegen? unsigned MaskOpIdx = Info->MaskOpIdx + TrueMI.getNumExplicitDefs(); - insertOperand(TrueMI, MachineOperand::CreateReg(RISCV::V0, false), - MaskOpIdx); + TrueMI.insert(&TrueMI.getOperand(MaskOpIdx), + MachineOperand::CreateReg(RISCV::V0, false)); } // Update the AVL. From aa4c5db59b277c7b6ae3e7d34cf7cbec414d85f0 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 20 Nov 2023 15:17:57 +0800 Subject: [PATCH 5/6] Simplify sinking after #72540 --- llvm/lib/Target/RISCV/RISCVFoldMasks.cpp | 73 +++++------------------- 1 file changed, 13 insertions(+), 60 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp index 9c959b1e1b352..bad44fa2a5cfe 100644 --- a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp +++ b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp @@ -29,7 +29,6 @@ #include "RISCV.h" #include "RISCVISelDAGToDAG.h" #include "RISCVSubtarget.h" -#include "llvm/ADT/SmallSet.h" using namespace llvm; @@ -120,64 +119,6 @@ static unsigned getVMSetForLMul(RISCVII::VLMUL LMUL) { llvm_unreachable("Unknown VLMUL enum"); } -// Try to sink From to before To, also sinking any instructions between From and -// To where there is a write-after-read dependency on a physical register. -static bool sinkInstructionAndDeps(MachineInstr &From, MachineInstr &To) { - assert(From.getParent() == To.getParent()); - SmallVector Worklist, ToSink; - Worklist.push_back(&From); - - // Rather than compute whether or not we saw a store for every instruction, - // just compute it once even if it's more conservative. - bool SawStore = false; - for (MachineBasicBlock::instr_iterator II = From.getIterator(); - II != To.getIterator(); II++) { - if (II->mayStore()) { - SawStore = true; - break; - } - } - - while (!Worklist.empty()) { - MachineInstr *MI = Worklist.pop_back_val(); - - if (!MI->isSafeToMove(nullptr, SawStore)) - return false; - - SmallSet Defs, Uses; - for (MachineOperand &Def : MI->all_defs()) - Defs.insert(Def.getReg()); - for (MachineOperand &Use : MI->all_uses()) - Uses.insert(Use.getReg()); - - // If anything from [MI, To] uses a definition of MI, we can't sink it. - for (MachineBasicBlock::instr_iterator II = MI->getIterator(); - II != To.getIterator(); II++) { - for (MachineOperand &Use : II->all_uses()) { - if (Defs.contains(Use.getReg())) - return false; - } - } - - // If MI uses any physical registers, we need to sink any instructions after - // it where there might be a write-after-read dependency. - for (MachineBasicBlock::instr_iterator II = MI->getIterator(); - II != To.getIterator(); II++) { - bool NeedsSink = any_of(II->all_defs(), [&Uses](MachineOperand &Def) { - return Def.getReg().isPhysical() && Uses.contains(Def.getReg()); - }); - if (NeedsSink) - Worklist.push_back(&*II); - } - - ToSink.push_back(MI); - } - - for (MachineInstr *MI : ToSink) - MI->moveBefore(&To); - return true; -} - // Returns true if LHS is the same register as RHS, or if LHS is undefined. bool RISCVFoldMasks::isOpSameAs(const MachineOperand &LHS, const MachineOperand &RHS) { @@ -330,8 +271,18 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI, #endif // Sink True down to MI so that it can access MI's operands. - if (!sinkInstructionAndDeps(TrueMI, MI)) + assert(!TrueMI.hasImplicitDef()); + bool SawStore = false; + for (MachineBasicBlock::instr_iterator II = TrueMI.getIterator(); + II != MI.getIterator(); II++) { + if (II->mayStore()) { + SawStore = true; + break; + } + } + if (!TrueMI.isSafeToMove(nullptr, SawStore)) return false; + TrueMI.moveBefore(&MI); // Set the merge to the false operand of the merge. TrueMI.getOperand(1).setReg(False->getReg()); @@ -392,6 +343,8 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI, MRI->replaceRegWith(MI.getOperand(0).getReg(), TrueMI.getOperand(0).getReg()); MI.eraseFromParent(); + if (IsMasked) + MaskDef->eraseFromParent(); return true; } From 1c2617ce0361867bf2da05fe6ab112518f6668ad Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 28 Nov 2023 18:13:24 +0800 Subject: [PATCH 6/6] Address review comments: - Instead of erasing the old mask, move it after MI. Even though in practice there shouldn't be any other users of V0 due to SelectionDAG emitting a copy for every user, this should preserve the existing value of V0 anyway. This is safe to do because we know there's no other defs of V0 between MaskDef and MI. - Remove isOpSameAs. Turns out we don't need to check if False is NoRegister, because that implicit_def->noreg peephole only operates on passthru operands. - Check that TrueMI and MI are in the same block --- llvm/lib/Target/RISCV/RISCVFoldMasks.cpp | 78 ++++++------ .../RISCV/rvv/rvv-peephole-fold-vmerge.mir | 112 ++++++++++++++++++ 2 files changed, 154 insertions(+), 36 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/rvv-peephole-fold-vmerge.mir diff --git a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp index bad44fa2a5cfe..176dda4a84c75 100644 --- a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp +++ b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp @@ -55,12 +55,11 @@ class RISCVFoldMasks : public MachineFunctionPass { StringRef getPassName() const override { return "RISC-V Fold Masks"; } private: + bool convertToUnmasked(MachineInstr &MI, MachineInstr *MaskDef); bool foldVMergeIntoOps(MachineInstr &MI, MachineInstr *MaskDef); bool convertVMergeToVMv(MachineInstr &MI, MachineInstr *MaskDef); - bool convertToUnmasked(MachineInstr &MI, MachineInstr *MaskDef); bool isAllOnesMask(MachineInstr *MaskDef); - bool isOpSameAs(const MachineOperand &LHS, const MachineOperand &RHS); }; } // namespace @@ -119,17 +118,6 @@ static unsigned getVMSetForLMul(RISCVII::VLMUL LMUL) { llvm_unreachable("Unknown VLMUL enum"); } -// Returns true if LHS is the same register as RHS, or if LHS is undefined. -bool RISCVFoldMasks::isOpSameAs(const MachineOperand &LHS, - const MachineOperand &RHS) { - if (LHS.getReg() == RISCV::NoRegister) - return true; - if (RHS.getReg() == RISCV::NoRegister) - return false; - return TRI->lookThruCopyLike(LHS.getReg(), MRI) == - TRI->lookThruCopyLike(RHS.getReg(), MRI); -} - // Try to fold away VMERGE_VVM instructions. We handle these cases: // -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction // folds to a masked TU instruction. VMERGE_VVM must have have merge operand @@ -163,10 +151,14 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI, return false; MachineInstr &TrueMI = *MRI->getVRegDef(True->getReg()); + if (TrueMI.getParent() != MI.getParent()) + return false; // We require that either merge and false are the same, or that merge // is undefined. - if (!isOpSameAs(*Merge, *False)) + if (Merge->getReg() != RISCV::NoRegister && + TRI->lookThruCopyLike(Merge->getReg(), MRI) != + TRI->lookThruCopyLike(False->getReg(), MRI)) return false; // N must be the only user of True. @@ -177,12 +169,14 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI, const MCInstrDesc &TrueMCID = TrueMI.getDesc(); bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID); - bool IsMasked = false; + const bool MIIsMasked = + BaseOpc == RISCV::VMERGE_VVM && !isAllOnesMask(MaskDef); + bool TrueIsMasked = false; const RISCV::RISCVMaskedPseudoInfo *Info = RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc); if (!Info && HasTiedDest) { Info = RISCV::getMaskedPseudoInfo(TrueOpc); - IsMasked = true; + TrueIsMasked = true; } if (!Info) @@ -190,8 +184,7 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI, // When Mask is not a true mask, this transformation is illegal for some // operations whose results are affected by mask, like viota.m. - if (Info->MaskAffectsResult && BaseOpc == RISCV::VMERGE_VVM && - !isAllOnesMask(MaskDef)) + if (Info->MaskAffectsResult && MIIsMasked) return false; MachineOperand &TrueMergeOp = TrueMI.getOperand(1); @@ -203,20 +196,21 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI, return false; // Both the vmerge instruction and the True instruction must have the same // merge operand. - if (!isOpSameAs(TrueMergeOp, *False)) + if (TrueMergeOp.getReg() != RISCV::NoRegister && + TrueMergeOp.getReg() != False->getReg()) return false; } - if (IsMasked) { + if (TrueIsMasked) { assert(HasTiedDest && "Expected tied dest"); // The vmerge instruction must be TU. if (Merge->getReg() == RISCV::NoRegister) return false; - // The vmerge instruction must have an all 1s mask since we're going to keep - // the mask from the True instruction. - // FIXME: Support mask agnostic True instruction which would have an - // undef merge operand. - if (BaseOpc == RISCV::VMERGE_VVM && !isAllOnesMask(MaskDef)) + // MI must have an all 1s mask since we're going to keep the mask from the + // True instruction. + // FIXME: Support mask agnostic True instruction which would have an undef + // merge operand. + if (MIIsMasked) return false; } @@ -225,10 +219,6 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI, if (TII->get(TrueOpc).hasUnmodeledSideEffects()) return false; - // The vector policy operand may be present for masked intrinsics - const MachineOperand &TrueVL = - TrueMI.getOperand(RISCVII::getVLOpNum(TrueMCID)); - auto GetMinVL = [](const MachineOperand &LHS, const MachineOperand &RHS) -> std::optional { @@ -246,7 +236,9 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI, // Because MI and True must have the same merge operand (or True's operand is // implicit_def), the "effective" body is the minimum of their VLs. - const MachineOperand VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc())); + const MachineOperand &TrueVL = + TrueMI.getOperand(RISCVII::getVLOpNum(TrueMCID)); + const MachineOperand &VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc())); auto MinVL = GetMinVL(TrueVL, VL); if (!MinVL) return false; @@ -255,7 +247,7 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI, // If we end up changing the VL or mask of True, then we need to make sure it // doesn't raise any observable fp exceptions, since changing the active // elements will affect how fflags is set. - if (VLChanged || !IsMasked) + if (VLChanged || !TrueIsMasked) if (TrueMCID.mayRaiseFPException() && !TrueMI.getFlag(MachineInstr::MIFlag::NoFPExcept)) return false; @@ -287,8 +279,9 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI, // Set the merge to the false operand of the merge. TrueMI.getOperand(1).setReg(False->getReg()); + bool NeedToMoveOldMask = TrueIsMasked; // If we're converting it to a masked pseudo, reuse MI's mask. - if (!IsMasked) { + if (!TrueIsMasked) { if (BaseOpc == RISCV::VMV_V_V) { // If MI is a vmv.v.v, it won't have a mask operand. So insert an all-ones // mask just before True. @@ -302,6 +295,7 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI, BuildMI(*MI.getParent(), TrueMI, MI.getDebugLoc(), TII->get(RISCV::COPY), RISCV::V0) .addReg(Dest); + NeedToMoveOldMask = true; } TrueMI.setDesc(MaskedMCID); @@ -342,9 +336,17 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI, MRI->constrainRegClass(PassthruReg, V0RC); MRI->replaceRegWith(MI.getOperand(0).getReg(), TrueMI.getOperand(0).getReg()); + + // We need to move the old mask copy to after MI if: + // - TrueMI is masked and we are using its mask instead + // - We created a new all ones mask that clobbers V0 + if (NeedToMoveOldMask && MaskDef) { + assert(MaskDef->getParent() == MI.getParent()); + MaskDef->removeFromParent(); + MI.getParent()->insertAfter(MI.getIterator(), MaskDef); + } + MI.eraseFromParent(); - if (IsMasked) - MaskDef->eraseFromParent(); return true; } @@ -369,8 +371,11 @@ bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) { CASE_VMERGE_TO_VMV(M8) } + Register MergeReg = MI.getOperand(1).getReg(); + Register FalseReg = MI.getOperand(2).getReg(); // Check merge == false (or merge == undef) - if (!isOpSameAs(MI.getOperand(1), MI.getOperand(2))) + if (MergeReg != RISCV::NoRegister && TRI->lookThruCopyLike(MergeReg, MRI) != + TRI->lookThruCopyLike(FalseReg, MRI)) return false; assert(MI.getOperand(4).isReg() && MI.getOperand(4).getReg() == RISCV::V0); @@ -468,8 +473,9 @@ bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) { CurrentV0Def = nullptr; for (MachineInstr &MI : MBB) { - Changed |= convertVMergeToVMv(MI, CurrentV0Def); Changed |= convertToUnmasked(MI, CurrentV0Def); + Changed |= convertVMergeToVMv(MI, CurrentV0Def); + if (MI.definesRegister(RISCV::V0, TRI)) CurrentV0Def = &MI; } diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-fold-vmerge.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-fold-vmerge.mir new file mode 100644 index 0000000000000..b9e9111198606 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-fold-vmerge.mir @@ -0,0 +1,112 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -run-pass=riscv-fold-masks \ +# RUN: -verify-machineinstrs | FileCheck %s + +--- +name: vmerge_unmasked +body: | + bb.0: + liveins: $x1, $v0, $v8, $v9, $v10 + ; CHECK-LABEL: name: vmerge_unmasked + ; CHECK: liveins: $x1, $v0, $v8, $v9, $v10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 + ; CHECK-NEXT: %false:vrnov0 = COPY $v8 + ; CHECK-NEXT: %mask:vmv0 = COPY $v0 + ; CHECK-NEXT: $v0 = COPY %mask + ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $v9, $v10, $v0, %avl, 5 /* e32 */, 1 /* ta, mu */ + %avl:gprnox0 = COPY $x1 + %false:vr = COPY $v8 + %true:vr = PseudoVADD_VV_M1 $noreg, $v9, $v10, %avl, 5, 3 + %mask:vmv0 = COPY $v0 + $v0 = COPY %mask + %x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, %avl, 5 +... +--- +name: vmerge_masked +body: | + bb.0: + liveins: $x1, $v0, $v8, $v9, $v10 + ; CHECK-LABEL: name: vmerge_masked + ; CHECK: liveins: $x1, $v0, $v8, $v9, $v10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 + ; CHECK-NEXT: %false:vrnov0 = COPY $v8 + ; CHECK-NEXT: %addmask:vmv0 = COPY $v0 + ; CHECK-NEXT: $v0 = COPY %addmask + ; CHECK-NEXT: %mergemask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */ + ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $v9, $v10, $v0, %avl, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v0 = COPY %mergemask + %avl:gprnox0 = COPY $x1 + %false:vrnov0 = COPY $v8 + %addmask:vmv0 = COPY $v0 + $v0 = COPY %addmask + %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $v9, $v10, $v0, %avl, 5, 0 + %mergemask:vmv0 = PseudoVMSET_M_B8 %avl, 5 + $v0 = COPY %mergemask + %x:vrnov0 = PseudoVMERGE_VVM_M1 %false, %false, %true, $v0, %avl, 5 +... +--- +name: vmv_unmasked +body: | + bb.0: + liveins: $x1, $v8, $v9, $v10 + ; CHECK-LABEL: name: vmv_unmasked + ; CHECK: liveins: $x1, $v8, $v9, $v10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 + ; CHECK-NEXT: %false:vr = COPY $v8 + ; CHECK-NEXT: [[PseudoVMSET_M_B8_:%[0-9]+]]:vr = PseudoVMSET_M_B8 %avl, 5 /* e32 */ + ; CHECK-NEXT: $v0 = COPY [[PseudoVMSET_M_B8_]] + ; CHECK-NEXT: %true:vr = PseudoVADD_VV_M1 %false, $v9, $v10, %avl, 5 /* e32 */, 0 /* tu, mu */ + %avl:gprnox0 = COPY $x1 + %false:vr = COPY $v8 + %true:vr = PseudoVADD_VV_M1 $noreg, $v9, $v10, %avl, 5, 3 + %x:vr = PseudoVMV_V_V_M1 %false, %true, %avl, 5, 0 +... +--- +name: vmv_masked +body: | + bb.0: + liveins: $x1, $v0, $v8, $v9, $v10 + ; CHECK-LABEL: name: vmv_masked + ; CHECK: liveins: $x1, $v0, $v8, $v9, $v10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 + ; CHECK-NEXT: %false:vrnov0 = COPY $v8 + ; CHECK-NEXT: %addmask:vmv0 = COPY $v0 + ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $v9, $v10, $v0, %avl, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v0 = COPY %addmask + %avl:gprnox0 = COPY $x1 + %false:vrnov0 = COPY $v8 + %addmask:vmv0 = COPY $v0 + $v0 = COPY %addmask + %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $v9, $v10, $v0, %avl, 5, 3 + %x:vrnov0 = PseudoVMV_V_V_M1 %false, %true, %avl, 5, 0 +... +--- +# Shouldn't fold this because vadd is in another BB +name: different_bb +body: | + ; CHECK-LABEL: name: different_bb + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $x1, $v0, $v8, $v9, $v10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 + ; CHECK-NEXT: %false:vr = COPY $v8 + ; CHECK-NEXT: %true:vr = PseudoVADD_VV_M1 $noreg, $v9, $v10, %avl, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: %mask:vmv0 = COPY $v0 + ; CHECK-NEXT: $v0 = COPY %mask + ; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, %avl, 5 /* e32 */ + bb.0: + liveins: $x1, $v0, $v8, $v9, $v10 + %avl:gprnox0 = COPY $x1 + %false:vr = COPY $v8 + %true:vr = PseudoVADD_VV_M1 $noreg, $v9, $v10, %avl, 5, 3 + bb.1: + %mask:vmv0 = COPY $v0 + $v0 = COPY %mask + %x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, %avl, 5