diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 0bf3294af92a8..3933e82b718f2 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -5120,6 +5120,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) { case Intrinsic::x86_tileloaddt164_internal: { if (!Subtarget->hasAMXTILE()) break; + auto *MFI = + CurDAG->getMachineFunction().getInfo(); + MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA); unsigned Opc = IntNo == Intrinsic::x86_tileloadd64_internal ? X86::PTILELOADDV : X86::PTILELOADDT1V; @@ -5201,6 +5204,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) { break; } case Intrinsic::x86_tilestored64_internal: { + auto *MFI = + CurDAG->getMachineFunction().getInfo(); + MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA); unsigned Opc = X86::PTILESTOREDV; // _tile_stored_internal(row, col, buf, STRIDE, c) SDValue Base = Node->getOperand(4); @@ -5228,6 +5234,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) { case Intrinsic::x86_tilestored64: { if (!Subtarget->hasAMXTILE()) break; + auto *MFI = + CurDAG->getMachineFunction().getInfo(); + MFI->setAMXProgModel(AMXProgModelEnum::DirectReg); unsigned Opc; switch (IntNo) { default: llvm_unreachable("Unexpected intrinsic!"); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0e377dd53b742..5aa8b015d9d95 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26776,7 +26776,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, case Intrinsic::swift_async_context_addr: { SDLoc dl(Op); auto &MF = DAG.getMachineFunction(); - auto X86FI = MF.getInfo(); + auto *X86FI = MF.getInfo(); if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF)) { MF.getFrameInfo().setFrameAddressIsTaken(true); X86FI->setHasSwiftAsyncContext(true); @@ -36781,7 +36781,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, } case TargetOpcode::PREALLOCATED_SETUP: { assert(Subtarget.is32Bit() && "preallocated only used in 32-bit"); - auto MFI = MF->getInfo(); + auto *MFI = MF->getInfo(); MFI->setHasPreallocatedCall(true); int64_t PreallocatedId = MI.getOperand(0).getImm(); size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId); @@ -36798,7 +36798,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit"); int64_t PreallocatedId = MI.getOperand(1).getImm(); int64_t ArgIdx = MI.getOperand(2).getImm(); - auto MFI = MF->getInfo(); + auto *MFI = MF->getInfo(); size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx]; LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx << ", arg offset " << ArgOffset << "\n"); @@ -36841,6 +36841,13 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, unsigned Imm = MI.getOperand(0).getImm(); BuildMI(*BB, MI, MIMD, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm)); MI.eraseFromParent(); // The pseudo is gone now. + auto *MFI = MF->getInfo(); + MFI->setAMXProgModel(AMXProgModelEnum::DirectReg); + return BB; + } + case X86::PTILEZEROV: { + auto *MFI = MF->getInfo(); + MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA); return BB; } case X86::PTILELOADD: diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td index c47bee070e04f..99deacc811a17 100644 --- a/llvm/lib/Target/X86/X86InstrAMX.td +++ b/llvm/lib/Target/X86/X86InstrAMX.td @@ -74,7 +74,7 @@ let SchedRW = [WriteSystem] in { GR16:$src2, opaquemem:$src3, TILE:$src4), []>; let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1, - canFoldAsLoad = 1 in + canFoldAsLoad = 1, usesCustomInserter = 1 in def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2), [(set TILE:$dst, (int_x86_tilezero_internal GR16:$src1, GR16:$src2))]>; diff --git a/llvm/lib/Target/X86/X86LowerTileCopy.cpp b/llvm/lib/Target/X86/X86LowerTileCopy.cpp index f27676a27e86c..613722b398f44 100644 --- a/llvm/lib/Target/X86/X86LowerTileCopy.cpp +++ b/llvm/lib/Target/X86/X86LowerTileCopy.cpp @@ -19,6 +19,7 @@ #include "X86.h" #include "X86InstrBuilder.h" #include "X86InstrInfo.h" +#include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" #include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -71,6 +72,10 @@ FunctionPass *llvm::createX86LowerTileCopyPass() { } bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) { + X86MachineFunctionInfo *FuncInfo = MF.getInfo(); + if (FuncInfo->getAMXProgModel() != AMXProgModelEnum::ManagedRA) + return false; + const X86Subtarget &ST = MF.getSubtarget(); const X86InstrInfo *TII = ST.getInstrInfo(); const TargetRegisterInfo *TRI = ST.getRegisterInfo(); @@ -81,26 +86,8 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; for (MachineBasicBlock &MBB : MF) { - // There won't be a tile copy if neither tile register live in nor live out. - bool HasTileCopy = false; - for (const auto &LI : MBB.liveins()) { - if (TILERegs.test(LI.PhysReg)) { - HasTileCopy = true; - break; - } - } LiveRegUnits UsedRegs(*TRI); UsedRegs.addLiveOuts(MBB); - if (!HasTileCopy) { - for (auto RegT : TILERegs.set_bits()) { - if (UsedRegs.available(RegT)) { - HasTileCopy = true; - break; - } - } - } - if (!HasTileCopy) - continue; for (MachineInstr &MI : llvm::make_early_inc_range(reverse(MBB))) { UsedRegs.stepBackward(MI); if (!MI.isCopy()) diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h index f6e853270e073..8aaa49945f9d4 100644 --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -21,6 +21,8 @@ namespace llvm { +enum AMXProgModelEnum { None = 0, DirectReg = 1, ManagedRA = 2 }; + /// X86MachineFunctionInfo - This class is derived from MachineFunction and /// contains private X86 target-specific information for each MachineFunction. class X86MachineFunctionInfo : public MachineFunctionInfo { @@ -96,6 +98,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// used to address arguments in a function using a base pointer. int SEHFramePtrSaveIndex = 0; + /// The AMX programing model used in the function. + AMXProgModelEnum AMXProgModel = AMXProgModelEnum::None; + /// True if this function has a subset of CSRs that is handled explicitly via /// copies. bool IsSplitCSR = false; @@ -219,6 +224,13 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { int getSEHFramePtrSaveIndex() const { return SEHFramePtrSaveIndex; } void setSEHFramePtrSaveIndex(int Index) { SEHFramePtrSaveIndex = Index; } + AMXProgModelEnum getAMXProgModel() const { return AMXProgModel; } + void setAMXProgModel(AMXProgModelEnum Model) { + assert((AMXProgModel == AMXProgModelEnum::None || AMXProgModel == Model) && + "mixed model is not supported"); + AMXProgModel = Model; + } + SmallVectorImpl &getForwardedMustTailRegParms() { return ForwardedMustTailRegParms; }