From 1292e9c51ae4189a0b1c81b9b9d08da0cf7a2ddb Mon Sep 17 00:00:00 2001 From: vikhegde Date: Fri, 14 Feb 2025 12:35:03 +0530 Subject: [PATCH] [AMDGPU][NewPM] Port "SIFormMemoryClauses" to NPM --- llvm/lib/Target/AMDGPU/AMDGPU.h | 4 +- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 2 +- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 +- .../lib/Target/AMDGPU/SIFormMemoryClauses.cpp | 102 ++++++++++-------- llvm/lib/Target/AMDGPU/SIFormMemoryClauses.h | 22 ++++ .../AMDGPU/limit-soft-clause-reg-pressure.mir | 1 + llvm/test/CodeGen/AMDGPU/memory_clause.mir | 1 + .../CodeGen/AMDGPU/reserved-reg-in-clause.mir | 1 + .../AMDGPU/smem-no-clause-coalesced.mir | 1 + .../CodeGen/AMDGPU/soft-clause-dbg-value.mir | 1 + 10 files changed, 92 insertions(+), 46 deletions(-) create mode 100644 llvm/lib/Target/AMDGPU/SIFormMemoryClauses.h diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index de3253e64b978..4a0e5ef58ac93 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -50,7 +50,7 @@ FunctionPass *createLowerWWMCopiesPass(); FunctionPass *createSIMemoryLegalizerPass(); FunctionPass *createSIInsertWaitcntsPass(); FunctionPass *createSIPreAllocateWWMRegsLegacyPass(); -FunctionPass *createSIFormMemoryClausesPass(); +FunctionPass *createSIFormMemoryClausesLegacyPass(); FunctionPass *createSIPostRABundlerPass(); FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *); @@ -425,7 +425,7 @@ extern char &SIInsertHardClausesID; void initializeSIInsertWaitcntsPass(PassRegistry&); extern char &SIInsertWaitcntsID; -void initializeSIFormMemoryClausesPass(PassRegistry&); +void initializeSIFormMemoryClausesLegacyPass(PassRegistry &); extern char &SIFormMemoryClausesID; void initializeSIPostRABundlerPass(PassRegistry&); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 14b35a4fd8327..225f84725874b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -104,6 +104,7 @@ MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass()) MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass()) MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", SIFixVGPRCopiesPass()) MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass()); +MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass()) MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass()) MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass()) MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass()) @@ -124,7 +125,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizations DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass()) -DUMMY_MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass()) DUMMY_MACHINE_FUNCTION_PASS("si-insert-hard-clauses", SIInsertHardClausesPass()) DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass()) DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 7c0f1040a8156..eb488843b53e0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -42,6 +42,7 @@ #include "SIFixSGPRCopies.h" #include "SIFixVGPRCopies.h" #include "SIFoldOperands.h" +#include "SIFormMemoryClauses.h" #include "SILoadStoreOptimizer.h" #include "SILowerControlFlow.h" #include "SILowerSGPRSpills.h" @@ -540,7 +541,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIMemoryLegalizerPass(*PR); initializeSIOptimizeExecMaskingLegacyPass(*PR); initializeSIPreAllocateWWMRegsLegacyPass(*PR); - initializeSIFormMemoryClausesPass(*PR); + initializeSIFormMemoryClausesLegacyPass(*PR); initializeSIPostRABundlerPass(*PR); initializeGCNCreateVOPDPass(*PR); initializeAMDGPUUnifyDivergentExitNodesPass(*PR); diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp index 926657b8a1e7b..bbc0280aed42e 100644 --- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp +++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp @@ -14,6 +14,7 @@ /// //===----------------------------------------------------------------------===// +#include "SIFormMemoryClauses.h" #include "AMDGPU.h" #include "GCNRegPressure.h" #include "SIMachineFunctionInfo.h" @@ -31,15 +32,37 @@ MaxClause("amdgpu-max-memory-clause", cl::Hidden, cl::init(15), namespace { -class SIFormMemoryClauses : public MachineFunctionPass { +class SIFormMemoryClausesImpl { using RegUse = DenseMap>; + bool canBundle(const MachineInstr &MI, const RegUse &Defs, + const RegUse &Uses) const; + bool checkPressure(const MachineInstr &MI, GCNDownwardRPTracker &RPT); + void collectRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses) const; + bool processRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses, + GCNDownwardRPTracker &RPT); + + const GCNSubtarget *ST; + const SIRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + SIMachineFunctionInfo *MFI; + LiveIntervals *LIS; + + unsigned LastRecordedOccupancy; + unsigned MaxVGPRs; + unsigned MaxSGPRs; + public: - static char ID; + SIFormMemoryClausesImpl(LiveIntervals *LS) : LIS(LS) {} + bool run(MachineFunction &MF); +}; +class SIFormMemoryClausesLegacy : public MachineFunctionPass { public: - SIFormMemoryClauses() : MachineFunctionPass(ID) { - initializeSIFormMemoryClausesPass(*PassRegistry::getPassRegistry()); + static char ID; + + SIFormMemoryClausesLegacy() : MachineFunctionPass(ID) { + initializeSIFormMemoryClausesLegacyPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; @@ -58,40 +81,22 @@ class SIFormMemoryClauses : public MachineFunctionPass { return MachineFunctionProperties().set( MachineFunctionProperties::Property::IsSSA); } - -private: - bool canBundle(const MachineInstr &MI, const RegUse &Defs, - const RegUse &Uses) const; - bool checkPressure(const MachineInstr &MI, GCNDownwardRPTracker &RPT); - void collectRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses) const; - bool processRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses, - GCNDownwardRPTracker &RPT); - - const GCNSubtarget *ST; - const SIRegisterInfo *TRI; - const MachineRegisterInfo *MRI; - SIMachineFunctionInfo *MFI; - - unsigned LastRecordedOccupancy; - unsigned MaxVGPRs; - unsigned MaxSGPRs; }; } // End anonymous namespace. -INITIALIZE_PASS_BEGIN(SIFormMemoryClauses, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(SIFormMemoryClausesLegacy, DEBUG_TYPE, "SI Form memory clauses", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) -INITIALIZE_PASS_END(SIFormMemoryClauses, DEBUG_TYPE, +INITIALIZE_PASS_END(SIFormMemoryClausesLegacy, DEBUG_TYPE, "SI Form memory clauses", false, false) +char SIFormMemoryClausesLegacy::ID = 0; -char SIFormMemoryClauses::ID = 0; - -char &llvm::SIFormMemoryClausesID = SIFormMemoryClauses::ID; +char &llvm::SIFormMemoryClausesID = SIFormMemoryClausesLegacy::ID; -FunctionPass *llvm::createSIFormMemoryClausesPass() { - return new SIFormMemoryClauses(); +FunctionPass *llvm::createSIFormMemoryClausesLegacyPass() { + return new SIFormMemoryClausesLegacy(); } static bool isVMEMClauseInst(const MachineInstr &MI) { @@ -147,8 +152,9 @@ static unsigned getMopState(const MachineOperand &MO) { // Returns false if there is a use of a def already in the map. // In this case we must break the clause. -bool SIFormMemoryClauses::canBundle(const MachineInstr &MI, const RegUse &Defs, - const RegUse &Uses) const { +bool SIFormMemoryClausesImpl::canBundle(const MachineInstr &MI, + const RegUse &Defs, + const RegUse &Uses) const { // Check interference with defs. for (const MachineOperand &MO : MI.operands()) { // TODO: Prologue/Epilogue Insertion pass does not process bundled @@ -184,8 +190,8 @@ bool SIFormMemoryClauses::canBundle(const MachineInstr &MI, const RegUse &Defs, // Since all defs in the clause are early clobber we can run out of registers. // Function returns false if pressure would hit the limit if instruction is // bundled into a memory clause. -bool SIFormMemoryClauses::checkPressure(const MachineInstr &MI, - GCNDownwardRPTracker &RPT) { +bool SIFormMemoryClausesImpl::checkPressure(const MachineInstr &MI, + GCNDownwardRPTracker &RPT) { // NB: skip advanceBeforeNext() call. Since all defs will be marked // early-clobber they will all stay alive at least to the end of the // clause. Therefor we should not decrease pressure even if load @@ -213,8 +219,8 @@ bool SIFormMemoryClauses::checkPressure(const MachineInstr &MI, } // Collect register defs and uses along with their lane masks and states. -void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI, - RegUse &Defs, RegUse &Uses) const { +void SIFormMemoryClausesImpl::collectRegUses(const MachineInstr &MI, + RegUse &Defs, RegUse &Uses) const { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; @@ -239,9 +245,9 @@ void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI, // Check register def/use conflicts, occupancy limits and collect def/use maps. // Return true if instruction can be bundled with previous. If it cannot // def/use maps are not updated. -bool SIFormMemoryClauses::processRegUses(const MachineInstr &MI, - RegUse &Defs, RegUse &Uses, - GCNDownwardRPTracker &RPT) { +bool SIFormMemoryClausesImpl::processRegUses(const MachineInstr &MI, + RegUse &Defs, RegUse &Uses, + GCNDownwardRPTracker &RPT) { if (!canBundle(MI, Defs, Uses)) return false; @@ -252,10 +258,7 @@ bool SIFormMemoryClauses::processRegUses(const MachineInstr &MI, return true; } -bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(MF.getFunction())) - return false; - +bool SIFormMemoryClausesImpl::run(MachineFunction &MF) { ST = &MF.getSubtarget(); if (!ST->isXNACKEnabled()) return false; @@ -264,7 +267,6 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) { TRI = ST->getRegisterInfo(); MRI = &MF.getRegInfo(); MFI = MF.getInfo(); - LiveIntervals *LIS = &getAnalysis().getLIS(); SlotIndexes *Ind = LIS->getSlotIndexes(); bool Changed = false; @@ -416,3 +418,19 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) { return Changed; } + +bool SIFormMemoryClausesLegacy::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + LiveIntervals *LIS = &getAnalysis().getLIS(); + return SIFormMemoryClausesImpl(LIS).run(MF); +} + +PreservedAnalyses +SIFormMemoryClausesPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + LiveIntervals &LIS = MFAM.getResult(MF); + SIFormMemoryClausesImpl(&LIS).run(MF); + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.h b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.h new file mode 100644 index 0000000000000..c50a46f9ac2fb --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.h @@ -0,0 +1,22 @@ +//===- SIFormMemoryClauses.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_SIFORMMEMORYCLAUSES_H +#define LLVM_LIB_TARGET_AMDGPU_SIFORMMEMORYCLAUSES_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { +class SIFormMemoryClausesPass : public PassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_SIFORMMEMORYCLAUSES_H diff --git a/llvm/test/CodeGen/AMDGPU/limit-soft-clause-reg-pressure.mir b/llvm/test/CodeGen/AMDGPU/limit-soft-clause-reg-pressure.mir index 46fe85a5f13f3..bd46754d10683 100644 --- a/llvm/test/CodeGen/AMDGPU/limit-soft-clause-reg-pressure.mir +++ b/llvm/test/CodeGen/AMDGPU/limit-soft-clause-reg-pressure.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack -run-pass=si-form-memory-clauses -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack -passes="si-form-memory-clauses" -o - %s | FileCheck %s # This previously would produce a bundle that could not be satisfied # due to using nearly the entire register budget and not considering diff --git a/llvm/test/CodeGen/AMDGPU/memory_clause.mir b/llvm/test/CodeGen/AMDGPU/memory_clause.mir index 4b0226a0f6586..e50c3146068f3 100644 --- a/llvm/test/CodeGen/AMDGPU/memory_clause.mir +++ b/llvm/test/CodeGen/AMDGPU/memory_clause.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx902 -verify-machineinstrs -run-pass=si-form-memory-clauses %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx902 -passes="si-form-memory-clauses" %s -o - | FileCheck -check-prefix=GCN %s # GCN-LABEL: {{^}}name: vector_clause{{$}} # GCN: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/reserved-reg-in-clause.mir b/llvm/test/CodeGen/AMDGPU/reserved-reg-in-clause.mir index 5346aea592348..fb1da2da0a8ff 100644 --- a/llvm/test/CodeGen/AMDGPU/reserved-reg-in-clause.mir +++ b/llvm/test/CodeGen/AMDGPU/reserved-reg-in-clause.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -run-pass si-form-memory-clauses %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -passes="si-form-memory-clauses" %s -o - | FileCheck -check-prefix=GCN %s # Make sure we do not produce early-clobber list with odd subregs. diff --git a/llvm/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir b/llvm/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir index 8a4450926471e..7608e066d1169 100644 --- a/llvm/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir +++ b/llvm/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx902 -o - %s -run-pass si-form-memory-clauses -verify-machineinstrs | FileCheck -check-prefix=XNACK %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx902 -o - %s -passes="si-form-memory-clauses" | FileCheck -check-prefix=XNACK %s # The SIFormMemoryClauses pass must not form a clause (indicated by BUNDLE) # from the two adjacent smem instructions, because the first one has its diff --git a/llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir index 728f3874a5be3..af9ff4bae8292 100644 --- a/llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir +++ b/llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack -run-pass=si-form-memory-clauses -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack -passes="si-form-memory-clauses" -o - %s | FileCheck %s # Make sure that debug instructions do not change the bundling, and # the dbg_values which break the clause are inserted after the new