diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 35549c47ed01d..00a807192eb0c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -213,7 +213,7 @@ extern char &SILowerControlFlowLegacyID; void initializeSIPreEmitPeepholePass(PassRegistry &); extern char &SIPreEmitPeepholeID; -void initializeSILateBranchLoweringPass(PassRegistry &); +void initializeSILateBranchLoweringLegacyPass(PassRegistry &); extern char &SILateBranchLoweringPassID; void initializeSIOptimizeExecMaskingLegacyPass(PassRegistry &); @@ -391,6 +391,14 @@ class SIInsertHardClausesPass : public PassInfoMixin { MachineFunctionAnalysisManager &MFAM); }; +class SILateBranchLoweringPass + : public PassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); + static bool isRequired() { return true; } +}; + FunctionPass *createAMDGPUAnnotateUniformValuesLegacy(); ModulePass *createAMDGPUPrintfRuntimeBinding(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 3f6817b17943c..6a45392b5f099 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -113,6 +113,7 @@ MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass()) MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass()) MACHINE_FUNCTION_PASS("si-insert-hard-clauses", SIInsertHardClausesPass()) MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass()) +MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass()) MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass()) MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass()) MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass()) @@ -134,7 +135,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizations DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass()) -DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass()) DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass()) // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it // already exists. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index bc686de08a4ed..4937b434bc955 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -541,7 +541,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIWholeQuadModeLegacyPass(*PR); initializeSILowerControlFlowLegacyPass(*PR); initializeSIPreEmitPeepholePass(*PR); - initializeSILateBranchLoweringPass(*PR); + initializeSILateBranchLoweringLegacyPass(*PR); initializeSIMemoryLegalizerLegacyPass(*PR); initializeSIOptimizeExecMaskingLegacyPass(*PR); initializeSIPreAllocateWWMRegsLegacyPass(*PR); @@ -2166,7 +2166,8 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const { // TODO: addPass(SIInsertHardClausesPass()); } - // addPass(SILateBranchLoweringPass()); + addPass(SILateBranchLoweringPass()); + if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less)) { // TODO: addPass(AMDGPUSetWavePriorityPass()); } diff --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp index 1a21d0c1d2777..3f7b0eab6bb8c 100644 --- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp @@ -16,6 +16,7 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachinePassManager.h" using namespace llvm; @@ -23,7 +24,7 @@ using namespace llvm; namespace { -class SILateBranchLowering : public MachineFunctionPass { +class SILateBranchLowering { private: const SIRegisterInfo *TRI = nullptr; const SIInstrInfo *TII = nullptr; @@ -34,14 +35,23 @@ class SILateBranchLowering : public MachineFunctionPass { void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock); public: - static char ID; + SILateBranchLowering(MachineDominatorTree *MDT) : MDT(MDT) {} + + bool run(MachineFunction &MF); unsigned MovOpc; Register ExecReg; +}; - SILateBranchLowering() : MachineFunctionPass(ID) {} +class SILateBranchLoweringLegacy : public MachineFunctionPass { +public: + static char ID; + SILateBranchLoweringLegacy() : MachineFunctionPass(ID) {} - bool runOnMachineFunction(MachineFunction &MF) override; + bool runOnMachineFunction(MachineFunction &MF) override { + auto *MDT = &getAnalysis().getDomTree(); + return SILateBranchLowering(MDT).run(MF); + } StringRef getPassName() const override { return "SI Final Branch Preparation"; @@ -56,15 +66,15 @@ class SILateBranchLowering : public MachineFunctionPass { } // end anonymous namespace -char SILateBranchLowering::ID = 0; +char SILateBranchLoweringLegacy::ID = 0; -INITIALIZE_PASS_BEGIN(SILateBranchLowering, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(SILateBranchLoweringLegacy, DEBUG_TYPE, "SI insert s_cbranch_execz instructions", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) -INITIALIZE_PASS_END(SILateBranchLowering, DEBUG_TYPE, +INITIALIZE_PASS_END(SILateBranchLoweringLegacy, DEBUG_TYPE, "SI insert s_cbranch_execz instructions", false, false) -char &llvm::SILateBranchLoweringPassID = SILateBranchLowering::ID; +char &llvm::SILateBranchLoweringPassID = SILateBranchLoweringLegacy::ID; static void generateEndPgm(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, @@ -192,11 +202,21 @@ void SILateBranchLowering::earlyTerm(MachineInstr &MI, MDT->insertEdge(&MBB, EarlyExitBlock); } -bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) { +PreservedAnalyses +llvm::SILateBranchLoweringPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + auto *MDT = &MFAM.getResult(MF); + if (!SILateBranchLowering(MDT).run(MF)) + return PreservedAnalyses::all(); + + return getMachineFunctionPassPreservedAnalyses() + .preserve(); +} + +bool SILateBranchLowering::run(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo(); - MDT = &getAnalysis().getDomTree(); MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; diff --git a/llvm/test/CodeGen/AMDGPU/early-term.mir b/llvm/test/CodeGen/AMDGPU/early-term.mir index 77bc9729ee845..2c43a15a109a7 100644 --- a/llvm/test/CodeGen/AMDGPU/early-term.mir +++ b/llvm/test/CodeGen/AMDGPU/early-term.mir @@ -2,6 +2,8 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX10 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX11 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -passes=si-late-branch-lowering %s -o - | FileCheck -check-prefixes=GCN,GFX11 %s + --- | define amdgpu_ps void @early_term_scc0_end_block() { ret void diff --git a/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir b/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir index 6a286eafa6d58..86685c265dff3 100644 --- a/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir +++ b/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir @@ -1,4 +1,5 @@ # RUN: llc -o - %s -mtriple=amdgcn -mcpu=fiji -run-pass=si-late-branch-lowering -verify-machineinstrs | FileCheck -check-prefix=GCN %s +# RUN: llc -o - %s -mtriple=amdgcn -mcpu=fiji -passes=si-late-branch-lowering | FileCheck -check-prefix=GCN %s # GCN-LABEL: readlane_exec0 # GCN: bb.0