diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 89356df39724a..5d9a830f041a7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -216,8 +216,8 @@ extern char &SIPreEmitPeepholeID; void initializeSILateBranchLoweringPass(PassRegistry &); extern char &SILateBranchLoweringPassID; -void initializeSIOptimizeExecMaskingPass(PassRegistry &); -extern char &SIOptimizeExecMaskingID; +void initializeSIOptimizeExecMaskingLegacyPass(PassRegistry &); +extern char &SIOptimizeExecMaskingLegacyID; void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &); extern char &SIPreAllocateWWMRegsLegacyID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index fbcf83e2fdd60..09a39d23d801b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -105,6 +105,7 @@ MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass()) MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass()) MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass()) MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass()) +MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass()) MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass()) MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass()) MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 98268b848f5ce..53ec80b8f7204 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -42,6 +42,7 @@ #include "SILowerSGPRSpills.h" #include "SIMachineFunctionInfo.h" #include "SIMachineScheduler.h" +#include "SIOptimizeExecMasking.h" #include "SIOptimizeVGPRLiveRange.h" #include "SIPeepholeSDWA.h" #include "SIPreAllocateWWMRegs.h" @@ -528,7 +529,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIPreEmitPeepholePass(*PR); initializeSILateBranchLoweringPass(*PR); initializeSIMemoryLegalizerPass(*PR); - initializeSIOptimizeExecMaskingPass(*PR); + initializeSIOptimizeExecMaskingLegacyPass(*PR); initializeSIPreAllocateWWMRegsLegacyPass(*PR); initializeSIFormMemoryClausesPass(*PR); initializeSIPostRABundlerPass(*PR); @@ -1634,7 +1635,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() { void GCNPassConfig::addPostRegAlloc() { addPass(&SIFixVGPRCopiesID); if (getOptLevel() > CodeGenOptLevel::None) - addPass(&SIOptimizeExecMaskingID); + addPass(&SIOptimizeExecMaskingLegacyID); TargetPassConfig::addPostRegAlloc(); } @@ -2105,6 +2106,13 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization( addPass(SIShrinkInstructionsPass()); } +void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const { + // addPass(SIFixVGPRCopiesID); + if (TM.getOptLevel() > CodeGenOptLevel::None) + addPass(SIOptimizeExecMaskingPass()); + Base::addPostRegAlloc(addPass); +} + bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt &Opt, CodeGenOptLevel Level) const { if (Opt.getNumOccurrences()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 5ba58a92621ed..24b4da3a68f67 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -176,6 +176,7 @@ class AMDGPUCodeGenPassBuilder void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const; Error addInstSelector(AddMachinePass &) const; void addMachineSSAOptimization(AddMachinePass &) const; + void addPostRegAlloc(AddMachinePass &) const; /// Check if a pass is enabled given \p Opt option. The option always /// overrides defaults if explicitly used. Otherwise its default will be used diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 93b70fa4ba974..3fb8d5b560496 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "SIOptimizeExecMasking.h" #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" @@ -23,7 +24,7 @@ using namespace llvm; namespace { -class SIOptimizeExecMasking : public MachineFunctionPass { +class SIOptimizeExecMasking { MachineFunction *MF = nullptr; const GCNSubtarget *ST = nullptr; const SIRegisterInfo *TRI = nullptr; @@ -61,11 +62,16 @@ class SIOptimizeExecMasking : public MachineFunctionPass { void tryRecordOrSaveexecXorSequence(MachineInstr &MI); bool optimizeOrSaveexecXorSequences(); +public: + bool run(MachineFunction &MF); +}; + +class SIOptimizeExecMaskingLegacy : public MachineFunctionPass { public: static char ID; - SIOptimizeExecMasking() : MachineFunctionPass(ID) { - initializeSIOptimizeExecMaskingPass(*PassRegistry::getPassRegistry()); + SIOptimizeExecMaskingLegacy() : MachineFunctionPass(ID) { + initializeSIOptimizeExecMaskingLegacyPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; @@ -82,15 +88,28 @@ class SIOptimizeExecMasking : public MachineFunctionPass { } // End anonymous namespace. -INITIALIZE_PASS_BEGIN(SIOptimizeExecMasking, DEBUG_TYPE, +PreservedAnalyses +SIOptimizeExecMaskingPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &) { + SIOptimizeExecMasking Impl; + + if (!Impl.run(MF)) + return PreservedAnalyses::all(); + + auto PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserveSet(); + return PA; +} + +INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingLegacy, DEBUG_TYPE, "SI optimize exec mask operations", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) -INITIALIZE_PASS_END(SIOptimizeExecMasking, DEBUG_TYPE, +INITIALIZE_PASS_END(SIOptimizeExecMaskingLegacy, DEBUG_TYPE, "SI optimize exec mask operations", false, false) -char SIOptimizeExecMasking::ID = 0; +char SIOptimizeExecMaskingLegacy::ID = 0; -char &llvm::SIOptimizeExecMaskingID = SIOptimizeExecMasking::ID; +char &llvm::SIOptimizeExecMaskingLegacyID = SIOptimizeExecMaskingLegacy::ID; /// If \p MI is a copy from exec, return the register copied to. Register SIOptimizeExecMasking::isCopyFromExec(const MachineInstr &MI) const { @@ -786,10 +805,14 @@ bool SIOptimizeExecMasking::optimizeOrSaveexecXorSequences() { return Changed; } -bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { +bool SIOptimizeExecMaskingLegacy::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; + return SIOptimizeExecMasking().run(MF); +} + +bool SIOptimizeExecMasking::run(MachineFunction &MF) { this->MF = &MF; ST = &MF.getSubtarget(); TRI = ST->getRegisterInfo(); diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.h b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.h new file mode 100644 index 0000000000000..f170a4733279b --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.h @@ -0,0 +1,23 @@ +//===- SIOptimizeExecMasking.h ----------------------------------*- C++- *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKING_H +#define LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKING_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { +class SIOptimizeExecMaskingPass + : public PassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKING_H diff --git a/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir b/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir index 8fb3e9ea3609b..acc9bf78a3401 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir +++ b/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir @@ -2,6 +2,8 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck %s --- name: lower_term_opcodes tracksRegLiveness: false diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir index ada5cfd5668ee..f2534a93da302 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-optimize-exec-masking -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -passes=si-optimize-exec-masking -o - %s | FileCheck %s # Make sure we can still optimize writes to exec when there are # additional terminators after the exec write. This can happen with