Skip to content

Commit cf54cae

Browse files
authored
AMDGPU/NewPM: Port SIFixSGPRCopies to new pass manager (#102614)
This allows moving some tests relying on -stop-after=amdgpu-isel to move to checking -stop-after=finalize-isel instead, which will more reliably pass the verifier.
1 parent 5bc1f9e commit cf54cae

8 files changed

+104
-32
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ FunctionPass *createSIWholeQuadModePass();
4343
FunctionPass *createSIFixControlFlowLiveIntervalsPass();
4444
FunctionPass *createSIOptimizeExecMaskingPreRAPass();
4545
FunctionPass *createSIOptimizeVGPRLiveRangePass();
46-
FunctionPass *createSIFixSGPRCopiesPass();
46+
FunctionPass *createSIFixSGPRCopiesLegacyPass();
4747
FunctionPass *createLowerWWMCopiesPass();
4848
FunctionPass *createSIMemoryLegalizerPass();
4949
FunctionPass *createSIInsertWaitcntsPass();
@@ -164,8 +164,8 @@ extern char &SIPeepholeSDWAID;
164164
void initializeSIShrinkInstructionsPass(PassRegistry&);
165165
extern char &SIShrinkInstructionsID;
166166

167-
void initializeSIFixSGPRCopiesPass(PassRegistry &);
168-
extern char &SIFixSGPRCopiesID;
167+
void initializeSIFixSGPRCopiesLegacyPass(PassRegistry &);
168+
extern char &SIFixSGPRCopiesLegacyID;
169169

170170
void initializeSIFixVGPRCopiesPass(PassRegistry &);
171171
extern char &SIFixVGPRCopiesID;

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "AMDGPUCodeGenPassBuilder.h"
1010
#include "AMDGPUISelDAGToDAG.h"
1111
#include "AMDGPUTargetMachine.h"
12+
#include "SIFixSGPRCopies.h"
1213
#include "llvm/Analysis/UniformityAnalysis.h"
1314

1415
using namespace llvm;
@@ -38,5 +39,6 @@ void AMDGPUCodeGenPassBuilder::addAsmPrinter(AddMachinePass &addPass,
3839

3940
Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const {
4041
addPass(AMDGPUISelDAGToDAGPass(TM));
42+
addPass(SIFixSGPRCopiesPass());
4143
return Error::success();
4244
}

llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,4 +76,5 @@ FUNCTION_PASS_WITH_PARAMS(
7676
#define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
7777
#endif
7878
MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
79+
MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
7980
#undef MACHINE_FUNCTION_PASS

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "R600.h"
3333
#include "R600MachineFunctionInfo.h"
3434
#include "R600TargetMachine.h"
35+
#include "SIFixSGPRCopies.h"
3536
#include "SIMachineFunctionInfo.h"
3637
#include "SIMachineScheduler.h"
3738
#include "TargetInfo/AMDGPUTargetInfo.h"
@@ -399,7 +400,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
399400
initializeSILowerWWMCopiesPass(*PR);
400401
initializeAMDGPUMarkLastScratchLoadPass(*PR);
401402
initializeSILowerSGPRSpillsPass(*PR);
402-
initializeSIFixSGPRCopiesPass(*PR);
403+
initializeSIFixSGPRCopiesLegacyPass(*PR);
403404
initializeSIFixVGPRCopiesPass(*PR);
404405
initializeSIFoldOperandsPass(*PR);
405406
initializeSIPeepholeSDWAPass(*PR);
@@ -1268,7 +1269,7 @@ bool GCNPassConfig::addILPOpts() {
12681269

12691270
bool GCNPassConfig::addInstSelector() {
12701271
AMDGPUPassConfig::addInstSelector();
1271-
addPass(&SIFixSGPRCopiesID);
1272+
addPass(&SIFixSGPRCopiesLegacyID);
12721273
addPass(createSILowerI1CopiesPass());
12731274
return false;
12741275
}

llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp

Lines changed: 41 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
/// ultimately led to the creation of an illegal COPY.
6565
//===----------------------------------------------------------------------===//
6666

67+
#include "SIFixSGPRCopies.h"
6768
#include "AMDGPU.h"
6869
#include "GCNSubtarget.h"
6970
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -118,7 +119,7 @@ class V2SCopyInfo {
118119
#endif
119120
};
120121

121-
class SIFixSGPRCopies : public MachineFunctionPass {
122+
class SIFixSGPRCopies {
122123
MachineDominatorTree *MDT;
123124
SmallVector<MachineInstr*, 4> SCCCopies;
124125
SmallVector<MachineInstr*, 4> RegSequences;
@@ -129,15 +130,13 @@ class SIFixSGPRCopies : public MachineFunctionPass {
129130
DenseMap<MachineInstr *, SetVector<unsigned>> SiblingPenalty;
130131

131132
public:
132-
static char ID;
133-
134133
MachineRegisterInfo *MRI;
135134
const SIRegisterInfo *TRI;
136135
const SIInstrInfo *TII;
137136

138-
SIFixSGPRCopies() : MachineFunctionPass(ID) {}
137+
SIFixSGPRCopies(MachineDominatorTree *MDT) : MDT(MDT) {}
139138

140-
bool runOnMachineFunction(MachineFunction &MF) override;
139+
bool run(MachineFunction &MF);
141140
void fixSCCCopies(MachineFunction &MF);
142141
void prepareRegSequenceAndPHIs(MachineFunction &MF);
143142
unsigned getNextVGPRToSGPRCopyId() { return ++NextVGPRToSGPRCopyID; }
@@ -158,6 +157,20 @@ class SIFixSGPRCopies : public MachineFunctionPass {
158157
bool tryMoveVGPRConstToSGPR(MachineOperand &MO, Register NewDst,
159158
MachineBasicBlock *BlockToInsertTo,
160159
MachineBasicBlock::iterator PointToInsertTo);
160+
};
161+
162+
class SIFixSGPRCopiesLegacy : public MachineFunctionPass {
163+
public:
164+
static char ID;
165+
166+
SIFixSGPRCopiesLegacy() : MachineFunctionPass(ID) {}
167+
168+
bool runOnMachineFunction(MachineFunction &MF) override {
169+
MachineDominatorTree *MDT =
170+
&getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
171+
SIFixSGPRCopies Impl(MDT);
172+
return Impl.run(MF);
173+
}
161174

162175
StringRef getPassName() const override { return "SI Fix SGPR copies"; }
163176

@@ -171,18 +184,18 @@ class SIFixSGPRCopies : public MachineFunctionPass {
171184

172185
} // end anonymous namespace
173186

174-
INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE,
175-
"SI Fix SGPR copies", false, false)
187+
INITIALIZE_PASS_BEGIN(SIFixSGPRCopiesLegacy, DEBUG_TYPE, "SI Fix SGPR copies",
188+
false, false)
176189
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
177-
INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE,
178-
"SI Fix SGPR copies", false, false)
190+
INITIALIZE_PASS_END(SIFixSGPRCopiesLegacy, DEBUG_TYPE, "SI Fix SGPR copies",
191+
false, false)
179192

180-
char SIFixSGPRCopies::ID = 0;
193+
char SIFixSGPRCopiesLegacy::ID = 0;
181194

182-
char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
195+
char &llvm::SIFixSGPRCopiesLegacyID = SIFixSGPRCopiesLegacy::ID;
183196

184-
FunctionPass *llvm::createSIFixSGPRCopiesPass() {
185-
return new SIFixSGPRCopies();
197+
FunctionPass *llvm::createSIFixSGPRCopiesLegacyPass() {
198+
return new SIFixSGPRCopiesLegacy();
186199
}
187200

188201
static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
@@ -602,7 +615,7 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
602615
return Changed;
603616
}
604617

605-
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
618+
bool SIFixSGPRCopies::run(MachineFunction &MF) {
606619
// Only need to run this in SelectionDAG path.
607620
if (MF.getProperties().hasProperty(
608621
MachineFunctionProperties::Property::Selected))
@@ -612,7 +625,6 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
612625
MRI = &MF.getRegInfo();
613626
TRI = ST.getRegisterInfo();
614627
TII = ST.getInstrInfo();
615-
MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
616628

617629
for (MachineBasicBlock &MBB : MF) {
618630
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;
@@ -1133,3 +1145,17 @@ void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
11331145
}
11341146
}
11351147
}
1148+
1149+
PreservedAnalyses
1150+
SIFixSGPRCopiesPass::run(MachineFunction &MF,
1151+
MachineFunctionAnalysisManager &MFAM) {
1152+
MachineDominatorTree &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
1153+
SIFixSGPRCopies Impl(&MDT);
1154+
bool Changed = Impl.run(MF);
1155+
if (!Changed)
1156+
return PreservedAnalyses::all();
1157+
1158+
// TODO: We could detect CFG changed.
1159+
auto PA = getMachineFunctionPassPreservedAnalyses();
1160+
return PA;
1161+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
//===- SIFixSGPRCopies.h ----------------------------------------*- C++- *-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIB_TARGET_AMDGPU_SIFIXSGPRCOPIES_H
10+
#define LLVM_LIB_TARGET_AMDGPU_SIFIXSGPRCOPIES_H
11+
12+
#include "llvm/CodeGen/MachinePassManager.h"
13+
14+
namespace llvm {
15+
16+
class SIFixSGPRCopiesPass : public PassInfoMixin<SIFixSGPRCopiesPass> {
17+
public:
18+
SIFixSGPRCopiesPass() = default;
19+
PreservedAnalyses run(MachineFunction &MF,
20+
MachineFunctionAnalysisManager &MFAM);
21+
};
22+
23+
} // namespace llvm
24+
25+
#endif // LLVM_LIB_TARGET_AMDGPU_SIFIXSGPRCOPIES_H

llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
3-
; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
4-
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
5-
; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
4+
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=finalize-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
5+
; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=finalize-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
66

77
define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_intrinsic(ptr %ptr, double %data) {
88
; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_intrinsic
@@ -13,8 +13,12 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_intrinsic(ptr %ptr, double %d
1313
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1414
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1515
; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
16-
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
17-
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
16+
; GFX90A_GFX940-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
17+
; GFX90A_GFX940-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
18+
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
19+
; GFX90A_GFX940-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
20+
; GFX90A_GFX940-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
21+
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
1822
; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
1923
; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
2024
; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s64) on %ir.ptr)
@@ -32,8 +36,12 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_intrinsic(ptr %ptr, double %da
3236
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3337
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3438
; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
35-
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
36-
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
39+
; GFX90A_GFX940-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
40+
; GFX90A_GFX940-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
41+
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
42+
; GFX90A_GFX940-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
43+
; GFX90A_GFX940-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
44+
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
3745
; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
3846
; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
3947
; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s64) on %ir.ptr)
@@ -55,8 +63,12 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %d
5563
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
5664
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
5765
; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
58-
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
59-
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
66+
; GFX90A_GFX940-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
67+
; GFX90A_GFX940-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
68+
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
69+
; GFX90A_GFX940-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
70+
; GFX90A_GFX940-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
71+
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
6072
; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
6173
; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
6274
; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
@@ -74,8 +86,12 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %da
7486
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
7587
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
7688
; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
77-
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
78-
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
89+
; GFX90A_GFX940-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
90+
; GFX90A_GFX940-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
91+
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
92+
; GFX90A_GFX940-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
93+
; GFX90A_GFX940-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
94+
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
7995
; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
8096
; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
8197
; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)

llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# RUN: llc -mtriple=amdgcn -run-pass si-fix-sgpr-copies %s -o - | FileCheck %s -check-prefixes=GCN
2+
# RUN: llc -mtriple=amdgcn -passes=si-fix-sgpr-copies %s -o - | FileCheck %s -check-prefixes=GCN
23

34
---
45

0 commit comments

Comments
 (0)