Skip to content

Commit f5d2996

Browse files
authored
AMDGPU: Fix trying to query end iterator for DebugLoc (#129886)
1 parent 760eeac commit f5d2996

File tree

3 files changed

+103
-8
lines changed

3 files changed

+103
-8
lines changed

llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,8 @@ class SIFixSGPRCopies {
155155
// have any other uses.
156156
bool tryMoveVGPRConstToSGPR(MachineOperand &MO, Register NewDst,
157157
MachineBasicBlock *BlockToInsertTo,
158-
MachineBasicBlock::iterator PointToInsertTo);
158+
MachineBasicBlock::iterator PointToInsertTo,
159+
const DebugLoc &DL);
159160
};
160161

161162
class SIFixSGPRCopiesLegacy : public MachineFunctionPass {
@@ -682,11 +683,11 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) {
682683
MachineBasicBlock::iterator PointToInsertCopy =
683684
MI.isPHI() ? BlockToInsertCopy->getFirstInstrTerminator() : I;
684685

686+
const DebugLoc &DL = MI.getDebugLoc();
685687
if (!tryMoveVGPRConstToSGPR(MO, NewDst, BlockToInsertCopy,
686-
PointToInsertCopy)) {
688+
PointToInsertCopy, DL)) {
687689
MachineInstr *NewCopy =
688-
BuildMI(*BlockToInsertCopy, PointToInsertCopy,
689-
PointToInsertCopy->getDebugLoc(),
690+
BuildMI(*BlockToInsertCopy, PointToInsertCopy, DL,
690691
TII->get(AMDGPU::COPY), NewDst)
691692
.addReg(MO.getReg());
692693
MO.setReg(NewDst);
@@ -855,7 +856,7 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
855856
bool SIFixSGPRCopies::tryMoveVGPRConstToSGPR(
856857
MachineOperand &MaybeVGPRConstMO, Register DstReg,
857858
MachineBasicBlock *BlockToInsertTo,
858-
MachineBasicBlock::iterator PointToInsertTo) {
859+
MachineBasicBlock::iterator PointToInsertTo, const DebugLoc &DL) {
859860

860861
MachineInstr *DefMI = MRI->getVRegDef(MaybeVGPRConstMO.getReg());
861862
if (!DefMI || !DefMI->isMoveImmediate())
@@ -869,8 +870,7 @@ bool SIFixSGPRCopies::tryMoveVGPRConstToSGPR(
869870
MRI->getRegClass(MaybeVGPRConstMO.getReg());
870871
unsigned MoveSize = TRI->getRegSizeInBits(*SrcRC);
871872
unsigned MoveOp = MoveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
872-
BuildMI(*BlockToInsertTo, PointToInsertTo, PointToInsertTo->getDebugLoc(),
873-
TII->get(MoveOp), DstReg)
873+
BuildMI(*BlockToInsertTo, PointToInsertTo, DL, TII->get(MoveOp), DstReg)
874874
.add(*SrcConst);
875875
if (MRI->hasOneUse(MaybeVGPRConstMO.getReg()))
876876
DefMI->eraseFromParent();
@@ -896,7 +896,7 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI,
896896
.add(MI.getOperand(1));
897897
MI.getOperand(1).setReg(TmpReg);
898898
} else if (tryMoveVGPRConstToSGPR(MI.getOperand(1), DstReg, MI.getParent(),
899-
MI)) {
899+
MI, MI.getDebugLoc())) {
900900
I = std::next(I);
901901
MI.eraseFromParent();
902902
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
3+
4+
define i32 @rocrand_regression(ptr addrspace(1) %arg, i32 %arg0, i1 %cmp7) {
5+
; CHECK-LABEL: rocrand_regression:
6+
; CHECK: ; %bb.0: ; %entry
7+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8+
; CHECK-NEXT: v_and_b32_e32 v0, 1, v3
9+
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
10+
; CHECK-NEXT: s_xor_b64 s[4:5], vcc, -1
11+
; CHECK-NEXT: s_mov_b32 s8, 0
12+
; CHECK-NEXT: .LBB0_1: ; %do.body
13+
; CHECK-NEXT: ; =>This Loop Header: Depth=1
14+
; CHECK-NEXT: ; Child Loop BB0_2 Depth 2
15+
; CHECK-NEXT: s_mov_b64 s[6:7], 0
16+
; CHECK-NEXT: .LBB0_2: ; %while.cond
17+
; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1
18+
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
19+
; CHECK-NEXT: s_and_b64 s[10:11], exec, s[4:5]
20+
; CHECK-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7]
21+
; CHECK-NEXT: s_andn2_b64 exec, exec, s[6:7]
22+
; CHECK-NEXT: s_cbranch_execnz .LBB0_2
23+
; CHECK-NEXT: ; %bb.3: ; %do.cond
24+
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
25+
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
26+
; CHECK-NEXT: s_or_b32 s8, s8, 1
27+
; CHECK-NEXT: s_cbranch_execnz .LBB0_1
28+
; CHECK-NEXT: ; %bb.4: ; %DummyReturnBlock
29+
; CHECK-NEXT: s_setpc_b64 s[30:31]
30+
entry:
31+
br label %do.body
32+
33+
do.body: ; preds = %do.cond, %entry
34+
%phi.0 = phi i32 [ %arg0, %do.cond ], [ 0, %entry ]
35+
%phi.1 = phi i32 [ %add6, %do.cond ], [ 0, %entry ]
36+
%add6 = or i32 %phi.1, 1
37+
store i32 %phi.1, ptr addrspace(1) %arg, align 4
38+
br label %while.cond
39+
40+
while.cond: ; preds = %while.cond, %do.body
41+
%phi.2 = phi i32 [ %phi.0, %do.body ], [ 0, %while.cond ]
42+
br i1 %cmp7, label %while.cond, label %do.cond
43+
44+
do.cond: ; preds = %while.cond
45+
br i1 true, label %do.body, label %do.end
46+
47+
do.end: ; preds = %do.cond
48+
ret i32 %phi.2
49+
}

llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,49 @@ body: |
8989
S_ENDPGM 0, implicit %1
9090
9191
...
92+
93+
# GCN-LABEL: name: find_debug_loc_end_iterator_regression
94+
# GCN: %6:vreg_1 = COPY %4
95+
# GCN: %14:sgpr_32 = S_MOV_B32 0
96+
97+
# GCN: %7:vgpr_32 = PHI %5, %bb.0, %1, %bb.3
98+
# GCN: %8:sreg_32 = PHI %14, %bb.0, %9, %bb.3
99+
100+
# GCN: %11:sreg_64 = PHI %10, %bb.1, %12, %bb.2
101+
# GCN: %13:sreg_64 = COPY %6
102+
---
103+
name: find_debug_loc_end_iterator_regression
104+
tracksRegLiveness: true
105+
body: |
106+
bb.0:
107+
liveins: $vgpr2, $vgpr3
108+
109+
%0:vgpr_32 = COPY $vgpr3
110+
%1:vgpr_32 = COPY $vgpr2
111+
%2:sreg_64 = V_CMP_EQ_U32_e64 killed %0, 1, implicit $exec
112+
%3:sreg_64 = S_MOV_B64 -1
113+
%4:sreg_64 = S_XOR_B64 killed %2, killed %3, implicit-def dead $scc
114+
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
115+
%6:vreg_1 = COPY %4
116+
117+
bb.1:
118+
%7:vgpr_32 = PHI %5, %bb.0, %1, %bb.3
119+
%8:sreg_32 = PHI %5, %bb.0, %9, %bb.3
120+
%10:sreg_64 = S_MOV_B64 0
121+
122+
bb.2:
123+
%11:sreg_64 = PHI %10, %bb.1, %12, %bb.2
124+
%13:sreg_64 = COPY %6
125+
%12:sreg_64 = SI_IF_BREAK %13, %11, implicit-def dead $scc
126+
SI_LOOP %12, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
127+
S_BRANCH %bb.3
128+
129+
bb.3:
130+
%9:sreg_32 = S_OR_B32 %8, 1, implicit-def dead $scc
131+
S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
132+
S_BRANCH %bb.4
133+
134+
bb.4:
135+
SI_RETURN
136+
137+
...

0 commit comments

Comments
 (0)