-
Notifications
You must be signed in to change notification settings - Fork 13.6k
AMDGPU: Fix trying to query end iterator for DebugLoc #129886
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Fix trying to query end iterator for DebugLoc #129886
Conversation
When inserting instructions by iterator, you always need the separate DebugLoc argument. This fixes a reported regression after 39bf765.
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesWhen inserting instructions by iterator, you always need the separate Full diff: https://github.com/llvm/llvm-project/pull/129886.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 52d00485385c2..4342e7a369c13 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -155,7 +155,8 @@ class SIFixSGPRCopies {
// have any other uses.
bool tryMoveVGPRConstToSGPR(MachineOperand &MO, Register NewDst,
MachineBasicBlock *BlockToInsertTo,
- MachineBasicBlock::iterator PointToInsertTo);
+ MachineBasicBlock::iterator PointToInsertTo,
+ const DebugLoc &DL);
};
class SIFixSGPRCopiesLegacy : public MachineFunctionPass {
@@ -682,11 +683,11 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) {
MachineBasicBlock::iterator PointToInsertCopy =
MI.isPHI() ? BlockToInsertCopy->getFirstInstrTerminator() : I;
+ const DebugLoc &DL = MI.getDebugLoc();
if (!tryMoveVGPRConstToSGPR(MO, NewDst, BlockToInsertCopy,
- PointToInsertCopy)) {
+ PointToInsertCopy, DL)) {
MachineInstr *NewCopy =
- BuildMI(*BlockToInsertCopy, PointToInsertCopy,
- PointToInsertCopy->getDebugLoc(),
+ BuildMI(*BlockToInsertCopy, PointToInsertCopy, DL,
TII->get(AMDGPU::COPY), NewDst)
.addReg(MO.getReg());
MO.setReg(NewDst);
@@ -855,7 +856,7 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
bool SIFixSGPRCopies::tryMoveVGPRConstToSGPR(
MachineOperand &MaybeVGPRConstMO, Register DstReg,
MachineBasicBlock *BlockToInsertTo,
- MachineBasicBlock::iterator PointToInsertTo) {
+ MachineBasicBlock::iterator PointToInsertTo, const DebugLoc &DL) {
MachineInstr *DefMI = MRI->getVRegDef(MaybeVGPRConstMO.getReg());
if (!DefMI || !DefMI->isMoveImmediate())
@@ -869,8 +870,7 @@ bool SIFixSGPRCopies::tryMoveVGPRConstToSGPR(
MRI->getRegClass(MaybeVGPRConstMO.getReg());
unsigned MoveSize = TRI->getRegSizeInBits(*SrcRC);
unsigned MoveOp = MoveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
- BuildMI(*BlockToInsertTo, PointToInsertTo, PointToInsertTo->getDebugLoc(),
- TII->get(MoveOp), DstReg)
+ BuildMI(*BlockToInsertTo, PointToInsertTo, DL, TII->get(MoveOp), DstReg)
.add(*SrcConst);
if (MRI->hasOneUse(MaybeVGPRConstMO.getReg()))
DefMI->eraseFromParent();
@@ -896,7 +896,7 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI,
.add(MI.getOperand(1));
MI.getOperand(1).setReg(TmpReg);
} else if (tryMoveVGPRConstToSGPR(MI.getOperand(1), DstReg, MI.getParent(),
- MI)) {
+ MI, MI.getDebugLoc())) {
I = std::next(I);
MI.eraseFromParent();
}
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-phi-block-end-iterator-debugloc.ll b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-phi-block-end-iterator-debugloc.ll
new file mode 100644
index 0000000000000..13184cf17a2e5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-phi-block-end-iterator-debugloc.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+
+define i32 @rocrand_regression(ptr addrspace(1) %arg, i32 %arg0, i1 %cmp7) {
+; CHECK-LABEL: rocrand_regression:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_and_b32_e32 v0, 1, v3
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
+; CHECK-NEXT: s_xor_b64 s[4:5], vcc, -1
+; CHECK-NEXT: s_mov_b32 s8, 0
+; CHECK-NEXT: .LBB0_1: ; %do.body
+; CHECK-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NEXT: ; Child Loop BB0_2 Depth 2
+; CHECK-NEXT: s_mov_b64 s[6:7], 0
+; CHECK-NEXT: .LBB0_2: ; %while.cond
+; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1
+; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NEXT: s_and_b64 s[10:11], exec, s[4:5]
+; CHECK-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7]
+; CHECK-NEXT: s_andn2_b64 exec, exec, s[6:7]
+; CHECK-NEXT: s_cbranch_execnz .LBB0_2
+; CHECK-NEXT: ; %bb.3: ; %do.cond
+; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
+; CHECK-NEXT: s_or_b32 s8, s8, 1
+; CHECK-NEXT: s_cbranch_execnz .LBB0_1
+; CHECK-NEXT: ; %bb.4: ; %DummyReturnBlock
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+entry:
+ br label %do.body
+
+do.body: ; preds = %do.cond, %entry
+ %phi.0 = phi i32 [ %arg0, %do.cond ], [ 0, %entry ]
+ %phi.1 = phi i32 [ %add6, %do.cond ], [ 0, %entry ]
+ %add6 = or i32 %phi.1, 1
+ store i32 %phi.1, ptr addrspace(1) %arg, align 4
+ br label %while.cond
+
+while.cond: ; preds = %while.cond, %do.body
+ %phi.2 = phi i32 [ %phi.0, %do.body ], [ 0, %while.cond ]
+ br i1 %cmp7, label %while.cond, label %do.cond
+
+do.cond: ; preds = %while.cond
+ br i1 true, label %do.body, label %do.end
+
+do.end: ; preds = %do.cond
+ ret i32 %phi.2
+}
diff --git a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
index f1b88c7616298..767942b7dca7f 100644
--- a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
@@ -89,3 +89,49 @@ body: |
S_ENDPGM 0, implicit %1
...
+
+# GCN-LABEL: name: find_debug_loc_end_iterator_regression
+# GCN: %6:vreg_1 = COPY %4
+# GCN: %14:sgpr_32 = S_MOV_B32 0
+
+# GCN: %7:vgpr_32 = PHI %5, %bb.0, %1, %bb.3
+# GCN: %8:sreg_32 = PHI %14, %bb.0, %9, %bb.3
+
+# GCN: %11:sreg_64 = PHI %10, %bb.1, %12, %bb.2
+# GCN: %13:sreg_64 = COPY %6
+---
+name: find_debug_loc_end_iterator_regression
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr2, $vgpr3
+
+ %0:vgpr_32 = COPY $vgpr3
+ %1:vgpr_32 = COPY $vgpr2
+ %2:sreg_64 = V_CMP_EQ_U32_e64 killed %0, 1, implicit $exec
+ %3:sreg_64 = S_MOV_B64 -1
+ %4:sreg_64 = S_XOR_B64 killed %2, killed %3, implicit-def dead $scc
+ %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %6:vreg_1 = COPY %4
+
+ bb.1:
+ %7:vgpr_32 = PHI %5, %bb.0, %1, %bb.3
+ %8:sreg_32 = PHI %5, %bb.0, %9, %bb.3
+ %10:sreg_64 = S_MOV_B64 0
+
+ bb.2:
+ %11:sreg_64 = PHI %10, %bb.1, %12, %bb.2
+ %13:sreg_64 = COPY %6
+ %12:sreg_64 = SI_IF_BREAK %13, %11, implicit-def dead $scc
+ SI_LOOP %12, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.3:
+ %9:sreg_32 = S_OR_B32 %8, 1, implicit-def dead $scc
+ S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
+ S_BRANCH %bb.4
+
+ bb.4:
+ SI_RETURN
+
+...
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, this is fixing CI issues.
When inserting instructions by iterator, you always need the separate
DebugLoc argument. This fixes a reported regression after
39bf765.