diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 52d00485385c2..4342e7a369c13 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -155,7 +155,8 @@ class SIFixSGPRCopies { // have any other uses. bool tryMoveVGPRConstToSGPR(MachineOperand &MO, Register NewDst, MachineBasicBlock *BlockToInsertTo, - MachineBasicBlock::iterator PointToInsertTo); + MachineBasicBlock::iterator PointToInsertTo, + const DebugLoc &DL); }; class SIFixSGPRCopiesLegacy : public MachineFunctionPass { @@ -682,11 +683,11 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) { MachineBasicBlock::iterator PointToInsertCopy = MI.isPHI() ? BlockToInsertCopy->getFirstInstrTerminator() : I; + const DebugLoc &DL = MI.getDebugLoc(); if (!tryMoveVGPRConstToSGPR(MO, NewDst, BlockToInsertCopy, - PointToInsertCopy)) { + PointToInsertCopy, DL)) { MachineInstr *NewCopy = - BuildMI(*BlockToInsertCopy, PointToInsertCopy, - PointToInsertCopy->getDebugLoc(), + BuildMI(*BlockToInsertCopy, PointToInsertCopy, DL, TII->get(AMDGPU::COPY), NewDst) .addReg(MO.getReg()); MO.setReg(NewDst); @@ -855,7 +856,7 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) { bool SIFixSGPRCopies::tryMoveVGPRConstToSGPR( MachineOperand &MaybeVGPRConstMO, Register DstReg, MachineBasicBlock *BlockToInsertTo, - MachineBasicBlock::iterator PointToInsertTo) { + MachineBasicBlock::iterator PointToInsertTo, const DebugLoc &DL) { MachineInstr *DefMI = MRI->getVRegDef(MaybeVGPRConstMO.getReg()); if (!DefMI || !DefMI->isMoveImmediate()) @@ -869,8 +870,7 @@ bool SIFixSGPRCopies::tryMoveVGPRConstToSGPR( MRI->getRegClass(MaybeVGPRConstMO.getReg()); unsigned MoveSize = TRI->getRegSizeInBits(*SrcRC); unsigned MoveOp = MoveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32; - BuildMI(*BlockToInsertTo, PointToInsertTo, PointToInsertTo->getDebugLoc(), - TII->get(MoveOp), DstReg) + BuildMI(*BlockToInsertTo, PointToInsertTo, DL, TII->get(MoveOp), DstReg) .add(*SrcConst); if (MRI->hasOneUse(MaybeVGPRConstMO.getReg())) DefMI->eraseFromParent(); @@ -896,7 +896,7 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI, .add(MI.getOperand(1)); MI.getOperand(1).setReg(TmpReg); } else if (tryMoveVGPRConstToSGPR(MI.getOperand(1), DstReg, MI.getParent(), - MI)) { + MI, MI.getDebugLoc())) { I = std::next(I); MI.eraseFromParent(); } diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-phi-block-end-iterator-debugloc.ll b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-phi-block-end-iterator-debugloc.ll new file mode 100644 index 0000000000000..13184cf17a2e5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-phi-block-end-iterator-debugloc.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s + +define i32 @rocrand_regression(ptr addrspace(1) %arg, i32 %arg0, i1 %cmp7) { +; CHECK-LABEL: rocrand_regression: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_and_b32_e32 v0, 1, v3 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; CHECK-NEXT: s_xor_b64 s[4:5], vcc, -1 +; CHECK-NEXT: s_mov_b32 s8, 0 +; CHECK-NEXT: .LBB0_1: ; %do.body +; CHECK-NEXT: ; =>This Loop Header: Depth=1 +; CHECK-NEXT: ; Child Loop BB0_2 Depth 2 +; CHECK-NEXT: s_mov_b64 s[6:7], 0 +; CHECK-NEXT: .LBB0_2: ; %while.cond +; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 +; CHECK-NEXT: s_and_b64 s[10:11], exec, s[4:5] +; CHECK-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7] +; CHECK-NEXT: s_andn2_b64 exec, exec, s[6:7] +; CHECK-NEXT: s_cbranch_execnz .LBB0_2 +; CHECK-NEXT: ; %bb.3: ; %do.cond +; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] +; CHECK-NEXT: s_or_b32 s8, s8, 1 +; CHECK-NEXT: s_cbranch_execnz .LBB0_1 +; CHECK-NEXT: ; %bb.4: ; %DummyReturnBlock +; CHECK-NEXT: s_setpc_b64 s[30:31] +entry: + br label %do.body + +do.body: ; preds = %do.cond, %entry + %phi.0 = phi i32 [ %arg0, %do.cond ], [ 0, %entry ] + %phi.1 = phi i32 [ %add6, %do.cond ], [ 0, %entry ] + %add6 = or i32 %phi.1, 1 + store i32 %phi.1, ptr addrspace(1) %arg, align 4 + br label %while.cond + +while.cond: ; preds = %while.cond, %do.body + %phi.2 = phi i32 [ %phi.0, %do.body ], [ 0, %while.cond ] + br i1 %cmp7, label %while.cond, label %do.cond + +do.cond: ; preds = %while.cond + br i1 true, label %do.body, label %do.end + +do.end: ; preds = %do.cond + ret i32 %phi.2 +} diff --git a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir index f1b88c7616298..767942b7dca7f 100644 --- a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir +++ b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir @@ -89,3 +89,49 @@ body: | S_ENDPGM 0, implicit %1 ... + +# GCN-LABEL: name: find_debug_loc_end_iterator_regression +# GCN: %6:vreg_1 = COPY %4 +# GCN: %14:sgpr_32 = S_MOV_B32 0 + +# GCN: %7:vgpr_32 = PHI %5, %bb.0, %1, %bb.3 +# GCN: %8:sreg_32 = PHI %14, %bb.0, %9, %bb.3 + +# GCN: %11:sreg_64 = PHI %10, %bb.1, %12, %bb.2 +# GCN: %13:sreg_64 = COPY %6 +--- +name: find_debug_loc_end_iterator_regression +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr2, $vgpr3 + + %0:vgpr_32 = COPY $vgpr3 + %1:vgpr_32 = COPY $vgpr2 + %2:sreg_64 = V_CMP_EQ_U32_e64 killed %0, 1, implicit $exec + %3:sreg_64 = S_MOV_B64 -1 + %4:sreg_64 = S_XOR_B64 killed %2, killed %3, implicit-def dead $scc + %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %6:vreg_1 = COPY %4 + + bb.1: + %7:vgpr_32 = PHI %5, %bb.0, %1, %bb.3 + %8:sreg_32 = PHI %5, %bb.0, %9, %bb.3 + %10:sreg_64 = S_MOV_B64 0 + + bb.2: + %11:sreg_64 = PHI %10, %bb.1, %12, %bb.2 + %13:sreg_64 = COPY %6 + %12:sreg_64 = SI_IF_BREAK %13, %11, implicit-def dead $scc + SI_LOOP %12, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.3 + + bb.3: + %9:sreg_32 = S_OR_B32 %8, 1, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc + S_BRANCH %bb.4 + + bb.4: + SI_RETURN + +...