Skip to content

Commit d996f0a

Browse files
committed
[AMDGPU] Fix interaction between WQM and llvm.amdgcn.init.exec
Whole quad mode requires inserting a copy of the initial EXEC mask. In a function that also uses llvm.amdgcn.init.exec, insert the COPY after initializing EXEC.
1 parent 2448b0e commit d996f0a

File tree

2 files changed

+19
-7
lines changed

2 files changed

+19
-7
lines changed

llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ class SIWholeQuadMode : public MachineFunctionPass {
225225
void lowerCopyInstrs();
226226
void lowerKillInstrs(bool IsWQM);
227227
void lowerInitExec(MachineInstr &MI);
228-
void lowerInitExecInstrs();
228+
MachineBasicBlock::iterator lowerInitExecInstrs(MachineBasicBlock &Entry);
229229

230230
public:
231231
static char ID;
@@ -1648,9 +1648,23 @@ void SIWholeQuadMode::lowerInitExec(MachineInstr &MI) {
16481648
LIS->createAndComputeVirtRegInterval(CountReg);
16491649
}
16501650

1651-
void SIWholeQuadMode::lowerInitExecInstrs() {
1652-
for (MachineInstr *MI : InitExecInstrs)
1651+
/// Lower INIT_EXEC instructions. Return a suitable insert point in \p Entry
1652+
/// for instructions that depend on EXEC.
1653+
MachineBasicBlock::iterator
1654+
SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry) {
1655+
MachineBasicBlock::iterator InsertPt = Entry.getFirstNonPHI();
1656+
1657+
for (MachineInstr *MI : InitExecInstrs) {
1658+
// Try to handle undefined cases gracefully:
1659+
// - multiple INIT_EXEC instructions
1660+
// - INIT_EXEC instructions not in the entry block
1661+
if (MI->getParent() == &Entry)
1662+
InsertPt = std::next(MI->getIterator());
1663+
16531664
lowerInitExec(*MI);
1665+
}
1666+
1667+
return InsertPt;
16541668
}
16551669

16561670
bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
@@ -1709,10 +1723,8 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
17091723
return !LiveMaskQueries.empty();
17101724
}
17111725

1712-
lowerInitExecInstrs();
1713-
17141726
MachineBasicBlock &Entry = MF.front();
1715-
MachineBasicBlock::iterator EntryMI = Entry.getFirstNonPHI();
1727+
MachineBasicBlock::iterator EntryMI = lowerInitExecInstrs(Entry);
17161728

17171729
// Store a copy of the original live mask when required
17181730
if (NeedsLiveMask || (GlobalFlags & StateWQM)) {

llvm/test/CodeGen/AMDGPU/wqm.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3334,8 +3334,8 @@ define amdgpu_gs void @wqm_init_exec() {
33343334
;
33353335
; GFX10-W32-LABEL: wqm_init_exec:
33363336
; GFX10-W32: ; %bb.0: ; %bb
3337-
; GFX10-W32-NEXT: s_mov_b32 s1, exec_lo
33383337
; GFX10-W32-NEXT: s_mov_b32 exec_lo, -1
3338+
; GFX10-W32-NEXT: s_mov_b32 s1, exec_lo
33393339
; GFX10-W32-NEXT: v_mov_b32_e32 v0, 0
33403340
; GFX10-W32-NEXT: s_mov_b32 s0, 0
33413341
; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo

0 commit comments

Comments
 (0)