Skip to content

Commit 7792b4a

Browse files
authored
Revert "Reland "[amdgpu] Add llvm.amdgcn.init.whole.wave intrinsic" (#108054)"" (#108341)
Reverts #108173 si-init-whole-wave.mir crashes on some buildbots (although it passed both locally with sanitizers enabled and in pre-merge tests). Investigating.
1 parent 42494e5 commit 7792b4a

22 files changed

+5
-1528
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -208,16 +208,6 @@ def int_amdgcn_init_exec_from_input : Intrinsic<[],
208208
[IntrConvergent, IntrHasSideEffects, IntrNoMem, IntrNoCallback,
209209
IntrNoFree, IntrWillReturn, ImmArg<ArgIndex<1>>]>;
210210

211-
// Sets the function into whole-wave-mode and returns whether the lane was
212-
// active when entering the function. A branch depending on this return will
213-
// revert the EXEC mask to what it was when entering the function, thus
214-
// resulting in a no-op. This pattern is used to optimize branches when function
215-
// tails need to be run in whole-wave-mode. It may also have other consequences
216-
// (mostly related to WWM CSR handling) that differentiate it from using
217-
// a plain `amdgcn.init.exec -1`.
218-
def int_amdgcn_init_whole_wave : Intrinsic<[llvm_i1_ty], [], [
219-
IntrHasSideEffects, IntrNoMem, IntrConvergent]>;
220-
221211
def int_amdgcn_wavefrontsize :
222212
ClangBuiltin<"__builtin_amdgcn_wavefrontsize">,
223213
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [NoUndef<RetIndex>, IntrNoMem, IntrSpeculatable]>;

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2738,11 +2738,6 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
27382738
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
27392739
SelectDSBvhStackIntrinsic(N);
27402740
return;
2741-
case Intrinsic::amdgcn_init_whole_wave:
2742-
CurDAG->getMachineFunction()
2743-
.getInfo<SIMachineFunctionInfo>()
2744-
->setInitWholeWave();
2745-
break;
27462741
}
27472742

27482743
SelectCode(N);

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1772,14 +1772,6 @@ bool AMDGPUInstructionSelector::selectDSAppendConsume(MachineInstr &MI,
17721772
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
17731773
}
17741774

1775-
bool AMDGPUInstructionSelector::selectInitWholeWave(MachineInstr &MI) const {
1776-
MachineFunction *MF = MI.getParent()->getParent();
1777-
SIMachineFunctionInfo *MFInfo = MF->getInfo<SIMachineFunctionInfo>();
1778-
1779-
MFInfo->setInitWholeWave();
1780-
return selectImpl(MI, *CoverageInfo);
1781-
}
1782-
17831775
bool AMDGPUInstructionSelector::selectSBarrier(MachineInstr &MI) const {
17841776
if (TM.getOptLevel() > CodeGenOptLevel::None) {
17851777
unsigned WGSize = STI.getFlatWorkGroupSizes(MF->getFunction()).second;
@@ -2107,8 +2099,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
21072099
return selectDSAppendConsume(I, true);
21082100
case Intrinsic::amdgcn_ds_consume:
21092101
return selectDSAppendConsume(I, false);
2110-
case Intrinsic::amdgcn_init_whole_wave:
2111-
return selectInitWholeWave(I);
21122102
case Intrinsic::amdgcn_s_barrier:
21132103
return selectSBarrier(I);
21142104
case Intrinsic::amdgcn_raw_buffer_load_lds:

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
120120
bool selectDSOrderedIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
121121
bool selectDSGWSIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
122122
bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const;
123-
bool selectInitWholeWave(MachineInstr &MI) const;
124123
bool selectSBarrier(MachineInstr &MI) const;
125124
bool selectDSBvhStackIntrinsic(MachineInstr &MI) const;
126125

llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,6 @@ class AMDGPUMachineFunction : public MachineFunctionInfo {
6767
// Kernel may need limited waves per EU for better performance.
6868
bool WaveLimiter = false;
6969

70-
bool HasInitWholeWave = false;
71-
7270
public:
7371
AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST);
7472

@@ -111,9 +109,6 @@ class AMDGPUMachineFunction : public MachineFunctionInfo {
111109
return WaveLimiter;
112110
}
113111

114-
bool hasInitWholeWave() const { return HasInitWholeWave; }
115-
void setInitWholeWave() { HasInitWholeWave = true; }
116-
117112
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV) {
118113
return allocateLDSGlobal(DL, GV, DynLDSAlign);
119114
}

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4997,7 +4997,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
49974997
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
49984998
break;
49994999
}
5000-
case Intrinsic::amdgcn_init_whole_wave:
50015000
case Intrinsic::amdgcn_live_mask: {
50025001
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
50035002
break;

llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,6 @@ def : SourceOfDivergence<int_amdgcn_mov_dpp>;
329329
def : SourceOfDivergence<int_amdgcn_mov_dpp8>;
330330
def : SourceOfDivergence<int_amdgcn_update_dpp>;
331331
def : SourceOfDivergence<int_amdgcn_writelane>;
332-
def : SourceOfDivergence<int_amdgcn_init_whole_wave>;
333332

334333
foreach intr = AMDGPUMFMAIntrinsics908 in
335334
def : SourceOfDivergence<intr>;

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1740,9 +1740,6 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
17401740
? DenormalMode::IEEE
17411741
: DenormalMode::PreserveSign;
17421742

1743-
if (YamlMFI.HasInitWholeWave)
1744-
MFI->setInitWholeWave();
1745-
17461743
return false;
17471744
}
17481745

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1343,14 +1343,10 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
13431343

13441344
// Allocate spill slots for WWM reserved VGPRs.
13451345
// For chain functions, we only need to do this if we have calls to
1346-
// llvm.amdgcn.cs.chain (otherwise there's no one to save them for, since
1347-
// chain functions do not return) and the function did not contain a call to
1348-
// llvm.amdgcn.init.whole.wave (since in that case there are no inactive lanes
1349-
// when entering the function).
1350-
bool IsChainWithoutRestores =
1351-
FuncInfo->isChainFunction() &&
1352-
(!MF.getFrameInfo().hasTailCall() || FuncInfo->hasInitWholeWave());
1353-
if (!FuncInfo->isEntryFunction() && !IsChainWithoutRestores) {
1346+
// llvm.amdgcn.cs.chain.
1347+
bool IsChainWithoutCalls =
1348+
FuncInfo->isChainFunction() && !MF.getFrameInfo().hasTailCall();
1349+
if (!FuncInfo->isEntryFunction() && !IsChainWithoutCalls) {
13541350
for (Register Reg : FuncInfo->getWWMReservedRegs()) {
13551351
const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
13561352
FuncInfo->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -570,16 +570,6 @@ def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI <
570570
let Defs = [EXEC];
571571
}
572572

573-
// Sets EXEC to all lanes and returns the previous EXEC.
574-
def SI_INIT_WHOLE_WAVE : SPseudoInstSI <
575-
(outs SReg_1:$dst), (ins),
576-
[(set i1:$dst, (int_amdgcn_init_whole_wave))]> {
577-
let Defs = [EXEC];
578-
let Uses = [EXEC];
579-
580-
let isConvergent = 1;
581-
}
582-
583573
// Return for returning shaders to a shader variant epilog.
584574
def SI_RETURN_TO_EPILOG : SPseudoInstSI <
585575
(outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> {

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -295,8 +295,6 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
295295
StringValue SGPRForEXECCopy;
296296
StringValue LongBranchReservedReg;
297297

298-
bool HasInitWholeWave = false;
299-
300298
SIMachineFunctionInfo() = default;
301299
SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
302300
const TargetRegisterInfo &TRI,
@@ -344,7 +342,6 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
344342
StringValue()); // Don't print out when it's empty.
345343
YamlIO.mapOptional("longBranchReservedReg", MFI.LongBranchReservedReg,
346344
StringValue());
347-
YamlIO.mapOptional("hasInitWholeWave", MFI.HasInitWholeWave, false);
348345
}
349346
};
350347

llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -586,8 +586,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
586586
KillInstrs.push_back(&MI);
587587
BBI.NeedsLowering = true;
588588
} else if (Opcode == AMDGPU::SI_INIT_EXEC ||
589-
Opcode == AMDGPU::SI_INIT_EXEC_FROM_INPUT ||
590-
Opcode == AMDGPU::SI_INIT_WHOLE_WAVE) {
589+
Opcode == AMDGPU::SI_INIT_EXEC_FROM_INPUT) {
591590
InitExecInstrs.push_back(&MI);
592591
} else if (WQMOutputs) {
593592
// The function is in machine SSA form, which means that physical
@@ -1572,33 +1571,6 @@ void SIWholeQuadMode::lowerInitExec(MachineInstr &MI) {
15721571
MachineBasicBlock *MBB = MI.getParent();
15731572
bool IsWave32 = ST->isWave32();
15741573

1575-
if (MI.getOpcode() == AMDGPU::SI_INIT_WHOLE_WAVE) {
1576-
assert(MBB == &MBB->getParent()->front() &&
1577-
"init whole wave not in entry block");
1578-
Register EntryExec = MRI->createVirtualRegister(TRI->getBoolRC());
1579-
MachineInstr *SaveExec =
1580-
BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(),
1581-
TII->get(IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32
1582-
: AMDGPU::S_OR_SAVEEXEC_B64),
1583-
EntryExec)
1584-
.addImm(-1);
1585-
1586-
// Replace all uses of MI's destination reg with EntryExec.
1587-
MRI->replaceRegWith(MI.getOperand(0).getReg(), EntryExec);
1588-
1589-
if (LIS) {
1590-
LIS->RemoveMachineInstrFromMaps(MI);
1591-
}
1592-
1593-
MI.eraseFromParent();
1594-
1595-
if (LIS) {
1596-
LIS->InsertMachineInstrInMaps(*SaveExec);
1597-
LIS->createAndComputeVirtRegInterval(EntryExec);
1598-
}
1599-
return;
1600-
}
1601-
16021574
if (MI.getOpcode() == AMDGPU::SI_INIT_EXEC) {
16031575
// This should be before all vector instructions.
16041576
MachineInstr *InitMI =

0 commit comments

Comments
 (0)