Skip to content

Commit 241c519

Browse files
committed
[AMDGPU] Teach iterative schedulers about IGLP (llvm#134953)
This adds IGLP mutation to the iterative schedulers (`gcn-iterative-max-occupancy-experimental`, `gcn-iterative-minreg`, and `gcn-iterative-ilp`). The `gcn-iterative-minreg` and `gcn-iterative-ilp` schedulers never actually applied the mutations added, so this also has the effect of teaching them about mutations in general. The `gcn-iterative-max-occupancy-experimental` scheduler has calls to `ScheduleDAGMILive::schedule()`, so, before this, mutations were applied at this point. Now this is done during calls to `BuildDAG`, with IGLP superseding other mutations (similar to the other schedulers). We may end up scheduling regions multiple times, with mutations being applied each time, so we need to track for `AMDGPU::SchedulingPhase::PreRAReentry`
1 parent c0b2239 commit 241c519

File tree

8 files changed

+1001
-27
lines changed

8 files changed

+1001
-27
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -563,12 +563,15 @@ createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
563563
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
564564
if (ST.shouldClusterStores())
565565
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
566+
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
566567
return DAG;
567568
}
568569

569570
static ScheduleDAGInstrs *createMinRegScheduler(MachineSchedContext *C) {
570-
return new GCNIterativeScheduler(C,
571-
GCNIterativeScheduler::SCHEDULE_MINREGFORCED);
571+
auto *DAG = new GCNIterativeScheduler(
572+
C, GCNIterativeScheduler::SCHEDULE_MINREGFORCED);
573+
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
574+
return DAG;
572575
}
573576

574577
static ScheduleDAGInstrs *
@@ -580,6 +583,7 @@ createIterativeILPMachineScheduler(MachineSchedContext *C) {
580583
if (ST.shouldClusterStores())
581584
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
582585
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
586+
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
583587
return DAG;
584588
}
585589

llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//===----------------------------------------------------------------------===//
1313

1414
#include "GCNIterativeScheduler.h"
15+
#include "AMDGPUIGroupLP.h"
1516
#include "GCNSchedStrategy.h"
1617
#include "SIMachineFunctionInfo.h"
1718

@@ -118,21 +119,42 @@ void GCNIterativeScheduler::printSchedRP(raw_ostream &OS,
118119
}
119120
#endif
120121

122+
void GCNIterativeScheduler::swapIGLPMutations(const Region &R, bool IsReentry) {
123+
bool HasIGLPInstrs = false;
124+
const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(TII);
125+
for (MachineBasicBlock::iterator I = R.Begin; I != R.End; I++) {
126+
if (SII->isIGLPMutationOnly(I->getOpcode())) {
127+
HasIGLPInstrs = true;
128+
break;
129+
}
130+
}
131+
132+
if (HasIGLPInstrs) {
133+
SavedMutations.clear();
134+
SavedMutations.swap(Mutations);
135+
auto SchedPhase = IsReentry ? AMDGPU::SchedulingPhase::PreRAReentry
136+
: AMDGPU::SchedulingPhase::Initial;
137+
138+
addMutation(createIGroupLPDAGMutation(SchedPhase));
139+
}
140+
}
141+
121142
// DAG builder helper
122143
class GCNIterativeScheduler::BuildDAG {
123144
GCNIterativeScheduler &Sch;
124145
SmallVector<SUnit *, 8> TopRoots;
125146

126147
SmallVector<SUnit*, 8> BotRoots;
127148
public:
128-
BuildDAG(const Region &R, GCNIterativeScheduler &_Sch)
129-
: Sch(_Sch) {
130-
auto BB = R.Begin->getParent();
149+
BuildDAG(const Region &R, GCNIterativeScheduler &_Sch, bool IsReentry = false)
150+
: Sch(_Sch) {
151+
auto *BB = R.Begin->getParent();
131152
Sch.BaseClass::startBlock(BB);
132153
Sch.BaseClass::enterRegion(BB, R.Begin, R.End, R.NumRegionInstrs);
133-
154+
Sch.swapIGLPMutations(R, IsReentry);
134155
Sch.buildSchedGraph(Sch.AA, nullptr, nullptr, nullptr,
135156
/*TrackLaneMask*/true);
157+
Sch.postProcessDAG();
136158
Sch.Topo.InitDAGTopologicalSorting();
137159
Sch.findRootsAndBiasEdges(TopRoots, BotRoots);
138160
}
@@ -432,13 +454,15 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
432454

433455
auto NewOcc = TargetOcc;
434456
for (auto *R : Regions) {
457+
// Always build the DAG to add mutations
458+
BuildDAG DAG(*R, *this);
459+
435460
if (R->MaxPressure.getOccupancy(ST) >= NewOcc)
436-
break;
461+
continue;
437462

438463
LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
439464
printLivenessInfo(dbgs(), R->Begin, R->End, LIS));
440465

441-
BuildDAG DAG(*R, *this);
442466
const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
443467
const auto MaxRP = getSchedulePressure(*R, MinSchedule);
444468
LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
@@ -469,8 +493,11 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
469493
sortRegionsByPressure(TgtOcc);
470494
auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
471495

472-
if (TryMaximizeOccupancy && Occ < TgtOcc)
496+
bool IsReentry = false;
497+
if (TryMaximizeOccupancy && Occ < TgtOcc) {
473498
Occ = tryMaximizeOccupancy(TgtOcc);
499+
IsReentry = true;
500+
}
474501

475502
// This is really weird but for some magic scheduling regions twice
476503
// gives performance improvement
@@ -489,7 +516,8 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
489516
LStrgy.setTargetOccupancy(I == 0 ? 0 : TgtOcc);
490517
for (auto *R : Regions) {
491518
OverrideLegacyStrategy Ovr(*R, LStrgy, *this);
492-
519+
IsReentry |= I > 0;
520+
swapIGLPMutations(*R, IsReentry);
493521
Ovr.schedule();
494522
const auto RP = getRegionPressure(*R);
495523
LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
@@ -556,8 +584,11 @@ void GCNIterativeScheduler::scheduleILP(
556584
sortRegionsByPressure(TgtOcc);
557585
auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
558586

559-
if (TryMaximizeOccupancy && Occ < TgtOcc)
587+
bool IsReentry = false;
588+
if (TryMaximizeOccupancy && Occ < TgtOcc) {
560589
Occ = tryMaximizeOccupancy(TgtOcc);
590+
IsReentry = true;
591+
}
561592

562593
TgtOcc = std::min(Occ, TgtOcc);
563594
LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
@@ -566,7 +597,7 @@ void GCNIterativeScheduler::scheduleILP(
566597

567598
unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
568599
for (auto *R : Regions) {
569-
BuildDAG DAG(*R, *this);
600+
BuildDAG DAG(*R, *this, IsReentry);
570601
const auto ILPSchedule = makeGCNILPScheduler(DAG.getBottomRoots(), *this);
571602

572603
const auto RP = getSchedulePressure(*R, ILPSchedule);

llvm/lib/Target/AMDGPU/GCNIterativeScheduler.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ class GCNIterativeScheduler : public ScheduleDAGMILive {
7777
const StrategyKind Strategy;
7878
mutable GCNUpwardRPTracker UPTracker;
7979

80+
std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
81+
8082
class BuildDAG;
8183
class OverrideLegacyStrategy;
8284

@@ -91,6 +93,7 @@ class GCNIterativeScheduler : public ScheduleDAGMILive {
9193
return getRegionPressure(R.Begin, R.End);
9294
}
9395

96+
void swapIGLPMutations(const Region &R, bool IsReentry);
9497
void setBestSchedule(Region &R,
9598
ScheduleRef Schedule,
9699
const GCNRegPressure &MaxRP = GCNRegPressure());

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -188,12 +188,6 @@ static void getRegisterPressures(
188188
Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.getAGPRNum();
189189
}
190190

191-
// Return true if the instruction is mutually exclusive with all non-IGLP DAG
192-
// mutations, requiring all other mutations to be disabled.
193-
static bool isIGLPMutationOnly(unsigned Opcode) {
194-
return Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
195-
}
196-
197191
void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
198192
bool AtTop,
199193
const RegPressureTracker &RPTracker,
@@ -1018,9 +1012,10 @@ bool GCNSchedStage::initGCNRegion() {
10181012
Unsched.reserve(DAG.NumRegionInstrs);
10191013
if (StageID == GCNSchedStageID::OccInitialSchedule ||
10201014
StageID == GCNSchedStageID::ILPInitialSchedule) {
1015+
const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(DAG.TII);
10211016
for (auto &I : DAG) {
10221017
Unsched.push_back(&I);
1023-
if (isIGLPMutationOnly(I.getOpcode()))
1018+
if (SII->isIGLPMutationOnly(I.getOpcode()))
10241019
DAG.RegionsWithIGLPInstrs[RegionIdx] = true;
10251020
}
10261021
} else {
@@ -1754,8 +1749,9 @@ void GCNScheduleDAGMILive::updateRegionBoundaries(
17541749
}
17551750

17561751
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
1757-
return any_of(*DAG, [](MachineBasicBlock::iterator MI) {
1758-
return isIGLPMutationOnly(MI->getOpcode());
1752+
const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(DAG->TII);
1753+
return any_of(*DAG, [SII](MachineBasicBlock::iterator MI) {
1754+
return SII->isIGLPMutationOnly(MI->getOpcode());
17591755
});
17601756
}
17611757

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -986,6 +986,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
986986

987987
bool isIGLP(const MachineInstr &MI) const { return isIGLP(MI.getOpcode()); }
988988

989+
// Return true if the instruction is mutually exclusive with all non-IGLP DAG
990+
// mutations, requiring all other mutations to be disabled.
991+
bool isIGLPMutationOnly(unsigned Opcode) const {
992+
return Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
993+
}
994+
989995
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) {
990996
switch (Opcode) {
991997
case AMDGPU::S_WAITCNT_soft:

llvm/test/CodeGen/AMDGPU/iglp.opt.reentry.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -O3 < %s | FileCheck %s
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -O3 -misched=gcn-iterative-max-occupancy-experimental < %s | FileCheck %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -O3 -misched=gcn-iterative-ilp < %s | FileCheck %s
24

35
; Test should not result in build failure
46
; CHECK-LABEL: shouldNotReApply

0 commit comments

Comments
 (0)