Skip to content

Commit 1d8a94c

Browse files
committed
[AMDGPU] SILowerControlFlow: fix preservation of LiveIntervals
In emitElse live interval for SI_ELSE source must be recalculated as SI_ELSE is removed, and new user is placed at block start. In emitIfBreak live interval for new created AndReg must be computed. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D158141
1 parent 7b74706 commit 1d8a94c

File tree

3 files changed

+376
-44
lines changed

3 files changed

+376
-44
lines changed

llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ class SILowerControlFlow : public MachineFunctionPass {
7979
SetVector<MachineInstr*> LoweredEndCf;
8080
DenseSet<Register> LoweredIf;
8181
SmallSet<MachineBasicBlock *, 4> KillBlocks;
82+
SmallSet<Register, 8> RecomputeRegs;
8283

8384
const TargetRegisterClass *BoolRC = nullptr;
8485
unsigned AndOpc;
@@ -297,8 +298,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
297298
// FIXME: Is there a better way of adjusting the liveness? It shouldn't be
298299
// hard to add another def here but I'm not sure how to correctly update the
299300
// valno.
300-
LIS->removeInterval(SaveExecReg);
301-
LIS->createAndComputeVirtRegInterval(SaveExecReg);
301+
RecomputeRegs.insert(SaveExecReg);
302302
LIS->createAndComputeVirtRegInterval(Tmp);
303303
if (!SimpleIf)
304304
LIS->createAndComputeVirtRegInterval(CopyReg);
@@ -309,6 +309,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
309309
const DebugLoc &DL = MI.getDebugLoc();
310310

311311
Register DstReg = MI.getOperand(0).getReg();
312+
Register SrcReg = MI.getOperand(1).getReg();
312313

313314
MachineBasicBlock::iterator Start = MBB.begin();
314315

@@ -319,7 +320,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
319320
BuildMI(MBB, Start, DL, TII->get(OrSaveExecOpc), SaveReg)
320321
.add(MI.getOperand(1)); // Saved EXEC
321322
if (LV)
322-
LV->replaceKillInstruction(MI.getOperand(1).getReg(), MI, *OrSaveExec);
323+
LV->replaceKillInstruction(SrcReg, MI, *OrSaveExec);
323324

324325
MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
325326

@@ -331,9 +332,6 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
331332
.addReg(Exec)
332333
.addReg(SaveReg);
333334

334-
if (LIS)
335-
LIS->InsertMachineInstrInMaps(*And);
336-
337335
MachineInstr *Xor =
338336
BuildMI(MBB, ElsePt, DL, TII->get(XorTermrOpc), Exec)
339337
.addReg(Exec)
@@ -356,12 +354,13 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
356354
MI.eraseFromParent();
357355

358356
LIS->InsertMachineInstrInMaps(*OrSaveExec);
357+
LIS->InsertMachineInstrInMaps(*And);
359358

360359
LIS->InsertMachineInstrInMaps(*Xor);
361360
LIS->InsertMachineInstrInMaps(*Branch);
362361

363-
LIS->removeInterval(DstReg);
364-
LIS->createAndComputeVirtRegInterval(DstReg);
362+
RecomputeRegs.insert(SrcReg);
363+
RecomputeRegs.insert(DstReg);
365364
LIS->createAndComputeVirtRegInterval(SaveReg);
366365

367366
// Let this be recomputed.
@@ -388,8 +387,9 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
388387
// AND the break condition operand with exec, then OR that into the "loop
389388
// exit" mask.
390389
MachineInstr *And = nullptr, *Or = nullptr;
390+
Register AndReg;
391391
if (!SkipAnding) {
392-
Register AndReg = MRI->createVirtualRegister(BoolRC);
392+
AndReg = MRI->createVirtualRegister(BoolRC);
393393
And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), AndReg)
394394
.addReg(Exec)
395395
.add(MI.getOperand(1));
@@ -398,8 +398,6 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
398398
Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
399399
.addReg(AndReg)
400400
.add(MI.getOperand(2));
401-
if (LIS)
402-
LIS->createAndComputeVirtRegInterval(AndReg);
403401
} else {
404402
Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
405403
.add(MI.getOperand(1))
@@ -411,9 +409,13 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
411409
LV->replaceKillInstruction(MI.getOperand(2).getReg(), MI, *Or);
412410

413411
if (LIS) {
414-
if (And)
415-
LIS->InsertMachineInstrInMaps(*And);
416412
LIS->ReplaceMachineInstrInMaps(MI, *Or);
413+
if (And) {
414+
// Read of original operand 1 is on And now not Or.
415+
RecomputeRegs.insert(And->getOperand(2).getReg());
416+
LIS->InsertMachineInstrInMaps(*And);
417+
LIS->createAndComputeVirtRegInterval(AndReg);
418+
}
417419
}
418420

419421
MI.eraseFromParent();
@@ -436,6 +438,7 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
436438
.add(MI.getOperand(1));
437439

438440
if (LIS) {
441+
RecomputeRegs.insert(MI.getOperand(0).getReg());
439442
LIS->ReplaceMachineInstrInMaps(MI, *AndN2);
440443
LIS->InsertMachineInstrInMaps(*Branch);
441444
}
@@ -714,11 +717,13 @@ void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
714717

715718
if (MI.getOpcode() == AMDGPU::SI_INIT_EXEC) {
716719
// This should be before all vector instructions.
717-
BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(),
720+
MachineInstr *InitMI = BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(),
718721
TII->get(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64), Exec)
719722
.addImm(MI.getOperand(0).getImm());
720-
if (LIS)
723+
if (LIS) {
721724
LIS->RemoveMachineInstrFromMaps(MI);
725+
LIS->InsertMachineInstrInMaps(*InitMI);
726+
}
722727
MI.eraseFromParent();
723728
return;
724729
}
@@ -789,8 +794,7 @@ void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
789794
LIS->InsertMachineInstrInMaps(*CmpMI);
790795
LIS->InsertMachineInstrInMaps(*CmovMI);
791796

792-
LIS->removeInterval(InputReg);
793-
LIS->createAndComputeVirtRegInterval(InputReg);
797+
RecomputeRegs.insert(InputReg);
794798
LIS->createAndComputeVirtRegInterval(CountReg);
795799
}
796800

@@ -807,7 +811,7 @@ bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
807811

808812
while (!MBB.predecessors().empty()) {
809813
MachineBasicBlock *P = *MBB.pred_begin();
810-
if (P->getFallThrough() == &MBB)
814+
if (P->getFallThrough(false) == &MBB)
811815
FallThrough = P;
812816
P->ReplaceUsesOfBlockWith(&MBB, Succ);
813817
}
@@ -828,14 +832,13 @@ bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
828832
MBB.clear();
829833
MBB.eraseFromParent();
830834
if (FallThrough && !FallThrough->isLayoutSuccessor(Succ)) {
831-
if (!Succ->canFallThrough()) {
832-
MachineFunction *MF = FallThrough->getParent();
833-
MachineFunction::iterator FallThroughPos(FallThrough);
834-
MF->splice(std::next(FallThroughPos), Succ);
835-
} else
836-
BuildMI(*FallThrough, FallThrough->end(),
837-
FallThrough->findBranchDebugLoc(), TII->get(AMDGPU::S_BRANCH))
838-
.addMBB(Succ);
835+
// Note: we cannot update block layout and preserve live intervals;
836+
// hence we must insert a branch.
837+
MachineInstr *BranchMI = BuildMI(*FallThrough, FallThrough->end(),
838+
FallThrough->findBranchDebugLoc(), TII->get(AMDGPU::S_BRANCH))
839+
.addMBB(Succ);
840+
if (LIS)
841+
LIS->InsertMachineInstrInMaps(*BranchMI);
839842
}
840843

841844
return true;
@@ -947,6 +950,14 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
947950

948951
optimizeEndCf();
949952

953+
if (LIS) {
954+
for (Register Reg : RecomputeRegs) {
955+
LIS->removeInterval(Reg);
956+
LIS->createAndComputeVirtRegInterval(Reg);
957+
}
958+
}
959+
960+
RecomputeRegs.clear();
950961
LoweredEndCf.clear();
951962
LoweredIf.clear();
952963
KillBlocks.clear();

llvm/test/CodeGen/AMDGPU/collapse-endcf.mir

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -446,15 +446,16 @@ body: |
446446
; GCN-NEXT: bb.2:
447447
; GCN-NEXT: successors: %bb.5(0x80000000)
448448
; GCN-NEXT: {{ $}}
449+
; GCN-NEXT: S_BRANCH %bb.5
450+
; GCN-NEXT: {{ $}}
451+
; GCN-NEXT: bb.4:
452+
; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc
453+
; GCN-NEXT: S_ENDPGM 0
449454
; GCN-NEXT: {{ $}}
450455
; GCN-NEXT: bb.5:
451456
; GCN-NEXT: successors: %bb.4(0x80000000)
452457
; GCN-NEXT: {{ $}}
453458
; GCN-NEXT: S_BRANCH %bb.4
454-
; GCN-NEXT: {{ $}}
455-
; GCN-NEXT: bb.4:
456-
; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc
457-
; GCN-NEXT: S_ENDPGM 0
458459
bb.0:
459460
successors: %bb.1, %bb.4
460461
@@ -923,7 +924,6 @@ body: |
923924
S_BRANCH %bb.1
924925
925926
bb.1:
926-
; predecessors: %bb.0
927927
successors: %bb.2, %bb.6
928928
929929
%3:vgpr_32 = IMPLICIT_DEF
@@ -932,7 +932,6 @@ body: |
932932
S_BRANCH %bb.2
933933
934934
bb.2:
935-
; predecessors: %bb.1
936935
successors: %bb.3, %bb.7
937936
938937
%6:vgpr_32 = IMPLICIT_DEF
@@ -941,7 +940,6 @@ body: |
941940
S_BRANCH %bb.3
942941
943942
bb.3:
944-
; predecessors: %bb.2
945943
successors: %bb.4, %bb.5
946944
947945
%9:vgpr_32 = IMPLICIT_DEF
@@ -950,40 +948,34 @@ body: |
950948
S_BRANCH %bb.4
951949
952950
bb.4:
953-
; predecessors: %bb.3
954951
successors: %bb.5
955952
956953
S_BRANCH %bb.5
957954
958955
bb.5:
959-
; predecessors: %bb.3, %bb.4
960956
successors: %bb.7
961957
962958
SI_END_CF %11:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
963959
S_BRANCH %bb.7
964960
965961
bb.6:
966-
; predecessors: %bb.1, %bb.13
967962
successors: %bb.14
968963
969964
SI_END_CF %5:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
970965
S_BRANCH %bb.14
971966
972967
bb.7:
973-
; predecessors: %bb2, %bb.5
974968
successors: %bb.8
975969
976970
SI_END_CF %8:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
977971
S_BRANCH %bb.8
978972
979973
bb.8:
980-
; predecessors: %bb.7
981974
successors: %bb.9
982975
983976
S_BRANCH %bb.9
984977
985978
bb.9:
986-
; predecessors: %bb.8
987979
successors: %bb.11, %bb.12
988980
989981
%12:vgpr_32 = IMPLICIT_DEF
@@ -992,33 +984,28 @@ body: |
992984
S_BRANCH %bb.11
993985
994986
bb.10:
995-
; predecessors: %bb.12
996987
successors: %bb.13
997988
998989
S_BRANCH %bb.13
999990
1000991
bb.11:
1001-
; predecessors: %bb.9
1002992
successors: %bb.12
1003993
1004994
S_BRANCH %bb.12
1005995
1006996
bb.12:
1007-
; predecessors: %bb.9, %bb.11
1008997
successors: %bb.10, %bb.13
1009998
1010999
%15:sreg_64 = SI_ELSE %14:sreg_64, %bb.13, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
10111000
S_BRANCH %bb.10
10121001
10131002
bb.13:
1014-
; predecessors: %bb.10, %bb.12
10151003
successors: %bb.6
10161004
10171005
SI_END_CF %15:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
10181006
S_BRANCH %bb.6
10191007
10201008
bb.14:
1021-
; predecessors: %bb.0, %bb.6
10221009
10231010
SI_END_CF %2:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
10241011
S_ENDPGM 0

0 commit comments

Comments
 (0)