Skip to content

Commit bf536cc

Browse files
authored
[AMDGPU] Fix unwanted LICM/CSE of llvm.amdgcn.pops.exiting.wave.id (#96190)
Mark both the intrinsic and the selected MachineInstr as having side effects to prevent MachineLICM and MachineCSE from moving/removing them.
1 parent 0ae2370 commit bf536cc

7 files changed

+31
-38
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

+2-3
Original file line numberDiff line numberDiff line change
@@ -2345,10 +2345,9 @@ class AMDGPUGlobalLoadLDS :
23452345
"", [SDNPMemOperand]>;
23462346
def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;
23472347

2348-
// Use read/write of inaccessible memory to model the fact that this reads a
2349-
// volatile value.
2348+
// This is IntrHasSideEffects because it reads from a volatile hardware register.
23502349
def int_amdgcn_pops_exiting_wave_id :
2351-
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>;
2350+
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrHasSideEffects]>;
23522351

23532352
//===----------------------------------------------------------------------===//
23542353
// GFX10 Intrinsics

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

-12
Original file line numberDiff line numberDiff line change
@@ -2583,15 +2583,6 @@ void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
25832583
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
25842584
}
25852585

2586-
void AMDGPUDAGToDAGISel::SelectPOPSExitingWaveID(SDNode *N) {
2587-
// TODO: Select this with a tablegen pattern. This is tricky because the
2588-
// intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
2589-
// mayLoad/mayStore and tablegen complains about the mismatch.
2590-
SDValue Reg = CurDAG->getRegister(AMDGPU::SRC_POPS_EXITING_WAVE_ID, MVT::i32);
2591-
SDValue Chain = N->getOperand(0);
2592-
CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, N->getVTList(), {Reg, Chain});
2593-
}
2594-
25952586
static unsigned gwsIntrinToOpcode(unsigned IntrID) {
25962587
switch (IntrID) {
25972588
case Intrinsic::amdgcn_ds_gws_init:
@@ -2748,9 +2739,6 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
27482739
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
27492740
SelectDSBvhStackIntrinsic(N);
27502741
return;
2751-
case Intrinsic::amdgcn_pops_exiting_wave_id:
2752-
SelectPOPSExitingWaveID(N);
2753-
return;
27542742
}
27552743

27562744
SelectCode(N);

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

-1
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,6 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
269269
void SelectFP_EXTEND(SDNode *N);
270270
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
271271
void SelectDSBvhStackIntrinsic(SDNode *N);
272-
void SelectPOPSExitingWaveID(SDNode *N);
273272
void SelectDS_GWS(SDNode *N, unsigned IntrID);
274273
void SelectInterpP1F16(SDNode *N);
275274
void SelectINTRINSIC_W_CHAIN(SDNode *N);

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

-17
Original file line numberDiff line numberDiff line change
@@ -2079,21 +2079,6 @@ bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
20792079
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
20802080
}
20812081

2082-
bool AMDGPUInstructionSelector::selectPOPSExitingWaveID(
2083-
MachineInstr &MI) const {
2084-
Register Dst = MI.getOperand(0).getReg();
2085-
const DebugLoc &DL = MI.getDebugLoc();
2086-
MachineBasicBlock *MBB = MI.getParent();
2087-
2088-
// TODO: Select this with a tablegen pattern. This is tricky because the
2089-
// intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
2090-
// mayLoad/mayStore and tablegen complains about the mismatch.
2091-
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst)
2092-
.addReg(AMDGPU::SRC_POPS_EXITING_WAVE_ID);
2093-
MI.eraseFromParent();
2094-
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
2095-
}
2096-
20972082
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
20982083
MachineInstr &I) const {
20992084
Intrinsic::ID IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
@@ -2144,8 +2129,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
21442129
return selectSBarrierSignalIsfirst(I, IntrinsicID);
21452130
case Intrinsic::amdgcn_s_barrier_leave:
21462131
return selectSBarrierLeave(I);
2147-
case Intrinsic::amdgcn_pops_exiting_wave_id:
2148-
return selectPOPSExitingWaveID(I);
21492132
}
21502133
return selectImpl(I, *CoverageInfo);
21512134
}

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

-1
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
125125
bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const;
126126
bool selectSBarrier(MachineInstr &MI) const;
127127
bool selectDSBvhStackIntrinsic(MachineInstr &MI) const;
128-
bool selectPOPSExitingWaveID(MachineInstr &MI) const;
129128

130129
bool selectImageIntrinsic(MachineInstr &MI,
131130
const AMDGPU::ImageDimIntrinsicInfo *Intr) const;

llvm/lib/Target/AMDGPU/SOPInstructions.td

+11
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,11 @@ let isMoveImm = 1 in {
215215
} // End Uses = [SCC]
216216
} // End isMoveImm = 1
217217

218+
// Variant of S_MOV_B32 used for reading from volatile registers like
219+
// SRC_POPS_EXITING_WAVE_ID.
220+
let hasSideEffects = 1 in
221+
def S_MOV_B32_sideeffects : SOP1_32 <"s_mov_b32">;
222+
218223
let Defs = [SCC] in {
219224
def S_NOT_B32 : SOP1_32 <"s_not_b32",
220225
[(set i32:$sdst, (UniformUnaryFrag<not> i32:$src0))]
@@ -1880,6 +1885,12 @@ let SubtargetPredicate = isNotGFX9Plus in {
18801885
def : GetFPModePat<fpmode_mask_gfx6plus>;
18811886
}
18821887

1888+
let SubtargetPredicate = isGFX9GFX10 in
1889+
def : GCNPat<
1890+
(int_amdgcn_pops_exiting_wave_id),
1891+
(S_MOV_B32_sideeffects (i32 SRC_POPS_EXITING_WAVE_ID))
1892+
>;
1893+
18831894
//===----------------------------------------------------------------------===//
18841895
// SOP2 Patterns
18851896
//===----------------------------------------------------------------------===//

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll

+18-4
Original file line numberDiff line numberDiff line change
@@ -36,29 +36,29 @@ define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
3636
define amdgpu_ps void @test_loop() {
3737
; SDAG-LABEL: test_loop:
3838
; SDAG: ; %bb.0:
39-
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
4039
; SDAG-NEXT: .LBB1_1: ; %loop
4140
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
41+
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
4242
; SDAG-NEXT: s_cmp_eq_u32 s0, 0
4343
; SDAG-NEXT: s_cbranch_scc1 .LBB1_1
4444
; SDAG-NEXT: ; %bb.2: ; %exit
4545
; SDAG-NEXT: s_endpgm
4646
;
4747
; GFX9-GISEL-LABEL: test_loop:
4848
; GFX9-GISEL: ; %bb.0:
49-
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
5049
; GFX9-GISEL-NEXT: .LBB1_1: ; %loop
5150
; GFX9-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
51+
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
5252
; GFX9-GISEL-NEXT: s_cmp_eq_u32 s0, 0
5353
; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB1_1
5454
; GFX9-GISEL-NEXT: ; %bb.2: ; %exit
5555
; GFX9-GISEL-NEXT: s_endpgm
5656
;
5757
; GFX10-GISEL-LABEL: test_loop:
5858
; GFX10-GISEL: ; %bb.0:
59-
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
6059
; GFX10-GISEL-NEXT: .LBB1_1: ; %loop
6160
; GFX10-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
61+
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
6262
; GFX10-GISEL-NEXT: s_cmp_eq_u32 s0, 0
6363
; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB1_1
6464
; GFX10-GISEL-NEXT: ; %bb.2: ; %exit
@@ -77,21 +77,35 @@ define amdgpu_ps i32 @test_if(i1 inreg %cond) {
7777
; SDAG: ; %bb.0: ; %entry
7878
; SDAG-NEXT: s_bitcmp0_b32 s0, 0
7979
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
80+
; SDAG-NEXT: s_cbranch_scc1 .LBB2_2
81+
; SDAG-NEXT: ; %bb.1: ; %body
82+
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
83+
; SDAG-NEXT: .LBB2_2: ; %exit
8084
; SDAG-NEXT: ; return to shader part epilog
8185
;
8286
; GFX9-GISEL-LABEL: test_if:
8387
; GFX9-GISEL: ; %bb.0: ; %entry
8488
; GFX9-GISEL-NEXT: s_mov_b32 s1, s0
85-
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
8689
; GFX9-GISEL-NEXT: s_xor_b32 s1, s1, 1
8790
; GFX9-GISEL-NEXT: s_and_b32 s1, s1, 1
91+
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
92+
; GFX9-GISEL-NEXT: s_cmp_lg_u32 s1, 0
93+
; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB2_2
94+
; GFX9-GISEL-NEXT: ; %bb.1: ; %body
95+
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
96+
; GFX9-GISEL-NEXT: .LBB2_2: ; %exit
8897
; GFX9-GISEL-NEXT: ; return to shader part epilog
8998
;
9099
; GFX10-GISEL-LABEL: test_if:
91100
; GFX10-GISEL: ; %bb.0: ; %entry
92101
; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, 1
93102
; GFX10-GISEL-NEXT: s_and_b32 s1, s0, 1
94103
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
104+
; GFX10-GISEL-NEXT: s_cmp_lg_u32 s1, 0
105+
; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB2_2
106+
; GFX10-GISEL-NEXT: ; %bb.1: ; %body
107+
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
108+
; GFX10-GISEL-NEXT: .LBB2_2: ; %exit
95109
; GFX10-GISEL-NEXT: ; return to shader part epilog
96110
entry:
97111
%id1 = call i32 @llvm.amdgcn.pops.exiting.wave.id()

0 commit comments

Comments
 (0)