Skip to content

Commit fc21387

Browse files
authored
[AMDGPU] Enable constant offset promotion to immediate FLAT (#93884)
Currently it is only supported for FLAT Global.
1 parent 3320249 commit fc21387

File tree

3 files changed

+86
-10
lines changed

3 files changed

+86
-10
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
223223
SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
224224
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
225225

226-
bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace) const;
227226
bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
228227

229228
unsigned isCFIntrinsic(const SDNode *Intr) const;
@@ -315,6 +314,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
315314
SmallVectorImpl<Value*> &/*Ops*/,
316315
Type *&/*AccessTy*/) const override;
317316

317+
bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace) const;
318318
bool isLegalGlobalAddressingMode(const AddrMode &AM) const;
319319
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
320320
unsigned AS,

llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2055,10 +2055,16 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
20552055
if (!(MI.mayLoad() ^ MI.mayStore()))
20562056
return false;
20572057

2058-
// TODO: Support flat and scratch.
2059-
if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0)
2058+
if (!STM->hasFlatInstOffsets() || !SIInstrInfo::isFLAT(MI))
20602059
return false;
20612060

2061+
// TODO: Support FLAT_SCRATCH. Currently code expects 64-bit pointers.
2062+
if (SIInstrInfo::isFLATScratch(MI))
2063+
return false;
2064+
2065+
unsigned AS = SIInstrInfo::isFLATGlobal(MI) ? AMDGPUAS::GLOBAL_ADDRESS
2066+
: AMDGPUAS::FLAT_ADDRESS;
2067+
20622068
if (MI.mayLoad() &&
20632069
TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != nullptr)
20642070
return false;
@@ -2157,7 +2163,7 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
21572163
TargetLoweringBase::AddrMode AM;
21582164
AM.HasBaseReg = true;
21592165
AM.BaseOffs = Dist;
2160-
if (TLI->isLegalGlobalAddressingMode(AM) &&
2166+
if (TLI->isLegalFlatAddressingMode(AM, AS) &&
21612167
(uint32_t)std::abs(Dist) > MaxDist) {
21622168
MaxDist = std::abs(Dist);
21632169

@@ -2183,7 +2189,7 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
21832189
AM.HasBaseReg = true;
21842190
AM.BaseOffs = OtherOffset - AnchorAddr.Offset;
21852191

2186-
if (TLI->isLegalGlobalAddressingMode(AM)) {
2192+
if (TLI->isLegalFlatAddressingMode(AM, AS)) {
21872193
LLVM_DEBUG(dbgs() << " Promote Offset(" << OtherOffset; dbgs() << ")";
21882194
OtherMI->dump());
21892195
updateBaseAndOffset(*OtherMI, Base, OtherOffset - AnchorAddr.Offset);

llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir

Lines changed: 75 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1-
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s
1+
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
23

3-
# GFX9-LABEL: name: diffoporder_add
4+
# GCN-LABEL: name: diffoporder_add
45
# GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0
56
# GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
67

8+
# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
9+
# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
10+
711
name: diffoporder_add
812
body: |
913
bb.0.entry:
@@ -43,7 +47,7 @@ body: |
4347
...
4448
---
4549

46-
# GFX9-LABEL: name: LowestInMiddle
50+
# GCN-LABEL: name: LowestInMiddle
4751
# GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11200
4852
# GFX9: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
4953
# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
@@ -57,6 +61,11 @@ body: |
5761
# GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0,
5862
# GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0,
5963

64+
# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
65+
# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
66+
# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
67+
68+
6069
name: LowestInMiddle
6170
body: |
6271
bb.0.entry:
@@ -101,7 +110,7 @@ body: |
101110
...
102111
---
103112

104-
# GFX9-LABEL: name: NegativeDistance
113+
# GCN-LABEL: name: NegativeDistance
105114
# GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 10240
106115
# GFX9: [[V_ADD_CO_U32_e64_4:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
107116
# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
@@ -110,6 +119,10 @@ body: |
110119
# GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0
111120
# GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0
112121

122+
# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
123+
# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
124+
# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
125+
113126
name: NegativeDistance
114127
body: |
115128
bb.0.entry:
@@ -190,10 +203,13 @@ body: |
190203
...
191204
---
192205

193-
# GFX9-LABEL: name: diffoporder_add_store
206+
# GCN-LABEL: name: diffoporder_add_store
194207
# GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0,
195208
# GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0,
196209

210+
# GFX8: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 0, 0
211+
# GFX8: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0
212+
197213
name: diffoporder_add_store
198214
body: |
199215
bb.0.entry:
@@ -212,3 +228,57 @@ body: |
212228
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
213229
GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec
214230
...
231+
---
232+
233+
# GCN-LABEL: name: diffoporder_add_flat_load
234+
# GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 1000, 0,
235+
# GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0,
236+
237+
# GFX8: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0,
238+
# GFX8: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0,
239+
240+
name: diffoporder_add_flat_load
241+
body: |
242+
bb.0.entry:
243+
244+
%0:vreg_64 = COPY $vgpr0_vgpr1
245+
246+
%1:sgpr_32 = S_MOV_B32 4000
247+
%2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
248+
%4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
249+
%6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
250+
%14:vgpr_32 = FLAT_LOAD_DWORD %6, 0, 0, implicit $exec, implicit $flat_scr
251+
252+
%8:sgpr_32 = S_MOV_B32 3000
253+
%9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
254+
%11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
255+
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
256+
%15:vgpr_32 = FLAT_LOAD_DWORD %13, 0, 0, implicit $exec, implicit $flat_scr
257+
...
258+
---
259+
260+
# GCN-LABEL: name: diffoporder_add_flat_store
261+
# GFX9: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0,
262+
# GFX9: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0,
263+
264+
# GFX8: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub0, 0, 0,
265+
# GFX8: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0,
266+
267+
name: diffoporder_add_flat_store
268+
body: |
269+
bb.0.entry:
270+
271+
%0:vreg_64 = COPY $vgpr0_vgpr1
272+
273+
%1:sgpr_32 = S_MOV_B32 4000
274+
%2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
275+
%4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
276+
%6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
277+
FLAT_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
278+
279+
%8:sgpr_32 = S_MOV_B32 3000
280+
%9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
281+
%11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
282+
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
283+
FLAT_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec, implicit $flat_scr
284+
...

0 commit comments

Comments
 (0)