-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AMDGPU] Enable constant offset promotion to immediate FLAT #93884
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
23290d1
438dc6f
d8f6ddb
2e938ef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2055,10 +2055,16 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm( | |
if (!(MI.mayLoad() ^ MI.mayStore())) | ||
return false; | ||
|
||
// TODO: Support flat and scratch. | ||
if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If not anything, checking existence of global SADDR form is a very obscure and indirect way of checking support for immediate offset and address size. This is actually how I discovered this piece of code, it just started to fail with the experimental patch. |
||
if (!STM->hasFlatInstOffsets() || !SIInstrInfo::isFLAT(MI)) | ||
return false; | ||
|
||
// TODO: Support FLAT_SCRATCH. Currently code expects 64-bit pointers. | ||
if (SIInstrInfo::isFLATScratch(MI)) | ||
return false; | ||
|
||
unsigned AS = SIInstrInfo::isFLATGlobal(MI) ? AMDGPUAS::GLOBAL_ADDRESS | ||
: AMDGPUAS::FLAT_ADDRESS; | ||
|
||
if (MI.mayLoad() && | ||
TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != nullptr) | ||
return false; | ||
|
@@ -2157,7 +2163,7 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm( | |
TargetLoweringBase::AddrMode AM; | ||
AM.HasBaseReg = true; | ||
AM.BaseOffs = Dist; | ||
if (TLI->isLegalGlobalAddressingMode(AM) && | ||
if (TLI->isLegalFlatAddressingMode(AM, AS) && | ||
(uint32_t)std::abs(Dist) > MaxDist) { | ||
MaxDist = std::abs(Dist); | ||
|
||
|
@@ -2183,7 +2189,7 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm( | |
AM.HasBaseReg = true; | ||
AM.BaseOffs = OtherOffset - AnchorAddr.Offset; | ||
|
||
if (TLI->isLegalGlobalAddressingMode(AM)) { | ||
if (TLI->isLegalFlatAddressingMode(AM, AS)) { | ||
LLVM_DEBUG(dbgs() << " Promote Offset(" << OtherOffset; dbgs() << ")"; | ||
OtherMI->dump()); | ||
updateBaseAndOffset(*OtherMI, Base, OtherOffset - AnchorAddr.Offset); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,13 @@ | ||
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s | ||
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s | ||
# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s | ||
|
||
# GFX9-LABEL: name: diffoporder_add | ||
# GCN-LABEL: name: diffoporder_add | ||
# GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0 | ||
# GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 | ||
|
||
# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 | ||
# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 | ||
|
||
name: diffoporder_add | ||
body: | | ||
bb.0.entry: | ||
|
@@ -43,7 +47,7 @@ body: | | |
... | ||
--- | ||
|
||
# GFX9-LABEL: name: LowestInMiddle | ||
# GCN-LABEL: name: LowestInMiddle | ||
# GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11200 | ||
# GFX9: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] | ||
# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]] | ||
|
@@ -57,6 +61,11 @@ body: | | |
# GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, | ||
# GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, | ||
|
||
# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 | ||
# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 | ||
# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 | ||
|
||
|
||
name: LowestInMiddle | ||
body: | | ||
bb.0.entry: | ||
|
@@ -101,7 +110,7 @@ body: | | |
... | ||
--- | ||
|
||
# GFX9-LABEL: name: NegativeDistance | ||
# GCN-LABEL: name: NegativeDistance | ||
# GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 10240 | ||
# GFX9: [[V_ADD_CO_U32_e64_4:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] | ||
# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]] | ||
|
@@ -110,6 +119,10 @@ body: | | |
# GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0 | ||
# GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0 | ||
|
||
# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 | ||
# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 | ||
# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 | ||
|
||
name: NegativeDistance | ||
body: | | ||
bb.0.entry: | ||
|
@@ -190,10 +203,13 @@ body: | | |
... | ||
--- | ||
|
||
# GFX9-LABEL: name: diffoporder_add_store | ||
# GCN-LABEL: name: diffoporder_add_store | ||
# GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0, | ||
# GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0, | ||
|
||
# GFX8: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 0, 0 | ||
# GFX8: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0 | ||
|
||
name: diffoporder_add_store | ||
body: | | ||
bb.0.entry: | ||
|
@@ -212,3 +228,57 @@ body: | | |
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 | ||
GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec | ||
... | ||
--- | ||
|
||
# GCN-LABEL: name: diffoporder_add_flat_load | ||
# GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 1000, 0, | ||
# GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0, | ||
|
||
Comment on lines
+234
to
+235
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Needs a gfx8 run line to make sure the offset isn't introduced |
||
# GFX8: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0, | ||
# GFX8: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0, | ||
|
||
name: diffoporder_add_flat_load | ||
body: | | ||
bb.0.entry: | ||
|
||
%0:vreg_64 = COPY $vgpr0_vgpr1 | ||
|
||
%1:sgpr_32 = S_MOV_B32 4000 | ||
%2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec | ||
%4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec | ||
%6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 | ||
%14:vgpr_32 = FLAT_LOAD_DWORD %6, 0, 0, implicit $exec, implicit $flat_scr | ||
|
||
%8:sgpr_32 = S_MOV_B32 3000 | ||
%9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec | ||
%11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec | ||
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 | ||
%15:vgpr_32 = FLAT_LOAD_DWORD %13, 0, 0, implicit $exec, implicit $flat_scr | ||
... | ||
--- | ||
|
||
# GCN-LABEL: name: diffoporder_add_flat_store | ||
# GFX9: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0, | ||
# GFX9: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0, | ||
|
||
# GFX8: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub0, 0, 0, | ||
# GFX8: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0, | ||
|
||
name: diffoporder_add_flat_store | ||
body: | | ||
bb.0.entry: | ||
|
||
%0:vreg_64 = COPY $vgpr0_vgpr1 | ||
|
||
%1:sgpr_32 = S_MOV_B32 4000 | ||
%2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec | ||
%4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec | ||
%6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 | ||
FLAT_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr | ||
|
||
%8:sgpr_32 = S_MOV_B32 3000 | ||
%9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec | ||
%11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec | ||
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 | ||
FLAT_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec, implicit $flat_scr | ||
... |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just out of curiosity: why atomics are not supported?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Atomics have identical addressing modes, they should be handled the same way