-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AMDGPU] Enable constant offset promotion to immediate FLAT #93884
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Currently it is only supported for FLAT Global.
@llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) ChangesCurrently it is only supported for FLAT Global. Full diff: https://github.com/llvm/llvm-project/pull/93884.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 292b17da93583..8b018a649e6a9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -223,8 +223,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace,
- uint64_t FlatVariant) const;
bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
unsigned isCFIntrinsic(const SDNode *Intr) const;
@@ -316,6 +314,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SmallVectorImpl<Value*> &/*Ops*/,
Type *&/*AccessTy*/) const override;
+ bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace,
+ uint64_t FlatVariant) const;
bool isLegalGlobalAddressingMode(const AddrMode &AM) const;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS,
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 774920aac2f08..c7c4f3efb410f 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -2055,10 +2055,20 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
if (!(MI.mayLoad() ^ MI.mayStore()))
return false;
- // TODO: Support flat and scratch.
- if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0)
+ if (!STM->hasFlatInstOffsets() || !SIInstrInfo::isFLAT(MI))
return false;
+ // TODO: Support FLAT_SCRATCH. Currently code expects 64-bit pointers.
+ if (SIInstrInfo::isFLATScratch(MI))
+ return false;
+
+ unsigned AS = AMDGPUAS::FLAT_ADDRESS;
+ uint64_t FlatVariant = SIInstrFlags::FLAT;
+ if (SIInstrInfo::isFLATGlobal(MI)) {
+ AS = AMDGPUAS::GLOBAL_ADDRESS;
+ FlatVariant = SIInstrFlags::FlatGlobal;
+ }
+
if (MI.mayLoad() &&
TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != nullptr)
return false;
@@ -2157,7 +2167,7 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.BaseOffs = Dist;
- if (TLI->isLegalGlobalAddressingMode(AM) &&
+ if (TLI->isLegalFlatAddressingMode(AM, AS, FlatVariant) &&
(uint32_t)std::abs(Dist) > MaxDist) {
MaxDist = std::abs(Dist);
@@ -2183,7 +2193,7 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
AM.HasBaseReg = true;
AM.BaseOffs = OtherOffset - AnchorAddr.Offset;
- if (TLI->isLegalGlobalAddressingMode(AM)) {
+ if (TLI->isLegalFlatAddressingMode(AM, AS, FlatVariant)) {
LLVM_DEBUG(dbgs() << " Promote Offset(" << OtherOffset; dbgs() << ")";
OtherMI->dump());
updateBaseAndOffset(*OtherMI, Base, OtherOffset - AnchorAddr.Offset);
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
index 1a751839e2947..a74faf4ff2c52 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
@@ -212,3 +212,51 @@ body: |
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec
...
+---
+
+# GFX9-LABEL: name: diffoporder_add_flat_load
+# GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 1000, 0,
+# GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0,
+
+name: diffoporder_add_flat_load
+body: |
+ bb.0.entry:
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+
+ %1:sgpr_32 = S_MOV_B32 4000
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+ %14:vgpr_32 = FLAT_LOAD_DWORD %6, 0, 0, implicit $exec, implicit $flat_scr
+
+ %8:sgpr_32 = S_MOV_B32 3000
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+ %15:vgpr_32 = FLAT_LOAD_DWORD %13, 0, 0, implicit $exec, implicit $flat_scr
+...
+---
+
+# GFX9-LABEL: name: diffoporder_add_flat_store
+# GFX9: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0,
+# GFX9: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0,
+
+name: diffoporder_add_flat_store
+body: |
+ bb.0.entry:
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+
+ %1:sgpr_32 = S_MOV_B32 4000
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+ FLAT_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
+
+ %8:sgpr_32 = S_MOV_B32 3000
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+ FLAT_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec, implicit $flat_scr
+...
|
@@ -2055,10 +2055,20 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm( | |||
if (!(MI.mayLoad() ^ MI.mayStore())) | |||
return false; | |||
|
|||
// TODO: Support flat and scratch. | |||
if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If not anything, checking existence of global SADDR form is a very obscure and indirect way of checking support for immediate offset and address size. This is actually how I discovered this piece of code, it just started to fail with the experimental patch.
@@ -2055,10 +2055,16 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm( | |||
if (!(MI.mayLoad() ^ MI.mayStore())) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just out of curiosity: why atomics are not supported?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Atomics have identical addressing modes, they should be handled the same way
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm except for the untested no-offset case
# GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 1000, 0, | ||
# GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Needs a gfx8 run line to make sure the offset isn't introduced
Currently it is only supported for FLAT Global.