[AMDGPU] Enable constant offset promotion to immediate FLAT #93884

rampitec · 2024-05-30T22:11:49Z

Currently it is only supported for FLAT Global.

llvmbot · 2024-05-30T22:12:19Z

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

Changes

Currently it is only supported for FLAT Global.

Full diff: https://github.com/llvm/llvm-project/pull/93884.diff

3 Files Affected:

(modified) llvm/lib/Target/AMDGPU/SIISelLowering.h (+2-2)
(modified) llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp (+14-4)
(modified) llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir (+48)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 292b17da93583..8b018a649e6a9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -223,8 +223,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
-  bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace,
-                                 uint64_t FlatVariant) const;
   bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
 
   unsigned isCFIntrinsic(const SDNode *Intr) const;
@@ -316,6 +314,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
                             SmallVectorImpl<Value*> &/*Ops*/,
                             Type *&/*AccessTy*/) const override;
 
+  bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace,
+                                 uint64_t FlatVariant) const;
   bool isLegalGlobalAddressingMode(const AddrMode &AM) const;
   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
                              unsigned AS,
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 774920aac2f08..c7c4f3efb410f 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -2055,10 +2055,20 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
   if (!(MI.mayLoad() ^ MI.mayStore()))
     return false;
 
-  // TODO: Support flat and scratch.
-  if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0)
+  if (!STM->hasFlatInstOffsets() || !SIInstrInfo::isFLAT(MI))
     return false;
 
+  // TODO: Support FLAT_SCRATCH. Currently code expects 64-bit pointers.
+  if (SIInstrInfo::isFLATScratch(MI))
+    return false;
+
+  unsigned AS = AMDGPUAS::FLAT_ADDRESS;
+  uint64_t FlatVariant = SIInstrFlags::FLAT;
+  if (SIInstrInfo::isFLATGlobal(MI)) {
+    AS = AMDGPUAS::GLOBAL_ADDRESS;
+    FlatVariant = SIInstrFlags::FlatGlobal;
+  }
+
   if (MI.mayLoad() &&
       TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != nullptr)
     return false;
@@ -2157,7 +2167,7 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
     TargetLoweringBase::AddrMode AM;
     AM.HasBaseReg = true;
     AM.BaseOffs = Dist;
-    if (TLI->isLegalGlobalAddressingMode(AM) &&
+    if (TLI->isLegalFlatAddressingMode(AM, AS, FlatVariant) &&
         (uint32_t)std::abs(Dist) > MaxDist) {
       MaxDist = std::abs(Dist);
 
@@ -2183,7 +2193,7 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
       AM.HasBaseReg = true;
       AM.BaseOffs = OtherOffset - AnchorAddr.Offset;
 
-      if (TLI->isLegalGlobalAddressingMode(AM)) {
+      if (TLI->isLegalFlatAddressingMode(AM, AS, FlatVariant)) {
         LLVM_DEBUG(dbgs() << "  Promote Offset(" << OtherOffset; dbgs() << ")";
                    OtherMI->dump());
         updateBaseAndOffset(*OtherMI, Base, OtherOffset - AnchorAddr.Offset);
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
index 1a751839e2947..a74faf4ff2c52 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
@@ -212,3 +212,51 @@ body:             |
     %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
     GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec
 ...
+---
+
+# GFX9-LABEL: name: diffoporder_add_flat_load
+# GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 1000, 0,
+# GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0,
+
+name: diffoporder_add_flat_load
+body:             |
+  bb.0.entry:
+
+    %0:vreg_64 = COPY $vgpr0_vgpr1
+
+    %1:sgpr_32 = S_MOV_B32 4000
+    %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
+    %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+    %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+    %14:vgpr_32 = FLAT_LOAD_DWORD %6, 0, 0, implicit $exec, implicit $flat_scr
+
+    %8:sgpr_32 = S_MOV_B32 3000
+    %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
+    %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+    %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+    %15:vgpr_32 = FLAT_LOAD_DWORD %13, 0, 0, implicit $exec, implicit $flat_scr
+...
+---
+
+# GFX9-LABEL: name: diffoporder_add_flat_store
+# GFX9: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0,
+# GFX9: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0,
+
+name: diffoporder_add_flat_store
+body:             |
+  bb.0.entry:
+
+    %0:vreg_64 = COPY $vgpr0_vgpr1
+
+    %1:sgpr_32 = S_MOV_B32 4000
+    %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
+    %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+    %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+    FLAT_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
+
+    %8:sgpr_32 = S_MOV_B32 3000
+    %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
+    %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+    %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+    FLAT_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec, implicit $flat_scr
+...

rampitec · 2024-05-30T23:28:08Z

llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp

@@ -2055,10 +2055,20 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
  if (!(MI.mayLoad() ^ MI.mayStore()))
    return false;

-  // TODO: Support flat and scratch.
-  if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0)


If not anything, checking existence of global SADDR form is a very obscure and indirect way of checking support for immediate offset and address size. This is actually how I discovered this piece of code, it just started to fail with the experimental patch.

rampitec · 2024-05-31T09:14:40Z

llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp

@@ -2055,10 +2055,16 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
  if (!(MI.mayLoad() ^ MI.mayStore()))


Just out of curiosity: why atomics are not supported?

Atomics have identical addressing modes, they should be handled the same way

arsenm

lgtm except for the untested no-offset case

arsenm · 2024-05-31T09:46:50Z

llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir

+# GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 1000, 0,
+# GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0,


Needs a gfx8 run line to make sure the offset isn't introduced

[AMDGPU] Enable constant offset promotion to immediate FLAT

23290d1

Currently it is only supported for FLAT Global.

rampitec requested review from jayfoad and arsenm May 30, 2024 22:11

llvmbot added the backend:AMDGPU label May 30, 2024

rampitec commented May 30, 2024

View reviewed changes

rampitec requested review from vpykhtin May 30, 2024 23:55

rampitec added 2 commits May 31, 2024 02:00

Merge branch 'main' into promote-flat-constant-offset

438dc6f

Rebased to remove FlatVariant

d8f6ddb

rampitec commented May 31, 2024

View reviewed changes

arsenm approved these changes May 31, 2024

View reviewed changes

jayfoad requested review from perlfu and piotrAMD May 31, 2024 10:54

Added gfx8 run line for negative tests

2e938ef

rampitec merged commit fc21387 into llvm:main May 31, 2024

rampitec deleted the promote-flat-constant-offset branch May 31, 2024 19:23

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] Enable constant offset promotion to immediate FLAT #93884

[AMDGPU] Enable constant offset promotion to immediate FLAT #93884

Uh oh!

rampitec commented May 30, 2024

Uh oh!

llvmbot commented May 30, 2024

Uh oh!

rampitec May 30, 2024

Uh oh!

rampitec May 31, 2024

Uh oh!

arsenm May 31, 2024

Uh oh!

arsenm left a comment

Uh oh!

arsenm May 31, 2024

Uh oh!

Uh oh!

		@@ -2055,10 +2055,16 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
		if (!(MI.mayLoad() ^ MI.mayStore()))

		# GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 1000, 0,
		# GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0,

[AMDGPU] Enable constant offset promotion to immediate FLAT #93884

[AMDGPU] Enable constant offset promotion to immediate FLAT #93884

Uh oh!

Conversation

rampitec commented May 30, 2024

Uh oh!

llvmbot commented May 30, 2024

Uh oh!

rampitec May 30, 2024

Choose a reason for hiding this comment

Uh oh!

rampitec May 31, 2024

Choose a reason for hiding this comment

Uh oh!

arsenm May 31, 2024

Choose a reason for hiding this comment

Uh oh!

arsenm left a comment

Choose a reason for hiding this comment

Uh oh!

arsenm May 31, 2024

Choose a reason for hiding this comment

Uh oh!

Uh oh!