diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index c5f3a98e69061..4c02bb1b306e5 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -223,7 +223,6 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const; - bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace) const; bool isLegalMUBUFAddressingMode(const AddrMode &AM) const; unsigned isCFIntrinsic(const SDNode *Intr) const; @@ -315,6 +314,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { SmallVectorImpl &/*Ops*/, Type *&/*AccessTy*/) const override; + bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace) const; bool isLegalGlobalAddressingMode(const AddrMode &AM) const; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 774920aac2f08..c47eea20563df 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -2055,10 +2055,16 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm( if (!(MI.mayLoad() ^ MI.mayStore())) return false; - // TODO: Support flat and scratch. - if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0) + if (!STM->hasFlatInstOffsets() || !SIInstrInfo::isFLAT(MI)) return false; + // TODO: Support FLAT_SCRATCH. Currently code expects 64-bit pointers. + if (SIInstrInfo::isFLATScratch(MI)) + return false; + + unsigned AS = SIInstrInfo::isFLATGlobal(MI) ? AMDGPUAS::GLOBAL_ADDRESS + : AMDGPUAS::FLAT_ADDRESS; + if (MI.mayLoad() && TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != nullptr) return false; @@ -2157,7 +2163,7 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm( TargetLoweringBase::AddrMode AM; AM.HasBaseReg = true; AM.BaseOffs = Dist; - if (TLI->isLegalGlobalAddressingMode(AM) && + if (TLI->isLegalFlatAddressingMode(AM, AS) && (uint32_t)std::abs(Dist) > MaxDist) { MaxDist = std::abs(Dist); @@ -2183,7 +2189,7 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm( AM.HasBaseReg = true; AM.BaseOffs = OtherOffset - AnchorAddr.Offset; - if (TLI->isLegalGlobalAddressingMode(AM)) { + if (TLI->isLegalFlatAddressingMode(AM, AS)) { LLVM_DEBUG(dbgs() << " Promote Offset(" << OtherOffset; dbgs() << ")"; OtherMI->dump()); updateBaseAndOffset(*OtherMI, Base, OtherOffset - AnchorAddr.Offset); diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir index 1a751839e2947..4ecce2842455b 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir @@ -1,9 +1,13 @@ -# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s -# GFX9-LABEL: name: diffoporder_add +# GCN-LABEL: name: diffoporder_add # GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0 # GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 +# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 +# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 + name: diffoporder_add body: | bb.0.entry: @@ -43,7 +47,7 @@ body: | ... --- -# GFX9-LABEL: name: LowestInMiddle +# GCN-LABEL: name: LowestInMiddle # GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11200 # GFX9: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] # GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]] @@ -57,6 +61,11 @@ body: | # GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, # GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, +# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 +# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 +# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 + + name: LowestInMiddle body: | bb.0.entry: @@ -101,7 +110,7 @@ body: | ... --- -# GFX9-LABEL: name: NegativeDistance +# GCN-LABEL: name: NegativeDistance # GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 10240 # GFX9: [[V_ADD_CO_U32_e64_4:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] # GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]] @@ -110,6 +119,10 @@ body: | # GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0 # GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0 +# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 +# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 +# GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0 + name: NegativeDistance body: | bb.0.entry: @@ -190,10 +203,13 @@ body: | ... --- -# GFX9-LABEL: name: diffoporder_add_store +# GCN-LABEL: name: diffoporder_add_store # GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0, # GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0, +# GFX8: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 0, 0 +# GFX8: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0 + name: diffoporder_add_store body: | bb.0.entry: @@ -212,3 +228,57 @@ body: | %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec ... +--- + +# GCN-LABEL: name: diffoporder_add_flat_load +# GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 1000, 0, +# GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0, + +# GFX8: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0, +# GFX8: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0, + +name: diffoporder_add_flat_load +body: | + bb.0.entry: + + %0:vreg_64 = COPY $vgpr0_vgpr1 + + %1:sgpr_32 = S_MOV_B32 4000 + %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec + %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec + %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 + %14:vgpr_32 = FLAT_LOAD_DWORD %6, 0, 0, implicit $exec, implicit $flat_scr + + %8:sgpr_32 = S_MOV_B32 3000 + %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec + %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec + %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 + %15:vgpr_32 = FLAT_LOAD_DWORD %13, 0, 0, implicit $exec, implicit $flat_scr +... +--- + +# GCN-LABEL: name: diffoporder_add_flat_store +# GFX9: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0, +# GFX9: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0, + +# GFX8: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub0, 0, 0, +# GFX8: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0, + +name: diffoporder_add_flat_store +body: | + bb.0.entry: + + %0:vreg_64 = COPY $vgpr0_vgpr1 + + %1:sgpr_32 = S_MOV_B32 4000 + %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec + %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec + %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 + FLAT_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr + + %8:sgpr_32 = S_MOV_B32 3000 + %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec + %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec + %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 + FLAT_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec, implicit $flat_scr +...