diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 780dfaae11ef3..de029a4d6bedd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4248,7 +4248,8 @@ bool AMDGPULegalizerInfo::loadInputValue( AMDGPU::isEntryFunctionCC(CC) && !MFI->hasWorkGroupIDZ() ? ~0u : 0xFFFFu); const ArgDescriptor WorkGroupIDZ = ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u); - if (ST.hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) { + if (ST.hasArchitectedSGPRs() && + (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) { switch (ArgType) { case AMDGPUFunctionArgInfo::WORKGROUP_ID_X: Arg = &WorkGroupIDX; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 17b6e0cb9c3b4..50159d435aef7 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2124,7 +2124,8 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG, AMDGPU::isEntryFunctionCC(CC) && !MFI.hasWorkGroupIDZ() ? ~0u : 0xFFFFu); const ArgDescriptor WorkGroupIDZ = ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u); - if (Subtarget->hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) { + if (Subtarget->hasArchitectedSGPRs() && + (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) { switch (PVID) { case AMDGPUFunctionArgInfo::WORKGROUP_ID_X: Reg = &WorkGroupIDX; @@ -2798,7 +2799,9 @@ SDValue SITargetLowering::LowerFormalArguments( (void)UserSGPRInfo; if (!Subtarget->enableFlatScratch()) assert(!UserSGPRInfo.hasFlatScratchInit()); - if (CallConv != CallingConv::AMDGPU_CS || !Subtarget->hasArchitectedSGPRs()) + if ((CallConv != CallingConv::AMDGPU_CS && + CallConv != CallingConv::AMDGPU_Gfx) || + !Subtarget->hasArchitectedSGPRs()) assert(!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() && !Info->hasWorkGroupIDZ()); } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 12433dc83c489..bf4a501cc3159 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -110,7 +110,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, } if (!AMDGPU::isGraphics(CC) || - (CC == CallingConv::AMDGPU_CS && ST.hasArchitectedSGPRs())) { + ((CC == CallingConv::AMDGPU_CS || CC == CallingConv::AMDGPU_CS) && + ST.hasArchitectedSGPRs())) { if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x")) WorkGroupIDX = true; diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll index cfff0a969da9e..14fe4e5f48c67 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-GISEL %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s @@ -156,10 +156,37 @@ define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace( ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX9ARCH-LABEL: workgroup_ids_gfx: -; GFX9ARCH: ; %bb.0: -; GFX9ARCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9ARCH-NEXT: s_setpc_b64 s[30:31] +; GFX9ARCH-SDAG-LABEL: workgroup_ids_gfx: +; GFX9ARCH-SDAG: ; %bb.0: +; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v6, ttmp9 +; GFX9ARCH-SDAG-NEXT: s_and_b32 s34, ttmp7, 0xffff +; GFX9ARCH-SDAG-NEXT: global_store_dword v[0:1], v6, off +; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, s34 +; GFX9ARCH-SDAG-NEXT: s_lshr_b32 s34, ttmp7, 16 +; GFX9ARCH-SDAG-NEXT: global_store_dword v[2:3], v0, off +; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, s34 +; GFX9ARCH-SDAG-NEXT: global_store_dword v[4:5], v0, off +; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX9ARCH-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9ARCH-GISEL-LABEL: workgroup_ids_gfx: +; GFX9ARCH-GISEL: ; %bb.0: +; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v6, ttmp9 +; GFX9ARCH-GISEL-NEXT: s_and_b32 s34, ttmp7, 0xffff +; GFX9ARCH-GISEL-NEXT: s_lshr_b32 s35, ttmp7, 16 +; GFX9ARCH-GISEL-NEXT: global_store_dword v[0:1], v6, off +; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, s34 +; GFX9ARCH-GISEL-NEXT: global_store_dword v[2:3], v0, off +; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, s35 +; GFX9ARCH-GISEL-NEXT: global_store_dword v[4:5], v0, off +; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9ARCH-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: workgroup_ids_gfx: ; GFX12: ; %bb.0: @@ -168,6 +195,18 @@ define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace( ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: v_dual_mov_b32 v6, ttmp9 :: v_dual_mov_b32 v7, s0 +; GFX12-NEXT: s_lshr_b32 s1, ttmp7, 16 +; GFX12-NEXT: v_mov_b32_e32 v8, s1 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_store_b32 v[0:1], v6, off scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_store_b32 v[2:3], v7, off scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_store_b32 v[4:5], v8, off scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] %id.x = call i32 @llvm.amdgcn.workgroup.id.x() %id.y = call i32 @llvm.amdgcn.workgroup.id.y() @@ -177,11 +216,3 @@ define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace( store volatile i32 %id.z, ptr addrspace(1) %outz ret void } - -declare i32 @llvm.amdgcn.workgroup.id.x() -declare i32 @llvm.amdgcn.workgroup.id.y() -declare i32 @llvm.amdgcn.workgroup.id.z() -declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg) -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX9-GISEL: {{.*}} -; GFX9-SDAG: {{.*}}