From c885e017ae70665c04a307d9920a0d7668d57efd Mon Sep 17 00:00:00 2001 From: Stefan Stipanovic Date: Wed, 4 Sep 2024 20:15:29 +0200 Subject: [PATCH] [AMDGPU] Overload image atomic swap to allow float as well. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 33 +++++++++---------- .../AMDGPU/llvm.amdgcn.image.atomic.dim.ll | 13 ++++++++ 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index dc13a35c66f9a..618fc3cdf87d2 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -799,18 +799,13 @@ class AMDGPUDimNoSampleProfile dataargs> : AMDGPUDimProfile { - let RetTypes = [llvm_anyint_ty]; + list dataargs, + LLVMType rettype> : AMDGPUDimProfile { + let RetTypes = [rettype]; let DataArgs = dataargs; let IsAtomic = true; } -class AMDGPUDimAtomicFloatProfile dataargs> - : AMDGPUDimAtomicProfile { - let RetTypes = [llvm_anyfloat_ty]; -} - class AMDGPUDimGetResInfoProfile : AMDGPUDimProfile<"GET_RESINFO", dim> { let RetTypes = [llvm_anyfloat_ty]; @@ -1023,26 +1018,28 @@ defset list AMDGPUImageDimIntrinsics = { ////////////////////////////////////////////////////////////////////////// defset list AMDGPUImageDimAtomicIntrinsics = { multiclass AMDGPUImageDimAtomicX dataargs, - int isFloat = 0> { + LLVMType rettype = llvm_anyint_ty> { foreach dim = AMDGPUDims.All in { - def !strconcat(NAME, "_", dim.Name): AMDGPUImageDimIntrinsic< - !if (isFloat, AMDGPUDimAtomicFloatProfile, - AMDGPUDimAtomicProfile), - [], [SDNPMemOperand]>; + def !strconcat(NAME, "_", dim.Name): + AMDGPUImageDimIntrinsic, + [], [SDNPMemOperand]>; } } - multiclass AMDGPUImageDimAtomic { + multiclass AMDGPUImageDimAtomic { defm "" - : AMDGPUImageDimAtomicX, "vdata">], - isFloat>; + : AMDGPUImageDimAtomicX, "vdata">], rettype>; } multiclass AMDGPUImageDimFloatAtomic { - defm "" : AMDGPUImageDimAtomic; + defm "" : AMDGPUImageDimAtomic; + } + + multiclass AMDGPUImageDimAnyAtomic { + defm "" : AMDGPUImageDimAtomic; } - defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAtomic<"ATOMIC_SWAP">; + defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAnyAtomic<"ATOMIC_SWAP">; defm int_amdgcn_image_atomic_add : AMDGPUImageDimAtomic<"ATOMIC_ADD">; defm int_amdgcn_image_atomic_sub : AMDGPUImageDimAtomic<"ATOMIC_SUB">; defm int_amdgcn_image_atomic_smin : AMDGPUImageDimAtomic<"ATOMIC_SMIN">; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll index f13b897971707..a661730ba2d1b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll @@ -30,6 +30,17 @@ main_body: ret <2 x float> %out } +; GCN-LABEL: {{^}}atomic_swap_1d_float: +; GFX6789: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_swap v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX10: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; +; GFX12: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN ; +define amdgpu_ps float @atomic_swap_1d_float(<8 x i32> inreg %rsrc, float %data, i32 %s) { +main_body: + %v = call float @llvm.amdgcn.image.atomic.swap.1d.f32.i32(float %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret float %v +} + ; GCN-LABEL: {{^}}atomic_add_1d: ; GFX6789: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX90A: image_atomic_add v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} @@ -299,6 +310,8 @@ declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32 declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32, i32) #0 declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32, i32) #0 +declare float @llvm.amdgcn.image.atomic.swap.1d.f32.i32(float, i32, <8 x i32>, i32, i32) #0 + declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0 declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0 declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0