Skip to content

Commit 914ab36

Browse files
authored
[AMDGPU] Overload image atomic swap to allow float as well. (#107283)
LLPC can generate llvm.amdgcn.image.atomic.swap intrinsic with data argument as float type as well as float return type. This went unnoticed until CreateIntrinsic with implicit mangling was used.
1 parent ea2da57 commit 914ab36

File tree

2 files changed

+28
-18
lines changed

2 files changed

+28
-18
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -799,18 +799,13 @@ class AMDGPUDimNoSampleProfile<string opmod,
799799

800800
class AMDGPUDimAtomicProfile<string opmod,
801801
AMDGPUDimProps dim,
802-
list<AMDGPUArg> dataargs> : AMDGPUDimProfile<opmod, dim> {
803-
let RetTypes = [llvm_anyint_ty];
802+
list<AMDGPUArg> dataargs,
803+
LLVMType rettype> : AMDGPUDimProfile<opmod, dim> {
804+
let RetTypes = [rettype];
804805
let DataArgs = dataargs;
805806
let IsAtomic = true;
806807
}
807808

808-
class AMDGPUDimAtomicFloatProfile<string opmod, AMDGPUDimProps dim,
809-
list<AMDGPUArg> dataargs>
810-
: AMDGPUDimAtomicProfile<opmod, dim, dataargs> {
811-
let RetTypes = [llvm_anyfloat_ty];
812-
}
813-
814809
class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim>
815810
: AMDGPUDimProfile<"GET_RESINFO", dim> {
816811
let RetTypes = [llvm_anyfloat_ty];
@@ -1023,26 +1018,28 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
10231018
//////////////////////////////////////////////////////////////////////////
10241019
defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
10251020
multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs,
1026-
int isFloat = 0> {
1021+
LLVMType rettype = llvm_anyint_ty> {
10271022
foreach dim = AMDGPUDims.All in {
1028-
def !strconcat(NAME, "_", dim.Name): AMDGPUImageDimIntrinsic<
1029-
!if (isFloat, AMDGPUDimAtomicFloatProfile<opmod, dim, dataargs>,
1030-
AMDGPUDimAtomicProfile<opmod, dim, dataargs>),
1031-
[], [SDNPMemOperand]>;
1023+
def !strconcat(NAME, "_", dim.Name):
1024+
AMDGPUImageDimIntrinsic<AMDGPUDimAtomicProfile<opmod, dim, dataargs, rettype>,
1025+
[], [SDNPMemOperand]>;
10321026
}
10331027
}
10341028

1035-
multiclass AMDGPUImageDimAtomic<string opmod, int isFloat = 0> {
1029+
multiclass AMDGPUImageDimAtomic<string opmod, LLVMType rettype = llvm_anyint_ty> {
10361030
defm ""
1037-
: AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">],
1038-
isFloat>;
1031+
: AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">], rettype>;
10391032
}
10401033

10411034
multiclass AMDGPUImageDimFloatAtomic<string opmod> {
1042-
defm "" : AMDGPUImageDimAtomic<opmod, 1 /*isFloat*/>;
1035+
defm "" : AMDGPUImageDimAtomic<opmod, llvm_anyfloat_ty>;
1036+
}
1037+
1038+
multiclass AMDGPUImageDimAnyAtomic<string opmod> {
1039+
defm "" : AMDGPUImageDimAtomic<opmod, llvm_any_ty>;
10431040
}
10441041

1045-
defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAtomic<"ATOMIC_SWAP">;
1042+
defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAnyAtomic<"ATOMIC_SWAP">;
10461043
defm int_amdgcn_image_atomic_add : AMDGPUImageDimAtomic<"ATOMIC_ADD">;
10471044
defm int_amdgcn_image_atomic_sub : AMDGPUImageDimAtomic<"ATOMIC_SUB">;
10481045
defm int_amdgcn_image_atomic_smin : AMDGPUImageDimAtomic<"ATOMIC_SMIN">;

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,17 @@ main_body:
3030
ret <2 x float> %out
3131
}
3232

33+
; GCN-LABEL: {{^}}atomic_swap_1d_float:
34+
; GFX6789: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
35+
; GFX90A: image_atomic_swap v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}}
36+
; GFX10: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ;
37+
; GFX12: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN ;
38+
define amdgpu_ps float @atomic_swap_1d_float(<8 x i32> inreg %rsrc, float %data, i32 %s) {
39+
main_body:
40+
%v = call float @llvm.amdgcn.image.atomic.swap.1d.f32.i32(float %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
41+
ret float %v
42+
}
43+
3344
; GCN-LABEL: {{^}}atomic_add_1d:
3445
; GFX6789: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
3546
; GFX90A: image_atomic_add v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}}
@@ -299,6 +310,8 @@ declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32
299310
declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32, i32) #0
300311
declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32, i32) #0
301312

313+
declare float @llvm.amdgcn.image.atomic.swap.1d.f32.i32(float, i32, <8 x i32>, i32, i32) #0
314+
302315
declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
303316
declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
304317
declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0

0 commit comments

Comments
 (0)