-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU] Overload image atomic swap to allow float as well. #107283
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-amdgpu Author: None (sstipanovic) ChangesLLPC can generate llvm.amdgcn.image.atomic.swap intrinsic with data argument as float type as well as float return type. This went unnoticed until CreateIntrinsic with implicit mangling was used. Full diff: https://github.com/llvm/llvm-project/pull/107283.diff 2 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index dc13a35c66f9ab..4cf967ed77d642 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -811,6 +811,12 @@ class AMDGPUDimAtomicFloatProfile<string opmod, AMDGPUDimProps dim,
let RetTypes = [llvm_anyfloat_ty];
}
+class AMDGPUDimAtomicAnyProfile<string opmod, AMDGPUDimProps dim,
+ list<AMDGPUArg> dataargs>
+ : AMDGPUDimAtomicProfile<opmod, dim, dataargs> {
+ let RetTypes = [llvm_any_ty];
+}
+
class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim>
: AMDGPUDimProfile<"GET_RESINFO", dim> {
let RetTypes = [llvm_anyfloat_ty];
@@ -1023,26 +1029,31 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
//////////////////////////////////////////////////////////////////////////
defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs,
- int isFloat = 0> {
+ int isFloat = 0, int isAny = 0> {
foreach dim = AMDGPUDims.All in {
def !strconcat(NAME, "_", dim.Name): AMDGPUImageDimIntrinsic<
!if (isFloat, AMDGPUDimAtomicFloatProfile<opmod, dim, dataargs>,
- AMDGPUDimAtomicProfile<opmod, dim, dataargs>),
+ !if (isAny, AMDGPUDimAtomicAnyProfile<opmod, dim, dataargs>,
+ AMDGPUDimAtomicProfile<opmod, dim, dataargs>)),
[], [SDNPMemOperand]>;
}
}
- multiclass AMDGPUImageDimAtomic<string opmod, int isFloat = 0> {
+ multiclass AMDGPUImageDimAtomic<string opmod, int isFloat = 0, int isAny = 0 > {
defm ""
: AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">],
- isFloat>;
+ isFloat, isAny>;
}
multiclass AMDGPUImageDimFloatAtomic<string opmod> {
defm "" : AMDGPUImageDimAtomic<opmod, 1 /*isFloat*/>;
}
- defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAtomic<"ATOMIC_SWAP">;
+ multiclass AMDGPUImageDimAnyAtomic<string opmod> {
+ defm "" : AMDGPUImageDimAtomic<opmod, 0 /*isFloat*/, 1 /*isAny*/>;
+ }
+
+ defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAnyAtomic<"ATOMIC_SWAP">;
defm int_amdgcn_image_atomic_add : AMDGPUImageDimAtomic<"ATOMIC_ADD">;
defm int_amdgcn_image_atomic_sub : AMDGPUImageDimAtomic<"ATOMIC_SUB">;
defm int_amdgcn_image_atomic_smin : AMDGPUImageDimAtomic<"ATOMIC_SMIN">;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
index f13b897971707a..a661730ba2d1b5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
@@ -30,6 +30,17 @@ main_body:
ret <2 x float> %out
}
+; GCN-LABEL: {{^}}atomic_swap_1d_float:
+; GFX6789: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX90A: image_atomic_swap v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX10: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ;
+; GFX12: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN ;
+define amdgpu_ps float @atomic_swap_1d_float(<8 x i32> inreg %rsrc, float %data, i32 %s) {
+main_body:
+ %v = call float @llvm.amdgcn.image.atomic.swap.1d.f32.i32(float %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+ ret float %v
+}
+
; GCN-LABEL: {{^}}atomic_add_1d:
; GFX6789: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
; GFX90A: image_atomic_add v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}}
@@ -299,6 +310,8 @@ declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32
declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32, i32) #0
declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32, i32) #0
+declare float @llvm.amdgcn.image.atomic.swap.1d.f32.i32(float, i32, <8 x i32>, i32, i32) #0
+
declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why doesn't this need any codegen / pattern changes?
float version was already used out there, so codegen for float was already implemented. |
LLPC can generate llvm.amdgcn.image.atomic.swap intrinsic with data argument as float type as well as float return type. This went unnoticed until CreateIntrinsic with implicit mangling was used.