[AMDGPU] Overload image atomic swap to allow float as well. #107283

sstipano · 2024-09-04T18:22:42Z

LLPC can generate llvm.amdgcn.image.atomic.swap intrinsic with data argument as float type as well as float return type. This went unnoticed until CreateIntrinsic with implicit mangling was used.

llvmbot · 2024-09-04T18:23:16Z

@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-backend-amdgpu

Author: None (sstipanovic)

Changes

LLPC can generate llvm.amdgcn.image.atomic.swap intrinsic with data argument as float type as well as float return type. This went unnoticed until CreateIntrinsic with implicit mangling was used.

Full diff: https://github.com/llvm/llvm-project/pull/107283.diff

2 Files Affected:

(modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+16-5)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll (+13)

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index dc13a35c66f9ab..4cf967ed77d642 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -811,6 +811,12 @@ class AMDGPUDimAtomicFloatProfile<string opmod, AMDGPUDimProps dim,
   let RetTypes = [llvm_anyfloat_ty];
 }
 
+class AMDGPUDimAtomicAnyProfile<string opmod, AMDGPUDimProps dim,
+                                  list<AMDGPUArg> dataargs>
+    : AMDGPUDimAtomicProfile<opmod, dim, dataargs> {
+  let RetTypes = [llvm_any_ty];
+}
+
 class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim>
     : AMDGPUDimProfile<"GET_RESINFO", dim> {
   let RetTypes = [llvm_anyfloat_ty];
@@ -1023,26 +1029,31 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
 //////////////////////////////////////////////////////////////////////////
 defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
   multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs,
-                                   int isFloat = 0> {
+                                   int isFloat = 0, int isAny = 0> {
         foreach dim = AMDGPUDims.All in {
           def !strconcat(NAME, "_", dim.Name): AMDGPUImageDimIntrinsic<
               !if (isFloat, AMDGPUDimAtomicFloatProfile<opmod, dim, dataargs>,
-                   AMDGPUDimAtomicProfile<opmod, dim, dataargs>),
+                   !if (isAny, AMDGPUDimAtomicAnyProfile<opmod, dim, dataargs>,
+                        AMDGPUDimAtomicProfile<opmod, dim, dataargs>)),
               [], [SDNPMemOperand]>;
         }
   }
 
-  multiclass AMDGPUImageDimAtomic<string opmod, int isFloat = 0> {
+  multiclass AMDGPUImageDimAtomic<string opmod, int isFloat = 0, int isAny = 0 > {
     defm ""
         : AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">],
-                                isFloat>;
+                                isFloat, isAny>;
   }
 
   multiclass AMDGPUImageDimFloatAtomic<string opmod> {
     defm "" : AMDGPUImageDimAtomic<opmod, 1 /*isFloat*/>;
   }
 
-  defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAtomic<"ATOMIC_SWAP">;
+  multiclass AMDGPUImageDimAnyAtomic<string opmod> {
+    defm "" : AMDGPUImageDimAtomic<opmod, 0 /*isFloat*/, 1 /*isAny*/>;
+  }
+
+  defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAnyAtomic<"ATOMIC_SWAP">;
   defm int_amdgcn_image_atomic_add : AMDGPUImageDimAtomic<"ATOMIC_ADD">;
   defm int_amdgcn_image_atomic_sub : AMDGPUImageDimAtomic<"ATOMIC_SUB">;
   defm int_amdgcn_image_atomic_smin : AMDGPUImageDimAtomic<"ATOMIC_SMIN">;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
index f13b897971707a..a661730ba2d1b5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
@@ -30,6 +30,17 @@ main_body:
   ret <2 x float> %out
 }
 
+; GCN-LABEL: {{^}}atomic_swap_1d_float:
+; GFX6789: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX90A: image_atomic_swap v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX10: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ;
+; GFX12: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN ;
+define amdgpu_ps float @atomic_swap_1d_float(<8 x i32> inreg %rsrc, float %data, i32 %s) {
+main_body:
+  %v = call float @llvm.amdgcn.image.atomic.swap.1d.f32.i32(float %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret float %v
+}
+
 ; GCN-LABEL: {{^}}atomic_add_1d:
 ; GFX6789: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
 ; GFX90A: image_atomic_add v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}}
@@ -299,6 +310,8 @@ declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32
 declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32, i32) #0
 declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32, i32) #0
 
+declare float @llvm.amdgcn.image.atomic.swap.1d.f32.i32(float, i32, <8 x i32>, i32, i32) #0
+
 declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
 declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

arsenm

Why doesn't this need any codegen / pattern changes?

sstipano · 2024-09-09T15:54:18Z

Why doesn't this need any codegen / pattern changes?

float version was already used out there, so codegen for float was already implemented.

sstipano requested review from jayfoad, nhaehnle and dstutt September 4, 2024 18:22

llvmbot added backend:AMDGPU llvm:ir labels Sep 4, 2024

arsenm reviewed Sep 4, 2024

View reviewed changes

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll Show resolved Hide resolved

jayfoad reviewed Sep 5, 2024

View reviewed changes

llvm/include/llvm/IR/IntrinsicsAMDGPU.td Outdated Show resolved Hide resolved

[AMDGPU] Overload image atomic swap to allow float as well.

c885e01

sstipano force-pushed the atomic_any branch from b1f0725 to c885e01 Compare September 6, 2024 19:39

sstipano requested a review from jayfoad September 6, 2024 19:40

arsenm approved these changes Sep 7, 2024

View reviewed changes

sstipano merged commit 914ab36 into llvm:main Sep 9, 2024
8 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] Overload image atomic swap to allow float as well. #107283

[AMDGPU] Overload image atomic swap to allow float as well. #107283

Uh oh!

sstipano commented Sep 4, 2024

Uh oh!

llvmbot commented Sep 4, 2024 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

arsenm left a comment

Uh oh!

sstipano commented Sep 9, 2024

Uh oh!

Uh oh!

Uh oh!

[AMDGPU] Overload image atomic swap to allow float as well. #107283

[AMDGPU] Overload image atomic swap to allow float as well. #107283

Uh oh!

Conversation

sstipano commented Sep 4, 2024

Uh oh!

llvmbot commented Sep 4, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

arsenm left a comment

Choose a reason for hiding this comment

Uh oh!

sstipano commented Sep 9, 2024

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Sep 4, 2024 •

edited

Loading