AMDGPU: Replace ptr addrspace(8) undef uses with poison #130904

arsenm · 2025-03-12T06:27:44Z

No description provided.

arsenm · 2025-03-12T06:28:07Z

This stack of pull requests is managed by Graphite. Learn more about stacking.

llvmbot · 2025-03-12T06:30:07Z

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Patch is 39.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130904.diff

16 Files Affected:

(modified) llvm/test/CodeGen/AMDGPU/amdpal.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll (+6-6)
(modified) llvm/test/CodeGen/AMDGPU/else.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll (+4-4)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/merge-store-crash.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/required-export-priority.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/wave32.ll (+4-4)
(modified) llvm/test/CodeGen/AMDGPU/wqm.ll (+55-55)

diff --git a/llvm/test/CodeGen/AMDGPU/amdpal.ll b/llvm/test/CodeGen/AMDGPU/amdpal.ll
index 171df029615ed..fd9227d2f4319 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal.ll
@@ -72,7 +72,7 @@ entry:
   %e = getelementptr [2 x i32], ptr addrspace(5) %v1, i32 0, i32 %idx
   %x = load i32, ptr addrspace(5) %e
   %xf = bitcast i32 %x to float
-  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %xf, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %xf, ptr addrspace(8) poison, i32 0, i32 0, i32 0)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
index b42542db6dbd8..f8227f0039af7 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
@@ -66,7 +66,7 @@ define i32 @combine_add_zext_xor() {
   br i1 undef, label %bb9, label %bb
 
 bb:                                               ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -146,7 +146,7 @@ define i32 @combine_sub_zext_xor() {
   br i1 undef, label %bb9, label %bb
 
 bb:                                               ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -229,7 +229,7 @@ define i32 @combine_add_zext_or() {
   br i1 undef, label %bb9, label %bb
 
 bb:                                               ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -313,7 +313,7 @@ define i32 @combine_sub_zext_or() {
   br i1 undef, label %bb9, label %bb
 
 bb:                                               ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -392,7 +392,7 @@ define i32 @combine_add_zext_and() {
   br i1 undef, label %bb9, label %bb
 
 bb:                                               ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -471,7 +471,7 @@ define i32 @combine_sub_zext_and() {
   br i1 undef, label %bb9, label %bb
 
 bb:                                               ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
diff --git a/llvm/test/CodeGen/AMDGPU/else.ll b/llvm/test/CodeGen/AMDGPU/else.ll
index 4a3018e67b17d..884f5305407a1 100644
--- a/llvm/test/CodeGen/AMDGPU/else.ll
+++ b/llvm/test/CodeGen/AMDGPU/else.ll
@@ -47,7 +47,7 @@ else:
 
 end:
   %r = phi float [ %v.if, %if ], [ %v.else, %else ]
-  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %r, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %r, ptr addrspace(8) poison, i32 0, i32 0, i32 0)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
index a44231f39ae6f..37a2e9e24dbd6 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
@@ -13,7 +13,7 @@ define amdgpu_hs void @main(ptr addrspace(6) inreg %arg) {
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
   ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
-  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from `ptr addrspace(8) undef`, align 1, addrspace 8)
+  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from `ptr addrspace(8) poison`, align 1, addrspace 8)
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
   ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
@@ -22,14 +22,14 @@ define amdgpu_hs void @main(ptr addrspace(6) inreg %arg) {
   ; GCN-NEXT:   [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
   ; GCN-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
   ; GCN-NEXT:   [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
-  ; GCN-NEXT:   BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s96) into `ptr addrspace(8) undef`, align 1, addrspace 8)
+  ; GCN-NEXT:   BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s96) into `ptr addrspace(8) poison`, align 1, addrspace 8)
   ; GCN-NEXT:   S_ENDPGM 0
 main_body:
-  %tmp25 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) undef, i32 undef, i32 0, i32 0)
+  %tmp25 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) poison, i32 undef, i32 0, i32 0)
   %tmp27 = bitcast <4 x float> %tmp25 to <16 x i8>
   %tmp28 = shufflevector <16 x i8> %tmp27, <16 x i8> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
   %tmp29 = bitcast <12 x i8> %tmp28 to <3 x i32>
-  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %tmp29, ptr addrspace(8) undef, i32 undef, i32 0, i32 0) #3
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %tmp29, ptr addrspace(8) poison, i32 undef, i32 0, i32 0) #3
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
index c120c58ce6f33..c506e08855470 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
@@ -623,7 +623,7 @@ define amdgpu_kernel void @test_export_pos_before_param_ordered(float %x, float
 define amdgpu_kernel void @test_export_pos_before_param_across_load(i32 %idx) #0 {
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
-  %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0)
+  %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0)
   call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll
index 9c9cf36baecf1..843ad563df69e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll
@@ -103,7 +103,7 @@ main_body:
 ;CHECK: buffer_atomic_add v0,
 define amdgpu_ps float @test4() {
 main_body:
-  %v = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 1, ptr addrspace(8) undef, i32 4, i32 0, i32 0)
+  %v = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 1, ptr addrspace(8) poison, i32 4, i32 0, i32 0)
   %v.float = bitcast i32 %v to float
   ret float %v.float
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll
index fb44515595a55..607f6001f99b4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll
@@ -115,7 +115,7 @@ main_body:
 ;CHECK: buffer_atomic_add v0,
 define amdgpu_ps float @test4() {
 main_body:
-  %v = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 1, ptr addrspace(8) undef, i32 0, i32 4, i32 0, i32 0)
+  %v = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 1, ptr addrspace(8) poison, i32 0, i32 4, i32 0, i32 0)
   %v.float = bitcast i32 %v to float
   ret float %v.float
 }
diff --git a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
index a407cd20bf762..2ce612b176587 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
@@ -46,7 +46,7 @@ loop:
   br i1 %tmp27, label %then, label %endif
 
 then:                                             ; preds = %bb
-  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) undef, i32 0, i32 undef, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) poison, i32 0, i32 undef, i32 0)
   br label %endif
 
 endif:                                             ; preds = %bb28, %bb
@@ -85,7 +85,7 @@ loop:
   %tmp23phi = phi i32 [ %tmp23, %loop ], [ 0, %entry ]
   %tmp23 = add nuw i32 %tmp23phi, 1
   %tmp27 = icmp ult i32 %arg, %tmp23
-  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) undef, i32 0, i32 undef, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) poison, i32 0, i32 undef, i32 0)
   br i1 %tmp27, label %loop, label %loopexit
 
 loopexit:
@@ -136,7 +136,7 @@ loop:
   br i1 %tmp27, label %then, label %endif
 
 then:                                             ; preds = %bb
-  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) undef, i32 0, i32 undef, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) poison, i32 0, i32 undef, i32 0)
   br label %endif
 
 endif:                                             ; preds = %bb28, %bb
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
index 018cdee038b91..a62427b2ce922 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
@@ -61,7 +61,7 @@ define amdgpu_kernel void @workgroup_ids_kernel() {
   %ielemx = insertelement <3 x i32> poison, i32 %idx, i64 0
   %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1
   %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2
-  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) poison, i32 0, i32 0, i32 0)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
index dcec7288686b1..52b1d5e6c5b17 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
@@ -68,7 +68,7 @@ define amdgpu_cs void @_amdgpu_cs_main() {
   %ielemx = insertelement <3 x i32> poison, i32 %idx, i64 0
   %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1
   %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2
-  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) poison, i32 0, i32 0, i32 0)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll b/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll
index 0809891aec72b..66a217cb7be45 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll
@@ -26,7 +26,7 @@ main_body:
   %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp7, i32 1
   %tmp10 = insertelement <4 x i32> %tmp9, i32 poison, i32 2
   %tmp11 = insertelement <4 x i32> %tmp10, i32 poison, i32 3
-  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32> %tmp11, ptr addrspace(8) undef, i32 0, i32 0, i32 %arg, i32 78, i32 3) #2
+  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32> %tmp11, ptr addrspace(8) poison, i32 0, i32 0, i32 %arg, i32 78, i32 3) #2
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll b/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll
index 88cc5334d5c73..ae4fd6674f466 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll
+++ b/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll
@@ -10,7 +10,7 @@ define amdgpu_vs void @test1(i32 %v) #0 {
 
   store i32 %v, ptr addrspace(3) null
 
-  call void @llvm.amdgcn.raw.ptr.tbuffer.store.i32(i32 %v, ptr addrspace(8) undef, i32 0, i32 0, i32 68, i32 1)
+  call void @llvm.amdgcn.raw.ptr.tbuffer.store.i32(i32 %v, ptr addrspace(8) poison, i32 0, i32 0, i32 68, i32 1)
 
   %w = load i32, ptr addrspace(3) null
   store i32 %w, ptr addrspace(3) %p1
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
index a37e2bf4eb294..f14cd4488ef1e 100644
--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
@@ -254,7 +254,7 @@ define amdgpu_ps void @test_export_pos_before_param_across_load(i32 %idx) #0 {
 ; GCN-NEXT:    s_endpgm
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
-  %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0)
+  %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0)
   call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
index 096dfcc2c7667..4e6aa03d91876 100644
--- a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
@@ -318,7 +318,7 @@ define amdgpu_vs void @reorder_local_load_tbuffer_store_local_load(ptr addrspace
 
   %vdata = insertelement <4 x i32> poison, i32 %a1, i32 0
   %vaddr.add = add i32 %vaddr, 32
-  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32> %vdata, ptr addrspace(8) undef, i32 %vaddr.add, i32 0, i32 0, i32 228, i32 3)
+  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32> %vdata, ptr addrspace(8) poison, i32 %vaddr.add, i32 0, i32 0, i32 228, i32 3)
 
   %tmp2 = load i32, ptr addrspace(3) %ptr2, align 4
 
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index 94c4eebe32281..846e347a4947e 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -1959,7 +1959,7 @@ main_body:
   br i1 %cc, label %endif, label %if
 
 if:
-  %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0, i32 0)
+  %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0, i32 0)
   %out = fadd float %src, %src
   %out.0 = call float @llvm.amdgcn.wwm.f32(float %out)
   %out.1 = fadd float %src, %out.0
@@ -2046,7 +2046,7 @@ main_body:
   br i1 %cc, label %endif, label %if
 
 if:
-  %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0, i32 0)
+  %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0, i32 0)
   %out = fadd float %src, %src
   %out.0 = call float @llvm.amdgcn.strict.wwm.f32(float %out)
   %out.1 = fadd float %src, %out.0
@@ -2128,8 +2128,8 @@ define amdgpu_ps float @test_wqm2(i32 inreg %idx0, i32 inreg %idx1) #0 {
 ; GFX1064-NEXT:    s_and_b64 exec, exec, s[2:3]
 ; GFX1064-NEXT:    ; return to shader part epilog
 main_body:
-  %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0)
-  %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0)
+  %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0)
+  %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0)
   %out = fadd float %src0, %src1
   %out.0 = bitcast float %out to i32
   %out.1 = call i32 @llvm.amdgcn.wqm.i32(i32 %out.0)
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index 3845db2edf73f..abf690048bae3 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -125,7 +125,7 @@ main_body:
   %tex.1 = bitcast <4 x float> %tex to <4 x i32>
   %tex.2 = extractelement <4 x i32> %tex.1, i32 0
 
-  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %tex, ptr addrspace(8) undef, i32 %tex.2, i32 0, i32 0, i32 0)
+  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %tex, ptr addrspace(8) poison, i32 %tex.2, i32 0, i32 0, i32 0)
 
   ret <4 x float> %tex
 }
@@ -247,7 +247,7 @@ define amdgpu_ps <4 x float> @test4_ptr_buf(<8 x i32> inreg %rsrc, <4 x i32> inr
 main_body:
   %c.1 = mul i32 %c, %d
 
-  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> undef, ptr addrspace(8) undef, i32 %c.1, i32 0, i32 0, i32 0)
+  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> undef, ptr addrspace(8) poison, i32 %c.1, i32 0, i32 0, i32 0)
   %c.1.bc = bitcast i32 %c.1 to float
   %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.1.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
   %tex0 = extractelement <4 x float> %tex, i32 0
@@ -327,8 +327,8 @@ define amdgpu_ps float @test5_ptr_buf(i32 inreg %idx0, i32 inreg %idx1) {
 ; GFX10-W32-NEXT:    s_and_b32 exec_lo, exec_lo, s2
 ; GFX10-W32-NEXT:    ; return to shader part epilog
 main_body:
-  %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0)
-  %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0)
+  %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0)
+  %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0)
   %out = fadd float %src0, %src1
   %out.0 = call float @llvm.amdgcn.wqm.f32(float %out)
   ret float %out.0
@@ -406,8 +406,8 @@ define amdgpu_ps float @test6_ptr_buf(i32 inreg %idx0, i32 inreg %idx1) {
 ; GFX10-W32-NEXT:    s_and_b32 exec_lo, exec_lo, s2
 ; GFX10-W32-NEXT:    ; return to shader part epilog
 main_body:
-  %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0)
-  %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0)
+  %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0)
+  %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0)
   %out = fadd float %src0, %src1
   %out.0 = bitcast float %out to i32
   %out.1 = call i32 @llvm.amdgcn.wqm.i32(i32 %out.0)
@@ -447,8 +447,8 @@ define amdgpu_ps float @test_wwm1(i32 inreg %idx0, i32 inreg %idx1) {
 ; G...
[truncated]

llvmbot · 2025-03-12T06:30:08Z

@llvm/pr-subscribers-llvm-globalisel

Author: Matt Arsenault (arsenm)

Changes

Patch is 39.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130904.diff

16 Files Affected:

(modified) llvm/test/CodeGen/AMDGPU/amdpal.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll (+6-6)
(modified) llvm/test/CodeGen/AMDGPU/else.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll (+4-4)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/merge-store-crash.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/required-export-priority.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/wave32.ll (+4-4)
(modified) llvm/test/CodeGen/AMDGPU/wqm.ll (+55-55)

diff --git a/llvm/test/CodeGen/AMDGPU/amdpal.ll b/llvm/test/CodeGen/AMDGPU/amdpal.ll
index 171df029615ed..fd9227d2f4319 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal.ll
@@ -72,7 +72,7 @@ entry:
   %e = getelementptr [2 x i32], ptr addrspace(5) %v1, i32 0, i32 %idx
   %x = load i32, ptr addrspace(5) %e
   %xf = bitcast i32 %x to float
-  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %xf, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %xf, ptr addrspace(8) poison, i32 0, i32 0, i32 0)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
index b42542db6dbd8..f8227f0039af7 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
@@ -66,7 +66,7 @@ define i32 @combine_add_zext_xor() {
   br i1 undef, label %bb9, label %bb
 
 bb:                                               ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -146,7 +146,7 @@ define i32 @combine_sub_zext_xor() {
   br i1 undef, label %bb9, label %bb
 
 bb:                                               ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -229,7 +229,7 @@ define i32 @combine_add_zext_or() {
   br i1 undef, label %bb9, label %bb
 
 bb:                                               ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -313,7 +313,7 @@ define i32 @combine_sub_zext_or() {
   br i1 undef, label %bb9, label %bb
 
 bb:                                               ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -392,7 +392,7 @@ define i32 @combine_add_zext_and() {
   br i1 undef, label %bb9, label %bb
 
 bb:                                               ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -471,7 +471,7 @@ define i32 @combine_sub_zext_and() {
   br i1 undef, label %bb9, label %bb
 
 bb:                                               ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
diff --git a/llvm/test/CodeGen/AMDGPU/else.ll b/llvm/test/CodeGen/AMDGPU/else.ll
index 4a3018e67b17d..884f5305407a1 100644
--- a/llvm/test/CodeGen/AMDGPU/else.ll
+++ b/llvm/test/CodeGen/AMDGPU/else.ll
@@ -47,7 +47,7 @@ else:
 
 end:
   %r = phi float [ %v.if, %if ], [ %v.else, %else ]
-  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %r, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %r, ptr addrspace(8) poison, i32 0, i32 0, i32 0)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
index a44231f39ae6f..37a2e9e24dbd6 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
@@ -13,7 +13,7 @@ define amdgpu_hs void @main(ptr addrspace(6) inreg %arg) {
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
   ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
-  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from `ptr addrspace(8) undef`, align 1, addrspace 8)
+  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from `ptr addrspace(8) poison`, align 1, addrspace 8)
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
   ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
@@ -22,14 +22,14 @@ define amdgpu_hs void @main(ptr addrspace(6) inreg %arg) {
   ; GCN-NEXT:   [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
   ; GCN-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
   ; GCN-NEXT:   [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
-  ; GCN-NEXT:   BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s96) into `ptr addrspace(8) undef`, align 1, addrspace 8)
+  ; GCN-NEXT:   BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s96) into `ptr addrspace(8) poison`, align 1, addrspace 8)
   ; GCN-NEXT:   S_ENDPGM 0
 main_body:
-  %tmp25 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) undef, i32 undef, i32 0, i32 0)
+  %tmp25 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) poison, i32 undef, i32 0, i32 0)
   %tmp27 = bitcast <4 x float> %tmp25 to <16 x i8>
   %tmp28 = shufflevector <16 x i8> %tmp27, <16 x i8> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
   %tmp29 = bitcast <12 x i8> %tmp28 to <3 x i32>
-  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %tmp29, ptr addrspace(8) undef, i32 undef, i32 0, i32 0) #3
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %tmp29, ptr addrspace(8) poison, i32 undef, i32 0, i32 0) #3
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
index c120c58ce6f33..c506e08855470 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
@@ -623,7 +623,7 @@ define amdgpu_kernel void @test_export_pos_before_param_ordered(float %x, float
 define amdgpu_kernel void @test_export_pos_before_param_across_load(i32 %idx) #0 {
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
-  %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0)
+  %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0)
   call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll
index 9c9cf36baecf1..843ad563df69e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll
@@ -103,7 +103,7 @@ main_body:
 ;CHECK: buffer_atomic_add v0,
 define amdgpu_ps float @test4() {
 main_body:
-  %v = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 1, ptr addrspace(8) undef, i32 4, i32 0, i32 0)
+  %v = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 1, ptr addrspace(8) poison, i32 4, i32 0, i32 0)
   %v.float = bitcast i32 %v to float
   ret float %v.float
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll
index fb44515595a55..607f6001f99b4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll
@@ -115,7 +115,7 @@ main_body:
 ;CHECK: buffer_atomic_add v0,
 define amdgpu_ps float @test4() {
 main_body:
-  %v = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 1, ptr addrspace(8) undef, i32 0, i32 4, i32 0, i32 0)
+  %v = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 1, ptr addrspace(8) poison, i32 0, i32 4, i32 0, i32 0)
   %v.float = bitcast i32 %v to float
   ret float %v.float
 }
diff --git a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
index a407cd20bf762..2ce612b176587 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
@@ -46,7 +46,7 @@ loop:
   br i1 %tmp27, label %then, label %endif
 
 then:                                             ; preds = %bb
-  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) undef, i32 0, i32 undef, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) poison, i32 0, i32 undef, i32 0)
   br label %endif
 
 endif:                                             ; preds = %bb28, %bb
@@ -85,7 +85,7 @@ loop:
   %tmp23phi = phi i32 [ %tmp23, %loop ], [ 0, %entry ]
   %tmp23 = add nuw i32 %tmp23phi, 1
   %tmp27 = icmp ult i32 %arg, %tmp23
-  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) undef, i32 0, i32 undef, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) poison, i32 0, i32 undef, i32 0)
   br i1 %tmp27, label %loop, label %loopexit
 
 loopexit:
@@ -136,7 +136,7 @@ loop:
   br i1 %tmp27, label %then, label %endif
 
 then:                                             ; preds = %bb
-  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) undef, i32 0, i32 undef, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) poison, i32 0, i32 undef, i32 0)
   br label %endif
 
 endif:                                             ; preds = %bb28, %bb
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
index 018cdee038b91..a62427b2ce922 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
@@ -61,7 +61,7 @@ define amdgpu_kernel void @workgroup_ids_kernel() {
   %ielemx = insertelement <3 x i32> poison, i32 %idx, i64 0
   %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1
   %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2
-  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) poison, i32 0, i32 0, i32 0)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
index dcec7288686b1..52b1d5e6c5b17 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
@@ -68,7 +68,7 @@ define amdgpu_cs void @_amdgpu_cs_main() {
   %ielemx = insertelement <3 x i32> poison, i32 %idx, i64 0
   %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1
   %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2
-  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) poison, i32 0, i32 0, i32 0)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll b/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll
index 0809891aec72b..66a217cb7be45 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll
@@ -26,7 +26,7 @@ main_body:
   %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp7, i32 1
   %tmp10 = insertelement <4 x i32> %tmp9, i32 poison, i32 2
   %tmp11 = insertelement <4 x i32> %tmp10, i32 poison, i32 3
-  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32> %tmp11, ptr addrspace(8) undef, i32 0, i32 0, i32 %arg, i32 78, i32 3) #2
+  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32> %tmp11, ptr addrspace(8) poison, i32 0, i32 0, i32 %arg, i32 78, i32 3) #2
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll b/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll
index 88cc5334d5c73..ae4fd6674f466 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll
+++ b/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll
@@ -10,7 +10,7 @@ define amdgpu_vs void @test1(i32 %v) #0 {
 
   store i32 %v, ptr addrspace(3) null
 
-  call void @llvm.amdgcn.raw.ptr.tbuffer.store.i32(i32 %v, ptr addrspace(8) undef, i32 0, i32 0, i32 68, i32 1)
+  call void @llvm.amdgcn.raw.ptr.tbuffer.store.i32(i32 %v, ptr addrspace(8) poison, i32 0, i32 0, i32 68, i32 1)
 
   %w = load i32, ptr addrspace(3) null
   store i32 %w, ptr addrspace(3) %p1
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
index a37e2bf4eb294..f14cd4488ef1e 100644
--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
@@ -254,7 +254,7 @@ define amdgpu_ps void @test_export_pos_before_param_across_load(i32 %idx) #0 {
 ; GCN-NEXT:    s_endpgm
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
-  %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0)
+  %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0)
   call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
index 096dfcc2c7667..4e6aa03d91876 100644
--- a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
@@ -318,7 +318,7 @@ define amdgpu_vs void @reorder_local_load_tbuffer_store_local_load(ptr addrspace
 
   %vdata = insertelement <4 x i32> poison, i32 %a1, i32 0
   %vaddr.add = add i32 %vaddr, 32
-  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32> %vdata, ptr addrspace(8) undef, i32 %vaddr.add, i32 0, i32 0, i32 228, i32 3)
+  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32> %vdata, ptr addrspace(8) poison, i32 %vaddr.add, i32 0, i32 0, i32 228, i32 3)
 
   %tmp2 = load i32, ptr addrspace(3) %ptr2, align 4
 
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index 94c4eebe32281..846e347a4947e 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -1959,7 +1959,7 @@ main_body:
   br i1 %cc, label %endif, label %if
 
 if:
-  %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0, i32 0)
+  %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0, i32 0)
   %out = fadd float %src, %src
   %out.0 = call float @llvm.amdgcn.wwm.f32(float %out)
   %out.1 = fadd float %src, %out.0
@@ -2046,7 +2046,7 @@ main_body:
   br i1 %cc, label %endif, label %if
 
 if:
-  %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0, i32 0)
+  %src = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx, i32 0, i32 0, i32 0)
   %out = fadd float %src, %src
   %out.0 = call float @llvm.amdgcn.strict.wwm.f32(float %out)
   %out.1 = fadd float %src, %out.0
@@ -2128,8 +2128,8 @@ define amdgpu_ps float @test_wqm2(i32 inreg %idx0, i32 inreg %idx1) #0 {
 ; GFX1064-NEXT:    s_and_b64 exec, exec, s[2:3]
 ; GFX1064-NEXT:    ; return to shader part epilog
 main_body:
-  %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0)
-  %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0)
+  %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0)
+  %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0)
   %out = fadd float %src0, %src1
   %out.0 = bitcast float %out to i32
   %out.1 = call i32 @llvm.amdgcn.wqm.i32(i32 %out.0)
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index 3845db2edf73f..abf690048bae3 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -125,7 +125,7 @@ main_body:
   %tex.1 = bitcast <4 x float> %tex to <4 x i32>
   %tex.2 = extractelement <4 x i32> %tex.1, i32 0
 
-  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %tex, ptr addrspace(8) undef, i32 %tex.2, i32 0, i32 0, i32 0)
+  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %tex, ptr addrspace(8) poison, i32 %tex.2, i32 0, i32 0, i32 0)
 
   ret <4 x float> %tex
 }
@@ -247,7 +247,7 @@ define amdgpu_ps <4 x float> @test4_ptr_buf(<8 x i32> inreg %rsrc, <4 x i32> inr
 main_body:
   %c.1 = mul i32 %c, %d
 
-  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> undef, ptr addrspace(8) undef, i32 %c.1, i32 0, i32 0, i32 0)
+  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> undef, ptr addrspace(8) poison, i32 %c.1, i32 0, i32 0, i32 0)
   %c.1.bc = bitcast i32 %c.1 to float
   %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.1.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
   %tex0 = extractelement <4 x float> %tex, i32 0
@@ -327,8 +327,8 @@ define amdgpu_ps float @test5_ptr_buf(i32 inreg %idx0, i32 inreg %idx1) {
 ; GFX10-W32-NEXT:    s_and_b32 exec_lo, exec_lo, s2
 ; GFX10-W32-NEXT:    ; return to shader part epilog
 main_body:
-  %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0)
-  %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0)
+  %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0)
+  %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0)
   %out = fadd float %src0, %src1
   %out.0 = call float @llvm.amdgcn.wqm.f32(float %out)
   ret float %out.0
@@ -406,8 +406,8 @@ define amdgpu_ps float @test6_ptr_buf(i32 inreg %idx0, i32 inreg %idx1) {
 ; GFX10-W32-NEXT:    s_and_b32 exec_lo, exec_lo, s2
 ; GFX10-W32-NEXT:    ; return to shader part epilog
 main_body:
-  %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx0, i32 0, i32 0, i32 0)
-  %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx1, i32 0, i32 0, i32 0)
+  %src0 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx0, i32 0, i32 0, i32 0)
+  %src1 = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %idx1, i32 0, i32 0, i32 0)
   %out = fadd float %src0, %src1
   %out.0 = bitcast float %out to i32
   %out.1 = call i32 @llvm.amdgcn.wqm.i32(i32 %out.0)
@@ -447,8 +447,8 @@ define amdgpu_ps float @test_wwm1(i32 inreg %idx0, i32 inreg %idx1) {
 ; G...
[truncated]

github-actions · 2025-03-12T06:30:58Z

⚠️ undef deprecator found issues in your code. ⚠️

You can test this locally with the following command:

git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' af90b6633c6a449b82992bc2d17d02984fac2f7c 39c40c749726f0baee23b6403fdb13237c91e014 llvm/test/CodeGen/AMDGPU/amdpal.ll llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll llvm/test/CodeGen/AMDGPU/else.ll llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll llvm/test/CodeGen/AMDGPU/merge-store-crash.ll llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll llvm/test/CodeGen/AMDGPU/required-export-priority.ll llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll llvm/test/CodeGen/AMDGPU/wave32.ll llvm/test/CodeGen/AMDGPU/wqm.ll

The following files introduce new uses of undef:

llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
llvm/test/CodeGen/AMDGPU/wqm.ll

Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields undef. You should use poison values for placeholders instead.

In tests, avoid using undef and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead.

For example, this is considered a bad practice:

define void @fn() {
  ...
  br i1 undef, ...
}

Please use the following instead:

define void @fn(i1 %cond) {
  ...
  br i1 %cond, ...
}

Please refer to the Undefined Behavior Manual for more information.

arsenm · 2025-03-13T01:21:55Z

Merge activity

Mar 12, 9:21 PM EDT: A user started a stack merge that includes this pull request via Graphite.
Mar 12, 9:32 PM EDT: Graphite rebased this pull request as part of a merge.
Mar 12, 9:35 PM EDT: Graphite rebased this pull request as part of a merge.
Mar 12, 9:37 PM EDT: A user merged this pull request with Graphite.

arsenm mentioned this pull request Mar 12, 2025

AMDGPU: Replace <8 x i32> undef uses in tests with poison #130903

Merged

arsenm added backend:AMDGPU llvm:globalisel labels Mar 12, 2025 — with Graphite App

arsenm requested review from jayfoad, krzysz00 and shiltian March 12, 2025 06:29

arsenm marked this pull request as ready for review March 12, 2025 06:29

arsenm force-pushed the users/arsenm/amdgpu/tests-replace-p8-undef-with-poison branch from 5aef79a to fa3c82b Compare March 12, 2025 08:00

arsenm force-pushed the users/arsenm/amdgpu/tests-replace-v8i32-undef-with-poison branch from dee83a2 to 602e8ab Compare March 12, 2025 08:00

arsenm force-pushed the users/arsenm/amdgpu/tests-replace-p8-undef-with-poison branch from fa3c82b to b2b21aa Compare March 12, 2025 09:15

arsenm force-pushed the users/arsenm/amdgpu/tests-replace-v8i32-undef-with-poison branch from 602e8ab to 4cb9b21 Compare March 12, 2025 09:15

shiltian approved these changes Mar 12, 2025

View reviewed changes

arsenm force-pushed the users/arsenm/amdgpu/tests-replace-v8i32-undef-with-poison branch from 4cb9b21 to aeb2f61 Compare March 12, 2025 13:49

arsenm force-pushed the users/arsenm/amdgpu/tests-replace-p8-undef-with-poison branch from b2b21aa to 1fe990c Compare March 12, 2025 13:49

arsenm force-pushed the users/arsenm/amdgpu/tests-replace-v8i32-undef-with-poison branch from aeb2f61 to af90b66 Compare March 13, 2025 01:26

arsenm force-pushed the users/arsenm/amdgpu/tests-replace-p8-undef-with-poison branch from 1fe990c to 39c40c7 Compare March 13, 2025 01:26

This was referenced Mar 13, 2025

AMDGPU: Replace ptr addrspace(3) undef in tests with poison #131049

Merged

AMDGPU: Replace some undef uses in test metadata with poison #131052

Merged

AMDGPU: Replace ptr undef in tests with poison #131050

Merged

arsenm mentioned this pull request Mar 13, 2025

AMDGPU: Replace undef global initializers in tests with poison #131051

Merged

arsenm force-pushed the users/arsenm/amdgpu/tests-replace-v8i32-undef-with-poison branch from af90b66 to 062160c Compare March 13, 2025 01:29

Base automatically changed from users/arsenm/amdgpu/tests-replace-v8i32-undef-with-poison to main March 13, 2025 01:31

arsenm force-pushed the users/arsenm/amdgpu/tests-replace-p8-undef-with-poison branch from 39c40c7 to df55f0c Compare March 13, 2025 01:32

AMDGPU: Replace ptr addrspace(8) undef uses with poison

ffe17d4

arsenm force-pushed the users/arsenm/amdgpu/tests-replace-p8-undef-with-poison branch from df55f0c to ffe17d4 Compare March 13, 2025 01:34

arsenm merged commit 12fe5ae into main Mar 13, 2025
5 of 9 checks passed

arsenm deleted the users/arsenm/amdgpu/tests-replace-p8-undef-with-poison branch March 13, 2025 01:37

frederik-h pushed a commit to frederik-h/llvm-project that referenced this pull request Mar 18, 2025

AMDGPU: Replace ptr addrspace(8) undef uses with poison (llvm#130904)

853e221

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

AMDGPU: Replace ptr addrspace(8) undef uses with poison #130904

AMDGPU: Replace ptr addrspace(8) undef uses with poison #130904

Uh oh!

arsenm commented Mar 12, 2025

Uh oh!

arsenm commented Mar 12, 2025 •

edited

Loading

Uh oh!

llvmbot commented Mar 12, 2025

Uh oh!

llvmbot commented Mar 12, 2025

Uh oh!

github-actions bot commented Mar 12, 2025 •

edited

Loading

Uh oh!

arsenm commented Mar 13, 2025 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

AMDGPU: Replace ptr addrspace(8) undef uses with poison #130904

AMDGPU: Replace ptr addrspace(8) undef uses with poison #130904

Uh oh!

Conversation

arsenm commented Mar 12, 2025

Uh oh!

arsenm commented Mar 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Mar 12, 2025

Uh oh!

llvmbot commented Mar 12, 2025

Uh oh!

github-actions bot commented Mar 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

arsenm commented Mar 13, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Merge activity

Uh oh!

Uh oh!

Uh oh!

arsenm commented Mar 12, 2025 •

edited

Loading

github-actions bot commented Mar 12, 2025 •

edited

Loading

arsenm commented Mar 13, 2025 •

edited

Loading