Skip to content

Commit a34a3fb

Browse files
committed
AMDGPU: Add baseline tests for cmpxchg custom expansion
We need a non-atomic path if flat may access private.
1 parent 6ff8091 commit a34a3fb

5 files changed

+382
-25
lines changed

llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll

+17-17
Original file line numberDiff line numberDiff line change
@@ -5005,7 +5005,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr %out, i64 %in, i64 %old
50055005
; GFX12-NEXT: s_endpgm
50065006
entry:
50075007
%gep = getelementptr i64, ptr %out, i64 4
5008-
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
5008+
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
50095009
ret void
50105010
}
50115011

@@ -5061,7 +5061,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr %out, i64 %in, i64 %ol
50615061
; GFX12-NEXT: s_endpgm
50625062
entry:
50635063
%gep = getelementptr i64, ptr %out, i64 9000
5064-
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
5064+
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
50655065
ret void
50665066
}
50675067

@@ -5121,7 +5121,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(ptr %out, ptr %out2, i6
51215121
; GFX12-NEXT: s_endpgm
51225122
entry:
51235123
%gep = getelementptr i64, ptr %out, i64 4
5124-
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
5124+
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
51255125
%extract0 = extractvalue { i64, i1 } %val, 0
51265126
store i64 %extract0, ptr %out2
51275127
ret void
@@ -5184,7 +5184,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(ptr %out, i64 %in, i
51845184
entry:
51855185
%ptr = getelementptr i64, ptr %out, i64 %index
51865186
%gep = getelementptr i64, ptr %ptr, i64 4
5187-
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
5187+
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
51885188
ret void
51895189
}
51905190

@@ -5257,7 +5257,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(ptr %out, ptr %o
52575257
entry:
52585258
%ptr = getelementptr i64, ptr %out, i64 %index
52595259
%gep = getelementptr i64, ptr %ptr, i64 4
5260-
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
5260+
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
52615261
%extract0 = extractvalue { i64, i1 } %val, 0
52625262
store i64 %extract0, ptr %out2
52635263
ret void
@@ -5310,7 +5310,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64(ptr %out, i64 %in, i64 %old) {
53105310
; GFX12-NEXT: global_inv scope:SCOPE_DEV
53115311
; GFX12-NEXT: s_endpgm
53125312
entry:
5313-
%val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
5313+
%val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
53145314
ret void
53155315
}
53165316

@@ -5365,7 +5365,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret(ptr %out, ptr %out2, i64 %in,
53655365
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
53665366
; GFX12-NEXT: s_endpgm
53675367
entry:
5368-
%val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
5368+
%val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
53695369
%extract0 = extractvalue { i64, i1 } %val, 0
53705370
store i64 %extract0, ptr %out2
53715371
ret void
@@ -5423,7 +5423,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr %out, i64 %in, i64 %ind
54235423
; GFX12-NEXT: s_endpgm
54245424
entry:
54255425
%ptr = getelementptr i64, ptr %out, i64 %index
5426-
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
5426+
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
54275427
ret void
54285428
}
54295429

@@ -5491,7 +5491,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(ptr %out, ptr %out2, i6
54915491
; GFX12-NEXT: s_endpgm
54925492
entry:
54935493
%ptr = getelementptr i64, ptr %out, i64 %index
5494-
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
5494+
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
54955495
%extract0 = extractvalue { i64, i1 } %val, 0
54965496
store i64 %extract0, ptr %out2
54975497
ret void
@@ -5543,7 +5543,7 @@ define amdgpu_kernel void @atomic_load_f64_offset(ptr %in, ptr %out) {
55435543
; GFX12-NEXT: s_endpgm
55445544
entry:
55455545
%gep = getelementptr double, ptr %in, i64 4
5546-
%val = load atomic double, ptr %gep seq_cst, align 8
5546+
%val = load atomic double, ptr %gep seq_cst, align 8, !noalias.addrspace !0
55475547
store double %val, ptr %out
55485548
ret void
55495549
}
@@ -5589,7 +5589,7 @@ define amdgpu_kernel void @atomic_load_f64(ptr %in, ptr %out) {
55895589
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
55905590
; GFX12-NEXT: s_endpgm
55915591
entry:
5592-
%val = load atomic double, ptr %in syncscope("agent") seq_cst, align 8
5592+
%val = load atomic double, ptr %in syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
55935593
store double %val, ptr %out
55945594
ret void
55955595
}
@@ -5654,7 +5654,7 @@ define amdgpu_kernel void @atomic_load_f64_addr64_offset(ptr %in, ptr %out, i64
56545654
entry:
56555655
%ptr = getelementptr double, ptr %in, i64 %index
56565656
%gep = getelementptr double, ptr %ptr, i64 4
5657-
%val = load atomic double, ptr %gep seq_cst, align 8
5657+
%val = load atomic double, ptr %gep seq_cst, align 8, !noalias.addrspace !0
56585658
store double %val, ptr %out
56595659
ret void
56605660
}
@@ -5714,7 +5714,7 @@ define amdgpu_kernel void @atomic_load_f64_addr64(ptr %in, ptr %out, i64 %index)
57145714
; GFX12-NEXT: s_endpgm
57155715
entry:
57165716
%ptr = getelementptr double, ptr %in, i64 %index
5717-
%val = load atomic double, ptr %ptr seq_cst, align 8
5717+
%val = load atomic double, ptr %ptr seq_cst, align 8, !noalias.addrspace !0
57185718
store double %val, ptr %out
57195719
ret void
57205720
}
@@ -5757,7 +5757,7 @@ define amdgpu_kernel void @atomic_store_f64_offset(double %in, ptr %out) {
57575757
; GFX12-NEXT: s_endpgm
57585758
entry:
57595759
%gep = getelementptr double, ptr %out, i64 4
5760-
store atomic double %in, ptr %gep seq_cst, align 8
5760+
store atomic double %in, ptr %gep seq_cst, align 8, !noalias.addrspace !0
57615761
ret void
57625762
}
57635763

@@ -5794,7 +5794,7 @@ define amdgpu_kernel void @atomic_store_f64(double %in, ptr %out) {
57945794
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] scope:SCOPE_SYS
57955795
; GFX12-NEXT: s_endpgm
57965796
entry:
5797-
store atomic double %in, ptr %out seq_cst, align 8
5797+
store atomic double %in, ptr %out seq_cst, align 8, !noalias.addrspace !0
57985798
ret void
57995799
}
58005800

@@ -5850,7 +5850,7 @@ define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, ptr %out,
58505850
entry:
58515851
%ptr = getelementptr double, ptr %out, i64 %index
58525852
%gep = getelementptr double, ptr %ptr, i64 4
5853-
store atomic double %in, ptr %gep seq_cst, align 8
5853+
store atomic double %in, ptr %gep seq_cst, align 8, !noalias.addrspace !0
58545854
ret void
58555855
}
58565856

@@ -5901,7 +5901,7 @@ define amdgpu_kernel void @atomic_store_f64_addr64(double %in, ptr %out, i64 %in
59015901
; GFX12-NEXT: s_endpgm
59025902
entry:
59035903
%ptr = getelementptr double, ptr %out, i64 %index
5904-
store atomic double %in, ptr %ptr seq_cst, align 8
5904+
store atomic double %in, ptr %ptr seq_cst, align 8, !noalias.addrspace !0
59055905
ret void
59065906
}
59075907

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll

+7-5
Original file line numberDiff line numberDiff line change
@@ -126,12 +126,12 @@ define i16 @test_cmpxchg_i16_global_agent_align4(ptr addrspace(1) %out, i16 %in,
126126

127127
define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
128128
; GFX90A-LABEL: define void @syncscope_workgroup_nortn(
129-
; GFX90A-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
129+
; GFX90A-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0]] {
130130
; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR]])
131131
; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
132132
; GFX90A: atomicrmw.shared:
133133
; GFX90A-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(3)
134-
; GFX90A-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]]
134+
; GFX90A-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.ignore.denormal.mode [[META3]]
135135
; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]]
136136
; GFX90A: atomicrmw.check.private:
137137
; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[ADDR]])
@@ -144,16 +144,16 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
144144
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
145145
; GFX90A: atomicrmw.global:
146146
; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
147-
; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]]
147+
; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
148148
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
149149
; GFX90A: atomicrmw.phi:
150150
; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]]
151151
; GFX90A: atomicrmw.end:
152152
; GFX90A-NEXT: ret void
153153
;
154154
; GFX1100-LABEL: define void @syncscope_workgroup_nortn(
155-
; GFX1100-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
156-
; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]]
155+
; GFX1100-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0]] {
156+
; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.ignore.denormal.mode [[META3]]
157157
; GFX1100-NEXT: ret void
158158
;
159159
%res = atomicrmw fadd ptr %addr, float %val syncscope("workgroup") seq_cst, !mmra !2, !amdgpu.no.fine.grained.memory !3, !amdgpu.ignore.denormal.mode !3
@@ -193,8 +193,10 @@ define i32 @atomic_load_global_align1(ptr addrspace(1) %ptr) {
193193
; GFX90A: [[META0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]]}
194194
; GFX90A: [[META1]] = !{!"foo", !"bar"}
195195
; GFX90A: [[META2]] = !{!"bux", !"baz"}
196+
; GFX90A: [[META3]] = !{}
196197
;.
197198
; GFX1100: [[META0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]]}
198199
; GFX1100: [[META1]] = !{!"foo", !"bar"}
199200
; GFX1100: [[META2]] = !{!"bux", !"baz"}
201+
; GFX1100: [[META3]] = !{}
200202
;.

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
163163
; GFX908-NEXT: br label [[ATOMICRMW_PHI]]
164164
; GFX908: atomicrmw.global:
165165
; GFX908-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
166-
; GFX908-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
166+
; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
167167
; GFX908-NEXT: br label [[ATOMICRMW_PHI]]
168168
; GFX908: atomicrmw.phi:
169169
; GFX908-NEXT: br label [[ATOMICRMW_END:%.*]]
@@ -188,7 +188,7 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
188188
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
189189
; GFX90A: atomicrmw.global:
190190
; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
191-
; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
191+
; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
192192
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
193193
; GFX90A: atomicrmw.phi:
194194
; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]]

0 commit comments

Comments
 (0)