Skip to content

Commit caecd58

Browse files
committed
AMDGPU: Add baseline tests for cmpxchg custom expansion
We need a non-atomic path if flat may access private.
1 parent 1fb2aae commit caecd58

5 files changed

+382
-25
lines changed

llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5088,7 +5088,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr %out, i64 %in, i64 %old
50885088
; GFX12-NEXT: s_endpgm
50895089
entry:
50905090
%gep = getelementptr i64, ptr %out, i64 4
5091-
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
5091+
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
50925092
ret void
50935093
}
50945094

@@ -5145,7 +5145,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr %out, i64 %in, i64 %ol
51455145
; GFX12-NEXT: s_endpgm
51465146
entry:
51475147
%gep = getelementptr i64, ptr %out, i64 9000
5148-
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
5148+
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
51495149
ret void
51505150
}
51515151

@@ -5206,7 +5206,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(ptr %out, ptr %out2, i6
52065206
; GFX12-NEXT: s_endpgm
52075207
entry:
52085208
%gep = getelementptr i64, ptr %out, i64 4
5209-
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
5209+
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
52105210
%extract0 = extractvalue { i64, i1 } %val, 0
52115211
store i64 %extract0, ptr %out2
52125212
ret void
@@ -5270,7 +5270,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(ptr %out, i64 %in, i
52705270
entry:
52715271
%ptr = getelementptr i64, ptr %out, i64 %index
52725272
%gep = getelementptr i64, ptr %ptr, i64 4
5273-
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
5273+
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
52745274
ret void
52755275
}
52765276

@@ -5344,7 +5344,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(ptr %out, ptr %o
53445344
entry:
53455345
%ptr = getelementptr i64, ptr %out, i64 %index
53465346
%gep = getelementptr i64, ptr %ptr, i64 4
5347-
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
5347+
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
53485348
%extract0 = extractvalue { i64, i1 } %val, 0
53495349
store i64 %extract0, ptr %out2
53505350
ret void
@@ -5398,7 +5398,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64(ptr %out, i64 %in, i64 %old) {
53985398
; GFX12-NEXT: global_inv scope:SCOPE_DEV
53995399
; GFX12-NEXT: s_endpgm
54005400
entry:
5401-
%val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
5401+
%val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
54025402
ret void
54035403
}
54045404

@@ -5454,7 +5454,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret(ptr %out, ptr %out2, i64 %in,
54545454
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
54555455
; GFX12-NEXT: s_endpgm
54565456
entry:
5457-
%val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
5457+
%val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
54585458
%extract0 = extractvalue { i64, i1 } %val, 0
54595459
store i64 %extract0, ptr %out2
54605460
ret void
@@ -5513,7 +5513,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr %out, i64 %in, i64 %ind
55135513
; GFX12-NEXT: s_endpgm
55145514
entry:
55155515
%ptr = getelementptr i64, ptr %out, i64 %index
5516-
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
5516+
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
55175517
ret void
55185518
}
55195519

@@ -5582,7 +5582,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(ptr %out, ptr %out2, i6
55825582
; GFX12-NEXT: s_endpgm
55835583
entry:
55845584
%ptr = getelementptr i64, ptr %out, i64 %index
5585-
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
5585+
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
55865586
%extract0 = extractvalue { i64, i1 } %val, 0
55875587
store i64 %extract0, ptr %out2
55885588
ret void
@@ -5634,7 +5634,7 @@ define amdgpu_kernel void @atomic_load_f64_offset(ptr %in, ptr %out) {
56345634
; GFX12-NEXT: s_endpgm
56355635
entry:
56365636
%gep = getelementptr double, ptr %in, i64 4
5637-
%val = load atomic double, ptr %gep seq_cst, align 8
5637+
%val = load atomic double, ptr %gep seq_cst, align 8, !noalias.addrspace !0
56385638
store double %val, ptr %out
56395639
ret void
56405640
}
@@ -5680,7 +5680,7 @@ define amdgpu_kernel void @atomic_load_f64(ptr %in, ptr %out) {
56805680
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
56815681
; GFX12-NEXT: s_endpgm
56825682
entry:
5683-
%val = load atomic double, ptr %in syncscope("agent") seq_cst, align 8
5683+
%val = load atomic double, ptr %in syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
56845684
store double %val, ptr %out
56855685
ret void
56865686
}
@@ -5745,7 +5745,7 @@ define amdgpu_kernel void @atomic_load_f64_addr64_offset(ptr %in, ptr %out, i64
57455745
entry:
57465746
%ptr = getelementptr double, ptr %in, i64 %index
57475747
%gep = getelementptr double, ptr %ptr, i64 4
5748-
%val = load atomic double, ptr %gep seq_cst, align 8
5748+
%val = load atomic double, ptr %gep seq_cst, align 8, !noalias.addrspace !0
57495749
store double %val, ptr %out
57505750
ret void
57515751
}
@@ -5805,7 +5805,7 @@ define amdgpu_kernel void @atomic_load_f64_addr64(ptr %in, ptr %out, i64 %index)
58055805
; GFX12-NEXT: s_endpgm
58065806
entry:
58075807
%ptr = getelementptr double, ptr %in, i64 %index
5808-
%val = load atomic double, ptr %ptr seq_cst, align 8
5808+
%val = load atomic double, ptr %ptr seq_cst, align 8, !noalias.addrspace !0
58095809
store double %val, ptr %out
58105810
ret void
58115811
}
@@ -5848,7 +5848,7 @@ define amdgpu_kernel void @atomic_store_f64_offset(double %in, ptr %out) {
58485848
; GFX12-NEXT: s_endpgm
58495849
entry:
58505850
%gep = getelementptr double, ptr %out, i64 4
5851-
store atomic double %in, ptr %gep seq_cst, align 8
5851+
store atomic double %in, ptr %gep seq_cst, align 8, !noalias.addrspace !0
58525852
ret void
58535853
}
58545854

@@ -5885,7 +5885,7 @@ define amdgpu_kernel void @atomic_store_f64(double %in, ptr %out) {
58855885
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] scope:SCOPE_SYS
58865886
; GFX12-NEXT: s_endpgm
58875887
entry:
5888-
store atomic double %in, ptr %out seq_cst, align 8
5888+
store atomic double %in, ptr %out seq_cst, align 8, !noalias.addrspace !0
58895889
ret void
58905890
}
58915891

@@ -5941,7 +5941,7 @@ define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, ptr %out,
59415941
entry:
59425942
%ptr = getelementptr double, ptr %out, i64 %index
59435943
%gep = getelementptr double, ptr %ptr, i64 4
5944-
store atomic double %in, ptr %gep seq_cst, align 8
5944+
store atomic double %in, ptr %gep seq_cst, align 8, !noalias.addrspace !0
59455945
ret void
59465946
}
59475947

@@ -5992,7 +5992,7 @@ define amdgpu_kernel void @atomic_store_f64_addr64(double %in, ptr %out, i64 %in
59925992
; GFX12-NEXT: s_endpgm
59935993
entry:
59945994
%ptr = getelementptr double, ptr %out, i64 %index
5995-
store atomic double %in, ptr %ptr seq_cst, align 8
5995+
store atomic double %in, ptr %ptr seq_cst, align 8, !noalias.addrspace !0
59965996
ret void
59975997
}
59985998

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -126,12 +126,12 @@ define i16 @test_cmpxchg_i16_global_agent_align4(ptr addrspace(1) %out, i16 %in,
126126

127127
define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
128128
; GFX90A-LABEL: define void @syncscope_workgroup_nortn(
129-
; GFX90A-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
129+
; GFX90A-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0]] {
130130
; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR]])
131131
; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
132132
; GFX90A: atomicrmw.shared:
133133
; GFX90A-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(3)
134-
; GFX90A-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]]
134+
; GFX90A-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.ignore.denormal.mode [[META3]]
135135
; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]]
136136
; GFX90A: atomicrmw.check.private:
137137
; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[ADDR]])
@@ -144,16 +144,16 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
144144
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
145145
; GFX90A: atomicrmw.global:
146146
; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
147-
; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]]
147+
; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
148148
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
149149
; GFX90A: atomicrmw.phi:
150150
; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]]
151151
; GFX90A: atomicrmw.end:
152152
; GFX90A-NEXT: ret void
153153
;
154154
; GFX1100-LABEL: define void @syncscope_workgroup_nortn(
155-
; GFX1100-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
156-
; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]]
155+
; GFX1100-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0]] {
156+
; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.ignore.denormal.mode [[META3]]
157157
; GFX1100-NEXT: ret void
158158
;
159159
%res = atomicrmw fadd ptr %addr, float %val syncscope("workgroup") seq_cst, !mmra !2, !amdgpu.no.fine.grained.memory !3, !amdgpu.ignore.denormal.mode !3
@@ -193,8 +193,10 @@ define i32 @atomic_load_global_align1(ptr addrspace(1) %ptr) {
193193
; GFX90A: [[META0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]]}
194194
; GFX90A: [[META1]] = !{!"foo", !"bar"}
195195
; GFX90A: [[META2]] = !{!"bux", !"baz"}
196+
; GFX90A: [[META3]] = !{}
196197
;.
197198
; GFX1100: [[META0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]]}
198199
; GFX1100: [[META1]] = !{!"foo", !"bar"}
199200
; GFX1100: [[META2]] = !{!"bux", !"baz"}
201+
; GFX1100: [[META3]] = !{}
200202
;.

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
163163
; GFX908-NEXT: br label [[ATOMICRMW_PHI]]
164164
; GFX908: atomicrmw.global:
165165
; GFX908-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
166-
; GFX908-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
166+
; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
167167
; GFX908-NEXT: br label [[ATOMICRMW_PHI]]
168168
; GFX908: atomicrmw.phi:
169169
; GFX908-NEXT: br label [[ATOMICRMW_END:%.*]]
@@ -188,7 +188,7 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
188188
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
189189
; GFX90A: atomicrmw.global:
190190
; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
191-
; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
191+
; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
192192
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
193193
; GFX90A: atomicrmw.phi:
194194
; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]]

0 commit comments

Comments
 (0)