Skip to content

AMDGPU: Add baseline tests for cmpxchg custom expansion #109408

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5005,7 +5005,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr %out, i64 %in, i64 %old
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
ret void
}

Expand Down Expand Up @@ -5061,7 +5061,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr %out, i64 %in, i64 %ol
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 9000
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
ret void
}

Expand Down Expand Up @@ -5121,7 +5121,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(ptr %out, ptr %out2, i6
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, ptr %out2
ret void
Expand Down Expand Up @@ -5184,7 +5184,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(ptr %out, i64 %in, i
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
ret void
}

Expand Down Expand Up @@ -5257,7 +5257,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(ptr %out, ptr %o
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%gep = getelementptr i64, ptr %ptr, i64 4
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, ptr %out2
ret void
Expand Down Expand Up @@ -5310,7 +5310,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64(ptr %out, i64 %in, i64 %old) {
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
ret void
}

Expand Down Expand Up @@ -5365,7 +5365,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret(ptr %out, ptr %out2, i64 %in,
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
entry:
%val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, ptr %out2
ret void
Expand Down Expand Up @@ -5423,7 +5423,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr %out, i64 %in, i64 %ind
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
ret void
}

Expand Down Expand Up @@ -5491,7 +5491,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(ptr %out, ptr %out2, i6
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, ptr %out2
ret void
Expand Down Expand Up @@ -5543,7 +5543,7 @@ define amdgpu_kernel void @atomic_load_f64_offset(ptr %in, ptr %out) {
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr double, ptr %in, i64 4
%val = load atomic double, ptr %gep seq_cst, align 8
%val = load atomic double, ptr %gep seq_cst, align 8, !noalias.addrspace !0
store double %val, ptr %out
ret void
}
Expand Down Expand Up @@ -5589,7 +5589,7 @@ define amdgpu_kernel void @atomic_load_f64(ptr %in, ptr %out) {
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
entry:
%val = load atomic double, ptr %in syncscope("agent") seq_cst, align 8
%val = load atomic double, ptr %in syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
store double %val, ptr %out
ret void
}
Expand Down Expand Up @@ -5654,7 +5654,7 @@ define amdgpu_kernel void @atomic_load_f64_addr64_offset(ptr %in, ptr %out, i64
entry:
%ptr = getelementptr double, ptr %in, i64 %index
%gep = getelementptr double, ptr %ptr, i64 4
%val = load atomic double, ptr %gep seq_cst, align 8
%val = load atomic double, ptr %gep seq_cst, align 8, !noalias.addrspace !0
store double %val, ptr %out
ret void
}
Expand Down Expand Up @@ -5714,7 +5714,7 @@ define amdgpu_kernel void @atomic_load_f64_addr64(ptr %in, ptr %out, i64 %index)
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr double, ptr %in, i64 %index
%val = load atomic double, ptr %ptr seq_cst, align 8
%val = load atomic double, ptr %ptr seq_cst, align 8, !noalias.addrspace !0
store double %val, ptr %out
ret void
}
Expand Down Expand Up @@ -5757,7 +5757,7 @@ define amdgpu_kernel void @atomic_store_f64_offset(double %in, ptr %out) {
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr double, ptr %out, i64 4
store atomic double %in, ptr %gep seq_cst, align 8
store atomic double %in, ptr %gep seq_cst, align 8, !noalias.addrspace !0
ret void
}

Expand Down Expand Up @@ -5794,7 +5794,7 @@ define amdgpu_kernel void @atomic_store_f64(double %in, ptr %out) {
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] scope:SCOPE_SYS
; GFX12-NEXT: s_endpgm
entry:
store atomic double %in, ptr %out seq_cst, align 8
store atomic double %in, ptr %out seq_cst, align 8, !noalias.addrspace !0
ret void
}

Expand Down Expand Up @@ -5850,7 +5850,7 @@ define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, ptr %out,
entry:
%ptr = getelementptr double, ptr %out, i64 %index
%gep = getelementptr double, ptr %ptr, i64 4
store atomic double %in, ptr %gep seq_cst, align 8
store atomic double %in, ptr %gep seq_cst, align 8, !noalias.addrspace !0
ret void
}

Expand Down Expand Up @@ -5901,7 +5901,7 @@ define amdgpu_kernel void @atomic_store_f64_addr64(double %in, ptr %out, i64 %in
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr double, ptr %out, i64 %index
store atomic double %in, ptr %ptr seq_cst, align 8
store atomic double %in, ptr %ptr seq_cst, align 8, !noalias.addrspace !0
ret void
}

Expand Down
12 changes: 7 additions & 5 deletions llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,12 @@ define i16 @test_cmpxchg_i16_global_agent_align4(ptr addrspace(1) %out, i16 %in,

define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX90A-LABEL: define void @syncscope_workgroup_nortn(
; GFX90A-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
; GFX90A-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0]] {
; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR]])
; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
; GFX90A: atomicrmw.shared:
; GFX90A-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(3)
; GFX90A-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]]
; GFX90A-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.ignore.denormal.mode [[META3]]
; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]]
; GFX90A: atomicrmw.check.private:
; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[ADDR]])
Expand All @@ -144,16 +144,16 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.global:
; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]]
; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.phi:
; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]]
; GFX90A: atomicrmw.end:
; GFX90A-NEXT: ret void
;
; GFX1100-LABEL: define void @syncscope_workgroup_nortn(
; GFX1100-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]]
; GFX1100-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0]] {
; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]], !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.ignore.denormal.mode [[META3]]
; GFX1100-NEXT: ret void
;
%res = atomicrmw fadd ptr %addr, float %val syncscope("workgroup") seq_cst, !mmra !2, !amdgpu.no.fine.grained.memory !3, !amdgpu.ignore.denormal.mode !3
Expand Down Expand Up @@ -193,8 +193,10 @@ define i32 @atomic_load_global_align1(ptr addrspace(1) %ptr) {
; GFX90A: [[META0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]]}
; GFX90A: [[META1]] = !{!"foo", !"bar"}
; GFX90A: [[META2]] = !{!"bux", !"baz"}
; GFX90A: [[META3]] = !{}
;.
; GFX1100: [[META0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]]}
; GFX1100: [[META1]] = !{!"foo", !"bar"}
; GFX1100: [[META2]] = !{!"bux", !"baz"}
; GFX1100: [[META3]] = !{}
;.
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX908-NEXT: br label [[ATOMICRMW_PHI]]
; GFX908: atomicrmw.global:
; GFX908-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
; GFX908-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX908-NEXT: br label [[ATOMICRMW_PHI]]
; GFX908: atomicrmw.phi:
; GFX908-NEXT: br label [[ATOMICRMW_END:%.*]]
Expand All @@ -188,7 +188,7 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.global:
; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.phi:
; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]]
Expand Down
Loading
Loading