-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AMDGPU] Remove amdgpu-no-heap-ptr and amdgpu-no-lds-kernel-id attributes from lowered kernels in amdgpu-sw-lower-lds pass #120887
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Chaitanya (skc7) Changes'amdgpu-sw-lower-lds' pass internally calls '__asan_malloc_impl' for heap memory allocation. This patch removes 'amdgpu-no-heap-ptr' and 'amdgpu-no-lds-kernel-id' from all kernels lowered by the pass. Patch is 21.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/120887.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
index c3f41a4cfcf0a1..17207773b4858c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
@@ -1176,10 +1176,13 @@ bool AMDGPUSwLowerLDS::run() {
LDSParams.IndirectAccess.DynamicLDSGlobals.empty()) {
Changed = false;
} else {
- removeFnAttrFromReachable(CG, Func,
- {"amdgpu-no-workitem-id-x",
- "amdgpu-no-workitem-id-y",
- "amdgpu-no-workitem-id-z"});
+ removeFnAttrFromReachable(
+ CG, Func,
+ {"amdgpu-no-workitem-id-x", "amdgpu-no-workitem-id-y",
+ "amdgpu-no-workitem-id-z", "amdgpu-no-heap-ptr"});
+ if (!LDSParams.IndirectAccess.StaticLDSGlobals.empty() ||
+ !LDSParams.IndirectAccess.DynamicLDSGlobals.empty())
+ removeFnAttrFromReachable(CG, Func, {"amdgpu-no-lds-kernel-id"});
reorderStaticDynamicIndirectLDSSet(LDSParams);
buildSwLDSGlobal(Func);
buildSwDynLDSGlobal(Func);
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-no-kernel-lds-id.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-no-kernel-lds-id.ll
new file mode 100644
index 00000000000000..f1a8a369aae393
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-no-kernel-lds-id.ll
@@ -0,0 +1,130 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+
+; Test to check if static LDS is lowered correctly when a non-kernel with LDS accesses is called from kernel.
+; Also checks if amdgpu-no-lds-kernel-id attribute is removed from the list of attributes
+@lds_1 = internal addrspace(3) global [1 x i8] poison, align 1
+@lds_2 = internal addrspace(3) global [1 x i32] poison, align 2
+@lds_3 = external addrspace(3) global [3 x i8], align 4
+@lds_4 = external addrspace(3) global [4 x i8], align 8
+
+;.
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 3, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 4, i32 32 } }, no_sanitize_address
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
+;.
+define void @use_variables() sanitize_address {
+; CHECK-LABEL: define void @use_variables(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP3]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP5]], align 8
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP10:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP9]], align 8
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(1) [[TMP10]], align 4
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP11]]
+; CHECK-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
+; CHECK-NEXT: store i8 3, ptr [[TMP13]], align 4
+; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr addrspace(3) [[TMP12]] to i32
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP14]]
+; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP15]], align 8
+; CHECK-NEXT: ret void
+;
+ %X = addrspacecast ptr addrspace(3) @lds_3 to ptr
+ store i8 3, ptr addrspacecast( ptr addrspace(3) @lds_3 to ptr), align 4
+ store i8 3, ptr addrspace(3) @lds_4, align 8
+ ret void
+}
+
+define amdgpu_kernel void @k0() sanitize_address #1 {
+; CHECK-LABEL: define amdgpu_kernel void @k0(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
+; CHECK-NEXT: WId:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP24:%.*]]
+; CHECK: Malloc:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
+; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33
+; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 31)
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
+; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP19]], i64 28)
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 99
+; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP21]], i64 29)
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 132
+; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 28)
+; CHECK-NEXT: br label [[TMP24]]
+; CHECK: 24:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: [[TMP25:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP26]]
+; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP28]]
+; CHECK-NEXT: call void @use_variables()
+; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr addrspace(3) [[TMP27]] to i32
+; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP25]], i32 [[TMP30]]
+; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP31]], align 1
+; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(3) [[TMP29]] to i32
+; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP25]], i32 [[TMP32]]
+; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP33]], align 2
+; CHECK-NEXT: br label [[CONDFREE:%.*]]
+; CHECK: CondFree:
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
+; CHECK: Free:
+; CHECK-NEXT: [[TMP34:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP35:%.*]] = ptrtoint ptr [[TMP34]] to i64
+; CHECK-NEXT: [[TMP36:%.*]] = ptrtoint ptr addrspace(1) [[TMP25]] to i64
+; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP36]], i64 [[TMP35]])
+; CHECK-NEXT: br label [[END]]
+; CHECK: End:
+; CHECK-NEXT: ret void
+;
+ call void @use_variables()
+ store i8 7, ptr addrspace(3) @lds_1, align 1
+ store i32 8, ptr addrspace(3) @lds_2, align 2
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 4, !"nosanitize_address", i32 1}
+attributes #1 = { "amdgpu-no-lds-kernel-id" }
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address }
+; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+;.
+; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+; CHECK: [[META2]] = !{i32 0}
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-no-heap-ptr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-no-heap-ptr.ll
new file mode 100644
index 00000000000000..e4fdeb6853978d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-no-heap-ptr.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+
+; Test to check if static LDS accesses in kernel are lowered correctly. Also checks if amdgpu-no-heap-ptr attribute
+; is removed from the list of attributes
+@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
+@lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
+
+;.
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 } }, no_sanitize_address
+;.
+define amdgpu_kernel void @k0() sanitize_address #1 {
+; CHECK-LABEL: define amdgpu_kernel void @k0(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: WId:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP20:%.*]]
+; CHECK: Malloc:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
+; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33
+; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 31)
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
+; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP19]], i64 28)
+; CHECK-NEXT: br label [[TMP20]]
+; CHECK: 20:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP22]]
+; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP24]]
+; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(3) [[TMP23]] to i32
+; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP26]]
+; CHECK-NEXT: [[TMP28:%.*]] = ptrtoint ptr addrspace(1) [[TMP27]] to i64
+; CHECK-NEXT: [[TMP29:%.*]] = lshr i64 [[TMP28]], 3
+; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[TMP29]], 2147450880
+; CHECK-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP30]] to ptr
+; CHECK-NEXT: [[TMP32:%.*]] = load i8, ptr [[TMP31]], align 1
+; CHECK-NEXT: [[TMP33:%.*]] = icmp ne i8 [[TMP32]], 0
+; CHECK-NEXT: [[TMP34:%.*]] = and i64 [[TMP28]], 7
+; CHECK-NEXT: [[TMP35:%.*]] = trunc i64 [[TMP34]] to i8
+; CHECK-NEXT: [[TMP36:%.*]] = icmp sge i8 [[TMP35]], [[TMP32]]
+; CHECK-NEXT: [[TMP37:%.*]] = and i1 [[TMP33]], [[TMP36]]
+; CHECK-NEXT: [[TMP38:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP37]])
+; CHECK-NEXT: [[TMP39:%.*]] = icmp ne i64 [[TMP38]], 0
+; CHECK-NEXT: br i1 [[TMP39]], label [[ASAN_REPORT:%.*]], label [[TMP42:%.*]], !prof [[PROF2:![0-9]+]]
+; CHECK: asan.report:
+; CHECK-NEXT: br i1 [[TMP37]], label [[TMP40:%.*]], label [[TMP41:%.*]]
+; CHECK: 40:
+; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP28]]) #[[ATTR6:[0-9]+]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label [[TMP41]]
+; CHECK: 41:
+; CHECK-NEXT: br label [[TMP42]]
+; CHECK: 42:
+; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP27]], align 4
+; CHECK-NEXT: [[TMP43:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32
+; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP43]]
+; CHECK-NEXT: [[TMP45:%.*]] = ptrtoint ptr addrspace(1) [[TMP44]] to i64
+; CHECK-NEXT: [[TMP46:%.*]] = add i64 [[TMP45]], 3
+; CHECK-NEXT: [[TMP47:%.*]] = inttoptr i64 [[TMP46]] to ptr addrspace(1)
+; CHECK-NEXT: [[TMP48:%.*]] = ptrtoint ptr addrspace(1) [[TMP44]] to i64
+; CHECK-NEXT: [[TMP49:%.*]] = lshr i64 [[TMP48]], 3
+; CHECK-NEXT: [[TMP50:%.*]] = add i64 [[TMP49]], 2147450880
+; CHECK-NEXT: [[TMP51:%.*]] = inttoptr i64 [[TMP50]] to ptr
+; CHECK-NEXT: [[TMP52:%.*]] = load i8, ptr [[TMP51]], align 1
+; CHECK-NEXT: [[TMP53:%.*]] = icmp ne i8 [[TMP52]], 0
+; CHECK-NEXT: [[TMP54:%.*]] = and i64 [[TMP48]], 7
+; CHECK-NEXT: [[TMP55:%.*]] = trunc i64 [[TMP54]] to i8
+; CHECK-NEXT: [[TMP56:%.*]] = icmp sge i8 [[TMP55]], [[TMP52]]
+; CHECK-NEXT: [[TMP57:%.*]] = and i1 [[TMP53]], [[TMP56]]
+; CHECK-NEXT: [[TMP58:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP57]])
+; CHECK-NEXT: [[TMP59:%.*]] = icmp ne i64 [[TMP58]], 0
+; CHECK-NEXT: br i1 [[TMP59]], label [[ASAN_REPORT1:%.*]], label [[TMP62:%.*]], !prof [[PROF2]]
+; CHECK: asan.report1:
+; CHECK-NEXT: br i1 [[TMP57]], label [[TMP60:%.*]], label [[TMP61:%.*]]
+; CHECK: 60:
+; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP48]]) #[[ATTR6]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label [[TMP61]]
+; CHECK: 61:
+; CHECK-NEXT: br label [[TMP62]]
+; CHECK: 62:
+; CHECK-NEXT: [[TMP63:%.*]] = ptrtoint ptr addrspace(1) [[TMP47]] to i64
+; CHECK-NEXT: [[TMP64:%.*]] = lshr i64 [[TMP63]], 3
+; CHECK-NEXT: [[TMP65:%.*]] = add i64 [[TMP64]], 2147450880
+; CHECK-NEXT: [[TMP66:%.*]] = inttoptr i64 [[TMP65]] to ptr
+; CHECK-NEXT: [[TMP67:%.*]] = load i8, ptr [[TMP66]], align 1
+; CHECK-NEXT: [[TMP68:%.*]] = icmp ne i8 [[TMP67]], 0
+; CHECK-NEXT: [[TMP69:%.*]] = and i64 [[TMP63]], 7
+; CHECK-NEXT: [[TMP70:%.*]] = trunc i64 [[TMP69]] to i8
+; CHECK-NEXT: [[TMP71:%.*]] = icmp sge i8 [[TMP70]], [[TMP67]]
+; CHECK-NEXT: [[TMP72:%.*]] = and i1 [[TMP68]], [[TMP71]]
+; CHECK-NEXT: [[TMP73:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP72]])
+; CHECK-NEXT: [[TMP74:%.*]] = icmp ne i64 [[TMP73]], 0
+; CHECK-NEXT: br i1 [[TMP74]], label [[ASAN_REPORT2:%.*]], label [[TMP77:%.*]], !prof [[PROF2]]
+; CHECK: asan.report2:
+; CHECK-NEXT: br i1 [[TMP72]], label [[TMP75:%.*]], label [[TMP76:%.*]]
+; CHECK: 75:
+; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP63]]) #[[ATTR6]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label [[TMP76]]
+; CHECK: 76:
+; CHECK-NEXT: br label [[TMP77]]
+; CHECK: 77:
+; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP44]], align 2
+; CHECK-NEXT: br label [[CONDFREE:%.*]]
+; CHECK: CondFree:
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
+; CHECK: Free:
+; CHECK-NEXT: [[TMP78:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP79:%.*]] = ptrtoint ptr [[TMP78]] to i64
+; CHECK-NEXT: [[TMP80:%.*]] ...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm but please regenerate the tests with the latest update_test_checks
@@ -0,0 +1,161 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Latest version is 5
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated in latest commit. Thanks
llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-no-kernel-lds-id.ll
Outdated
Show resolved
Hide resolved
…utes from kernel in amdgpu-sw-lower-lds pass
e402d05
to
0258fa4
Compare
'amdgpu-sw-lower-lds' pass internally calls '__asan_malloc_impl' for heap memory allocation.
Pass also uses 'amdgcn_lds_kernel_id' for non-kernel lds accesses lowering.
This patch removes 'amdgpu-no-heap-ptr' and 'amdgpu-no-lds-kernel-id' from all kernels lowered by the pass.