diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 7256eec89008a..6bb10ab5bc321 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1174,6 +1174,9 @@ void AMDGPUPassConfig::addIRPasses() { // Replace OpenCL enqueued block function pointers with global variables. addPass(createAMDGPUOpenCLEnqueuedBlockLoweringPass()); + if (TM.getOptLevel() > CodeGenOptLevel::None) + addPass(createInferAddressSpacesPass()); + // Lower LDS accesses to global memory pass if address sanitizer is enabled. if (EnableSwLowerLDS) addPass(createAMDGPUSwLowerLDSLegacyPass(&TM)); @@ -1183,9 +1186,6 @@ void AMDGPUPassConfig::addIRPasses() { addPass(createAMDGPULowerModuleLDSLegacyPass(&TM)); } - if (TM.getOptLevel() > CodeGenOptLevel::None) - addPass(createInferAddressSpacesPass()); - // Run atomic optimizer before Atomic Expand if ((TM.getTargetTriple().getArch() == Triple::amdgcn) && (TM.getOptLevel() >= CodeGenOptLevel::Less) && @@ -1941,13 +1941,17 @@ void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const { // TODO: Missing OpenCLEnqueuedBlockLowering + if (TM.getOptLevel() > CodeGenOptLevel::None) + addPass(InferAddressSpacesPass()); + + // Lower LDS accesses to global memory pass if address sanitizer is enabled. + if (EnableSwLowerLDS) + addPass(AMDGPUSwLowerLDSPass(TM)); + // Runs before PromoteAlloca so the latter can account for function uses if (EnableLowerModuleLDS) addPass(AMDGPULowerModuleLDSPass(TM)); - if (TM.getOptLevel() > CodeGenOptLevel::None) - addPass(InferAddressSpacesPass()); - // Run atomic optimizer before Atomic Expand if (TM.getOptLevel() >= CodeGenOptLevel::Less && (AMDGPUAtomicOptimizerStrategy != ScanOptions::None)) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-O0.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-O0.ll new file mode 100644 index 0000000000000..4531dcdcba81c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-O0.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +; RUN: opt < %s -passes='function(infer-address-spaces),amdgpu-sw-lower-lds' -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s +@lds = internal addrspace(3) global [5 x i32] poison, align 16 + +;. +; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 16, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 20, i32 64 } }, no_sanitize_address +;. +define amdgpu_kernel void @k0() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[WID:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]] +; CHECK: [[MALLOC]]: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]]) +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8 +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 52 +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 44) +; CHECK-NEXT: br label %[[BB18]] +; CHECK: [[BB18]]: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP20]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(3) [[TMP21]], i64 0, i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(3) [[GEP]] to i32 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr addrspace(1) [[TMP23]] to i64 +; CHECK-NEXT: [[TMP25:%.*]] = lshr i64 [[TMP24]], 3 +; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[TMP25]], 2147450880 +; CHECK-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP26]] to ptr +; CHECK-NEXT: [[TMP28:%.*]] = load i8, ptr [[TMP27]], align 1 +; CHECK-NEXT: [[TMP29:%.*]] = icmp ne i8 [[TMP28]], 0 +; CHECK-NEXT: [[TMP30:%.*]] = and i64 [[TMP24]], 7 +; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[TMP30]], 3 +; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP31]] to i8 +; CHECK-NEXT: [[TMP33:%.*]] = icmp sge i8 [[TMP32]], [[TMP28]] +; CHECK-NEXT: [[TMP34:%.*]] = and i1 [[TMP29]], [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP34]]) +; CHECK-NEXT: [[TMP36:%.*]] = icmp ne i64 [[TMP35]], 0 +; CHECK-NEXT: br i1 [[TMP36]], label %[[ASAN_REPORT:.*]], label %[[BB39:.*]], !prof [[PROF2:![0-9]+]] +; CHECK: [[ASAN_REPORT]]: +; CHECK-NEXT: br i1 [[TMP34]], label %[[BB37:.*]], label %[[BB38:.*]] +; CHECK: [[BB37]]: +; CHECK-NEXT: call void @__asan_report_store4(i64 [[TMP24]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB38]] +; CHECK: [[BB38]]: +; CHECK-NEXT: br label %[[BB39]] +; CHECK: [[BB39]]: +; CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP23]], align 4 +; CHECK-NEXT: br label %[[CONDFREE:.*]] +; CHECK: [[CONDFREE]]: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]] +; CHECK: [[FREE]]: +; CHECK-NEXT: [[TMP40:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[TMP40]] to i64 +; CHECK-NEXT: [[TMP42:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP42]], i64 [[TMP41]]) +; CHECK-NEXT: br label %[[END]] +; CHECK: [[END]]: +; CHECK-NEXT: ret void +; + %gep = getelementptr inbounds [5 x i32], ptr addrspacecast (ptr addrspace(3) @lds to ptr), i64 0, i64 0 + store i32 1, ptr %gep, align 4 + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="16" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind } +; CHECK: attributes #[[ATTR6]] = { nomerge } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575} +;. diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index e77f4f69e265b..ff4f7864eec7d 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -187,10 +187,11 @@ ; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O1-NEXT: Function Alias Analysis Results ; GCN-O1-NEXT: Lower OpenCL enqueued blocks +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Infer address spaces ; GCN-O1-NEXT: AMDGPU Software lowering of LDS ; GCN-O1-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O1-NEXT: FunctionPass Manager -; GCN-O1-NEXT: Infer address spaces ; GCN-O1-NEXT: Dominator Tree Construction ; GCN-O1-NEXT: Cycle Info Analysis ; GCN-O1-NEXT: Uniformity Analysis @@ -467,10 +468,11 @@ ; GCN-O1-OPTS-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O1-OPTS-NEXT: Function Alias Analysis Results ; GCN-O1-OPTS-NEXT: Lower OpenCL enqueued blocks +; GCN-O1-OPTS-NEXT: FunctionPass Manager +; GCN-O1-OPTS-NEXT: Infer address spaces ; GCN-O1-OPTS-NEXT: AMDGPU Software lowering of LDS ; GCN-O1-OPTS-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O1-OPTS-NEXT: FunctionPass Manager -; GCN-O1-OPTS-NEXT: Infer address spaces ; GCN-O1-OPTS-NEXT: Dominator Tree Construction ; GCN-O1-OPTS-NEXT: Cycle Info Analysis ; GCN-O1-OPTS-NEXT: Uniformity Analysis @@ -777,10 +779,11 @@ ; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Lower OpenCL enqueued blocks +; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: Infer address spaces ; GCN-O2-NEXT: AMDGPU Software lowering of LDS ; GCN-O2-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O2-NEXT: FunctionPass Manager -; GCN-O2-NEXT: Infer address spaces ; GCN-O2-NEXT: Dominator Tree Construction ; GCN-O2-NEXT: Cycle Info Analysis ; GCN-O2-NEXT: Uniformity Analysis @@ -1091,10 +1094,11 @@ ; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Lower OpenCL enqueued blocks +; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: Infer address spaces ; GCN-O3-NEXT: AMDGPU Software lowering of LDS ; GCN-O3-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O3-NEXT: FunctionPass Manager -; GCN-O3-NEXT: Infer address spaces ; GCN-O3-NEXT: Dominator Tree Construction ; GCN-O3-NEXT: Cycle Info Analysis ; GCN-O3-NEXT: Uniformity Analysis