diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 72f9e5a8174d2..18ac9451576d7 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -58,9 +58,6 @@ Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) { /// COV_NONE : Emit code to load a global variable "__oclc_ABI_version" /// and use its value for COV_4 or COV_5+ approach. It is used for /// compiling device libraries in an ABI-agnostic way. -/// -/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by -/// clang during compilation of user code. Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) { llvm::LoadInst *LD; diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index e3c8243cbb0b5..db2a2c5740646 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -305,8 +305,6 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F, CodeGenModule &CGM) const; - void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const override; - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; @@ -414,40 +412,6 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( } } -/// Emits control constants used to change per-architecture behaviour in the -/// AMDGPU ROCm device libraries. -void AMDGPUTargetCodeGenInfo::emitTargetGlobals( - CodeGen::CodeGenModule &CGM) const { - StringRef Name = "__oclc_ABI_version"; - llvm::GlobalVariable *OriginalGV = CGM.getModule().getNamedGlobal(Name); - if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage())) - return; - - if (CGM.getTarget().getTargetOpts().CodeObjectVersion == - llvm::CodeObjectVersionKind::COV_None) - return; - - auto *Type = llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), 32); - llvm::Constant *COV = llvm::ConstantInt::get( - Type, CGM.getTarget().getTargetOpts().CodeObjectVersion); - - // It needs to be constant weak_odr without externally_initialized so that - // the load instuction can be eliminated by the IPSCCP. - auto *GV = new llvm::GlobalVariable( - CGM.getModule(), Type, true, llvm::GlobalValue::WeakODRLinkage, COV, Name, - nullptr, llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, - CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant)); - GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); - GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility); - - // Replace any external references to this variable with the new global. - if (OriginalGV) { - OriginalGV->replaceAllUsesWith(GV); - GV->takeName(OriginalGV); - OriginalGV->eraseFromParent(); - } -} - void AMDGPUTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { if (requiresAMDGPUProtectedVisibility(D, GV)) { diff --git a/clang/test/CodeGen/amdgpu-abi-version.c b/clang/test/CodeGen/amdgpu-abi-version.c index 4e5ad87655f23..b9c1de0521b95 100644 --- a/clang/test/CodeGen/amdgpu-abi-version.c +++ b/clang/test/CodeGen/amdgpu-abi-version.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --version 3 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5 // RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -mcode-object-version=none %s -o - | FileCheck %s //. @@ -6,7 +6,7 @@ //. // CHECK-LABEL: define dso_local i32 @foo( // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) @__oclc_ABI_version, align 4 @@ -16,8 +16,17 @@ // CHECK-NEXT: [[TMP4:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() // CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP4]], i32 4 // CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP1]], ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP5]] -// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG2:![0-9]+]], !invariant.load !3, !noundef !3 +// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG2:![0-9]+]], !invariant.load [[META3:![0-9]+]], !noundef [[META3]] // CHECK-NEXT: [[CONV:%.*]] = zext i16 [[TMP7]] to i32 // CHECK-NEXT: ret i32 [[CONV]] // int foo() { return __builtin_amdgcn_workgroup_size_x(); } +//. +// CHECK: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +//. +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +// CHECK: [[RNG2]] = !{i16 1, i16 1025} +// CHECK: [[META3]] = !{} +//. diff --git a/clang/test/CodeGen/amdgpu-address-spaces.cpp b/clang/test/CodeGen/amdgpu-address-spaces.cpp index b121b559f58dc..1d8668a7f0917 100644 --- a/clang/test/CodeGen/amdgpu-address-spaces.cpp +++ b/clang/test/CodeGen/amdgpu-address-spaces.cpp @@ -29,7 +29,6 @@ int [[clang::address_space(999)]] bbb = 1234; // CHECK: @u = addrspace(5) global i32 undef, align 4 // CHECK: @aaa = addrspace(6) global i32 1000, align 4 // CHECK: @bbb = addrspace(999) global i32 1234, align 4 -// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 //. // CHECK-LABEL: define dso_local amdgpu_kernel void @foo( // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { @@ -60,3 +59,10 @@ extern "C" [[clang::amdgpu_kernel]] void foo() { aaa = 0; bbb = 0; } +//. +// CHECK: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +//. +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} +// CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +//. diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu deleted file mode 100644 index cb467886c016c..0000000000000 --- a/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu +++ /dev/null @@ -1,133 +0,0 @@ -// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \ -// RUN: -mcode-object-version=4 -DUSER -x hip -o %t_4.bc %s - -// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \ -// RUN: -mcode-object-version=5 -DUSER -x hip -o %t_5.bc %s - -// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \ -// RUN: -mcode-object-version=6 -DUSER -x hip -o %t_6.bc %s - -// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \ -// RUN: -mcode-object-version=none -DDEVICELIB -x hip -o %t_0.bc %s - -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \ -// RUN: %t_4.bc -mlink-builtin-bitcode %t_0.bc -o - |\ -// RUN: FileCheck -check-prefix=LINKED4 %s - -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \ -// RUN: %t_5.bc -mlink-builtin-bitcode %t_0.bc -o - |\ -// RUN: FileCheck -check-prefix=LINKED5 %s - -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \ -// RUN: %t_6.bc -mlink-builtin-bitcode %t_0.bc -o - |\ -// RUN: FileCheck -check-prefix=LINKED6 %s - -#include "Inputs/cuda.h" - -// LINKED4: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400 -// LINKED4-LABEL: bar -// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} -// LINKED4-NOT: icmp sge i32 %{{.*}}, 500 -// LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -// LINKED4: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12 -// LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -// LINKED4: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4 -// LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]] -// LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef - -// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} -// LINKED4-NOT: icmp sge i32 %{{.*}}, 500 -// LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -// LINKED4: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14 -// LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -// LINKED4: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6 -// LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]] -// LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef - -// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} -// LINKED4-NOT: icmp sge i32 %{{.*}}, 500 -// LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -// LINKED4: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16 -// LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -// LINKED4: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8 -// LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]] -// LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef -// LINKED4: "amdhsa_code_object_version", i32 400 - -// LINKED5: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 -// LINKED5-LABEL: bar -// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} -// LINKED5-NOT: icmp sge i32 %{{.*}}, 500 -// LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -// LINKED5: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12 -// LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -// LINKED5: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4 -// LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]] -// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef - -// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} -// LINKED5-NOT: icmp sge i32 %{{.*}}, 500 -// LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -// LINKED5: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14 -// LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -// LINKED5: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6 -// LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]] -// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef - -// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} -// LINKED5-NOT: icmp sge i32 %{{.*}}, 500 -// LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -// LINKED5: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16 -// LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -// LINKED5: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8 -// LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]] -// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef -// LINKED5: "amdhsa_code_object_version", i32 500 - -// LINKED6: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 -// LINKED6-LABEL: bar -// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} -// LINKED6-NOT: icmp sge i32 %{{.*}}, 500 -// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -// LINKED6: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12 -// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -// LINKED6: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4 -// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]] -// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef - -// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} -// LINKED6-NOT: icmp sge i32 %{{.*}}, 500 -// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -// LINKED6: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14 -// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -// LINKED6: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6 -// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]] -// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef - -// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} -// LINKED6-NOT: icmp sge i32 %{{.*}}, 500 -// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -// LINKED6: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16 -// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -// LINKED6: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8 -// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]] -// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef -// LINKED6: "amdhsa_code_object_version", i32 600 - -#ifdef DEVICELIB -__device__ void bar(int *x, int *y, int *z) -{ - *x = __builtin_amdgcn_workgroup_size_x(); - *y = __builtin_amdgcn_workgroup_size_y(); - *z = __builtin_amdgcn_workgroup_size_z(); -} -#endif - -#ifdef USER -__device__ void bar(int *x, int *y, int *z); -__device__ void foo() -{ - int *x, *y, *z; - bar(x, y, z); -} -#endif diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu index f42b69f492ff8..2d3730cdfc8c9 100644 --- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu +++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu @@ -2,7 +2,6 @@ // RUN: -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=PRECOV5 %s - // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=COV5 %s @@ -11,10 +10,6 @@ // RUN: -fcuda-is-device -mcode-object-version=6 -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=COV5 %s -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -mcode-object-version=none -emit-llvm -o - -x hip %s \ -// RUN: | FileCheck -check-prefix=COVNONE %s - #include "Inputs/cuda.h" // PRECOV5-LABEL: test_get_workgroup_size @@ -35,35 +30,6 @@ // COV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16 // COV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef - -// COVNONE-LABEL: test_get_workgroup_size -// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version -// COVNONE: [[ABI5_X:%.*]] = icmp sge i32 %{{.*}}, 500 -// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -// COVNONE: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12 -// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -// COVNONE: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4 -// COVNONE: select i1 [[ABI5_X]], ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]] -// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef - -// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version -// COVNONE: [[ABI5_Y:%.*]] = icmp sge i32 %{{.*}}, 500 -// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -// COVNONE: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14 -// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -// COVNONE: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6 -// COVNONE: select i1 [[ABI5_Y]], ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]] -// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef - -// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version -// COVNONE: [[ABI5_Z:%.*]] = icmp sge i32 %{{.*}}, 500 -// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -// COVNONE: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16 -// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -// COVNONE: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8 -// COVNONE: select i1 [[ABI5_Z]], ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]] -// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef - __device__ void test_get_workgroup_size(int d, int *out) { switch (d) { diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp index 5d49cc0544b9c..8d50c71feb990 100644 --- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp +++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp @@ -13,7 +13,6 @@ B fail; // CHECK: @_ZTI1B = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1B, ptr addrspace(1) @_ZTI1A }, comdat, align 8 // CHECK: @_ZTVN10__cxxabiv120__si_class_type_infoE = external addrspace(1) global [0 x ptr addrspace(1)] // CHECK: @_ZTS1B = linkonce_odr addrspace(1) constant [3 x i8] c"1B\00", comdat, align 1 -// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 //. // WITH-NONZERO-DEFAULT-AS: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1B, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZN1A1fEv to ptr addrspace(1))] }, comdat, align 8 // WITH-NONZERO-DEFAULT-AS: @fail = addrspace(1) global { ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV1B, i32 0, i32 0, i32 2) }, align 8 diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip index f4dbad021987f..9aa40f18696c8 100644 --- a/clang/test/CodeGenHIP/default-attributes.hip +++ b/clang/test/CodeGenHIP/default-attributes.hip @@ -8,7 +8,6 @@ //. // OPTNONE: @__hip_cuid_ = addrspace(1) global i8 0 // OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata" -// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 //. __device__ void extern_func(); diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index 7e847367e1a13..caae5666de29e 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -68,7 +68,6 @@ kernel void test_target_features_kernel(global int *i) { // CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0 // CHECK: @__test_target_features_kernel_block_invoke_kernel.runtime.handle = internal addrspace(1) externally_initialized constant %block.runtime.handle.t.3 zeroinitializer, section ".amdgpu.kernel.runtime.handle" // CHECK: @llvm.used = appending addrspace(1) global [10 x ptr] [ptr @__test_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to ptr), ptr @__test_block_invoke_2_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to ptr), ptr @__test_block_invoke_3_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr @__test_block_invoke_4_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr)], section "llvm.metadata" -// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 //. // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone // NOCPU-LABEL: define {{[^@]+}}@callee diff --git a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp index d728dc1233e2c..bbfb0c4d9b11e 100644 --- a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp +++ b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp @@ -29,7 +29,6 @@ S A; // CHECK: @A = addrspace(1) global %struct.S zeroinitializer, align 4 // CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_amdgcn_target_global_constructor.cpp, ptr null }] // CHECK: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__dtor_A, ptr null }] -// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 //. // CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init // CHECK-SAME: () #[[ATTR0:[0-9]+]] { diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake index 7bd3269bd999d..e1945ba2b2230 100644 --- a/compiler-rt/cmake/builtin-config-ix.cmake +++ b/compiler-rt/cmake/builtin-config-ix.cmake @@ -22,7 +22,6 @@ builtin_check_c_compiler_flag(-Wno-pedantic COMPILER_RT_HAS_WNO_PEDANTIC builtin_check_c_compiler_flag(-nogpulib COMPILER_RT_HAS_NOGPULIB_FLAG) builtin_check_c_compiler_flag(-flto COMPILER_RT_HAS_FLTO_FLAG) builtin_check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG) -builtin_check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG) builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG) builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG) builtin_check_c_compiler_flag(-fcf-protection=full COMPILER_RT_HAS_FCF_PROTECTION_FLAG) diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 19316c52d12ce..626b21e30ed6b 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -833,12 +833,6 @@ else () append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto BUILTIN_CFLAGS) append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG -fconvergent-functions BUILTIN_CFLAGS) - - # AMDGPU targets want to use a generic ABI. - if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn") - append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG - "SHELL:-Xclang -mcode-object-version=none" BUILTIN_CFLAGS) - endif() endif() set(BUILTIN_DEFS "") diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 3e6e98855fe7a..2cb5260334a0f 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -170,62 +170,6 @@ static void addDependentLibs(mlir::ModuleOp mlirModule, CompilerInstance &ci) { } } -// Add to MLIR code target specific items which are dependent on target -// configuration specified by the user. -// Clang equivalent function: AMDGPUTargetCodeGenInfo::emitTargetGlobals -static void addAMDGPUSpecificMLIRItems(mlir::ModuleOp mlirModule, - CompilerInstance &ci) { - const TargetOptions &targetOpts = ci.getInvocation().getTargetOpts(); - const llvm::Triple triple(targetOpts.triple); - const llvm::StringRef codeObjectVersionGlobalOpName = "__oclc_ABI_version"; - - if (!triple.isAMDGPU()) { - return; - } - const CodeGenOptions &codeGenOpts = ci.getInvocation().getCodeGenOpts(); - if (codeGenOpts.CodeObjectVersion == llvm::CodeObjectVersionKind::COV_None) { - return; - } - - mlir::IRRewriter builder(mlirModule.getContext()); - unsigned oclcABIVERsion = codeGenOpts.CodeObjectVersion; - auto int32Type = builder.getI32Type(); - - std::optional originalGV; - - mlirModule.walk([&originalGV, codeObjectVersionGlobalOpName]( - mlir::LLVM::GlobalOp globalOp) { - if (globalOp.getName() == codeObjectVersionGlobalOpName) - originalGV = globalOp; - }); - if (originalGV.has_value()) { - mlir::LLVM::GlobalOp originalGVOp = originalGV.value(); - if (originalGVOp.getLinkage() != mlir::LLVM::Linkage::External) { - return; - } - // Update the variable if it is already present in MLIR but it was marked - // as external linkage variable - originalGVOp.setLinkage(mlir::LLVM::Linkage::WeakODR); - originalGVOp.setValueAttr( - builder.getIntegerAttr(int32Type, oclcABIVERsion)); - originalGVOp.setUnnamedAddr(mlir::LLVM::UnnamedAddr::Local); - originalGVOp.setAddrSpace(llvm::AMDGPUAS::CONSTANT_ADDRESS); - originalGVOp.setVisibility_(mlir::LLVM::Visibility::Hidden); - return; - } - - mlir::LLVM::GlobalOp covInfo = builder.create( - /* Location */ mlirModule.getLoc(), /* Type */ int32Type, - /* IsConstant */ true, /* Linkage */ mlir::LLVM::Linkage::WeakODR, - /* Name */ codeObjectVersionGlobalOpName, - /* Value */ builder.getIntegerAttr(int32Type, oclcABIVERsion)); - covInfo.setUnnamedAddr(mlir::LLVM::UnnamedAddr::Local); - covInfo.setAddrSpace(llvm::AMDGPUAS::CONSTANT_ADDRESS); - covInfo.setVisibility_(mlir::LLVM::Visibility::Hidden); - builder.setInsertionPointToStart(mlirModule.getBody()); - builder.insert(covInfo); -} - bool CodeGenAction::beginSourceFileAction() { llvmCtx = std::make_unique(); CompilerInstance &ci = this->getInstance(); @@ -334,7 +278,6 @@ bool CodeGenAction::beginSourceFileAction() { // Add target specific items like dependent libraries, target specific // constants etc. addDependentLibs(*mlirModule, ci); - addAMDGPUSpecificMLIRItems(*mlirModule, ci); timingScopeMLIRGen.stop(); // run the default passes. diff --git a/flang/test/Lower/AMD/code-object-version.f90 b/flang/test/Lower/AMD/code-object-version.f90 deleted file mode 100644 index 4380734d3d1a5..0000000000000 --- a/flang/test/Lower/AMD/code-object-version.f90 +++ /dev/null @@ -1,14 +0,0 @@ -!REQUIRES: amdgpu-registered-target -!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 %s -o - | FileCheck --check-prefix=COV_DEFAULT %s -!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=none %s -o - | FileCheck --check-prefix=COV_NONE %s -!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=4 %s -o - | FileCheck --check-prefix=COV_4 %s -!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=5 %s -o - | FileCheck --check-prefix=COV_5 %s -!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=6 %s -o - | FileCheck --check-prefix=COV_6 %s - -!COV_DEFAULT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(500 : i32) {addr_space = 4 : i32} : i32 -!COV_NONE-NOT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(500 : i32) {addr_space = 4 : i32} : i32 -!COV_4: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32 -!COV_5: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(500 : i32) {addr_space = 4 : i32} : i32 -!COV_6: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(600 : i32) {addr_space = 4 : i32} : i32 -subroutine target_simple -end subroutine target_simple diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake index 0facb0b9be0c1..ddd18ef293c8d 100644 --- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake +++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake @@ -215,8 +215,6 @@ function(_get_common_compile_options output_var flags) if(LIBC_CUDA_ROOT) list(APPEND compile_options "--cuda-path=${LIBC_CUDA_ROOT}") endif() - elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) - list(APPEND compile_options "SHELL:-Xclang -mcode-object-version=none") endif() endif() set(${output_var} ${compile_options} PARENT_SCOPE) diff --git a/libcxx/cmake/caches/AMDGPU.cmake b/libcxx/cmake/caches/AMDGPU.cmake index e7bf3f53891f0..d4aa28b4134ea 100644 --- a/libcxx/cmake/caches/AMDGPU.cmake +++ b/libcxx/cmake/caches/AMDGPU.cmake @@ -32,8 +32,6 @@ set(LIBCXX_TEST_CONFIG "amdgpu-libc++-shared.cfg.in" CACHE STRING "") set(LIBCXX_TEST_PARAMS "optimization=none;long_tests=False;executor=amdhsa-loader" CACHE STRING "") # Necessary compile flags for AMDGPU. -set(LIBCXX_ADDITIONAL_COMPILE_FLAGS - "-nogpulib;-flto;-fconvergent-functions;SHELL:-Xclang -mcode-object-version=none" CACHE STRING "") -set(LIBCXXABI_ADDITIONAL_COMPILE_FLAGS - "-nogpulib;-flto;-fconvergent-functions;SHELL:-Xclang -mcode-object-version=none" CACHE STRING "") +set(LIBCXX_ADDITIONAL_COMPILE_FLAGS "-nogpulib;-flto;-fconvergent-functions" CACHE STRING "") +set(LIBCXXABI_ADDITIONAL_COMPILE_FLAGS "-nogpulib;-flto;-fconvergent-functions" CACHE STRING "") set(CMAKE_REQUIRED_FLAGS "-nogpulib" CACHE STRING "") diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt index 8f2a1fd01fabc..07888217b6c68 100644 --- a/offload/DeviceRTL/CMakeLists.txt +++ b/offload/DeviceRTL/CMakeLists.txt @@ -255,7 +255,7 @@ function(compileDeviceRTLLibrary target_name target_triple) endfunction() add_custom_target(omptarget.devicertl.amdgpu) -compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa -Xclang -mcode-object-version=none) +compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa) add_custom_target(omptarget.devicertl.nvptx) compileDeviceRTLLibrary(nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx63)