-
Notifications
You must be signed in to change notification settings - Fork 13.5k
Revert "[AMDGPU] Make default AMDHSA Code Object Version to be 5 (#65410)" #66060
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang ChangesThis reverts commit 0a8d17e.Patch is 1.92 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66060.diff 109 Files Affected:
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0484575ca482717..a5f5ca29053b43b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4616,12 +4616,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee", NegFlag>, Group; def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group, - HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">, + HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">, Visibility<[ClangOption, CC1Option]>, Values<"none,2,3,4,5">, NormalizedValuesScope<"TargetOptions">, NormalizedValues<["COV_None", "COV_2", "COV_3", "COV_4", "COV_5"]>, - MarshallingInfoEnum, "COV_5">; + MarshallingInfoEnum, "COV_4">; defm cumode : SimpleMFlag<"cumode", "Specify CU wavefront", "Specify WGP wavefront", diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 471ff2541d355b6..1db05b47e11ba6a 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2341,7 +2341,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D, unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D, const llvm::opt::ArgList &Args) { - unsigned CodeObjVer = 5; // default + unsigned CodeObjVer = 4; // default if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer); return CodeObjVer; diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu index 62ccc2bd4d05db3..16505b34c4a6e09 100644 --- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu +++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu @@ -1,7 +1,7 @@ // Create module flag for code object version. // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -o - %s | FileCheck %s -check-prefix=V5 +// RUN: -o - %s | FileCheck %s -check-prefix=V4 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=2 -o - %s | FileCheck -check-prefix=V2 %s diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu index b885b9991a9bd31..c661b06d57b78d7 100644 --- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu +++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu @@ -1,10 +1,10 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \ +// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=PRECOV5 %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ +// RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=COV5 %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip index 9c9ea521271b99b..80aa1ee0700628f 100644 --- a/clang/test/CodeGenHIP/default-attributes.hip +++ b/clang/test/CodeGenHIP/default-attributes.hip @@ -46,11 +46,11 @@ __global__ void kernel() { // OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } //. -// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} // OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4} //. -// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} // OPT: !2 = !{i32 1, !"wchar_size", i32 4} //. diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index 2cf1286e2b54e8e..e574b1f64c499bd 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -703,7 +703,7 @@ kernel void test_target_features_kernel(global int *i) { // GFX900: attributes #8 = { nounwind } // GFX900: attributes #9 = { convergent nounwind } //. -// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // NOCPU: !1 = !{i32 1, !"wchar_size", i32 4} // NOCPU: !2 = !{i32 2, i32 0} // NOCPU: !3 = !{i32 1, i32 0, i32 1, i32 0} @@ -721,7 +721,7 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU: !15 = !{i32 1} // NOCPU: !16 = !{!"int*"} //. -// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // GFX900: !1 = !{i32 1, !"wchar_size", i32 4} // GFX900: !2 = !{i32 2, i32 0} // GFX900: !3 = !{!4, !4, i64 0} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 74b5ef03b52f21e..8938642e3b19f8c 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -599,13 +599,13 @@ void test_get_local_id(int d, global int *out) } // CHECK-LABEL: @test_get_workgroup_size( -// CHECK: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 12 +// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 4 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 14 +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 6 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 16 -// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 8 +// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef void test_get_workgroup_size(int d, global int *out) { switch (d) { diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index f049bf3467c611c..71d9554da696b42 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -160,13 +160,13 @@ // Test default code object version. // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5 +// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI4 -// Test default code object version with old device library without abi_version_500.bc -// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ +// Test default code object version with old device library without abi_version_400.bc +// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5 +// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4 // Test -mcode-object-version=3 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ @@ -193,12 +193,12 @@ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5 -// Test -mcode-object-version=4 with old device library without abi_version_400.bc -// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ -// RUN: -mcode-object-version=4 \ +// Test -mcode-object-version=5 with old device library without abi_version_400.bc +// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ +// RUN: -mcode-object-version=5 \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4 +// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5 // ALL-NOT: error: // ALL: {{"[^"]*clang[^"]*"}} diff --git a/lld/test/ELF/emulation-amdgpu.s b/lld/test/ELF/emulation-amdgpu.s index 329fb1c69b16665..707f0aeb909efae 100644 --- a/lld/test/ELF/emulation-amdgpu.s +++ b/lld/test/ELF/emulation-amdgpu.s @@ -13,7 +13,7 @@ # CHECK-NEXT: DataEncoding: LittleEndian (0x1) # CHECK-NEXT: FileVersion: 1 # CHECK-NEXT: OS/ABI: AMDGPU_HSA (0x40) -# CHECK-NEXT: ABIVersion: 3 +# CHECK-NEXT: ABIVersion: 2 # CHECK-NEXT: Unused: (00 00 00 00 00 00 00) # CHECK-NEXT: } # CHECK-NEXT: Type: Executable (0x2) diff --git a/lld/test/ELF/lto/amdgcn-oses.ll b/lld/test/ELF/lto/amdgcn-oses.ll index a70b678ac25141c..a2f25cdd57d87b5 100644 --- a/lld/test/ELF/lto/amdgcn-oses.ll +++ b/lld/test/ELF/lto/amdgcn-oses.ll @@ -15,7 +15,7 @@ ; RUN: llvm-readobj --file-headers %t/mesa3d.so | FileCheck %s --check-prefixes=GCN,NON-AMDHSA,MESA3D ; AMDHSA: OS/ABI: AMDGPU_HSA (0x40) -; AMDHSA: ABIVersion: 3 +; AMDHSA: ABIVersion: 2 ; AMDPAL: OS/ABI: AMDGPU_PAL (0x41) ; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 923b8e2e62ab9d3..f733c514ffbee47 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1418,12 +1418,12 @@ The AMDGPU backend uses the following ELF header: * ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA runtime ABI for code object V4. Specify using the Clang option - ``-mcode-object-version=4``. + ``-mcode-object-version=4``. This is the default code object + version if not specified. * ``ELFABIVERSION_AMDGPU_HSA_V5`` is used to specify the version of AMD HSA runtime ABI for code object V5. Specify using the Clang option - ``-mcode-object-version=5``. This is the default code object - version if not specified. + ``-mcode-object-version=5``. * ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL runtime ABI. @@ -3852,10 +3852,6 @@ same *vendor-name*. Code Object V4 Metadata +++++++++++++++++++++++ -. warning:: - Code object V4 is not the default code object version emitted by this version - of LLVM. - Code object V4 metadata is the same as :ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`. @@ -3886,6 +3882,11 @@ defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`. Code Object V5 Metadata +++++++++++++++++++++++ +.. warning:: + Code object V5 is not the default code object version emitted by this version + of LLVM. + + Code object V5 metadata is the same as :ref:`amdgpu-amdhsa-code-object-metadata-v4` with the changes defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v5`, table diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index d0abd08cc78b8ec..63da86391e5c650 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -34,7 +34,7 @@ static llvm::cl::opt AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidde... |
@llvm/pr-subscribers-backend-amdgpu ChangesThis reverts commit 0a8d17e.Patch is 1.92 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66060.diff 109 Files Affected:
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0484575ca482717..a5f5ca29053b43b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4616,12 +4616,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee", NegFlag>, Group; def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group, - HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">, + HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">, Visibility<[ClangOption, CC1Option]>, Values<"none,2,3,4,5">, NormalizedValuesScope<"TargetOptions">, NormalizedValues<["COV_None", "COV_2", "COV_3", "COV_4", "COV_5"]>, - MarshallingInfoEnum, "COV_5">; + MarshallingInfoEnum, "COV_4">; defm cumode : SimpleMFlag<"cumode", "Specify CU wavefront", "Specify WGP wavefront", diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 471ff2541d355b6..1db05b47e11ba6a 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2341,7 +2341,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D, unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D, const llvm::opt::ArgList &Args) { - unsigned CodeObjVer = 5; // default + unsigned CodeObjVer = 4; // default if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer); return CodeObjVer; diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu index 62ccc2bd4d05db3..16505b34c4a6e09 100644 --- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu +++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu @@ -1,7 +1,7 @@ // Create module flag for code object version. // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -o - %s | FileCheck %s -check-prefix=V5 +// RUN: -o - %s | FileCheck %s -check-prefix=V4 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=2 -o - %s | FileCheck -check-prefix=V2 %s diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu index b885b9991a9bd31..c661b06d57b78d7 100644 --- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu +++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu @@ -1,10 +1,10 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \ +// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=PRECOV5 %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ +// RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=COV5 %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip index 9c9ea521271b99b..80aa1ee0700628f 100644 --- a/clang/test/CodeGenHIP/default-attributes.hip +++ b/clang/test/CodeGenHIP/default-attributes.hip @@ -46,11 +46,11 @@ __global__ void kernel() { // OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } //. -// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} // OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4} //. -// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} // OPT: !2 = !{i32 1, !"wchar_size", i32 4} //. diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index 2cf1286e2b54e8e..e574b1f64c499bd 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -703,7 +703,7 @@ kernel void test_target_features_kernel(global int *i) { // GFX900: attributes #8 = { nounwind } // GFX900: attributes #9 = { convergent nounwind } //. -// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // NOCPU: !1 = !{i32 1, !"wchar_size", i32 4} // NOCPU: !2 = !{i32 2, i32 0} // NOCPU: !3 = !{i32 1, i32 0, i32 1, i32 0} @@ -721,7 +721,7 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU: !15 = !{i32 1} // NOCPU: !16 = !{!"int*"} //. -// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // GFX900: !1 = !{i32 1, !"wchar_size", i32 4} // GFX900: !2 = !{i32 2, i32 0} // GFX900: !3 = !{!4, !4, i64 0} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 74b5ef03b52f21e..8938642e3b19f8c 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -599,13 +599,13 @@ void test_get_local_id(int d, global int *out) } // CHECK-LABEL: @test_get_workgroup_size( -// CHECK: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 12 +// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 4 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 14 +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 6 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 16 -// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 8 +// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef void test_get_workgroup_size(int d, global int *out) { switch (d) { diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index f049bf3467c611c..71d9554da696b42 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -160,13 +160,13 @@ // Test default code object version. // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5 +// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI4 -// Test default code object version with old device library without abi_version_500.bc -// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ +// Test default code object version with old device library without abi_version_400.bc +// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5 +// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4 // Test -mcode-object-version=3 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ @@ -193,12 +193,12 @@ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5 -// Test -mcode-object-version=4 with old device library without abi_version_400.bc -// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ -// RUN: -mcode-object-version=4 \ +// Test -mcode-object-version=5 with old device library without abi_version_400.bc +// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ +// RUN: -mcode-object-version=5 \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4 +// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5 // ALL-NOT: error: // ALL: {{"[^"]*clang[^"]*"}} diff --git a/lld/test/ELF/emulation-amdgpu.s b/lld/test/ELF/emulation-amdgpu.s index 329fb1c69b16665..707f0aeb909efae 100644 --- a/lld/test/ELF/emulation-amdgpu.s +++ b/lld/test/ELF/emulation-amdgpu.s @@ -13,7 +13,7 @@ # CHECK-NEXT: DataEncoding: LittleEndian (0x1) # CHECK-NEXT: FileVersion: 1 # CHECK-NEXT: OS/ABI: AMDGPU_HSA (0x40) -# CHECK-NEXT: ABIVersion: 3 +# CHECK-NEXT: ABIVersion: 2 # CHECK-NEXT: Unused: (00 00 00 00 00 00 00) # CHECK-NEXT: } # CHECK-NEXT: Type: Executable (0x2) diff --git a/lld/test/ELF/lto/amdgcn-oses.ll b/lld/test/ELF/lto/amdgcn-oses.ll index a70b678ac25141c..a2f25cdd57d87b5 100644 --- a/lld/test/ELF/lto/amdgcn-oses.ll +++ b/lld/test/ELF/lto/amdgcn-oses.ll @@ -15,7 +15,7 @@ ; RUN: llvm-readobj --file-headers %t/mesa3d.so | FileCheck %s --check-prefixes=GCN,NON-AMDHSA,MESA3D ; AMDHSA: OS/ABI: AMDGPU_HSA (0x40) -; AMDHSA: ABIVersion: 3 +; AMDHSA: ABIVersion: 2 ; AMDPAL: OS/ABI: AMDGPU_PAL (0x41) ; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 923b8e2e62ab9d3..f733c514ffbee47 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1418,12 +1418,12 @@ The AMDGPU backend uses the following ELF header: * ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA runtime ABI for code object V4. Specify using the Clang option - ``-mcode-object-version=4``. + ``-mcode-object-version=4``. This is the default code object + version if not specified. * ``ELFABIVERSION_AMDGPU_HSA_V5`` is used to specify the version of AMD HSA runtime ABI for code object V5. Specify using the Clang option - ``-mcode-object-version=5``. This is the default code object - version if not specified. + ``-mcode-object-version=5``. * ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL runtime ABI. @@ -3852,10 +3852,6 @@ same *vendor-name*. Code Object V4 Metadata +++++++++++++++++++++++ -. warning:: - Code object V4 is not the default code object version emitted by this version - of LLVM. - Code object V4 metadata is the same as :ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`. @@ -3886,6 +3882,11 @@ defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`. Code Object V5 Metadata +++++++++++++++++++++++ +.. warning:: + Code object V5 is not the default code object version emitted by this version + of LLVM. + + Code object V5 metadata is the same as :ref:`amdgpu-amdhsa-code-object-metadata-v4` with the changes defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v5`, table diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index d0abd08cc78b8ec..63da86391e5c650 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -34,7 +34,7 @@ static llvm::cl::opt AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidde... |
@llvm/pr-subscribers-lld-elf ChangesThis reverts commit 0a8d17e.Patch is 1.92 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66060.diff 109 Files Affected:
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0484575ca482717..a5f5ca29053b43b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4616,12 +4616,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee", NegFlag>, Group; def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group, - HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">, + HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">, Visibility<[ClangOption, CC1Option]>, Values<"none,2,3,4,5">, NormalizedValuesScope<"TargetOptions">, NormalizedValues<["COV_None", "COV_2", "COV_3", "COV_4", "COV_5"]>, - MarshallingInfoEnum, "COV_5">; + MarshallingInfoEnum, "COV_4">; defm cumode : SimpleMFlag<"cumode", "Specify CU wavefront", "Specify WGP wavefront", diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 471ff2541d355b6..1db05b47e11ba6a 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2341,7 +2341,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D, unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D, const llvm::opt::ArgList &Args) { - unsigned CodeObjVer = 5; // default + unsigned CodeObjVer = 4; // default if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer); return CodeObjVer; diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu index 62ccc2bd4d05db3..16505b34c4a6e09 100644 --- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu +++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu @@ -1,7 +1,7 @@ // Create module flag for code object version. // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -o - %s | FileCheck %s -check-prefix=V5 +// RUN: -o - %s | FileCheck %s -check-prefix=V4 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=2 -o - %s | FileCheck -check-prefix=V2 %s diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu index b885b9991a9bd31..c661b06d57b78d7 100644 --- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu +++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu @@ -1,10 +1,10 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \ +// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=PRECOV5 %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ +// RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=COV5 %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip index 9c9ea521271b99b..80aa1ee0700628f 100644 --- a/clang/test/CodeGenHIP/default-attributes.hip +++ b/clang/test/CodeGenHIP/default-attributes.hip @@ -46,11 +46,11 @@ __global__ void kernel() { // OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } //. -// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} // OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4} //. -// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} // OPT: !2 = !{i32 1, !"wchar_size", i32 4} //. diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index 2cf1286e2b54e8e..e574b1f64c499bd 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -703,7 +703,7 @@ kernel void test_target_features_kernel(global int *i) { // GFX900: attributes #8 = { nounwind } // GFX900: attributes #9 = { convergent nounwind } //. -// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // NOCPU: !1 = !{i32 1, !"wchar_size", i32 4} // NOCPU: !2 = !{i32 2, i32 0} // NOCPU: !3 = !{i32 1, i32 0, i32 1, i32 0} @@ -721,7 +721,7 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU: !15 = !{i32 1} // NOCPU: !16 = !{!"int*"} //. -// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // GFX900: !1 = !{i32 1, !"wchar_size", i32 4} // GFX900: !2 = !{i32 2, i32 0} // GFX900: !3 = !{!4, !4, i64 0} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 74b5ef03b52f21e..8938642e3b19f8c 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -599,13 +599,13 @@ void test_get_local_id(int d, global int *out) } // CHECK-LABEL: @test_get_workgroup_size( -// CHECK: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 12 +// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 4 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 14 +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 6 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 16 -// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 8 +// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef void test_get_workgroup_size(int d, global int *out) { switch (d) { diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index f049bf3467c611c..71d9554da696b42 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -160,13 +160,13 @@ // Test default code object version. // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5 +// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI4 -// Test default code object version with old device library without abi_version_500.bc -// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ +// Test default code object version with old device library without abi_version_400.bc +// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5 +// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4 // Test -mcode-object-version=3 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ @@ -193,12 +193,12 @@ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5 -// Test -mcode-object-version=4 with old device library without abi_version_400.bc -// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ -// RUN: -mcode-object-version=4 \ +// Test -mcode-object-version=5 with old device library without abi_version_400.bc +// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ +// RUN: -mcode-object-version=5 \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4 +// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5 // ALL-NOT: error: // ALL: {{"[^"]*clang[^"]*"}} diff --git a/lld/test/ELF/emulation-amdgpu.s b/lld/test/ELF/emulation-amdgpu.s index 329fb1c69b16665..707f0aeb909efae 100644 --- a/lld/test/ELF/emulation-amdgpu.s +++ b/lld/test/ELF/emulation-amdgpu.s @@ -13,7 +13,7 @@ # CHECK-NEXT: DataEncoding: LittleEndian (0x1) # CHECK-NEXT: FileVersion: 1 # CHECK-NEXT: OS/ABI: AMDGPU_HSA (0x40) -# CHECK-NEXT: ABIVersion: 3 +# CHECK-NEXT: ABIVersion: 2 # CHECK-NEXT: Unused: (00 00 00 00 00 00 00) # CHECK-NEXT: } # CHECK-NEXT: Type: Executable (0x2) diff --git a/lld/test/ELF/lto/amdgcn-oses.ll b/lld/test/ELF/lto/amdgcn-oses.ll index a70b678ac25141c..a2f25cdd57d87b5 100644 --- a/lld/test/ELF/lto/amdgcn-oses.ll +++ b/lld/test/ELF/lto/amdgcn-oses.ll @@ -15,7 +15,7 @@ ; RUN: llvm-readobj --file-headers %t/mesa3d.so | FileCheck %s --check-prefixes=GCN,NON-AMDHSA,MESA3D ; AMDHSA: OS/ABI: AMDGPU_HSA (0x40) -; AMDHSA: ABIVersion: 3 +; AMDHSA: ABIVersion: 2 ; AMDPAL: OS/ABI: AMDGPU_PAL (0x41) ; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 923b8e2e62ab9d3..f733c514ffbee47 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1418,12 +1418,12 @@ The AMDGPU backend uses the following ELF header: * ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA runtime ABI for code object V4. Specify using the Clang option - ``-mcode-object-version=4``. + ``-mcode-object-version=4``. This is the default code object + version if not specified. * ``ELFABIVERSION_AMDGPU_HSA_V5`` is used to specify the version of AMD HSA runtime ABI for code object V5. Specify using the Clang option - ``-mcode-object-version=5``. This is the default code object - version if not specified. + ``-mcode-object-version=5``. * ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL runtime ABI. @@ -3852,10 +3852,6 @@ same *vendor-name*. Code Object V4 Metadata +++++++++++++++++++++++ -. warning:: - Code object V4 is not the default code object version emitted by this version - of LLVM. - Code object V4 metadata is the same as :ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`. @@ -3886,6 +3882,11 @@ defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`. Code Object V5 Metadata +++++++++++++++++++++++ +.. warning:: + Code object V5 is not the default code object version emitted by this version + of LLVM. + + Code object V5 metadata is the same as :ref:`amdgpu-amdhsa-code-object-metadata-v4` with the changes defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v5`, table diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index d0abd08cc78b8ec..63da86391e5c650 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -34,7 +34,7 @@ static llvm::cl::opt AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidde... |
@llvm/pr-subscribers-llvm-globalisel ChangesThis reverts commit 0a8d17e.Patch is 1.92 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66060.diff 109 Files Affected:
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0484575ca482717..a5f5ca29053b43b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4616,12 +4616,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee", NegFlag>, Group; def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group, - HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">, + HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">, Visibility<[ClangOption, CC1Option]>, Values<"none,2,3,4,5">, NormalizedValuesScope<"TargetOptions">, NormalizedValues<["COV_None", "COV_2", "COV_3", "COV_4", "COV_5"]>, - MarshallingInfoEnum, "COV_5">; + MarshallingInfoEnum, "COV_4">; defm cumode : SimpleMFlag<"cumode", "Specify CU wavefront", "Specify WGP wavefront", diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 471ff2541d355b6..1db05b47e11ba6a 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2341,7 +2341,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D, unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D, const llvm::opt::ArgList &Args) { - unsigned CodeObjVer = 5; // default + unsigned CodeObjVer = 4; // default if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer); return CodeObjVer; diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu index 62ccc2bd4d05db3..16505b34c4a6e09 100644 --- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu +++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu @@ -1,7 +1,7 @@ // Create module flag for code object version. // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -o - %s | FileCheck %s -check-prefix=V5 +// RUN: -o - %s | FileCheck %s -check-prefix=V4 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=2 -o - %s | FileCheck -check-prefix=V2 %s diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu index b885b9991a9bd31..c661b06d57b78d7 100644 --- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu +++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu @@ -1,10 +1,10 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \ +// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=PRECOV5 %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ +// RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=COV5 %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip index 9c9ea521271b99b..80aa1ee0700628f 100644 --- a/clang/test/CodeGenHIP/default-attributes.hip +++ b/clang/test/CodeGenHIP/default-attributes.hip @@ -46,11 +46,11 @@ __global__ void kernel() { // OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } //. -// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} // OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4} //. -// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} // OPT: !2 = !{i32 1, !"wchar_size", i32 4} //. diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index 2cf1286e2b54e8e..e574b1f64c499bd 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -703,7 +703,7 @@ kernel void test_target_features_kernel(global int *i) { // GFX900: attributes #8 = { nounwind } // GFX900: attributes #9 = { convergent nounwind } //. -// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // NOCPU: !1 = !{i32 1, !"wchar_size", i32 4} // NOCPU: !2 = !{i32 2, i32 0} // NOCPU: !3 = !{i32 1, i32 0, i32 1, i32 0} @@ -721,7 +721,7 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU: !15 = !{i32 1} // NOCPU: !16 = !{!"int*"} //. -// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // GFX900: !1 = !{i32 1, !"wchar_size", i32 4} // GFX900: !2 = !{i32 2, i32 0} // GFX900: !3 = !{!4, !4, i64 0} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 74b5ef03b52f21e..8938642e3b19f8c 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -599,13 +599,13 @@ void test_get_local_id(int d, global int *out) } // CHECK-LABEL: @test_get_workgroup_size( -// CHECK: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 12 +// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 4 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 14 +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 6 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 16 -// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 8 +// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef void test_get_workgroup_size(int d, global int *out) { switch (d) { diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index f049bf3467c611c..71d9554da696b42 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -160,13 +160,13 @@ // Test default code object version. // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5 +// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI4 -// Test default code object version with old device library without abi_version_500.bc -// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ +// Test default code object version with old device library without abi_version_400.bc +// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5 +// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4 // Test -mcode-object-version=3 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ @@ -193,12 +193,12 @@ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5 -// Test -mcode-object-version=4 with old device library without abi_version_400.bc -// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ -// RUN: -mcode-object-version=4 \ +// Test -mcode-object-version=5 with old device library without abi_version_400.bc +// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ +// RUN: -mcode-object-version=5 \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4 +// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5 // ALL-NOT: error: // ALL: {{"[^"]*clang[^"]*"}} diff --git a/lld/test/ELF/emulation-amdgpu.s b/lld/test/ELF/emulation-amdgpu.s index 329fb1c69b16665..707f0aeb909efae 100644 --- a/lld/test/ELF/emulation-amdgpu.s +++ b/lld/test/ELF/emulation-amdgpu.s @@ -13,7 +13,7 @@ # CHECK-NEXT: DataEncoding: LittleEndian (0x1) # CHECK-NEXT: FileVersion: 1 # CHECK-NEXT: OS/ABI: AMDGPU_HSA (0x40) -# CHECK-NEXT: ABIVersion: 3 +# CHECK-NEXT: ABIVersion: 2 # CHECK-NEXT: Unused: (00 00 00 00 00 00 00) # CHECK-NEXT: } # CHECK-NEXT: Type: Executable (0x2) diff --git a/lld/test/ELF/lto/amdgcn-oses.ll b/lld/test/ELF/lto/amdgcn-oses.ll index a70b678ac25141c..a2f25cdd57d87b5 100644 --- a/lld/test/ELF/lto/amdgcn-oses.ll +++ b/lld/test/ELF/lto/amdgcn-oses.ll @@ -15,7 +15,7 @@ ; RUN: llvm-readobj --file-headers %t/mesa3d.so | FileCheck %s --check-prefixes=GCN,NON-AMDHSA,MESA3D ; AMDHSA: OS/ABI: AMDGPU_HSA (0x40) -; AMDHSA: ABIVersion: 3 +; AMDHSA: ABIVersion: 2 ; AMDPAL: OS/ABI: AMDGPU_PAL (0x41) ; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 923b8e2e62ab9d3..f733c514ffbee47 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1418,12 +1418,12 @@ The AMDGPU backend uses the following ELF header: * ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA runtime ABI for code object V4. Specify using the Clang option - ``-mcode-object-version=4``. + ``-mcode-object-version=4``. This is the default code object + version if not specified. * ``ELFABIVERSION_AMDGPU_HSA_V5`` is used to specify the version of AMD HSA runtime ABI for code object V5. Specify using the Clang option - ``-mcode-object-version=5``. This is the default code object - version if not specified. + ``-mcode-object-version=5``. * ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL runtime ABI. @@ -3852,10 +3852,6 @@ same *vendor-name*. Code Object V4 Metadata +++++++++++++++++++++++ -. warning:: - Code object V4 is not the default code object version emitted by this version - of LLVM. - Code object V4 metadata is the same as :ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`. @@ -3886,6 +3882,11 @@ defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`. Code Object V5 Metadata +++++++++++++++++++++++ +.. warning:: + Code object V5 is not the default code object version emitted by this version + of LLVM. + + Code object V5 metadata is the same as :ref:`amdgpu-amdhsa-code-object-metadata-v4` with the changes defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v5`, table diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index d0abd08cc78b8ec..63da86391e5c650 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -34,7 +34,7 @@ static llvm::cl::opt AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidde... |
@llvm/pr-subscribers-clang-driver ChangesThis reverts commit 0a8d17e.Patch is 1.92 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66060.diff 109 Files Affected:
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0484575ca482717..a5f5ca29053b43b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4616,12 +4616,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee", NegFlag>, Group; def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group, - HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">, + HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">, Visibility<[ClangOption, CC1Option]>, Values<"none,2,3,4,5">, NormalizedValuesScope<"TargetOptions">, NormalizedValues<["COV_None", "COV_2", "COV_3", "COV_4", "COV_5"]>, - MarshallingInfoEnum, "COV_5">; + MarshallingInfoEnum, "COV_4">; defm cumode : SimpleMFlag<"cumode", "Specify CU wavefront", "Specify WGP wavefront", diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 471ff2541d355b6..1db05b47e11ba6a 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2341,7 +2341,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D, unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D, const llvm::opt::ArgList &Args) { - unsigned CodeObjVer = 5; // default + unsigned CodeObjVer = 4; // default if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer); return CodeObjVer; diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu index 62ccc2bd4d05db3..16505b34c4a6e09 100644 --- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu +++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu @@ -1,7 +1,7 @@ // Create module flag for code object version. // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -o - %s | FileCheck %s -check-prefix=V5 +// RUN: -o - %s | FileCheck %s -check-prefix=V4 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=2 -o - %s | FileCheck -check-prefix=V2 %s diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu index b885b9991a9bd31..c661b06d57b78d7 100644 --- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu +++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu @@ -1,10 +1,10 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \ +// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=PRECOV5 %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ +// RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=COV5 %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip index 9c9ea521271b99b..80aa1ee0700628f 100644 --- a/clang/test/CodeGenHIP/default-attributes.hip +++ b/clang/test/CodeGenHIP/default-attributes.hip @@ -46,11 +46,11 @@ __global__ void kernel() { // OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } //. -// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} // OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4} //. -// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} // OPT: !2 = !{i32 1, !"wchar_size", i32 4} //. diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index 2cf1286e2b54e8e..e574b1f64c499bd 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -703,7 +703,7 @@ kernel void test_target_features_kernel(global int *i) { // GFX900: attributes #8 = { nounwind } // GFX900: attributes #9 = { convergent nounwind } //. -// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // NOCPU: !1 = !{i32 1, !"wchar_size", i32 4} // NOCPU: !2 = !{i32 2, i32 0} // NOCPU: !3 = !{i32 1, i32 0, i32 1, i32 0} @@ -721,7 +721,7 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU: !15 = !{i32 1} // NOCPU: !16 = !{!"int*"} //. -// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} +// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // GFX900: !1 = !{i32 1, !"wchar_size", i32 4} // GFX900: !2 = !{i32 2, i32 0} // GFX900: !3 = !{!4, !4, i64 0} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 74b5ef03b52f21e..8938642e3b19f8c 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -599,13 +599,13 @@ void test_get_local_id(int d, global int *out) } // CHECK-LABEL: @test_get_workgroup_size( -// CHECK: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 12 +// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 4 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 14 +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 6 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef -// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 16 -// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef +// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 8 +// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef void test_get_workgroup_size(int d, global int *out) { switch (d) { diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index f049bf3467c611c..71d9554da696b42 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -160,13 +160,13 @@ // Test default code object version. // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5 +// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI4 -// Test default code object version with old device library without abi_version_500.bc -// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ +// Test default code object version with old device library without abi_version_400.bc +// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5 +// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4 // Test -mcode-object-version=3 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ @@ -193,12 +193,12 @@ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5 -// Test -mcode-object-version=4 with old device library without abi_version_400.bc -// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ -// RUN: -mcode-object-version=4 \ +// Test -mcode-object-version=5 with old device library without abi_version_400.bc +// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ +// RUN: -mcode-object-version=5 \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4 +// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5 // ALL-NOT: error: // ALL: {{"[^"]*clang[^"]*"}} diff --git a/lld/test/ELF/emulation-amdgpu.s b/lld/test/ELF/emulation-amdgpu.s index 329fb1c69b16665..707f0aeb909efae 100644 --- a/lld/test/ELF/emulation-amdgpu.s +++ b/lld/test/ELF/emulation-amdgpu.s @@ -13,7 +13,7 @@ # CHECK-NEXT: DataEncoding: LittleEndian (0x1) # CHECK-NEXT: FileVersion: 1 # CHECK-NEXT: OS/ABI: AMDGPU_HSA (0x40) -# CHECK-NEXT: ABIVersion: 3 +# CHECK-NEXT: ABIVersion: 2 # CHECK-NEXT: Unused: (00 00 00 00 00 00 00) # CHECK-NEXT: } # CHECK-NEXT: Type: Executable (0x2) diff --git a/lld/test/ELF/lto/amdgcn-oses.ll b/lld/test/ELF/lto/amdgcn-oses.ll index a70b678ac25141c..a2f25cdd57d87b5 100644 --- a/lld/test/ELF/lto/amdgcn-oses.ll +++ b/lld/test/ELF/lto/amdgcn-oses.ll @@ -15,7 +15,7 @@ ; RUN: llvm-readobj --file-headers %t/mesa3d.so | FileCheck %s --check-prefixes=GCN,NON-AMDHSA,MESA3D ; AMDHSA: OS/ABI: AMDGPU_HSA (0x40) -; AMDHSA: ABIVersion: 3 +; AMDHSA: ABIVersion: 2 ; AMDPAL: OS/ABI: AMDGPU_PAL (0x41) ; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 923b8e2e62ab9d3..f733c514ffbee47 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1418,12 +1418,12 @@ The AMDGPU backend uses the following ELF header: * ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA runtime ABI for code object V4. Specify using the Clang option - ``-mcode-object-version=4``. + ``-mcode-object-version=4``. This is the default code object + version if not specified. * ``ELFABIVERSION_AMDGPU_HSA_V5`` is used to specify the version of AMD HSA runtime ABI for code object V5. Specify using the Clang option - ``-mcode-object-version=5``. This is the default code object - version if not specified. + ``-mcode-object-version=5``. * ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL runtime ABI. @@ -3852,10 +3852,6 @@ same *vendor-name*. Code Object V4 Metadata +++++++++++++++++++++++ -. warning:: - Code object V4 is not the default code object version emitted by this version - of LLVM. - Code object V4 metadata is the same as :ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`. @@ -3886,6 +3882,11 @@ defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`. Code Object V5 Metadata +++++++++++++++++++++++ +.. warning:: + Code object V5 is not the default code object version emitted by this version + of LLVM. + + Code object V5 metadata is the same as :ref:`amdgpu-amdhsa-code-object-metadata-v4` with the changes defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v5`, table diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index d0abd08cc78b8ec..63da86391e5c650 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -34,7 +34,7 @@ static llvm::cl::opt AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidde... |
Error: Command failed due to missing milestone. |
Error: Command failed due to missing milestone. |
Error: Command failed due to missing milestone. |
Error: Command failed due to missing milestone. |
Error: Command failed due to missing milestone. |
…m#65410)" (llvm#66060) This reverts commit 0a8d17e.
This reverts commit 0a8d17e.