Revert "[AMDGPU] Make default AMDHSA Code Object Version to be 5 (#65410)" #66060

saiislam · 2023-09-12T09:41:48Z

This reverts commit 0a8d17e.

…m#65410)" This reverts commit 0a8d17e.

llvmbot · 2023-09-12T09:42:54Z

@llvm/pr-subscribers-clang

Changes

This reverts commit `0a8d17e`.

Patch is 1.92 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66060.diff

109 Files Affected:

(modified) clang/include/clang/Driver/Options.td (+2-2)
(modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+1-1)
(modified) clang/test/CodeGenCUDA/amdgpu-code-object-version.cu (+1-1)
(modified) clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu (+2-2)
(modified) clang/test/CodeGenHIP/default-attributes.hip (+2-2)
(modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+2-2)
(modified) clang/test/CodeGenOpenCL/builtins-amdgcn.cl (+5-5)
(modified) clang/test/Driver/hip-device-libs.hip (+8-8)
(modified) lld/test/ELF/emulation-amdgpu.s (+1-1)
(modified) lld/test/ELF/lto/amdgcn-oses.ll (+1-1)
(modified) llvm/docs/AMDGPUUsage.rst (+8-7)
(modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll (+29-28)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll (+8-8)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll (+55-53)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll (+484-468)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll (+1623-1578)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll (+32-31)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll (+2077-2013)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll (+32-31)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll (+51-51)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll (+18-18)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir (+39-28)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll (+6-5)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll (+6-5)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll (+4-7)
(modified) llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll (+35-75)
(modified) llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll (+20-46)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll (+30-30)
(modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (+12-12)
(modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (+7-8)
(modified) llvm/test/CodeGen/AMDGPU/attributor-noopt.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll (+24-22)
(modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll (+366-384)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-argument-types.ll (+1463-1221)
(modified) llvm/test/CodeGen/AMDGPU/call-waitcnt.ll (+47-42)
(modified) llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll (+12-8)
(modified) llvm/test/CodeGen/AMDGPU/cc-update.ll (+160-144)
(modified) llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll (+45-45)
(modified) llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/collapse-endcf.ll (+10-10)
(modified) llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll (+32-28)
(modified) llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll (+20-20)
(modified) llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll (+4-2)
(modified) llvm/test/CodeGen/AMDGPU/ds_read2.ll (+32-28)
(modified) llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll (+6-6)
(modified) llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll (+19-17)
(modified) llvm/test/CodeGen/AMDGPU/fneg-fabs.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll (+12-19)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll (+605-550)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll (+363-330)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll (+363-330)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll (+605-550)
(modified) llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll (+19-19)
(modified) llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll (+59-57)
(modified) llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll (+32-25)
(modified) llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll (+112-124)
(modified) llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll (+22-48)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll (+7-8)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll (+7-8)
(modified) llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/lower-kernargs.ll (+118-118)
(modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll (+4-2)
(modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll (+9-1)
(modified) llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll (+44-48)
(modified) llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll (+61-33)
(modified) llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll (+8-8)
(modified) llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll (+72-72)
(modified) llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll (+24-24)
(modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll (+23-10)
(modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/spill-m0.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll (+30-30)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll (+167-157)
(modified) llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll (+15-15)
(modified) llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll (+36-25)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s (-4)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s (-3)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s (-3)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s (-3)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s (-1)
(modified) openmp/libomptarget/test/offloading/thread_limit.c (-1)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 0484575ca482717..a5f5ca29053b43b 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4616,12 +4616,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
   NegFlag>, Group;
 
 def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group,
-  HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">,
+  HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
   Visibility<[ClangOption, CC1Option]>,
   Values<"none,2,3,4,5">,
   NormalizedValuesScope<"TargetOptions">,
   NormalizedValues<["COV_None", "COV_2", "COV_3", "COV_4", "COV_5"]>,
-  MarshallingInfoEnum, "COV_5">;
+  MarshallingInfoEnum, "COV_4">;
 
 defm cumode : SimpleMFlag<"cumode",
   "Specify CU wavefront", "Specify WGP wavefront",
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 471ff2541d355b6..1db05b47e11ba6a 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2341,7 +2341,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D,
 
 unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D,
                                            const llvm::opt::ArgList &Args) {
-  unsigned CodeObjVer = 5; // default
+  unsigned CodeObjVer = 4; // default
   if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args))
     StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
   return CodeObjVer;
diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
index 62ccc2bd4d05db3..16505b34c4a6e09 100644
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
@@ -1,7 +1,7 @@
 // Create module flag for code object version.
 
 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
-// RUN:   -o - %s | FileCheck %s -check-prefix=V5
+// RUN:   -o - %s | FileCheck %s -check-prefix=V4
 
 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
 // RUN:   -mcode-object-version=2 -o - %s | FileCheck -check-prefix=V2 %s
diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
index b885b9991a9bd31..c661b06d57b78d7 100644
--- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
@@ -1,10 +1,10 @@
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
-// RUN:     -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \
+// RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefix=PRECOV5 %s
 
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
-// RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
+// RUN:     -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefix=COV5 %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip
index 9c9ea521271b99b..80aa1ee0700628f 100644
--- a/clang/test/CodeGenHIP/default-attributes.hip
+++ b/clang/test/CodeGenHIP/default-attributes.hip
@@ -46,11 +46,11 @@ __global__ void kernel() {
 // OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
 // OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
 //.
-// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
 // OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4}
 //.
-// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
 // OPT: !2 = !{i32 1, !"wchar_size", i32 4}
 //.
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index 2cf1286e2b54e8e..e574b1f64c499bd 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -703,7 +703,7 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900: attributes #8 = { nounwind }
 // GFX900: attributes #9 = { convergent nounwind }
 //.
-// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // NOCPU: !1 = !{i32 1, !"wchar_size", i32 4}
 // NOCPU: !2 = !{i32 2, i32 0}
 // NOCPU: !3 = !{i32 1, i32 0, i32 1, i32 0}
@@ -721,7 +721,7 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU: !15 = !{i32 1}
 // NOCPU: !16 = !{!"int*"}
 //.
-// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // GFX900: !1 = !{i32 1, !"wchar_size", i32 4}
 // GFX900: !2 = !{i32 2, i32 0}
 // GFX900: !3 = !{!4, !4, i64 0}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 74b5ef03b52f21e..8938642e3b19f8c 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -599,13 +599,13 @@ void test_get_local_id(int d, global int *out)
 }
 
 // CHECK-LABEL: @test_get_workgroup_size(
-// CHECK: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 12
+// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 4
 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 14
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 6
 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 16
-// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 8
+// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
 void test_get_workgroup_size(int d, global int *out)
 {
 	switch (d) {
diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip
index f049bf3467c611c..71d9554da696b42 100644
--- a/clang/test/Driver/hip-device-libs.hip
+++ b/clang/test/Driver/hip-device-libs.hip
@@ -160,13 +160,13 @@
 // Test default code object version.
 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI4
 
-// Test default code object version with old device library without abi_version_500.bc
-// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
+// Test default code object version with old device library without abi_version_400.bc
+// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
 // RUN:   --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver   \
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5
+// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4
 
 // Test -mcode-object-version=3
 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
@@ -193,12 +193,12 @@
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
 
-// Test -mcode-object-version=4 with old device library without abi_version_400.bc
-// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
-// RUN:   -mcode-object-version=4 \
+// Test -mcode-object-version=5 with old device library without abi_version_400.bc
+// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
+// RUN:   -mcode-object-version=5 \
 // RUN:   --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver   \
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4
+// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5
 
 // ALL-NOT: error:
 // ALL: {{"[^"]*clang[^"]*"}}
diff --git a/lld/test/ELF/emulation-amdgpu.s b/lld/test/ELF/emulation-amdgpu.s
index 329fb1c69b16665..707f0aeb909efae 100644
--- a/lld/test/ELF/emulation-amdgpu.s
+++ b/lld/test/ELF/emulation-amdgpu.s
@@ -13,7 +13,7 @@
 # CHECK-NEXT:     DataEncoding: LittleEndian (0x1)
 # CHECK-NEXT:     FileVersion: 1
 # CHECK-NEXT:     OS/ABI: AMDGPU_HSA (0x40)
-# CHECK-NEXT:     ABIVersion: 3
+# CHECK-NEXT:     ABIVersion: 2
 # CHECK-NEXT:     Unused: (00 00 00 00 00 00 00)
 # CHECK-NEXT:   }
 # CHECK-NEXT:   Type: Executable (0x2)
diff --git a/lld/test/ELF/lto/amdgcn-oses.ll b/lld/test/ELF/lto/amdgcn-oses.ll
index a70b678ac25141c..a2f25cdd57d87b5 100644
--- a/lld/test/ELF/lto/amdgcn-oses.ll
+++ b/lld/test/ELF/lto/amdgcn-oses.ll
@@ -15,7 +15,7 @@
 ; RUN: llvm-readobj --file-headers %t/mesa3d.so | FileCheck %s --check-prefixes=GCN,NON-AMDHSA,MESA3D
 
 ; AMDHSA: OS/ABI: AMDGPU_HSA (0x40)
-; AMDHSA: ABIVersion: 3
+; AMDHSA: ABIVersion: 2
 
 ; AMDPAL: OS/ABI: AMDGPU_PAL (0x41)
 ; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42)
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 923b8e2e62ab9d3..f733c514ffbee47 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1418,12 +1418,12 @@ The AMDGPU backend uses the following ELF header:
 
   * ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA
     runtime ABI for code object V4. Specify using the Clang option
-    ``-mcode-object-version=4``.
+    ``-mcode-object-version=4``. This is the default code object
+    version if not specified.
 
   * ``ELFABIVERSION_AMDGPU_HSA_V5`` is used to specify the version of AMD HSA
     runtime ABI for code object V5. Specify using the Clang option
-    ``-mcode-object-version=5``. This is the default code object
-    version if not specified.
+    ``-mcode-object-version=5``.
 
   * ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL
     runtime ABI.
@@ -3852,10 +3852,6 @@ same *vendor-name*.
 Code Object V4 Metadata
 +++++++++++++++++++++++
 
-. warning::
-  Code object V4 is not the default code object version emitted by this version
-  of LLVM.
-
 Code object V4 metadata is the same as
 :ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions
 defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`.
@@ -3886,6 +3882,11 @@ defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`.
 Code Object V5 Metadata
 +++++++++++++++++++++++
 
+.. warning::
+  Code object V5 is not the default code object version emitted by this version
+  of LLVM.
+
+
 Code object V5 metadata is the same as
 :ref:`amdgpu-amdhsa-code-object-metadata-v4` with the changes defined in table
 :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v5`, table
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index d0abd08cc78b8ec..63da86391e5c650 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -34,7 +34,7 @@
 static llvm::cl::opt
     AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidde...

llvmbot · 2023-09-12T09:42:57Z

@llvm/pr-subscribers-backend-amdgpu

Changes

This reverts commit `0a8d17e`.

Patch is 1.92 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66060.diff

109 Files Affected:

(modified) clang/include/clang/Driver/Options.td (+2-2)
(modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+1-1)
(modified) clang/test/CodeGenCUDA/amdgpu-code-object-version.cu (+1-1)
(modified) clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu (+2-2)
(modified) clang/test/CodeGenHIP/default-attributes.hip (+2-2)
(modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+2-2)
(modified) clang/test/CodeGenOpenCL/builtins-amdgcn.cl (+5-5)
(modified) clang/test/Driver/hip-device-libs.hip (+8-8)
(modified) lld/test/ELF/emulation-amdgpu.s (+1-1)
(modified) lld/test/ELF/lto/amdgcn-oses.ll (+1-1)
(modified) llvm/docs/AMDGPUUsage.rst (+8-7)
(modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll (+29-28)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll (+8-8)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll (+55-53)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll (+484-468)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll (+1623-1578)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll (+32-31)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll (+2077-2013)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll (+32-31)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll (+51-51)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll (+18-18)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir (+39-28)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll (+6-5)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll (+6-5)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll (+4-7)
(modified) llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll (+35-75)
(modified) llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll (+20-46)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll (+30-30)
(modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (+12-12)
(modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (+7-8)
(modified) llvm/test/CodeGen/AMDGPU/attributor-noopt.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll (+24-22)
(modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll (+366-384)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-argument-types.ll (+1463-1221)
(modified) llvm/test/CodeGen/AMDGPU/call-waitcnt.ll (+47-42)
(modified) llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll (+12-8)
(modified) llvm/test/CodeGen/AMDGPU/cc-update.ll (+160-144)
(modified) llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll (+45-45)
(modified) llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/collapse-endcf.ll (+10-10)
(modified) llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll (+32-28)
(modified) llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll (+20-20)
(modified) llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll (+4-2)
(modified) llvm/test/CodeGen/AMDGPU/ds_read2.ll (+32-28)
(modified) llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll (+6-6)
(modified) llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll (+19-17)
(modified) llvm/test/CodeGen/AMDGPU/fneg-fabs.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll (+12-19)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll (+605-550)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll (+363-330)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll (+363-330)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll (+605-550)
(modified) llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll (+19-19)
(modified) llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll (+59-57)
(modified) llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll (+32-25)
(modified) llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll (+112-124)
(modified) llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll (+22-48)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll (+7-8)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll (+7-8)
(modified) llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/lower-kernargs.ll (+118-118)
(modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll (+4-2)
(modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll (+9-1)
(modified) llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll (+44-48)
(modified) llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll (+61-33)
(modified) llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll (+8-8)
(modified) llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll (+72-72)
(modified) llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll (+24-24)
(modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll (+23-10)
(modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/spill-m0.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll (+30-30)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll (+167-157)
(modified) llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll (+15-15)
(modified) llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll (+36-25)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s (-4)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s (-3)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s (-3)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s (-3)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s (-1)
(modified) openmp/libomptarget/test/offloading/thread_limit.c (-1)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 0484575ca482717..a5f5ca29053b43b 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4616,12 +4616,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
   NegFlag>, Group;
 
 def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group,
-  HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">,
+  HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
   Visibility<[ClangOption, CC1Option]>,
   Values<"none,2,3,4,5">,
   NormalizedValuesScope<"TargetOptions">,
   NormalizedValues<["COV_None", "COV_2", "COV_3", "COV_4", "COV_5"]>,
-  MarshallingInfoEnum, "COV_5">;
+  MarshallingInfoEnum, "COV_4">;
 
 defm cumode : SimpleMFlag<"cumode",
   "Specify CU wavefront", "Specify WGP wavefront",
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 471ff2541d355b6..1db05b47e11ba6a 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2341,7 +2341,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D,
 
 unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D,
                                            const llvm::opt::ArgList &Args) {
-  unsigned CodeObjVer = 5; // default
+  unsigned CodeObjVer = 4; // default
   if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args))
     StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
   return CodeObjVer;
diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
index 62ccc2bd4d05db3..16505b34c4a6e09 100644
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
@@ -1,7 +1,7 @@
 // Create module flag for code object version.
 
 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
-// RUN:   -o - %s | FileCheck %s -check-prefix=V5
+// RUN:   -o - %s | FileCheck %s -check-prefix=V4
 
 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
 // RUN:   -mcode-object-version=2 -o - %s | FileCheck -check-prefix=V2 %s
diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
index b885b9991a9bd31..c661b06d57b78d7 100644
--- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
@@ -1,10 +1,10 @@
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
-// RUN:     -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \
+// RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefix=PRECOV5 %s
 
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
-// RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
+// RUN:     -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefix=COV5 %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip
index 9c9ea521271b99b..80aa1ee0700628f 100644
--- a/clang/test/CodeGenHIP/default-attributes.hip
+++ b/clang/test/CodeGenHIP/default-attributes.hip
@@ -46,11 +46,11 @@ __global__ void kernel() {
 // OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
 // OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
 //.
-// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
 // OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4}
 //.
-// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
 // OPT: !2 = !{i32 1, !"wchar_size", i32 4}
 //.
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index 2cf1286e2b54e8e..e574b1f64c499bd 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -703,7 +703,7 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900: attributes #8 = { nounwind }
 // GFX900: attributes #9 = { convergent nounwind }
 //.
-// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // NOCPU: !1 = !{i32 1, !"wchar_size", i32 4}
 // NOCPU: !2 = !{i32 2, i32 0}
 // NOCPU: !3 = !{i32 1, i32 0, i32 1, i32 0}
@@ -721,7 +721,7 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU: !15 = !{i32 1}
 // NOCPU: !16 = !{!"int*"}
 //.
-// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // GFX900: !1 = !{i32 1, !"wchar_size", i32 4}
 // GFX900: !2 = !{i32 2, i32 0}
 // GFX900: !3 = !{!4, !4, i64 0}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 74b5ef03b52f21e..8938642e3b19f8c 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -599,13 +599,13 @@ void test_get_local_id(int d, global int *out)
 }
 
 // CHECK-LABEL: @test_get_workgroup_size(
-// CHECK: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 12
+// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 4
 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 14
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 6
 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 16
-// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 8
+// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
 void test_get_workgroup_size(int d, global int *out)
 {
 	switch (d) {
diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip
index f049bf3467c611c..71d9554da696b42 100644
--- a/clang/test/Driver/hip-device-libs.hip
+++ b/clang/test/Driver/hip-device-libs.hip
@@ -160,13 +160,13 @@
 // Test default code object version.
 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI4
 
-// Test default code object version with old device library without abi_version_500.bc
-// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
+// Test default code object version with old device library without abi_version_400.bc
+// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
 // RUN:   --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver   \
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5
+// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4
 
 // Test -mcode-object-version=3
 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
@@ -193,12 +193,12 @@
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
 
-// Test -mcode-object-version=4 with old device library without abi_version_400.bc
-// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
-// RUN:   -mcode-object-version=4 \
+// Test -mcode-object-version=5 with old device library without abi_version_400.bc
+// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
+// RUN:   -mcode-object-version=5 \
 // RUN:   --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver   \
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4
+// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5
 
 // ALL-NOT: error:
 // ALL: {{"[^"]*clang[^"]*"}}
diff --git a/lld/test/ELF/emulation-amdgpu.s b/lld/test/ELF/emulation-amdgpu.s
index 329fb1c69b16665..707f0aeb909efae 100644
--- a/lld/test/ELF/emulation-amdgpu.s
+++ b/lld/test/ELF/emulation-amdgpu.s
@@ -13,7 +13,7 @@
 # CHECK-NEXT:     DataEncoding: LittleEndian (0x1)
 # CHECK-NEXT:     FileVersion: 1
 # CHECK-NEXT:     OS/ABI: AMDGPU_HSA (0x40)
-# CHECK-NEXT:     ABIVersion: 3
+# CHECK-NEXT:     ABIVersion: 2
 # CHECK-NEXT:     Unused: (00 00 00 00 00 00 00)
 # CHECK-NEXT:   }
 # CHECK-NEXT:   Type: Executable (0x2)
diff --git a/lld/test/ELF/lto/amdgcn-oses.ll b/lld/test/ELF/lto/amdgcn-oses.ll
index a70b678ac25141c..a2f25cdd57d87b5 100644
--- a/lld/test/ELF/lto/amdgcn-oses.ll
+++ b/lld/test/ELF/lto/amdgcn-oses.ll
@@ -15,7 +15,7 @@
 ; RUN: llvm-readobj --file-headers %t/mesa3d.so | FileCheck %s --check-prefixes=GCN,NON-AMDHSA,MESA3D
 
 ; AMDHSA: OS/ABI: AMDGPU_HSA (0x40)
-; AMDHSA: ABIVersion: 3
+; AMDHSA: ABIVersion: 2
 
 ; AMDPAL: OS/ABI: AMDGPU_PAL (0x41)
 ; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42)
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 923b8e2e62ab9d3..f733c514ffbee47 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1418,12 +1418,12 @@ The AMDGPU backend uses the following ELF header:
 
   * ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA
     runtime ABI for code object V4. Specify using the Clang option
-    ``-mcode-object-version=4``.
+    ``-mcode-object-version=4``. This is the default code object
+    version if not specified.
 
   * ``ELFABIVERSION_AMDGPU_HSA_V5`` is used to specify the version of AMD HSA
     runtime ABI for code object V5. Specify using the Clang option
-    ``-mcode-object-version=5``. This is the default code object
-    version if not specified.
+    ``-mcode-object-version=5``.
 
   * ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL
     runtime ABI.
@@ -3852,10 +3852,6 @@ same *vendor-name*.
 Code Object V4 Metadata
 +++++++++++++++++++++++
 
-. warning::
-  Code object V4 is not the default code object version emitted by this version
-  of LLVM.
-
 Code object V4 metadata is the same as
 :ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions
 defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`.
@@ -3886,6 +3882,11 @@ defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`.
 Code Object V5 Metadata
 +++++++++++++++++++++++
 
+.. warning::
+  Code object V5 is not the default code object version emitted by this version
+  of LLVM.
+
+
 Code object V5 metadata is the same as
 :ref:`amdgpu-amdhsa-code-object-metadata-v4` with the changes defined in table
 :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v5`, table
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index d0abd08cc78b8ec..63da86391e5c650 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -34,7 +34,7 @@
 static llvm::cl::opt
     AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidde...

llvmbot · 2023-09-12T09:42:59Z

@llvm/pr-subscribers-lld-elf

Changes

This reverts commit `0a8d17e`.

Patch is 1.92 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66060.diff

109 Files Affected:

(modified) clang/include/clang/Driver/Options.td (+2-2)
(modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+1-1)
(modified) clang/test/CodeGenCUDA/amdgpu-code-object-version.cu (+1-1)
(modified) clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu (+2-2)
(modified) clang/test/CodeGenHIP/default-attributes.hip (+2-2)
(modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+2-2)
(modified) clang/test/CodeGenOpenCL/builtins-amdgcn.cl (+5-5)
(modified) clang/test/Driver/hip-device-libs.hip (+8-8)
(modified) lld/test/ELF/emulation-amdgpu.s (+1-1)
(modified) lld/test/ELF/lto/amdgcn-oses.ll (+1-1)
(modified) llvm/docs/AMDGPUUsage.rst (+8-7)
(modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll (+29-28)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll (+8-8)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll (+55-53)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll (+484-468)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll (+1623-1578)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll (+32-31)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll (+2077-2013)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll (+32-31)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll (+51-51)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll (+18-18)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir (+39-28)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll (+6-5)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll (+6-5)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll (+4-7)
(modified) llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll (+35-75)
(modified) llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll (+20-46)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll (+30-30)
(modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (+12-12)
(modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (+7-8)
(modified) llvm/test/CodeGen/AMDGPU/attributor-noopt.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll (+24-22)
(modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll (+366-384)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-argument-types.ll (+1463-1221)
(modified) llvm/test/CodeGen/AMDGPU/call-waitcnt.ll (+47-42)
(modified) llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll (+12-8)
(modified) llvm/test/CodeGen/AMDGPU/cc-update.ll (+160-144)
(modified) llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll (+45-45)
(modified) llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/collapse-endcf.ll (+10-10)
(modified) llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll (+32-28)
(modified) llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll (+20-20)
(modified) llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll (+4-2)
(modified) llvm/test/CodeGen/AMDGPU/ds_read2.ll (+32-28)
(modified) llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll (+6-6)
(modified) llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll (+19-17)
(modified) llvm/test/CodeGen/AMDGPU/fneg-fabs.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll (+12-19)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll (+605-550)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll (+363-330)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll (+363-330)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll (+605-550)
(modified) llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll (+19-19)
(modified) llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll (+59-57)
(modified) llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll (+32-25)
(modified) llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll (+112-124)
(modified) llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll (+22-48)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll (+7-8)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll (+7-8)
(modified) llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/lower-kernargs.ll (+118-118)
(modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll (+4-2)
(modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll (+9-1)
(modified) llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll (+44-48)
(modified) llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll (+61-33)
(modified) llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll (+8-8)
(modified) llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll (+72-72)
(modified) llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll (+24-24)
(modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll (+23-10)
(modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/spill-m0.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll (+30-30)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll (+167-157)
(modified) llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll (+15-15)
(modified) llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll (+36-25)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s (-4)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s (-3)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s (-3)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s (-3)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s (-1)
(modified) openmp/libomptarget/test/offloading/thread_limit.c (-1)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 0484575ca482717..a5f5ca29053b43b 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4616,12 +4616,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
   NegFlag>, Group;
 
 def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group,
-  HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">,
+  HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
   Visibility<[ClangOption, CC1Option]>,
   Values<"none,2,3,4,5">,
   NormalizedValuesScope<"TargetOptions">,
   NormalizedValues<["COV_None", "COV_2", "COV_3", "COV_4", "COV_5"]>,
-  MarshallingInfoEnum, "COV_5">;
+  MarshallingInfoEnum, "COV_4">;
 
 defm cumode : SimpleMFlag<"cumode",
   "Specify CU wavefront", "Specify WGP wavefront",
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 471ff2541d355b6..1db05b47e11ba6a 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2341,7 +2341,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D,
 
 unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D,
                                            const llvm::opt::ArgList &Args) {
-  unsigned CodeObjVer = 5; // default
+  unsigned CodeObjVer = 4; // default
   if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args))
     StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
   return CodeObjVer;
diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
index 62ccc2bd4d05db3..16505b34c4a6e09 100644
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
@@ -1,7 +1,7 @@
 // Create module flag for code object version.
 
 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
-// RUN:   -o - %s | FileCheck %s -check-prefix=V5
+// RUN:   -o - %s | FileCheck %s -check-prefix=V4
 
 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
 // RUN:   -mcode-object-version=2 -o - %s | FileCheck -check-prefix=V2 %s
diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
index b885b9991a9bd31..c661b06d57b78d7 100644
--- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
@@ -1,10 +1,10 @@
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
-// RUN:     -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \
+// RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefix=PRECOV5 %s
 
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
-// RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
+// RUN:     -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefix=COV5 %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip
index 9c9ea521271b99b..80aa1ee0700628f 100644
--- a/clang/test/CodeGenHIP/default-attributes.hip
+++ b/clang/test/CodeGenHIP/default-attributes.hip
@@ -46,11 +46,11 @@ __global__ void kernel() {
 // OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
 // OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
 //.
-// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
 // OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4}
 //.
-// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
 // OPT: !2 = !{i32 1, !"wchar_size", i32 4}
 //.
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index 2cf1286e2b54e8e..e574b1f64c499bd 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -703,7 +703,7 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900: attributes #8 = { nounwind }
 // GFX900: attributes #9 = { convergent nounwind }
 //.
-// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // NOCPU: !1 = !{i32 1, !"wchar_size", i32 4}
 // NOCPU: !2 = !{i32 2, i32 0}
 // NOCPU: !3 = !{i32 1, i32 0, i32 1, i32 0}
@@ -721,7 +721,7 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU: !15 = !{i32 1}
 // NOCPU: !16 = !{!"int*"}
 //.
-// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // GFX900: !1 = !{i32 1, !"wchar_size", i32 4}
 // GFX900: !2 = !{i32 2, i32 0}
 // GFX900: !3 = !{!4, !4, i64 0}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 74b5ef03b52f21e..8938642e3b19f8c 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -599,13 +599,13 @@ void test_get_local_id(int d, global int *out)
 }
 
 // CHECK-LABEL: @test_get_workgroup_size(
-// CHECK: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 12
+// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 4
 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 14
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 6
 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 16
-// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 8
+// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
 void test_get_workgroup_size(int d, global int *out)
 {
 	switch (d) {
diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip
index f049bf3467c611c..71d9554da696b42 100644
--- a/clang/test/Driver/hip-device-libs.hip
+++ b/clang/test/Driver/hip-device-libs.hip
@@ -160,13 +160,13 @@
 // Test default code object version.
 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI4
 
-// Test default code object version with old device library without abi_version_500.bc
-// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
+// Test default code object version with old device library without abi_version_400.bc
+// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
 // RUN:   --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver   \
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5
+// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4
 
 // Test -mcode-object-version=3
 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
@@ -193,12 +193,12 @@
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
 
-// Test -mcode-object-version=4 with old device library without abi_version_400.bc
-// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
-// RUN:   -mcode-object-version=4 \
+// Test -mcode-object-version=5 with old device library without abi_version_400.bc
+// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
+// RUN:   -mcode-object-version=5 \
 // RUN:   --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver   \
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4
+// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5
 
 // ALL-NOT: error:
 // ALL: {{"[^"]*clang[^"]*"}}
diff --git a/lld/test/ELF/emulation-amdgpu.s b/lld/test/ELF/emulation-amdgpu.s
index 329fb1c69b16665..707f0aeb909efae 100644
--- a/lld/test/ELF/emulation-amdgpu.s
+++ b/lld/test/ELF/emulation-amdgpu.s
@@ -13,7 +13,7 @@
 # CHECK-NEXT:     DataEncoding: LittleEndian (0x1)
 # CHECK-NEXT:     FileVersion: 1
 # CHECK-NEXT:     OS/ABI: AMDGPU_HSA (0x40)
-# CHECK-NEXT:     ABIVersion: 3
+# CHECK-NEXT:     ABIVersion: 2
 # CHECK-NEXT:     Unused: (00 00 00 00 00 00 00)
 # CHECK-NEXT:   }
 # CHECK-NEXT:   Type: Executable (0x2)
diff --git a/lld/test/ELF/lto/amdgcn-oses.ll b/lld/test/ELF/lto/amdgcn-oses.ll
index a70b678ac25141c..a2f25cdd57d87b5 100644
--- a/lld/test/ELF/lto/amdgcn-oses.ll
+++ b/lld/test/ELF/lto/amdgcn-oses.ll
@@ -15,7 +15,7 @@
 ; RUN: llvm-readobj --file-headers %t/mesa3d.so | FileCheck %s --check-prefixes=GCN,NON-AMDHSA,MESA3D
 
 ; AMDHSA: OS/ABI: AMDGPU_HSA (0x40)
-; AMDHSA: ABIVersion: 3
+; AMDHSA: ABIVersion: 2
 
 ; AMDPAL: OS/ABI: AMDGPU_PAL (0x41)
 ; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42)
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 923b8e2e62ab9d3..f733c514ffbee47 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1418,12 +1418,12 @@ The AMDGPU backend uses the following ELF header:
 
   * ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA
     runtime ABI for code object V4. Specify using the Clang option
-    ``-mcode-object-version=4``.
+    ``-mcode-object-version=4``. This is the default code object
+    version if not specified.
 
   * ``ELFABIVERSION_AMDGPU_HSA_V5`` is used to specify the version of AMD HSA
     runtime ABI for code object V5. Specify using the Clang option
-    ``-mcode-object-version=5``. This is the default code object
-    version if not specified.
+    ``-mcode-object-version=5``.
 
   * ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL
     runtime ABI.
@@ -3852,10 +3852,6 @@ same *vendor-name*.
 Code Object V4 Metadata
 +++++++++++++++++++++++
 
-. warning::
-  Code object V4 is not the default code object version emitted by this version
-  of LLVM.
-
 Code object V4 metadata is the same as
 :ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions
 defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`.
@@ -3886,6 +3882,11 @@ defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`.
 Code Object V5 Metadata
 +++++++++++++++++++++++
 
+.. warning::
+  Code object V5 is not the default code object version emitted by this version
+  of LLVM.
+
+
 Code object V5 metadata is the same as
 :ref:`amdgpu-amdhsa-code-object-metadata-v4` with the changes defined in table
 :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v5`, table
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index d0abd08cc78b8ec..63da86391e5c650 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -34,7 +34,7 @@
 static llvm::cl::opt
     AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidde...

llvmbot · 2023-09-12T09:42:59Z

@llvm/pr-subscribers-llvm-globalisel

Changes

This reverts commit `0a8d17e`.

Patch is 1.92 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66060.diff

109 Files Affected:

(modified) clang/include/clang/Driver/Options.td (+2-2)
(modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+1-1)
(modified) clang/test/CodeGenCUDA/amdgpu-code-object-version.cu (+1-1)
(modified) clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu (+2-2)
(modified) clang/test/CodeGenHIP/default-attributes.hip (+2-2)
(modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+2-2)
(modified) clang/test/CodeGenOpenCL/builtins-amdgcn.cl (+5-5)
(modified) clang/test/Driver/hip-device-libs.hip (+8-8)
(modified) lld/test/ELF/emulation-amdgpu.s (+1-1)
(modified) lld/test/ELF/lto/amdgcn-oses.ll (+1-1)
(modified) llvm/docs/AMDGPUUsage.rst (+8-7)
(modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll (+29-28)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll (+8-8)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll (+55-53)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll (+484-468)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll (+1623-1578)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll (+32-31)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll (+2077-2013)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll (+32-31)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll (+51-51)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll (+18-18)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir (+39-28)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll (+6-5)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll (+6-5)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll (+4-7)
(modified) llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll (+35-75)
(modified) llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll (+20-46)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll (+30-30)
(modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (+12-12)
(modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (+7-8)
(modified) llvm/test/CodeGen/AMDGPU/attributor-noopt.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll (+24-22)
(modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll (+366-384)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-argument-types.ll (+1463-1221)
(modified) llvm/test/CodeGen/AMDGPU/call-waitcnt.ll (+47-42)
(modified) llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll (+12-8)
(modified) llvm/test/CodeGen/AMDGPU/cc-update.ll (+160-144)
(modified) llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll (+45-45)
(modified) llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/collapse-endcf.ll (+10-10)
(modified) llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll (+32-28)
(modified) llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll (+20-20)
(modified) llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll (+4-2)
(modified) llvm/test/CodeGen/AMDGPU/ds_read2.ll (+32-28)
(modified) llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll (+6-6)
(modified) llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll (+19-17)
(modified) llvm/test/CodeGen/AMDGPU/fneg-fabs.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll (+12-19)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll (+605-550)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll (+363-330)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll (+363-330)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll (+605-550)
(modified) llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll (+19-19)
(modified) llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll (+59-57)
(modified) llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll (+32-25)
(modified) llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll (+112-124)
(modified) llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll (+22-48)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll (+7-8)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll (+7-8)
(modified) llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/lower-kernargs.ll (+118-118)
(modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll (+4-2)
(modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll (+9-1)
(modified) llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll (+44-48)
(modified) llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll (+61-33)
(modified) llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll (+8-8)
(modified) llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll (+72-72)
(modified) llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll (+24-24)
(modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll (+23-10)
(modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/spill-m0.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll (+30-30)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll (+167-157)
(modified) llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll (+15-15)
(modified) llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll (+36-25)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s (-4)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s (-3)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s (-3)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s (-3)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s (-1)
(modified) openmp/libomptarget/test/offloading/thread_limit.c (-1)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 0484575ca482717..a5f5ca29053b43b 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4616,12 +4616,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
   NegFlag>, Group;
 
 def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group,
-  HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">,
+  HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
   Visibility<[ClangOption, CC1Option]>,
   Values<"none,2,3,4,5">,
   NormalizedValuesScope<"TargetOptions">,
   NormalizedValues<["COV_None", "COV_2", "COV_3", "COV_4", "COV_5"]>,
-  MarshallingInfoEnum, "COV_5">;
+  MarshallingInfoEnum, "COV_4">;
 
 defm cumode : SimpleMFlag<"cumode",
   "Specify CU wavefront", "Specify WGP wavefront",
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 471ff2541d355b6..1db05b47e11ba6a 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2341,7 +2341,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D,
 
 unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D,
                                            const llvm::opt::ArgList &Args) {
-  unsigned CodeObjVer = 5; // default
+  unsigned CodeObjVer = 4; // default
   if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args))
     StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
   return CodeObjVer;
diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
index 62ccc2bd4d05db3..16505b34c4a6e09 100644
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
@@ -1,7 +1,7 @@
 // Create module flag for code object version.
 
 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
-// RUN:   -o - %s | FileCheck %s -check-prefix=V5
+// RUN:   -o - %s | FileCheck %s -check-prefix=V4
 
 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
 // RUN:   -mcode-object-version=2 -o - %s | FileCheck -check-prefix=V2 %s
diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
index b885b9991a9bd31..c661b06d57b78d7 100644
--- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
@@ -1,10 +1,10 @@
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
-// RUN:     -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \
+// RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefix=PRECOV5 %s
 
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
-// RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
+// RUN:     -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefix=COV5 %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip
index 9c9ea521271b99b..80aa1ee0700628f 100644
--- a/clang/test/CodeGenHIP/default-attributes.hip
+++ b/clang/test/CodeGenHIP/default-attributes.hip
@@ -46,11 +46,11 @@ __global__ void kernel() {
 // OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
 // OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
 //.
-// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
 // OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4}
 //.
-// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
 // OPT: !2 = !{i32 1, !"wchar_size", i32 4}
 //.
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index 2cf1286e2b54e8e..e574b1f64c499bd 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -703,7 +703,7 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900: attributes #8 = { nounwind }
 // GFX900: attributes #9 = { convergent nounwind }
 //.
-// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // NOCPU: !1 = !{i32 1, !"wchar_size", i32 4}
 // NOCPU: !2 = !{i32 2, i32 0}
 // NOCPU: !3 = !{i32 1, i32 0, i32 1, i32 0}
@@ -721,7 +721,7 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU: !15 = !{i32 1}
 // NOCPU: !16 = !{!"int*"}
 //.
-// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // GFX900: !1 = !{i32 1, !"wchar_size", i32 4}
 // GFX900: !2 = !{i32 2, i32 0}
 // GFX900: !3 = !{!4, !4, i64 0}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 74b5ef03b52f21e..8938642e3b19f8c 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -599,13 +599,13 @@ void test_get_local_id(int d, global int *out)
 }
 
 // CHECK-LABEL: @test_get_workgroup_size(
-// CHECK: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 12
+// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 4
 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 14
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 6
 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 16
-// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 8
+// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
 void test_get_workgroup_size(int d, global int *out)
 {
 	switch (d) {
diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip
index f049bf3467c611c..71d9554da696b42 100644
--- a/clang/test/Driver/hip-device-libs.hip
+++ b/clang/test/Driver/hip-device-libs.hip
@@ -160,13 +160,13 @@
 // Test default code object version.
 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI4
 
-// Test default code object version with old device library without abi_version_500.bc
-// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
+// Test default code object version with old device library without abi_version_400.bc
+// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
 // RUN:   --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver   \
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5
+// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4
 
 // Test -mcode-object-version=3
 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
@@ -193,12 +193,12 @@
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
 
-// Test -mcode-object-version=4 with old device library without abi_version_400.bc
-// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
-// RUN:   -mcode-object-version=4 \
+// Test -mcode-object-version=5 with old device library without abi_version_400.bc
+// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
+// RUN:   -mcode-object-version=5 \
 // RUN:   --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver   \
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4
+// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5
 
 // ALL-NOT: error:
 // ALL: {{"[^"]*clang[^"]*"}}
diff --git a/lld/test/ELF/emulation-amdgpu.s b/lld/test/ELF/emulation-amdgpu.s
index 329fb1c69b16665..707f0aeb909efae 100644
--- a/lld/test/ELF/emulation-amdgpu.s
+++ b/lld/test/ELF/emulation-amdgpu.s
@@ -13,7 +13,7 @@
 # CHECK-NEXT:     DataEncoding: LittleEndian (0x1)
 # CHECK-NEXT:     FileVersion: 1
 # CHECK-NEXT:     OS/ABI: AMDGPU_HSA (0x40)
-# CHECK-NEXT:     ABIVersion: 3
+# CHECK-NEXT:     ABIVersion: 2
 # CHECK-NEXT:     Unused: (00 00 00 00 00 00 00)
 # CHECK-NEXT:   }
 # CHECK-NEXT:   Type: Executable (0x2)
diff --git a/lld/test/ELF/lto/amdgcn-oses.ll b/lld/test/ELF/lto/amdgcn-oses.ll
index a70b678ac25141c..a2f25cdd57d87b5 100644
--- a/lld/test/ELF/lto/amdgcn-oses.ll
+++ b/lld/test/ELF/lto/amdgcn-oses.ll
@@ -15,7 +15,7 @@
 ; RUN: llvm-readobj --file-headers %t/mesa3d.so | FileCheck %s --check-prefixes=GCN,NON-AMDHSA,MESA3D
 
 ; AMDHSA: OS/ABI: AMDGPU_HSA (0x40)
-; AMDHSA: ABIVersion: 3
+; AMDHSA: ABIVersion: 2
 
 ; AMDPAL: OS/ABI: AMDGPU_PAL (0x41)
 ; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42)
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 923b8e2e62ab9d3..f733c514ffbee47 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1418,12 +1418,12 @@ The AMDGPU backend uses the following ELF header:
 
   * ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA
     runtime ABI for code object V4. Specify using the Clang option
-    ``-mcode-object-version=4``.
+    ``-mcode-object-version=4``. This is the default code object
+    version if not specified.
 
   * ``ELFABIVERSION_AMDGPU_HSA_V5`` is used to specify the version of AMD HSA
     runtime ABI for code object V5. Specify using the Clang option
-    ``-mcode-object-version=5``. This is the default code object
-    version if not specified.
+    ``-mcode-object-version=5``.
 
   * ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL
     runtime ABI.
@@ -3852,10 +3852,6 @@ same *vendor-name*.
 Code Object V4 Metadata
 +++++++++++++++++++++++
 
-. warning::
-  Code object V4 is not the default code object version emitted by this version
-  of LLVM.
-
 Code object V4 metadata is the same as
 :ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions
 defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`.
@@ -3886,6 +3882,11 @@ defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`.
 Code Object V5 Metadata
 +++++++++++++++++++++++
 
+.. warning::
+  Code object V5 is not the default code object version emitted by this version
+  of LLVM.
+
+
 Code object V5 metadata is the same as
 :ref:`amdgpu-amdhsa-code-object-metadata-v4` with the changes defined in table
 :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v5`, table
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index d0abd08cc78b8ec..63da86391e5c650 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -34,7 +34,7 @@
 static llvm::cl::opt
     AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidde...

llvmbot · 2023-09-12T09:42:59Z

@llvm/pr-subscribers-clang-driver

Changes

This reverts commit `0a8d17e`.

Patch is 1.92 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66060.diff

109 Files Affected:

(modified) clang/include/clang/Driver/Options.td (+2-2)
(modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+1-1)
(modified) clang/test/CodeGenCUDA/amdgpu-code-object-version.cu (+1-1)
(modified) clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu (+2-2)
(modified) clang/test/CodeGenHIP/default-attributes.hip (+2-2)
(modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+2-2)
(modified) clang/test/CodeGenOpenCL/builtins-amdgcn.cl (+5-5)
(modified) clang/test/Driver/hip-device-libs.hip (+8-8)
(modified) lld/test/ELF/emulation-amdgpu.s (+1-1)
(modified) lld/test/ELF/lto/amdgcn-oses.ll (+1-1)
(modified) llvm/docs/AMDGPUUsage.rst (+8-7)
(modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll (+29-28)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll (+8-8)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll (+55-53)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll (+484-468)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll (+1623-1578)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll (+32-31)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll (+2077-2013)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll (+32-31)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll (+51-51)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll (+18-18)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir (+39-28)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll (+6-5)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll (+6-5)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll (+4-7)
(modified) llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll (+35-75)
(modified) llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll (+20-46)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll (+30-30)
(modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (+12-12)
(modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (+7-8)
(modified) llvm/test/CodeGen/AMDGPU/attributor-noopt.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll (+24-22)
(modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll (+366-384)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/call-argument-types.ll (+1463-1221)
(modified) llvm/test/CodeGen/AMDGPU/call-waitcnt.ll (+47-42)
(modified) llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll (+12-8)
(modified) llvm/test/CodeGen/AMDGPU/cc-update.ll (+160-144)
(modified) llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll (+45-45)
(modified) llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/collapse-endcf.ll (+10-10)
(modified) llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll (+32-28)
(modified) llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll (+20-20)
(modified) llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll (+4-2)
(modified) llvm/test/CodeGen/AMDGPU/ds_read2.ll (+32-28)
(modified) llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll (+6-6)
(modified) llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll (+19-17)
(modified) llvm/test/CodeGen/AMDGPU/fneg-fabs.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll (+12-19)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll (+605-550)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll (+363-330)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll (+363-330)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll (+605-550)
(modified) llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll (+19-19)
(modified) llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll (+59-57)
(modified) llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll (+32-25)
(modified) llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll (+112-124)
(modified) llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll (+22-48)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll (+7-8)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll (+7-8)
(modified) llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/lower-kernargs.ll (+118-118)
(modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll (+4-2)
(modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll (+9-1)
(modified) llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll (+44-48)
(modified) llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll (+61-33)
(modified) llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll (+8-8)
(modified) llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll (+72-72)
(modified) llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll (+24-24)
(modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll (+23-10)
(modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/spill-m0.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll (+30-30)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll (+3-3)
(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll (+167-157)
(modified) llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll (+15-15)
(modified) llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll (+36-25)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s (-4)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s (-3)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s (-3)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s (-3)
(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s (-1)
(modified) openmp/libomptarget/test/offloading/thread_limit.c (-1)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 0484575ca482717..a5f5ca29053b43b 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4616,12 +4616,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
   NegFlag>, Group;
 
 def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group,
-  HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">,
+  HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
   Visibility<[ClangOption, CC1Option]>,
   Values<"none,2,3,4,5">,
   NormalizedValuesScope<"TargetOptions">,
   NormalizedValues<["COV_None", "COV_2", "COV_3", "COV_4", "COV_5"]>,
-  MarshallingInfoEnum, "COV_5">;
+  MarshallingInfoEnum, "COV_4">;
 
 defm cumode : SimpleMFlag<"cumode",
   "Specify CU wavefront", "Specify WGP wavefront",
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 471ff2541d355b6..1db05b47e11ba6a 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2341,7 +2341,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D,
 
 unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D,
                                            const llvm::opt::ArgList &Args) {
-  unsigned CodeObjVer = 5; // default
+  unsigned CodeObjVer = 4; // default
   if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args))
     StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
   return CodeObjVer;
diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
index 62ccc2bd4d05db3..16505b34c4a6e09 100644
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
@@ -1,7 +1,7 @@
 // Create module flag for code object version.
 
 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
-// RUN:   -o - %s | FileCheck %s -check-prefix=V5
+// RUN:   -o - %s | FileCheck %s -check-prefix=V4
 
 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
 // RUN:   -mcode-object-version=2 -o - %s | FileCheck -check-prefix=V2 %s
diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
index b885b9991a9bd31..c661b06d57b78d7 100644
--- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
@@ -1,10 +1,10 @@
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
-// RUN:     -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \
+// RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefix=PRECOV5 %s
 
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
-// RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
+// RUN:     -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefix=COV5 %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip
index 9c9ea521271b99b..80aa1ee0700628f 100644
--- a/clang/test/CodeGenHIP/default-attributes.hip
+++ b/clang/test/CodeGenHIP/default-attributes.hip
@@ -46,11 +46,11 @@ __global__ void kernel() {
 // OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
 // OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
 //.
-// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
 // OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4}
 //.
-// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
 // OPT: !2 = !{i32 1, !"wchar_size", i32 4}
 //.
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index 2cf1286e2b54e8e..e574b1f64c499bd 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -703,7 +703,7 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900: attributes #8 = { nounwind }
 // GFX900: attributes #9 = { convergent nounwind }
 //.
-// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // NOCPU: !1 = !{i32 1, !"wchar_size", i32 4}
 // NOCPU: !2 = !{i32 2, i32 0}
 // NOCPU: !3 = !{i32 1, i32 0, i32 1, i32 0}
@@ -721,7 +721,7 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU: !15 = !{i32 1}
 // NOCPU: !16 = !{!"int*"}
 //.
-// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // GFX900: !1 = !{i32 1, !"wchar_size", i32 4}
 // GFX900: !2 = !{i32 2, i32 0}
 // GFX900: !3 = !{!4, !4, i64 0}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 74b5ef03b52f21e..8938642e3b19f8c 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -599,13 +599,13 @@ void test_get_local_id(int d, global int *out)
 }
 
 // CHECK-LABEL: @test_get_workgroup_size(
-// CHECK: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 12
+// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 4
 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 14
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 6
 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 16
-// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 8
+// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
 void test_get_workgroup_size(int d, global int *out)
 {
 	switch (d) {
diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip
index f049bf3467c611c..71d9554da696b42 100644
--- a/clang/test/Driver/hip-device-libs.hip
+++ b/clang/test/Driver/hip-device-libs.hip
@@ -160,13 +160,13 @@
 // Test default code object version.
 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI4
 
-// Test default code object version with old device library without abi_version_500.bc
-// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
+// Test default code object version with old device library without abi_version_400.bc
+// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
 // RUN:   --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver   \
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5
+// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4
 
 // Test -mcode-object-version=3
 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
@@ -193,12 +193,12 @@
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
 
-// Test -mcode-object-version=4 with old device library without abi_version_400.bc
-// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
-// RUN:   -mcode-object-version=4 \
+// Test -mcode-object-version=5 with old device library without abi_version_400.bc
+// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
+// RUN:   -mcode-object-version=5 \
 // RUN:   --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver   \
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4
+// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5
 
 // ALL-NOT: error:
 // ALL: {{"[^"]*clang[^"]*"}}
diff --git a/lld/test/ELF/emulation-amdgpu.s b/lld/test/ELF/emulation-amdgpu.s
index 329fb1c69b16665..707f0aeb909efae 100644
--- a/lld/test/ELF/emulation-amdgpu.s
+++ b/lld/test/ELF/emulation-amdgpu.s
@@ -13,7 +13,7 @@
 # CHECK-NEXT:     DataEncoding: LittleEndian (0x1)
 # CHECK-NEXT:     FileVersion: 1
 # CHECK-NEXT:     OS/ABI: AMDGPU_HSA (0x40)
-# CHECK-NEXT:     ABIVersion: 3
+# CHECK-NEXT:     ABIVersion: 2
 # CHECK-NEXT:     Unused: (00 00 00 00 00 00 00)
 # CHECK-NEXT:   }
 # CHECK-NEXT:   Type: Executable (0x2)
diff --git a/lld/test/ELF/lto/amdgcn-oses.ll b/lld/test/ELF/lto/amdgcn-oses.ll
index a70b678ac25141c..a2f25cdd57d87b5 100644
--- a/lld/test/ELF/lto/amdgcn-oses.ll
+++ b/lld/test/ELF/lto/amdgcn-oses.ll
@@ -15,7 +15,7 @@
 ; RUN: llvm-readobj --file-headers %t/mesa3d.so | FileCheck %s --check-prefixes=GCN,NON-AMDHSA,MESA3D
 
 ; AMDHSA: OS/ABI: AMDGPU_HSA (0x40)
-; AMDHSA: ABIVersion: 3
+; AMDHSA: ABIVersion: 2
 
 ; AMDPAL: OS/ABI: AMDGPU_PAL (0x41)
 ; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42)
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 923b8e2e62ab9d3..f733c514ffbee47 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1418,12 +1418,12 @@ The AMDGPU backend uses the following ELF header:
 
   * ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA
     runtime ABI for code object V4. Specify using the Clang option
-    ``-mcode-object-version=4``.
+    ``-mcode-object-version=4``. This is the default code object
+    version if not specified.
 
   * ``ELFABIVERSION_AMDGPU_HSA_V5`` is used to specify the version of AMD HSA
     runtime ABI for code object V5. Specify using the Clang option
-    ``-mcode-object-version=5``. This is the default code object
-    version if not specified.
+    ``-mcode-object-version=5``.
 
   * ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL
     runtime ABI.
@@ -3852,10 +3852,6 @@ same *vendor-name*.
 Code Object V4 Metadata
 +++++++++++++++++++++++
 
-. warning::
-  Code object V4 is not the default code object version emitted by this version
-  of LLVM.
-
 Code object V4 metadata is the same as
 :ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions
 defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`.
@@ -3886,6 +3882,11 @@ defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`.
 Code Object V5 Metadata
 +++++++++++++++++++++++
 
+.. warning::
+  Code object V5 is not the default code object version emitted by this version
+  of LLVM.
+
+
 Code object V5 metadata is the same as
 :ref:`amdgpu-amdhsa-code-object-metadata-v4` with the changes defined in table
 :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v5`, table
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index d0abd08cc78b8ec..63da86391e5c650 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -34,7 +34,7 @@
 static llvm::cl::opt
     AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidde...

llvmbot · 2023-09-12T09:44:22Z

@llvm/pr-subscribers-llvm-globalisel
Changes

This reverts commit 0a8d17e.

Patch is 1.92 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66060.diff

109 Files Affected:

(modified) clang/include/clang/Driver/Options.td (+2-2)

(modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+1-1)

(modified) clang/test/CodeGenCUDA/amdgpu-code-object-version.cu (+1-1)

(modified) clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu (+2-2)

(modified) clang/test/CodeGenHIP/default-attributes.hip (+2-2)

(modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+2-2)

(modified) clang/test/CodeGenOpenCL/builtins-amdgcn.cl (+5-5)

(modified) clang/test/Driver/hip-device-libs.hip (+8-8)

(modified) lld/test/ELF/emulation-amdgpu.s (+1-1)

(modified) lld/test/ELF/lto/amdgcn-oses.ll (+1-1)

(modified) llvm/docs/AMDGPUUsage.rst (+8-7)

(modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+2-2)

(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll (+1-1)

(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll (+29-28)

(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll (+8-8)

(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll (+2-2)

(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll (+55-53)

(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll (+484-468)

(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll (+1623-1578)

(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll (+32-31)

(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll (+2077-2013)

(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll (+2-2)

(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll (+32-31)

(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll (+51-51)

(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll (+18-18)

(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll (+2-2)

(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir (+39-28)

(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll (+6-5)

(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll (+6-5)

(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll (+4-7)

(modified) llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll (+35-75)

(modified) llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll (+2-2)

(modified) llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll (+20-46)

(modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll (+30-30)

(modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (+12-12)

(modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (+7-8)

(modified) llvm/test/CodeGen/AMDGPU/attributor-noopt.ll (+1-1)

(modified) llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll (+24-22)

(modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll (+366-384)

(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll (+3-3)

(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll (+1-1)

(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll (+1-1)

(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll (+1-1)

(modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll (+1-1)

(modified) llvm/test/CodeGen/AMDGPU/call-argument-types.ll (+1463-1221)

(modified) llvm/test/CodeGen/AMDGPU/call-waitcnt.ll (+47-42)

(modified) llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll (+12-8)

(modified) llvm/test/CodeGen/AMDGPU/cc-update.ll (+160-144)

(modified) llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll (+45-45)

(modified) llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll (+1-1)

(modified) llvm/test/CodeGen/AMDGPU/collapse-endcf.ll (+10-10)

(modified) llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll (+32-28)

(modified) llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll (+20-20)

(modified) llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll (+4-2)

(modified) llvm/test/CodeGen/AMDGPU/ds_read2.ll (+32-28)

(modified) llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll (+6-6)

(modified) llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll (+1-1)

(modified) llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll (+19-17)

(modified) llvm/test/CodeGen/AMDGPU/fneg-fabs.ll (+1-1)

(modified) llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll (+12-19)

(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll (+605-550)

(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll (+363-330)

(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll (+363-330)

(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll (+605-550)

(modified) llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll (+19-19)

(modified) llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll (+59-57)

(modified) llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll (+32-25)

(modified) llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll (+112-124)

(modified) llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll (+22-48)

(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll (+7-8)

(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll (+7-8)

(modified) llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll (+1-1)

(modified) llvm/test/CodeGen/AMDGPU/lower-kernargs.ll (+118-118)

(modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll (+4-2)

(modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll (+9-1)

(modified) llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll (+44-48)

(modified) llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll (+61-33)

(modified) llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll (+8-8)

(modified) llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll (+72-72)

(modified) llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll (+1-1)

(modified) llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll (+24-24)

(modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll (+2-2)

(modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll (+23-10)

(modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll (+3-3)

(modified) llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll (+1-1)

(modified) llvm/test/CodeGen/AMDGPU/spill-m0.ll (+1-1)

(modified) llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll (+30-30)

(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll (+3-3)

(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll (+3-3)

(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll (+3-3)

(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll (+3-3)

(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll (+3-3)

(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll (+3-3)

(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll (+3-3)

(modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll (+3-3)

(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll (+3-3)

(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll (+3-3)

(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll (+3-3)

(modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll (+1-1)

(modified) llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll (+167-157)

(modified) llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll (+15-15)

(modified) llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll (+1-1)

(modified) llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll (+36-25)

(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s (-4)

(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s (-3)

(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s (-3)

(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s (-3)

(modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s (-1)

(modified) openmp/libomptarget/test/offloading/thread_limit.c (-1)
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 0484575ca482717..a5f5ca29053b43b 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4616,12 +4616,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
  NegFlag>, Group;

def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group,
-  HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">,
+  HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
  Visibility<[ClangOption, CC1Option]>,
  Values<"none,2,3,4,5">,
  NormalizedValuesScope<"TargetOptions">,
  NormalizedValues<["COV_None", "COV_2", "COV_3", "COV_4", "COV_5"]>,
-  MarshallingInfoEnum, "COV_5">;
+  MarshallingInfoEnum, "COV_4">;

defm cumode : SimpleMFlag<"cumode",
  "Specify CU wavefront", "Specify WGP wavefront",
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 471ff2541d355b6..1db05b47e11ba6a 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2341,7 +2341,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D,

unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D,
                                           const llvm::opt::ArgList &Args) {
-  unsigned CodeObjVer = 5; // default
+  unsigned CodeObjVer = 4; // default
  if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args))
    StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
  return CodeObjVer;
diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
index 62ccc2bd4d05db3..16505b34c4a6e09 100644
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
@@ -1,7 +1,7 @@
// Create module flag for code object version.

// RUN: �lang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
-// RUN:   -o -  | FileCheck  -check-prefix=V5
+// RUN:   -o -  | FileCheck  -check-prefix=V4

// RUN: �lang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN:   -mcode-object-version=2 -o -  | FileCheck -check-prefix=V2 
diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
index b885b9991a9bd31..c661b06d57b78d7 100644
--- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
@@ -1,10 +1,10 @@
// RUN: �lang_cc1 -triple amdgcn-amd-amdhsa \
-// RUN:     -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip  \
+// RUN:     -fcuda-is-device -emit-llvm -o - -x hip  \
// RUN:     | FileCheck -check-prefix=PRECOV5 


// RUN: �lang_cc1 -triple amdgcn-amd-amdhsa \
-// RUN:     -fcuda-is-device -emit-llvm -o - -x hip  \
+// RUN:     -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip  \
// RUN:     | FileCheck -check-prefix=COV5 

// RUN: �lang_cc1 -triple amdgcn-amd-amdhsa \
diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip
index 9c9ea521271b99b..80aa1ee0700628f 100644
--- a/clang/test/CodeGenHIP/default-attributes.hip
+++ b/clang/test/CodeGenHIP/default-attributes.hip
@@ -46,11 +46,11 @@ __global__ void kernel() {
// OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
//.
-// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
// OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
// OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4}
//.
-// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
// OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
// OPT: !2 = !{i32 1, !"wchar_size", i32 4}
//.
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index 2cf1286e2b54e8e..e574b1f64c499bd 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -703,7 +703,7 @@ kernel void test_target_features_kernel(global int *i) {
// GFX900: attributes #8 = { nounwind }
// GFX900: attributes #9 = { convergent nounwind }
//.
-// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
// NOCPU: !1 = !{i32 1, !"wchar_size", i32 4}
// NOCPU: !2 = !{i32 2, i32 0}
// NOCPU: !3 = !{i32 1, i32 0, i32 1, i32 0}
@@ -721,7 +721,7 @@ kernel void test_target_features_kernel(global int *i) {
// NOCPU: !15 = !{i32 1}
// NOCPU: !16 = !{!"int*"}
//.
-// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
// GFX900: !1 = !{i32 1, !"wchar_size", i32 4}
// GFX900: !2 = !{i32 2, i32 0}
// GFX900: !3 = !{!4, !4, i64 0}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 74b5ef03b52f21e..8938642e3b19f8c 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -599,13 +599,13 @@ void test_get_local_id(int d, global int *out)
}

// CHECK-LABEL: @test_get_workgroup_size(
-// CHECK: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// CHECK: getelementptr inbounds i8, ptr addrspace(4)