From 076953d250624b8fdf094f8bce260380e28595ad Mon Sep 17 00:00:00 2001 From: Kai Sasaki Date: Fri, 2 Feb 2024 17:07:44 +0900 Subject: [PATCH 01/54] [mlir] Skip invalid test on big endian platform (s390x) (#80246) The buildbot test running on s390x platform keeps failing since [this time](https://lab.llvm.org/buildbot/#/builders/199/builds/31136). This is because of the dependency on the endianness of the platform. It expects the format invalid in the big endian platform (s390x). We can simply skip it. See: https://discourse.llvm.org/t/mlir-s390x-linux-failure/76695 (cherry picked from commit 65ac8c16e028b23b49fd6b03817faa1ab6c0229d) --- .../Target/LLVMIR/llvmir-le-specific.mlir | 27 +++++++++++++++++++ mlir/test/Target/LLVMIR/llvmir.mlir | 23 ---------------- 2 files changed, 27 insertions(+), 23 deletions(-) create mode 100644 mlir/test/Target/LLVMIR/llvmir-le-specific.mlir diff --git a/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir b/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir new file mode 100644 index 0000000000000..f8d082082117c --- /dev/null +++ b/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir @@ -0,0 +1,27 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s + +// Decoding the attribute does not work on big-endian platforms currently +// XFAIL: target=s390x-{{.*}} + +// CHECK{LITERAL}: @dense_resource_tensor_constant = internal constant [5 x float] [float 0x3FCA034080000000, float 0xBFD0466300000000, float 0xBFD75DDF80000000, float 0xBFDE074F40000000, float 0x3FDDD3A1C0000000] +llvm.mlir.global internal constant @dense_resource_tensor_constant(dense_resource : tensor<5xf32>) : !llvm.array<5 x f32> + +// CHECK{LITERAL}: @dense_resource_vector_constant = internal constant <5 x float> +llvm.mlir.global internal constant @dense_resource_vector_constant(dense_resource : vector<5xf32>) : vector<5xf32> + + +// CHECK{LITERAL}: @dense_resource_multidim_tensor_constant = internal constant [1 x [2 x [2 x float]]] [[2 x [2 x float]] [[2 x float] [float 0x3FD6B46A80000000, float 0x3FD6781AC0000000], [2 x float] [float 0xBFB45A2AA0000000, float 0x3FD77A5CA0000000]]] +llvm.mlir.global internal constant @dense_resource_multidim_tensor_constant(dense_resource : tensor<1x2x2xf32>) : !llvm.array<1 x !llvm.array<2 x !llvm.array<2 x f32>>> + +// CHECK{LITERAL}: @dense_resource_multidim_vector_constant = internal constant [1 x [2 x <2 x float>]] [[2 x <2 x float>] [<2 x float> , <2 x float> ]] +llvm.mlir.global internal constant @dense_resource_multidim_vector_constant(dense_resource : vector<1x2x2xf32>) : !llvm.array<1 x !llvm.array<2 x vector<2 x f32>>> + +// Resources are kept at end of file. New tests should be added above this. +{-# + dialect_resources: { + builtin: { + dense_resource_test_5xf32: "0x08000000041A503E183382BEFCEEBABE7A3AF0BE0E9DEE3E", + dense_resource_test_2x2xf32: "0x0800000054A3B53ED6C0B33E55D1A2BDE5D2BB3E" + } + } +#-} \ No newline at end of file diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index 448aa3a5d85d7..961c948444684 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -101,19 +101,6 @@ llvm.mlir.global internal @dense_float_vector_3d(dense<[[[1.0, 2.0], [3.0, 4.0]] // CHECK{LITERAL}: @splat_float_vector_3d = internal global [2 x [2 x <2 x float>]] [[2 x <2 x float>] [<2 x float> , <2 x float> ], [2 x <2 x float>] [<2 x float> , <2 x float> ]] llvm.mlir.global internal @splat_float_vector_3d(dense<42.0> : vector<2x2x2xf32>) : !llvm.array<2 x !llvm.array<2 x vector<2xf32>>> -// CHECK{LITERAL}: @dense_resource_tensor_constant = internal constant [5 x float] [float 0x3FCA034080000000, float 0xBFD0466300000000, float 0xBFD75DDF80000000, float 0xBFDE074F40000000, float 0x3FDDD3A1C0000000] -llvm.mlir.global internal constant @dense_resource_tensor_constant(dense_resource : tensor<5xf32>) : !llvm.array<5 x f32> - -// CHECK{LITERAL}: @dense_resource_vector_constant = internal constant <5 x float> -llvm.mlir.global internal constant @dense_resource_vector_constant(dense_resource : vector<5xf32>) : vector<5xf32> - - -// CHECK{LITERAL}: @dense_resource_multidim_tensor_constant = internal constant [1 x [2 x [2 x float]]] [[2 x [2 x float]] [[2 x float] [float 0x3FD6B46A80000000, float 0x3FD6781AC0000000], [2 x float] [float 0xBFB45A2AA0000000, float 0x3FD77A5CA0000000]]] -llvm.mlir.global internal constant @dense_resource_multidim_tensor_constant(dense_resource : tensor<1x2x2xf32>) : !llvm.array<1 x !llvm.array<2 x !llvm.array<2 x f32>>> - -// CHECK{LITERAL}: @dense_resource_multidim_vector_constant = internal constant [1 x [2 x <2 x float>]] [[2 x <2 x float>] [<2 x float> , <2 x float> ]] -llvm.mlir.global internal constant @dense_resource_multidim_vector_constant(dense_resource : vector<1x2x2xf32>) : !llvm.array<1 x !llvm.array<2 x vector<2 x f32>>> - // // Linkage attribute. // @@ -1590,16 +1577,6 @@ llvm.func @invokeLandingpad() -> i32 attributes { personality = @__gxx_personali llvm.invoke %9(%6, %0) to ^bb2 unwind ^bb1 vararg(!llvm.func) : !llvm.ptr, (!llvm.ptr, i32) -> () } -// Resources are kept at end of file. New tests should be added above this. -{-# - dialect_resources: { - builtin: { - dense_resource_test_5xf32: "0x08000000041A503E183382BEFCEEBABE7A3AF0BE0E9DEE3E", - dense_resource_test_2x2xf32: "0x0800000054A3B53ED6C0B33E55D1A2BDE5D2BB3E" - } - } -#-} - // ----- llvm.func @foo() -> i8 From 50f8284ceadb56d8bb08d989b4563b9443e45b5f Mon Sep 17 00:00:00 2001 From: Koakuma Date: Sun, 11 Feb 2024 14:04:18 +0700 Subject: [PATCH 02/54] [SPARC] Support reserving arbitrary general purpose registers (#74927) This adds support for marking arbitrary general purpose registers - except for those with special purpose (G0, I6-I7, O6-O7) - as reserved, as needed by some software like the Linux kernel. (cherry picked from commit c2f9885a8aa3a820eefdacccf3fcc6b9d87e3284) --- clang/include/clang/Driver/Options.td | 12 ++ clang/lib/Driver/ToolChains/Arch/Sparc.cpp | 81 ++++++++ clang/test/Driver/sparc-fixed-register.c | 181 ++++++++++++++++++ llvm/lib/Target/Sparc/Sparc.td | 14 ++ llvm/lib/Target/Sparc/SparcISelLowering.cpp | 43 +++++ llvm/lib/Target/Sparc/SparcRegisterInfo.cpp | 14 +- llvm/lib/Target/Sparc/SparcRegisterInfo.h | 1 + llvm/lib/Target/Sparc/SparcRegisterInfo.td | 4 + llvm/lib/Target/Sparc/SparcSubtarget.cpp | 1 + llvm/lib/Target/Sparc/SparcSubtarget.h | 10 + llvm/test/CodeGen/SPARC/reserved-arg-regs.ll | 25 +++ .../test/CodeGen/SPARC/reserved-regs-named.ll | 13 ++ .../SPARC/reserved-regs-unavailable.ll | 14 ++ llvm/test/CodeGen/SPARC/reserved-regs.ll | 17 ++ 14 files changed, 428 insertions(+), 2 deletions(-) create mode 100644 clang/test/Driver/sparc-fixed-register.c create mode 100644 llvm/test/CodeGen/SPARC/reserved-arg-regs.ll create mode 100644 llvm/test/CodeGen/SPARC/reserved-regs-named.ll create mode 100644 llvm/test/CodeGen/SPARC/reserved-regs-unavailable.ll diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index e8d03fc269023..175bedbfb4d01 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5815,6 +5815,18 @@ def mvis3 : Flag<["-"], "mvis3">, Group; def mno_vis3 : Flag<["-"], "mno-vis3">, Group; def mhard_quad_float : Flag<["-"], "mhard-quad-float">, Group; def msoft_quad_float : Flag<["-"], "msoft-quad-float">, Group; +foreach i = 1 ... 7 in + def ffixed_g#i : Flag<["-"], "ffixed-g"#i>, Group, + HelpText<"Reserve the G"#i#" register (SPARC only)">; +foreach i = 0 ... 5 in + def ffixed_o#i : Flag<["-"], "ffixed-o"#i>, Group, + HelpText<"Reserve the O"#i#" register (SPARC only)">; +foreach i = 0 ... 7 in + def ffixed_l#i : Flag<["-"], "ffixed-l"#i>, Group, + HelpText<"Reserve the L"#i#" register (SPARC only)">; +foreach i = 0 ... 5 in + def ffixed_i#i : Flag<["-"], "ffixed-i"#i>, Group, + HelpText<"Reserve the I"#i#" register (SPARC only)">; } // let Flags = [TargetSpecific] // M68k features flags diff --git a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp index 22e583021515e..ae1a4ba788262 100644 --- a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp +++ b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp @@ -178,4 +178,85 @@ void sparc::getSparcTargetFeatures(const Driver &D, const ArgList &Args, else Features.push_back("-hard-quad-float"); } + + if (Args.hasArg(options::OPT_ffixed_g1)) + Features.push_back("+reserve-g1"); + + if (Args.hasArg(options::OPT_ffixed_g2)) + Features.push_back("+reserve-g2"); + + if (Args.hasArg(options::OPT_ffixed_g3)) + Features.push_back("+reserve-g3"); + + if (Args.hasArg(options::OPT_ffixed_g4)) + Features.push_back("+reserve-g4"); + + if (Args.hasArg(options::OPT_ffixed_g5)) + Features.push_back("+reserve-g5"); + + if (Args.hasArg(options::OPT_ffixed_g6)) + Features.push_back("+reserve-g6"); + + if (Args.hasArg(options::OPT_ffixed_g7)) + Features.push_back("+reserve-g7"); + + if (Args.hasArg(options::OPT_ffixed_o0)) + Features.push_back("+reserve-o0"); + + if (Args.hasArg(options::OPT_ffixed_o1)) + Features.push_back("+reserve-o1"); + + if (Args.hasArg(options::OPT_ffixed_o2)) + Features.push_back("+reserve-o2"); + + if (Args.hasArg(options::OPT_ffixed_o3)) + Features.push_back("+reserve-o3"); + + if (Args.hasArg(options::OPT_ffixed_o4)) + Features.push_back("+reserve-o4"); + + if (Args.hasArg(options::OPT_ffixed_o5)) + Features.push_back("+reserve-o5"); + + if (Args.hasArg(options::OPT_ffixed_l0)) + Features.push_back("+reserve-l0"); + + if (Args.hasArg(options::OPT_ffixed_l1)) + Features.push_back("+reserve-l1"); + + if (Args.hasArg(options::OPT_ffixed_l2)) + Features.push_back("+reserve-l2"); + + if (Args.hasArg(options::OPT_ffixed_l3)) + Features.push_back("+reserve-l3"); + + if (Args.hasArg(options::OPT_ffixed_l4)) + Features.push_back("+reserve-l4"); + + if (Args.hasArg(options::OPT_ffixed_l5)) + Features.push_back("+reserve-l5"); + + if (Args.hasArg(options::OPT_ffixed_l6)) + Features.push_back("+reserve-l6"); + + if (Args.hasArg(options::OPT_ffixed_l7)) + Features.push_back("+reserve-l7"); + + if (Args.hasArg(options::OPT_ffixed_i0)) + Features.push_back("+reserve-i0"); + + if (Args.hasArg(options::OPT_ffixed_i1)) + Features.push_back("+reserve-i1"); + + if (Args.hasArg(options::OPT_ffixed_i2)) + Features.push_back("+reserve-i2"); + + if (Args.hasArg(options::OPT_ffixed_i3)) + Features.push_back("+reserve-i3"); + + if (Args.hasArg(options::OPT_ffixed_i4)) + Features.push_back("+reserve-i4"); + + if (Args.hasArg(options::OPT_ffixed_i5)) + Features.push_back("+reserve-i5"); } diff --git a/clang/test/Driver/sparc-fixed-register.c b/clang/test/Driver/sparc-fixed-register.c new file mode 100644 index 0000000000000..24880b9c9d86f --- /dev/null +++ b/clang/test/Driver/sparc-fixed-register.c @@ -0,0 +1,181 @@ +// RUN: %clang --target=sparc-none-gnu -ffixed-g1 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-G1 < %t %s +// CHECK-FIXED-G1: "-target-feature" "+reserve-g1" + +// RUN: %clang --target=sparc-none-gnu -ffixed-g2 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-G2 < %t %s +// CHECK-FIXED-G2: "-target-feature" "+reserve-g2" + +// RUN: %clang --target=sparc-none-gnu -ffixed-g3 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-G3 < %t %s +// CHECK-FIXED-G3: "-target-feature" "+reserve-g3" + +// RUN: %clang --target=sparc-none-gnu -ffixed-g4 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-G4 < %t %s +// CHECK-FIXED-G4: "-target-feature" "+reserve-g4" + +// RUN: %clang --target=sparc-none-gnu -ffixed-g5 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-G5 < %t %s +// CHECK-FIXED-G5: "-target-feature" "+reserve-g5" + +// RUN: %clang --target=sparc-none-gnu -ffixed-g6 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-G6 < %t %s +// CHECK-FIXED-G6: "-target-feature" "+reserve-g6" + +// RUN: %clang --target=sparc-none-gnu -ffixed-g7 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-G7 < %t %s +// CHECK-FIXED-G7: "-target-feature" "+reserve-g7" + +// RUN: %clang --target=sparc-none-gnu -ffixed-o0 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-O0 < %t %s +// CHECK-FIXED-O0: "-target-feature" "+reserve-o0" + +// RUN: %clang --target=sparc-none-gnu -ffixed-o1 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-O1 < %t %s +// CHECK-FIXED-O1: "-target-feature" "+reserve-o1" + +// RUN: %clang --target=sparc-none-gnu -ffixed-o2 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-O2 < %t %s +// CHECK-FIXED-O2: "-target-feature" "+reserve-o2" + +// RUN: %clang --target=sparc-none-gnu -ffixed-o3 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-O3 < %t %s +// CHECK-FIXED-O3: "-target-feature" "+reserve-o3" + +// RUN: %clang --target=sparc-none-gnu -ffixed-o4 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-O4 < %t %s +// CHECK-FIXED-O4: "-target-feature" "+reserve-o4" + +// RUN: %clang --target=sparc-none-gnu -ffixed-o5 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-O5 < %t %s +// CHECK-FIXED-O5: "-target-feature" "+reserve-o5" + +// RUN: %clang --target=sparc-none-gnu -ffixed-l0 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-L0 < %t %s +// CHECK-FIXED-L0: "-target-feature" "+reserve-l0" + +// RUN: %clang --target=sparc-none-gnu -ffixed-l1 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-L1 < %t %s +// CHECK-FIXED-L1: "-target-feature" "+reserve-l1" + +// RUN: %clang --target=sparc-none-gnu -ffixed-l2 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-L2 < %t %s +// CHECK-FIXED-L2: "-target-feature" "+reserve-l2" + +// RUN: %clang --target=sparc-none-gnu -ffixed-l3 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-L3 < %t %s +// CHECK-FIXED-L3: "-target-feature" "+reserve-l3" + +// RUN: %clang --target=sparc-none-gnu -ffixed-l4 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-L4 < %t %s +// CHECK-FIXED-L4: "-target-feature" "+reserve-l4" + +// RUN: %clang --target=sparc-none-gnu -ffixed-l5 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-L5 < %t %s +// CHECK-FIXED-L5: "-target-feature" "+reserve-l5" + +// RUN: %clang --target=sparc-none-gnu -ffixed-l6 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-L6 < %t %s +// CHECK-FIXED-L6: "-target-feature" "+reserve-l6" + +// RUN: %clang --target=sparc-none-gnu -ffixed-l7 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-L7 < %t %s +// CHECK-FIXED-L7: "-target-feature" "+reserve-l7" + +// RUN: %clang --target=sparc-none-gnu -ffixed-i0 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-I0 < %t %s +// CHECK-FIXED-I0: "-target-feature" "+reserve-i0" + +// RUN: %clang --target=sparc-none-gnu -ffixed-i1 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-I1 < %t %s +// CHECK-FIXED-I1: "-target-feature" "+reserve-i1" + +// RUN: %clang --target=sparc-none-gnu -ffixed-i2 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-I2 < %t %s +// CHECK-FIXED-I2: "-target-feature" "+reserve-i2" + +// RUN: %clang --target=sparc-none-gnu -ffixed-i3 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-I3 < %t %s +// CHECK-FIXED-I3: "-target-feature" "+reserve-i3" + +// RUN: %clang --target=sparc-none-gnu -ffixed-i4 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-I4 < %t %s +// CHECK-FIXED-I4: "-target-feature" "+reserve-i4" + +// RUN: %clang --target=sparc-none-gnu -ffixed-i5 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-I5 < %t %s +// CHECK-FIXED-I5: "-target-feature" "+reserve-i5" + +// Test multiple of reserve-* options together. +// RUN: %clang --target=sparc-none-gnu \ +// RUN: -ffixed-g1 \ +// RUN: -ffixed-o2 \ +// RUN: -ffixed-l3 \ +// RUN: -ffixed-i4 \ +// RUN: -### %s 2> %t +// RUN: FileCheck \ +// RUN: --check-prefix=CHECK-FIXED-G1 \ +// RUN: --check-prefix=CHECK-FIXED-O2 \ +// RUN: --check-prefix=CHECK-FIXED-L3 \ +// RUN: --check-prefix=CHECK-FIXED-I4 \ +// RUN: < %t %s + +// Test all reserve-* options together. +// RUN: %clang --target=sparc-none-gnu \ +// RUN: -ffixed-g1 \ +// RUN: -ffixed-g2 \ +// RUN: -ffixed-g3 \ +// RUN: -ffixed-g4 \ +// RUN: -ffixed-g5 \ +// RUN: -ffixed-g6 \ +// RUN: -ffixed-g7 \ +// RUN: -ffixed-o0 \ +// RUN: -ffixed-o1 \ +// RUN: -ffixed-o2 \ +// RUN: -ffixed-o3 \ +// RUN: -ffixed-o4 \ +// RUN: -ffixed-o5 \ +// RUN: -ffixed-l0 \ +// RUN: -ffixed-l1 \ +// RUN: -ffixed-l2 \ +// RUN: -ffixed-l3 \ +// RUN: -ffixed-l4 \ +// RUN: -ffixed-l5 \ +// RUN: -ffixed-l6 \ +// RUN: -ffixed-l7 \ +// RUN: -ffixed-i0 \ +// RUN: -ffixed-i1 \ +// RUN: -ffixed-i2 \ +// RUN: -ffixed-i3 \ +// RUN: -ffixed-i4 \ +// RUN: -ffixed-i5 \ +// RUN: -### %s 2> %t +// RUN: FileCheck \ +// RUN: --check-prefix=CHECK-FIXED-G1 \ +// RUN: --check-prefix=CHECK-FIXED-G2 \ +// RUN: --check-prefix=CHECK-FIXED-G3 \ +// RUN: --check-prefix=CHECK-FIXED-G4 \ +// RUN: --check-prefix=CHECK-FIXED-G5 \ +// RUN: --check-prefix=CHECK-FIXED-G6 \ +// RUN: --check-prefix=CHECK-FIXED-G7 \ +// RUN: --check-prefix=CHECK-FIXED-O0 \ +// RUN: --check-prefix=CHECK-FIXED-O1 \ +// RUN: --check-prefix=CHECK-FIXED-O2 \ +// RUN: --check-prefix=CHECK-FIXED-O3 \ +// RUN: --check-prefix=CHECK-FIXED-O4 \ +// RUN: --check-prefix=CHECK-FIXED-O5 \ +// RUN: --check-prefix=CHECK-FIXED-L0 \ +// RUN: --check-prefix=CHECK-FIXED-L1 \ +// RUN: --check-prefix=CHECK-FIXED-L2 \ +// RUN: --check-prefix=CHECK-FIXED-L3 \ +// RUN: --check-prefix=CHECK-FIXED-L4 \ +// RUN: --check-prefix=CHECK-FIXED-L5 \ +// RUN: --check-prefix=CHECK-FIXED-L6 \ +// RUN: --check-prefix=CHECK-FIXED-L7 \ +// RUN: --check-prefix=CHECK-FIXED-I0 \ +// RUN: --check-prefix=CHECK-FIXED-I1 \ +// RUN: --check-prefix=CHECK-FIXED-I2 \ +// RUN: --check-prefix=CHECK-FIXED-I3 \ +// RUN: --check-prefix=CHECK-FIXED-I4 \ +// RUN: --check-prefix=CHECK-FIXED-I5 \ +// RUN: < %t %s diff --git a/llvm/lib/Target/Sparc/Sparc.td b/llvm/lib/Target/Sparc/Sparc.td index 7b10339565243..38a59e650f33c 100644 --- a/llvm/lib/Target/Sparc/Sparc.td +++ b/llvm/lib/Target/Sparc/Sparc.td @@ -72,6 +72,20 @@ def TuneSlowRDPC : SubtargetFeature<"slow-rdpc", "HasSlowRDPC", "true", //==== Features added predmoninantly for LEON subtarget support include "LeonFeatures.td" +//==== Register allocation tweaks needed by some low-level software +foreach i = 1 ... 7 in + def FeatureReserveG#i : SubtargetFeature<"reserve-g"#i, "ReserveRegister["#i#" + SP::G0]", "true", + "Reserve G"#i#", making it unavailable as a GPR">; +foreach i = 0 ... 5 in + def FeatureReserveO#i : SubtargetFeature<"reserve-o"#i, "ReserveRegister["#i#" + SP::O0]", "true", + "Reserve O"#i#", making it unavailable as a GPR">; +foreach i = 0 ... 7 in + def FeatureReserveL#i : SubtargetFeature<"reserve-l"#i, "ReserveRegister["#i#" + SP::L0]", "true", + "Reserve L"#i#", making it unavailable as a GPR">; +foreach i = 0 ... 5 in + def FeatureReserveI#i : SubtargetFeature<"reserve-i"#i, "ReserveRegister["#i#" + SP::I0]", "true", + "Reserve I"#i#", making it unavailable as a GPR">; + //===----------------------------------------------------------------------===// // Register File, Calling Conv, Instruction Descriptions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 78bdf3ae9a84b..bdefb0841a124 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -13,6 +13,7 @@ #include "SparcISelLowering.h" #include "MCTargetDesc/SparcMCExpr.h" +#include "MCTargetDesc/SparcMCTargetDesc.h" #include "SparcMachineFunctionInfo.h" #include "SparcRegisterInfo.h" #include "SparcTargetMachine.h" @@ -28,6 +29,7 @@ #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" @@ -729,6 +731,30 @@ SDValue SparcTargetLowering::LowerFormalArguments_64( return Chain; } +// Check whether any of the argument registers are reserved +static bool isAnyArgRegReserved(const SparcRegisterInfo *TRI, + const MachineFunction &MF) { + // The register window design means that outgoing parameters at O* + // will appear in the callee as I*. + // Be conservative and check both sides of the register names. + bool Outgoing = + llvm::any_of(SP::GPROutgoingArgRegClass, [TRI, &MF](MCPhysReg r) { + return TRI->isReservedReg(MF, r); + }); + bool Incoming = + llvm::any_of(SP::GPRIncomingArgRegClass, [TRI, &MF](MCPhysReg r) { + return TRI->isReservedReg(MF, r); + }); + return Outgoing || Incoming; +} + +static void emitReservedArgRegCallError(const MachineFunction &MF) { + const Function &F = MF.getFunction(); + F.getContext().diagnose(DiagnosticInfoUnsupported{ + F, ("SPARC doesn't support" + " function calls if any of the argument registers is reserved.")}); +} + SDValue SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { @@ -805,6 +831,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, bool &isTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; + MachineFunction &MF = DAG.getMachineFunction(); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; @@ -1055,6 +1082,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, ((hasReturnsTwice) ? TRI->getRTCallPreservedMask(CallConv) : TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv)); + + if (isAnyArgRegReserved(TRI, MF)) + emitReservedArgRegCallError(MF); + assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -1125,6 +1156,13 @@ Register SparcTargetLowering::getRegisterByName(const char* RegName, LLT VT, .Case("g4", SP::G4).Case("g5", SP::G5).Case("g6", SP::G6).Case("g7", SP::G7) .Default(0); + // If we're directly referencing register names + // (e.g in GCC C extension `register int r asm("g1");`), + // make sure that said register is in the reserve list. + const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo(); + if (!TRI->isReservedReg(MF, Reg)) + Reg = 0; + if (Reg) return Reg; @@ -1189,6 +1227,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, SDLoc DL = CLI.DL; SDValue Chain = CLI.Chain; auto PtrVT = getPointerTy(DAG.getDataLayout()); + MachineFunction &MF = DAG.getMachineFunction(); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; @@ -1372,6 +1411,10 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, ((hasReturnsTwice) ? TRI->getRTCallPreservedMask(CLI.CallConv) : TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv)); + + if (isAnyArgRegReserved(TRI, MF)) + emitReservedArgRegCallError(MF); + assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); diff --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp index f97bf57627d1a..71a27f77d2c6b 100644 --- a/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -12,10 +12,8 @@ #include "SparcRegisterInfo.h" #include "Sparc.h" -#include "SparcMachineFunctionInfo.h" #include "SparcSubtarget.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -98,9 +96,21 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const { for (unsigned n = 0; n < 31; n++) Reserved.set(SP::ASR1 + n); + for (TargetRegisterClass::iterator i = SP::IntRegsRegClass.begin(); + i != SP::IntRegsRegClass.end(); ++i) { + if (MF.getSubtarget().isRegisterReserved(*i)) + markSuperRegs(Reserved, *i); + } + + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } +bool SparcRegisterInfo::isReservedReg(const MachineFunction &MF, + MCRegister Reg) const { + return getReservedRegs(MF)[Reg]; +} + const TargetRegisterClass* SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { diff --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/llvm/lib/Target/Sparc/SparcRegisterInfo.h index 5b3c1a7ad07dd..58c85f33635f2 100644 --- a/llvm/lib/Target/Sparc/SparcRegisterInfo.h +++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.h @@ -30,6 +30,7 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { const uint32_t* getRTCallPreservedMask(CallingConv::ID CC) const; BitVector getReservedRegs(const MachineFunction &MF) const override; + bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const; const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, unsigned Kind) const override; diff --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.td b/llvm/lib/Target/Sparc/SparcRegisterInfo.td index d5ba7464695c5..d8319a8d41dda 100644 --- a/llvm/lib/Target/Sparc/SparcRegisterInfo.td +++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.td @@ -370,6 +370,10 @@ def LowQFPRegs : RegisterClass<"SP", [f128], 128, (sequence "Q%u", 0, 7)>; // Floating point control register classes. def FCCRegs : RegisterClass<"SP", [i1], 1, (sequence "FCC%u", 0, 3)>; +// GPR argument registers. +def GPROutgoingArg : RegisterClass<"SP", [i32, i64], 32, (sequence "O%u", 0, 5)>; +def GPRIncomingArg : RegisterClass<"SP", [i32, i64], 32, (sequence "I%u", 0, 5)>; + let isAllocatable = 0 in { // Ancillary state registers // FIXME: TICK is special-cased here as it can be accessed diff --git a/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/llvm/lib/Target/Sparc/SparcSubtarget.cpp index 6b09904ca5e8e..5b65e34e0f8a3 100644 --- a/llvm/lib/Target/Sparc/SparcSubtarget.cpp +++ b/llvm/lib/Target/Sparc/SparcSubtarget.cpp @@ -50,6 +50,7 @@ SparcSubtarget::SparcSubtarget(const StringRef &CPU, const StringRef &TuneCPU, const StringRef &FS, const TargetMachine &TM, bool is64Bit) : SparcGenSubtargetInfo(TM.getTargetTriple(), CPU, TuneCPU, FS), + ReserveRegister(TM.getMCRegisterInfo()->getNumRegs()), TargetTriple(TM.getTargetTriple()), Is64Bit(is64Bit), InstrInfo(initializeSubtargetDependencies(CPU, TuneCPU, FS)), TLInfo(TM, *this), FrameLowering(*this) {} diff --git a/llvm/lib/Target/Sparc/SparcSubtarget.h b/llvm/lib/Target/Sparc/SparcSubtarget.h index cdb210f67482c..fe4aca5195306 100644 --- a/llvm/lib/Target/Sparc/SparcSubtarget.h +++ b/llvm/lib/Target/Sparc/SparcSubtarget.h @@ -13,12 +13,14 @@ #ifndef LLVM_LIB_TARGET_SPARC_SPARCSUBTARGET_H #define LLVM_LIB_TARGET_SPARC_SPARCSUBTARGET_H +#include "MCTargetDesc/SparcMCTargetDesc.h" #include "SparcFrameLowering.h" #include "SparcISelLowering.h" #include "SparcInstrInfo.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/TargetParser/Triple.h" #include @@ -29,6 +31,10 @@ namespace llvm { class StringRef; class SparcSubtarget : public SparcGenSubtargetInfo { + // ReserveRegister[i] - Register #i is not available as a general purpose + // register. + BitVector ReserveRegister; + Triple TargetTriple; virtual void anchor(); @@ -82,6 +88,10 @@ class SparcSubtarget : public SparcGenSubtargetInfo { return is64Bit() ? 2047 : 0; } + bool isRegisterReserved(MCPhysReg PhysReg) const { + return ReserveRegister[PhysReg]; + } + /// Given a actual stack size as determined by FrameInfo, this function /// returns adjusted framesize which includes space for register window /// spills and arguments. diff --git a/llvm/test/CodeGen/SPARC/reserved-arg-regs.ll b/llvm/test/CodeGen/SPARC/reserved-arg-regs.ll new file mode 100644 index 0000000000000..3587ecb7f3c94 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/reserved-arg-regs.ll @@ -0,0 +1,25 @@ +;; Test reserving argument registers. +; RUN: not llc < %s -mtriple=sparc-linux-gnu -mattr=+reserve-o0 2>&1 | FileCheck %s --check-prefixes=CHECK-RESERVED-O0 +; RUN: not llc < %s -mtriple=sparc64-linux-gnu -mattr=+reserve-o0 2>&1 | FileCheck %s --check-prefixes=CHECK-RESERVED-O0 +; RUN: not llc < %s -mtriple=sparc-linux-gnu -mattr=+reserve-i0 2>&1 | FileCheck %s --check-prefixes=CHECK-RESERVED-I0 +; RUN: not llc < %s -mtriple=sparc64-linux-gnu -mattr=+reserve-i0 2>&1 | FileCheck %s --check-prefixes=CHECK-RESERVED-I0 + +; CHECK-RESERVED-O0: error: +; CHECK-RESERVED-O0-SAME: SPARC doesn't support function calls if any of the argument registers is reserved. +; CHECK-RESERVED-I0: error: +; CHECK-RESERVED-I0-SAME: SPARC doesn't support function calls if any of the argument registers is reserved. +define void @call_function() { + call void @foo() + ret void +} +declare void @foo() + +; CHECK-RESERVED-O0: error: +; CHECK-RESERVED-O0-SAME: SPARC doesn't support function calls if any of the argument registers is reserved. +; CHECK-RESERVED-I0: error: +; CHECK-RESERVED-I0-SAME: SPARC doesn't support function calls if any of the argument registers is reserved. +define void @call_function_with_arg(i8 %in) { + call void @bar(i8 %in) + ret void +} +declare void @bar(i8) diff --git a/llvm/test/CodeGen/SPARC/reserved-regs-named.ll b/llvm/test/CodeGen/SPARC/reserved-regs-named.ll new file mode 100644 index 0000000000000..91808be156c55 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/reserved-regs-named.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=sparc64-linux-gnu -mattr=+reserve-l0 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVED-L0 + +;; Ensure explicit register references are catched as well. + +; CHECK-RESERVED-L0: %l0 +define void @set_reg(i32 zeroext %x) { +entry: + tail call void @llvm.write_register.i32(metadata !0, i32 %x) + ret void +} + +declare void @llvm.write_register.i32(metadata, i32) +!0 = !{!"l0"} diff --git a/llvm/test/CodeGen/SPARC/reserved-regs-unavailable.ll b/llvm/test/CodeGen/SPARC/reserved-regs-unavailable.ll new file mode 100644 index 0000000000000..53ca045f10044 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/reserved-regs-unavailable.ll @@ -0,0 +1,14 @@ +; RUN: not --crash llc -mtriple=sparc64-linux-gnu -o - %s 2>&1 | FileCheck %s --check-prefixes=CHECK-RESERVED-L0 + +;; Ensure explicit register references for non-reserved registers +;; are caught properly. + +; CHECK-RESERVED-L0: LLVM ERROR: Invalid register name global variable +define void @set_reg(i32 zeroext %x) { +entry: + tail call void @llvm.write_register.i32(metadata !0, i32 %x) + ret void +} + +declare void @llvm.write_register.i32(metadata, i32) +!0 = !{!"l0"} diff --git a/llvm/test/CodeGen/SPARC/reserved-regs.ll b/llvm/test/CodeGen/SPARC/reserved-regs.ll index ec6290586eeef..7dea1f31538b8 100644 --- a/llvm/test/CodeGen/SPARC/reserved-regs.ll +++ b/llvm/test/CodeGen/SPARC/reserved-regs.ll @@ -1,5 +1,14 @@ ; RUN: llc -march=sparc -verify-machineinstrs < %s | FileCheck %s +;; Test reserve-* options. +; RUN: llc -mtriple=sparc64-linux-gnu -mattr=+reserve-g1 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVED-G1 +; RUN: llc -mtriple=sparc64-linux-gnu -mattr=+reserve-o1 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVED-O1 +; RUN: llc -mtriple=sparc64-linux-gnu -mattr=+reserve-l1 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVED-L1 +; RUN: llc -mtriple=sparc64-linux-gnu -mattr=+reserve-i1 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVED-I1 + +;; Test multiple reserve-* options together. +; RUN: llc -mtriple=sparc64-linux-gnu -mattr=+reserve-g1 -mattr=+reserve-o1 -mattr=+reserve-l1 -mattr=+reserve-i1 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVED-G1,CHECK-RESERVED-O1,CHECK-RESERVED-L1,CHECK-RESERVED-I1 + @g = common global [32 x i32] zeroinitializer, align 16 @h = common global [16 x i64] zeroinitializer, align 16 @@ -16,6 +25,10 @@ ; CHECK-NOT: %o6 ; CHECK-NOT: %i6 ; CHECK-NOT: %i7 +; CHECK-RESERVED-G1-NOT: %g1 +; CHECK-RESERVED-O1-NOT: %o1 +; CHECK-RESERVED-L1-NOT: %l1 +; CHECK-RESERVED-I1-NOT: %i1 ; CHECK: ret define void @use_all_i32_regs() { entry: @@ -100,6 +113,10 @@ entry: ; CHECK-NOT: %o7 ; CHECK-NOT: %i6 ; CHECK-NOT: %i7 +; CHECK-RESERVED-G1-NOT: %g1 +; CHECK-RESERVED-O1-NOT: %o1 +; CHECK-RESERVED-L1-NOT: %l1 +; CHECK-RESERVED-I1-NOT: %i1 ; CHECK: ret define void @use_all_i64_regs() { entry: From 58b2a6d3bcd0696e5014958d6e2fae967a1627f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 12 Feb 2024 13:22:45 +0200 Subject: [PATCH 03/54] [LLD] [test] Avoid printing timestamps past INT32_MAX with llvm-readobj (#81463) If llvm-readobj is built with a 32 bit time_t, it can't print such timestamps correctly. (cherry picked from commit 0bf4ff29816c0eead99ba576a2df2e3c4d214b1f) --- lld/test/COFF/timestamp.test | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/lld/test/COFF/timestamp.test b/lld/test/COFF/timestamp.test index c0658d6109811..cc73af13c38ca 100644 --- a/lld/test/COFF/timestamp.test +++ b/lld/test/COFF/timestamp.test @@ -4,19 +4,28 @@ RUN: lld-link %t.obj /debug /Brepro /entry:main /nodefaultlib /out:%t.1.exe RUN: lld-link %t.obj /debug /Brepro /entry:main /nodefaultlib /out:%t.2.exe RUN: lld-link %t.obj /debug /timestamp:0 /entry:main /nodefaultlib /out:%t.3.exe RUN: env SOURCE_DATE_EPOCH=0 lld-link %t.obj /debug /entry:main /nodefaultlib /out:%t.4.exe -RUN: lld-link %t.obj /debug /timestamp:4294967295 /entry:main /nodefaultlib /out:%t.5.exe -RUN: env SOURCE_DATE_EPOCH=4294967295 lld-link %t.obj /debug /entry:main /nodefaultlib /out:%t.6.exe +# Test timestamps corresponding to INT32_TMAX +RUN: lld-link %t.obj /debug /timestamp:2147483647 /entry:main /nodefaultlib /out:%t.5.exe +RUN: env SOURCE_DATE_EPOCH=2147483647 lld-link %t.obj /debug /entry:main /nodefaultlib /out:%t.6.exe +# Test that the command line option /timestamp has precedence over SOURCE_DATE_EPOCH RUN: env SOURCE_DATE_EPOCH=12345 lld-link %t.obj /debug /timestamp:0 /entry:main /nodefaultlib /out:%t.7.exe -RUN: env LLD_IN_TEST=1 not lld-link %t.obj /debug /timestamp:4294967296 /entry:main /nodefaultlib /out:%t.8.exe 2>&1 | FileCheck %s --check-prefix=ERROR -RUN: env SOURCE_DATE_EPOCH=4294967296 env LLD_IN_TEST=1 not lld-link %t.obj /debug /entry:main /nodefaultlib /out:%t.9.exe 2>&1 | FileCheck %s --check-prefix=ERROR2 +# Test timestamps corresponding to UINT32_TMAX +RUN: lld-link %t.obj /debug /timestamp:4294967295 /entry:main /nodefaultlib /out:%t.8.exe +RUN: env SOURCE_DATE_EPOCH=4294967295 lld-link %t.obj /debug /entry:main /nodefaultlib /out:%t.9.exe +# Test that setting UINT32_MAX+1 as timestamp fails. +RUN: env LLD_IN_TEST=1 not lld-link %t.obj /debug /timestamp:4294967296 /entry:main /nodefaultlib /out:%t.10.exe 2>&1 | FileCheck %s --check-prefix=ERROR +RUN: env SOURCE_DATE_EPOCH=4294967296 env LLD_IN_TEST=1 not lld-link %t.obj /debug /entry:main /nodefaultlib /out:%t.11.exe 2>&1 | FileCheck %s --check-prefix=ERROR2 RUN: llvm-readobj --file-headers --coff-debug-directory %t.1.exe | FileCheck %s --check-prefix=HASH RUN: llvm-readobj --file-headers --coff-debug-directory %t.2.exe | FileCheck %s --check-prefix=HASH RUN: llvm-readobj --file-headers --coff-debug-directory %t.3.exe | FileCheck %s --check-prefix=ZERO RUN: llvm-readobj --file-headers --coff-debug-directory %t.4.exe | FileCheck %s --check-prefix=ZERO -RUN: llvm-readobj --file-headers --coff-debug-directory %t.5.exe | FileCheck %s --check-prefix=MAX -RUN: llvm-readobj --file-headers --coff-debug-directory %t.6.exe | FileCheck %s --check-prefix=MAX +RUN: llvm-readobj --file-headers --coff-debug-directory %t.5.exe | FileCheck %s --check-prefix=LARGE +RUN: llvm-readobj --file-headers --coff-debug-directory %t.6.exe | FileCheck %s --check-prefix=LARGE RUN: llvm-readobj --file-headers --coff-debug-directory %t.7.exe | FileCheck %s --check-prefix=ZERO +# Not inspecting %t.8.exe and %t.9.exe; llvm-readobj with a 32 bit time_t fails to print dates +# past INT32_MAX correctly. + HASH: ImageFileHeader { HASH: TimeDateStamp: [[STAMP:.*]] HASH: DebugDirectory [ @@ -27,10 +36,10 @@ ZERO: TimeDateStamp: 1970-01-01 00:00:00 (0x0) ZERO: DebugDirectory [ ZERO: TimeDateStamp: 1970-01-01 00:00:00 (0x0) -MAX: ImageFileHeader { -MAX: TimeDateStamp: 2106-02-07 06:28:15 (0xFFFFFFFF) -MAX: DebugDirectory [ -MAX: TimeDateStamp: 2106-02-07 06:28:15 (0xFFFFFFFF) +LARGE: ImageFileHeader { +LARGE: TimeDateStamp: 2038-01-19 03:14:07 (0x7FFFFFFF) +LARGE: DebugDirectory [ +LARGE: TimeDateStamp: 2038-01-19 03:14:07 (0x7FFFFFFF) ERROR: error: invalid timestamp: 4294967296. Expected 32-bit integer ERROR2: error: invalid SOURCE_DATE_EPOCH timestamp: 4294967296. Expected 32-bit integer From 831b9a5db2b7be590dcb09d0bf909ba37765a70b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 13 Feb 2024 09:29:56 +0100 Subject: [PATCH 04/54] [AArch64][GISel] Don't pointlessly lower G_TRUNC (#81479) If we have something like G_TRUNC from v2s32 to v2s16, then lowering this to a concat of two G_TRUNC s32 to s16 followed by G_TRUNC from v2s16 to v2s8 does not bring us any closer to legality. In fact, the first part of that is a G_BUILD_VECTOR whose legalization will produce a new G_TRUNC from v2s32 to v2s16, and both G_TRUNCs will then get combined to the original, causing a legalization cycle. Make the lowering condition more precise, by requiring that the original vector is >128 bits, which is I believe the only case where this specific splitting approach is useful. Note that this doesn't actually produce a legal result (the alwaysLegal is a lie, as before), but it will cause a proper globalisel abort instead of an infinite legalization loop. Fixes https://github.com/llvm/llvm-project/issues/81244. (cherry picked from commit 070848c17c2944afa494d42d3ad42929f3379842) --- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 5 ++-- .../AArch64/GlobalISel/legalize-xtn.mir | 24 +++++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index fd69a7d6c33d0..4b9d549e79114 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -622,9 +622,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .lowerIf([=](const LegalityQuery &Query) { LLT DstTy = Query.Types[0]; LLT SrcTy = Query.Types[1]; - return DstTy.isVector() && (SrcTy.getSizeInBits() > 128 || - (DstTy.getScalarSizeInBits() * 2 < - SrcTy.getScalarSizeInBits())); + return DstTy.isVector() && SrcTy.getSizeInBits() > 128 && + DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits(); }) .alwaysLegal(); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir index 16b780a839734..661265173ae82 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir @@ -529,3 +529,27 @@ body: | RET_ReallyLR implicit $q0 ... + +--- +name: pr81244 +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0 + ; CHECK-LABEL: name: pr81244 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s8>), [[TRUNC]](<2 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[CONCAT_VECTORS]](<4 x s8>) + ; CHECK-NEXT: $d0 = COPY [[ANYEXT]](<4 x s16>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s8>) = G_TRUNC %0(<2 x s32>) + %2:_(<4 x s8>) = G_CONCAT_VECTORS %1(<2 x s8>), %1(<2 x s8>) + %3:_(<4 x s16>) = G_ANYEXT %2(<4 x s8>) + $d0 = COPY %3(<4 x s16>) + RET_ReallyLR implicit $d0 + +... From 83914215322e6db125ee5621d2fb18d97bab82c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 16 Feb 2024 07:35:12 +0200 Subject: [PATCH 05/54] [LLD] [docs] Add a release note for the SOURCE_DATE_EPOCH support (#81388) --- lld/docs/ReleaseNotes.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index fa0e7f2bc0b3e..82f9d93b8e86a 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -86,6 +86,11 @@ COFF Improvements * LLD now prefers library paths specified with ``-libpath:`` over the implicitly detected toolchain paths. +* Use the ``SOURCE_DATE_EPOCH`` environment variable for the PE header and + debug directory timestamps, if neither the ``/Brepro`` nor ``/timestamp:`` + options have been specified. This makes the linker output reproducible by + setting this environment variable. + MinGW Improvements ------------------ From 872fe20dddfaa6e41d92f90c3a1b716dc9e32107 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Sat, 10 Feb 2024 17:09:53 +0100 Subject: [PATCH 06/54] [libc++][print] Moves is_terminal to the dylib. (#80464) Having the test in the header requires including unistd.h on POSIX platforms. This header has other declarations which may conflict with code that uses named declarations provided by this header. For example code using "int pipe;" would conflict with the function pipe in this header. Moving the code to the dylib means std::print would not be available on Apple backdeployment targets. On POSIX platforms there is no transcoding required so a not Standard conforming implementation is still a useful and the observable differences are minimal. This behaviour has been done for print before https://github.com/llvm/llvm-project/pull/76293. Note questions have been raised in LWG4044 "Confusing requirements for std::print on POSIX platforms", whether or not the isatty check on POSIX platforms is required. When this LWG issue is resolved the backdeployment targets could become Standard compliant. This patch is intended to be backported to the LLVM-18 branch. Fixes: https://github.com/llvm/llvm-project/issues/79782 (cherry picked from commit 4fb7b3301bfbd439eb3d30d6a36c7cdb26941a0d) --- libcxx/include/print | 14 +++++------ libcxx/lib/abi/CHANGELOG.TXT | 8 ++++++ ...bcxxabi.v1.stable.exceptions.nonew.abilist | 1 + ...bcxxabi.v1.stable.exceptions.nonew.abilist | 1 + ...bcxxabi.v1.stable.exceptions.nonew.abilist | 1 + ...bcxxabi.v1.stable.exceptions.nonew.abilist | 1 + ...bcxxabi.v1.stable.exceptions.nonew.abilist | 1 + ...bcxxabi.v1.stable.exceptions.nonew.abilist | 1 + ...bcxxabi.v1.stable.exceptions.nonew.abilist | 1 + ...bcxxabi.v1.stable.exceptions.nonew.abilist | 1 + ...xxabi.v1.stable.noexceptions.nonew.abilist | 1 + libcxx/src/print.cpp | 25 ++++++++++++------- 12 files changed, 40 insertions(+), 16 deletions(-) diff --git a/libcxx/include/print b/libcxx/include/print index 7f2b5bac3dcf6..543a540ee4f27 100644 --- a/libcxx/include/print +++ b/libcxx/include/print @@ -32,6 +32,7 @@ namespace std { */ #include <__assert> // all public C++ headers provide the assertion handler +#include <__availability> #include <__concepts/same_as.h> #include <__config> #include <__system_error/system_error.h> @@ -43,10 +44,6 @@ namespace std { #include #include -#if __has_include() -# include -#endif - #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif @@ -68,7 +65,8 @@ _LIBCPP_EXPORTED_FROM_ABI bool __is_windows_terminal(FILE* __stream); // Note the function is only implemented on the Windows platform. _LIBCPP_EXPORTED_FROM_ABI void __write_to_windows_console(FILE* __stream, wstring_view __view); # endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS - +#elif __has_include() +_LIBCPP_EXPORTED_FROM_ABI bool __is_posix_terminal(FILE* __stream); #endif // _LIBCPP_WIN32API #if _LIBCPP_STD_VER >= 23 @@ -195,15 +193,17 @@ inline constexpr bool __use_unicode_execution_charset = _MSVC_EXECUTION_CHARACTE inline constexpr bool __use_unicode_execution_charset = true; # endif -_LIBCPP_HIDE_FROM_ABI inline bool __is_terminal(FILE* __stream) { +_LIBCPP_HIDE_FROM_ABI inline bool __is_terminal([[maybe_unused]] FILE* __stream) { // The macro _LIBCPP_TESTING_PRINT_IS_TERMINAL is used to change // the behavior in the test. This is not part of the public API. # ifdef _LIBCPP_TESTING_PRINT_IS_TERMINAL return _LIBCPP_TESTING_PRINT_IS_TERMINAL(__stream); +# elif _LIBCPP_AVAILABILITY_HAS_PRINT == 0 + return false; # elif defined(_LIBCPP_WIN32API) return std::__is_windows_terminal(__stream); # elif __has_include() - return isatty(fileno(__stream)); + return std::__is_posix_terminal(__stream); # else # error "Provide a way to determine whether a FILE* is a terminal" # endif diff --git a/libcxx/lib/abi/CHANGELOG.TXT b/libcxx/lib/abi/CHANGELOG.TXT index 1179c253f18c8..7ff604959f4d5 100644 --- a/libcxx/lib/abi/CHANGELOG.TXT +++ b/libcxx/lib/abi/CHANGELOG.TXT @@ -16,6 +16,14 @@ New entries should be added directly below the "Version" header. Version 18.0 ------------ +* [libc++] Moves is_terminal to the dylib + + The patch moves the POSIX implementation of is_terminal to the dylib. This is + needed to avoid using in public headers. + + All platforms + Symbol added: _ZNSt6__ndk119__is_posix_terminalEP7__sFILE + * [libc++abi] Implement __cxa_init_primary_exception and use it to optimize std::make_exception_ptr (#65534) This patch implements __cxa_init_primary_exception, an extension to the Itanium C++ ABI. diff --git a/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist index c2fea4d8adb42..2064f45bf8c08 100644 --- a/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -1495,6 +1495,7 @@ {'is_defined': True, 'name': '__ZNSt3__118shared_timed_mutex8try_lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__118shared_timed_mutexC1Ev', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__118shared_timed_mutexC2Ev', 'type': 'FUNC'} +{'is_defined': True, 'name': '__ZNSt3__119__is_posix_terminalEP7__sFILE', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist index a60f099b53205..fec3a4505a0c6 100644 --- a/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -1176,6 +1176,7 @@ {'is_defined': True, 'name': '_ZNSt6__ndk118shared_timed_mutex8try_lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk118shared_timed_mutexC1Ev', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk118shared_timed_mutexC2Ev', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNSt6__ndk119__is_posix_terminalEP7__sFILE', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist index a159ff5221866..e52cf98dd4c4f 100644 --- a/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -534,6 +534,7 @@ {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutex8try_lockEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC1Ev', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC2Ev', 'storage_mapping_class': 'DS', 'type': 'FUNC'} +{'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__is_posix_terminalEP4FILE', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base11lock_sharedEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist index 5749a7520f9ba..52a04706ddf20 100644 --- a/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -534,6 +534,7 @@ {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutex8try_lockEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC1Ev', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC2Ev', 'storage_mapping_class': 'DS', 'type': 'FUNC'} +{'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__is_posix_terminalEP4FILE', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base11lock_sharedEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist index e827114f16919..bced6b2ea81ba 100644 --- a/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -1495,6 +1495,7 @@ {'is_defined': True, 'name': '__ZNSt3__118shared_timed_mutex8try_lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__118shared_timed_mutexC1Ev', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__118shared_timed_mutexC2Ev', 'type': 'FUNC'} +{'is_defined': True, 'name': '__ZNSt3__119__is_posix_terminalEP7__sFILE', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist index f4077adc074e0..efa2189e9c928 100644 --- a/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -1176,6 +1176,7 @@ {'is_defined': True, 'name': '_ZNSt6__ndk118shared_timed_mutex8try_lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk118shared_timed_mutexC1Ev', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk118shared_timed_mutexC2Ev', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNSt6__ndk119__is_posix_terminalEP7__sFILE', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist index e3d3fcb35d840..ebda5b0dfba57 100644 --- a/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -1190,6 +1190,7 @@ {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutex8try_lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC1Ev', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC2Ev', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNSt3__119__is_posix_terminalEP7__sFILE', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist index 16923301d2548..6432ad3be3585 100644 --- a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -1188,6 +1188,7 @@ {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutex8try_lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC1Ev', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC2Ev', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNSt3__119__is_posix_terminalEP8_IO_FILE', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist index 2380ffb100de9..1fe84e17b3f7f 100644 --- a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist @@ -1159,6 +1159,7 @@ {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutex8try_lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC1Ev', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC2Ev', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNSt3__119__is_posix_terminalEP8_IO_FILE', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'} diff --git a/libcxx/src/print.cpp b/libcxx/src/print.cpp index 3692187a5954a..8fa59fdd097bc 100644 --- a/libcxx/src/print.cpp +++ b/libcxx/src/print.cpp @@ -8,22 +8,26 @@ #include <__config> -#if defined(_LIBCPP_WIN32API) +#include +#include + +#include <__system_error/system_error.h> -# include -# include +#include "filesystem/error.h" +#if defined(_LIBCPP_WIN32API) # define WIN32_LEAN_AND_MEAN # define NOMINMAX # include # include - -# include <__system_error/system_error.h> - -# include "filesystem/error.h" +#elif __has_include() +# include +#endif _LIBCPP_BEGIN_NAMESPACE_STD +#if defined(_LIBCPP_WIN32API) + _LIBCPP_EXPORTED_FROM_ABI bool __is_windows_terminal(FILE* __stream) { // Note the Standard does this in one call, but it's unclear whether // an invalid handle is allowed when calling GetConsoleMode. @@ -52,6 +56,9 @@ __write_to_windows_console([[maybe_unused]] FILE* __stream, [[maybe_unused]] wst } # endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS -_LIBCPP_END_NAMESPACE_STD +#elif __has_include() // !_LIBCPP_WIN32API -#endif // !_LIBCPP_WIN32API +_LIBCPP_EXPORTED_FROM_ABI bool __is_posix_terminal(FILE* __stream) { return isatty(fileno(__stream)); } +#endif + +_LIBCPP_END_NAMESPACE_STD From ab57f6ced6909c202446b265a7acb2e945e4f52b Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Mon, 12 Feb 2024 19:20:26 -0800 Subject: [PATCH 07/54] [clang-format] Don't remove parentheses in macro definitions (#81444) Closes #81399. (cherry picked from commit 4af24d4ab76539706bfbceec4b3923426fb1b9e7) --- clang/lib/Format/UnwrappedLineParser.cpp | 2 +- clang/unittests/Format/FormatTest.cpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index b904e0e56d9eb..5739197988707 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -2515,7 +2515,7 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { parseChildBlock(); break; case tok::r_paren: - if (!MightBeStmtExpr && + if (!MightBeStmtExpr && !Line->InMacroBody && Style.RemoveParentheses > FormatStyle::RPS_Leave) { const auto *Prev = LeftParen->Previous; const auto *Next = Tokens->peekNextToken(); diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index a471e36f8d682..0beba12dda62a 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -26856,6 +26856,7 @@ TEST_F(FormatTest, RemoveParentheses) { EXPECT_EQ(Style.RemoveParentheses, FormatStyle::RPS_Leave); Style.RemoveParentheses = FormatStyle::RPS_MultipleParentheses; + verifyFormat("#define Foo(...) foo((__VA_ARGS__))", Style); verifyFormat("int x __attribute__((aligned(16))) = 0;", Style); verifyFormat("decltype((foo->bar)) baz;", Style); verifyFormat("class __declspec(dllimport) X {};", @@ -26890,6 +26891,7 @@ TEST_F(FormatTest, RemoveParentheses) { verifyFormat("return (({ 0; }));", "return ((({ 0; })));", Style); Style.RemoveParentheses = FormatStyle::RPS_ReturnStatement; + verifyFormat("#define Return0 return (0);", Style); verifyFormat("return 0;", "return (0);", Style); verifyFormat("co_return 0;", "co_return ((0));", Style); verifyFormat("return 0;", "return (((0)));", Style); From 325d4a1985d2fc2ea1851b0c27d56cc691445a0e Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 15 Feb 2024 21:56:56 -0800 Subject: [PATCH 08/54] Revert "[RISCV] Recurse on first operand of two operand shuffles (#79180)" (#80238) This reverts commit bdc41106ee48dce59c500c9a3957af947f30c8c3 on the release/18.x branch. This change was the first in a mini-series and while I'm not aware of any particular problem from having it on it's own in the branch, it seems safer to ship with the previous known good state. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 92 ++--- .../RISCV/rvv/fixed-vectors-fp-interleave.ll | 41 +- .../RISCV/rvv/fixed-vectors-int-interleave.ll | 63 +-- .../RISCV/rvv/fixed-vectors-int-shuffles.ll | 43 +- .../rvv/fixed-vectors-interleaved-access.ll | 387 +++++++++--------- .../rvv/fixed-vectors-shuffle-transpose.ll | 128 +++--- 6 files changed, 407 insertions(+), 347 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7895d74f06d12..dba4df77663b0 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5033,60 +5033,56 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, MVT IndexContainerVT = ContainerVT.changeVectorElementType(IndexVT.getScalarType()); - // Base case for the recursion just below - handle the worst case - // single source permutation. Note that all the splat variants - // are handled above. - if (V2.isUndef()) { + SDValue Gather; + // TODO: This doesn't trigger for i64 vectors on RV32, since there we + // encounter a bitcasted BUILD_VECTOR with low/high i32 values. + if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) { + Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG, + Subtarget); + } else { V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); - SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS); - LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG, - Subtarget); - SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices, - DAG.getUNDEF(ContainerVT), TrueMask, VL); - return convertFromScalableVector(VT, Gather, DAG, Subtarget); - } - - // Translate the gather index we computed above (and possibly swapped) - // back to a shuffle mask. This step should disappear once we complete - // the migration to recursive design. - SmallVector ShuffleMaskLHS; - ShuffleMaskLHS.reserve(GatherIndicesLHS.size()); - for (SDValue GatherIndex : GatherIndicesLHS) { - if (GatherIndex.isUndef()) { - ShuffleMaskLHS.push_back(-1); - continue; + // If only one index is used, we can use a "splat" vrgather. + // TODO: We can splat the most-common index and fix-up any stragglers, if + // that's beneficial. + if (LHSIndexCounts.size() == 1) { + int SplatIndex = LHSIndexCounts.begin()->getFirst(); + Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1, + DAG.getConstant(SplatIndex, DL, XLenVT), + DAG.getUNDEF(ContainerVT), TrueMask, VL); + } else { + SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS); + LHSIndices = + convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget); + + Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices, + DAG.getUNDEF(ContainerVT), TrueMask, VL); } - auto *IdxC = cast(GatherIndex); - ShuffleMaskLHS.push_back(IdxC->getZExtValue()); } - // Recursively invoke lowering for the LHS as if there were no RHS. - // This allows us to leverage all of our single source permute tricks. - SDValue Gather = - DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS); - Gather = convertToScalableVector(ContainerVT, Gather, DAG, Subtarget); + // If a second vector operand is used by this shuffle, blend it in with an + // additional vrgather. + if (!V2.isUndef()) { + V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); - // Blend in second vector source with an additional vrgather. - V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); + MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); + SelectMask = + convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget); - MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); - SelectMask = - convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget); - - // If only one index is used, we can use a "splat" vrgather. - // TODO: We can splat the most-common index and fix-up any stragglers, if - // that's beneficial. - if (RHSIndexCounts.size() == 1) { - int SplatIndex = RHSIndexCounts.begin()->getFirst(); - Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2, - DAG.getConstant(SplatIndex, DL, XLenVT), Gather, - SelectMask, VL); - } else { - SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS); - RHSIndices = - convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget); - Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather, - SelectMask, VL); + // If only one index is used, we can use a "splat" vrgather. + // TODO: We can splat the most-common index and fix-up any stragglers, if + // that's beneficial. + if (RHSIndexCounts.size() == 1) { + int SplatIndex = RHSIndexCounts.begin()->getFirst(); + Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2, + DAG.getConstant(SplatIndex, DL, XLenVT), Gather, + SelectMask, VL); + } else { + SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS); + RHSIndices = + convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget); + Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather, + SelectMask, VL); + } } return convertFromScalableVector(VT, Gather, DAG, Subtarget); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll index dab530751ef96..799aebcaa6302 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -238,26 +238,39 @@ define <64 x half> @interleave_v32f16(<32 x half> %x, <32 x half> %y) { define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) { ; V128-LABEL: interleave_v32f32: ; V128: # %bb.0: -; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; V128-NEXT: vslidedown.vi v0, v8, 16 -; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; V128-NEXT: vwaddu.vv v24, v0, v8 -; V128-NEXT: li a0, -1 -; V128-NEXT: vwmaccu.vx v24, a0, v8 -; V128-NEXT: lui a1, %hi(.LCPI10_0) -; V128-NEXT: addi a1, a1, %lo(.LCPI10_0) -; V128-NEXT: li a2, 32 -; V128-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; V128-NEXT: vle16.v v12, (a1) -; V128-NEXT: lui a1, 699051 -; V128-NEXT: addi a1, a1, -1366 -; V128-NEXT: vmv.s.x v0, a1 +; V128-NEXT: addi sp, sp, -16 +; V128-NEXT: .cfi_def_cfa_offset 16 +; V128-NEXT: csrr a0, vlenb +; V128-NEXT: slli a0, a0, 2 +; V128-NEXT: sub sp, sp, a0 +; V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; V128-NEXT: lui a0, %hi(.LCPI10_0) +; V128-NEXT: addi a0, a0, %lo(.LCPI10_0) +; V128-NEXT: li a1, 32 +; V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; V128-NEXT: vle16.v v4, (a0) +; V128-NEXT: lui a0, %hi(.LCPI10_1) +; V128-NEXT: addi a0, a0, %lo(.LCPI10_1) +; V128-NEXT: vle16.v v24, (a0) +; V128-NEXT: addi a0, sp, 16 +; V128-NEXT: vs4r.v v24, (a0) # Unknown-size Folded Spill +; V128-NEXT: lui a0, 699051 +; V128-NEXT: addi a0, a0, -1366 +; V128-NEXT: vmv.s.x v0, a0 +; V128-NEXT: vrgatherei16.vv v24, v8, v4 +; V128-NEXT: addi a0, sp, 16 +; V128-NEXT: vl4r.v v12, (a0) # Unknown-size Folded Reload ; V128-NEXT: vrgatherei16.vv v24, v16, v12, v0.t ; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; V128-NEXT: vwaddu.vv v0, v8, v16 +; V128-NEXT: li a0, -1 ; V128-NEXT: vwmaccu.vx v0, a0, v16 ; V128-NEXT: vmv8r.v v8, v0 ; V128-NEXT: vmv8r.v v16, v24 +; V128-NEXT: csrr a0, vlenb +; V128-NEXT: slli a0, a0, 2 +; V128-NEXT: add sp, sp, a0 +; V128-NEXT: addi sp, sp, 16 ; V128-NEXT: ret ; ; V512-LABEL: interleave_v32f32: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll index 9e21cc9e3d624..e1bd16649eede 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -188,30 +188,24 @@ define <4 x i32> @interleave_v4i32_offset_2(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) { ; V128-LABEL: interleave_v4i32_offset_1: ; V128: # %bb.0: -; V128-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; V128-NEXT: vwaddu.vv v10, v8, v8 -; V128-NEXT: li a0, -1 -; V128-NEXT: vwmaccu.vx v10, a0, v8 ; V128-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; V128-NEXT: vid.v v8 -; V128-NEXT: vsrl.vi v8, v8, 1 +; V128-NEXT: vid.v v10 +; V128-NEXT: vsrl.vi v11, v10, 1 +; V128-NEXT: vrgather.vv v10, v8, v11 ; V128-NEXT: vmv.v.i v0, 10 -; V128-NEXT: vadd.vi v8, v8, 1 +; V128-NEXT: vadd.vi v8, v11, 1 ; V128-NEXT: vrgather.vv v10, v9, v8, v0.t ; V128-NEXT: vmv.v.v v8, v10 ; V128-NEXT: ret ; ; V512-LABEL: interleave_v4i32_offset_1: ; V512: # %bb.0: -; V512-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; V512-NEXT: vwaddu.vv v10, v8, v8 -; V512-NEXT: li a0, -1 -; V512-NEXT: vwmaccu.vx v10, a0, v8 ; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, mu -; V512-NEXT: vid.v v8 -; V512-NEXT: vsrl.vi v8, v8, 1 +; V512-NEXT: vid.v v10 +; V512-NEXT: vsrl.vi v11, v10, 1 +; V512-NEXT: vrgather.vv v10, v8, v11 ; V512-NEXT: vmv.v.i v0, 10 -; V512-NEXT: vadd.vi v8, v8, 1 +; V512-NEXT: vadd.vi v8, v11, 1 ; V512-NEXT: vrgather.vv v10, v9, v8, v0.t ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret @@ -403,26 +397,39 @@ define <64 x i16> @interleave_v32i16(<32 x i16> %x, <32 x i16> %y) { define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; V128-LABEL: interleave_v32i32: ; V128: # %bb.0: -; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; V128-NEXT: vslidedown.vi v0, v8, 16 -; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; V128-NEXT: vwaddu.vv v24, v0, v8 -; V128-NEXT: li a0, -1 -; V128-NEXT: vwmaccu.vx v24, a0, v8 -; V128-NEXT: lui a1, %hi(.LCPI17_0) -; V128-NEXT: addi a1, a1, %lo(.LCPI17_0) -; V128-NEXT: li a2, 32 -; V128-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; V128-NEXT: vle16.v v12, (a1) -; V128-NEXT: lui a1, 699051 -; V128-NEXT: addi a1, a1, -1366 -; V128-NEXT: vmv.s.x v0, a1 +; V128-NEXT: addi sp, sp, -16 +; V128-NEXT: .cfi_def_cfa_offset 16 +; V128-NEXT: csrr a0, vlenb +; V128-NEXT: slli a0, a0, 2 +; V128-NEXT: sub sp, sp, a0 +; V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; V128-NEXT: lui a0, %hi(.LCPI17_0) +; V128-NEXT: addi a0, a0, %lo(.LCPI17_0) +; V128-NEXT: li a1, 32 +; V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; V128-NEXT: vle16.v v4, (a0) +; V128-NEXT: lui a0, %hi(.LCPI17_1) +; V128-NEXT: addi a0, a0, %lo(.LCPI17_1) +; V128-NEXT: vle16.v v24, (a0) +; V128-NEXT: addi a0, sp, 16 +; V128-NEXT: vs4r.v v24, (a0) # Unknown-size Folded Spill +; V128-NEXT: lui a0, 699051 +; V128-NEXT: addi a0, a0, -1366 +; V128-NEXT: vmv.s.x v0, a0 +; V128-NEXT: vrgatherei16.vv v24, v8, v4 +; V128-NEXT: addi a0, sp, 16 +; V128-NEXT: vl4r.v v12, (a0) # Unknown-size Folded Reload ; V128-NEXT: vrgatherei16.vv v24, v16, v12, v0.t ; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; V128-NEXT: vwaddu.vv v0, v8, v16 +; V128-NEXT: li a0, -1 ; V128-NEXT: vwmaccu.vx v0, a0, v16 ; V128-NEXT: vmv8r.v v8, v0 ; V128-NEXT: vmv8r.v v16, v24 +; V128-NEXT: csrr a0, vlenb +; V128-NEXT: slli a0, a0, 2 +; V128-NEXT: add sp, sp, a0 +; V128-NEXT: addi sp, sp, 16 ; V128-NEXT: ret ; ; V512-LABEL: interleave_v32i32: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index a26a87a1f3c13..a56a81f5f793b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -612,11 +612,13 @@ define <8 x i8> @concat_4xi8_start_undef_at_start(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: concat_4xi8_start_undef_at_start: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 224 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vadd.vi v10, v10, -4 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -4 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res @@ -626,11 +628,13 @@ define <8 x i8> @merge_start_into_end_non_contiguous(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: merge_start_into_end_non_contiguous: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 144 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vadd.vi v10, v10, -4 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -4 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res @@ -671,11 +675,13 @@ define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: merge_slidedown: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: li a0, 195 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res @@ -686,12 +692,14 @@ define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w ; CHECK-LABEL: merge_non_contiguous_slideup_slidedown: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vadd.vi v10, v10, -1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 2 +; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: li a0, 234 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res @@ -702,13 +710,16 @@ define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: unmergable: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vadd.vi v11, v10, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI46_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) -; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vle8.v v12, (a0) ; CHECK-NEXT: li a0, 234 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vrgather.vv v10, v9, v12, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index f889041647b23..eeb8e517d01d2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -8,51 +8,23 @@ ; FIXME: This should be widened to a vlseg2 of <4 x i32> with VL set to 3 define {<3 x i32>, <3 x i32>} @load_factor2_v3(ptr %ptr) { -; RV32-LABEL: load_factor2_v3: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV32-NEXT: vle32.v v10, (a0) -; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v10, 2 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vwaddu.vv v8, v10, v9 -; RV32-NEXT: li a0, -1 -; RV32-NEXT: vwmaccu.vx v8, a0, v9 -; RV32-NEXT: vmv.v.i v0, 4 -; RV32-NEXT: vsetivli zero, 4, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v12, v10, 4 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vrgather.vi v8, v12, 0, v0.t -; RV32-NEXT: vid.v v9 -; RV32-NEXT: vadd.vv v9, v9, v9 -; RV32-NEXT: vadd.vi v11, v9, 1 -; RV32-NEXT: vrgather.vv v9, v10, v11 -; RV32-NEXT: vrgather.vi v9, v12, 1, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: load_factor2_v3: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV64-NEXT: vle32.v v10, (a0) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vid.v v8 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vadd.vi v8, v8, 1 -; RV64-NEXT: vrgather.vv v9, v10, v8 -; RV64-NEXT: vmv.v.i v0, 4 -; RV64-NEXT: vsetivli zero, 4, e32, m2, ta, ma -; RV64-NEXT: vslidedown.vi v12, v10, 4 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV64-NEXT: vrgather.vi v9, v12, 1, v0.t -; RV64-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v11, v10, 2 -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vwaddu.vv v8, v10, v11 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: vwmaccu.vx v8, a0, v11 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV64-NEXT: vrgather.vi v8, v12, 0, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: load_factor2_v3: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vadd.vv v9, v8, v8 +; CHECK-NEXT: vrgather.vv v8, v10, v9 +; CHECK-NEXT: vmv.v.i v0, 4 +; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v12, v10, 4 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vrgather.vi v8, v12, 0, v0.t +; CHECK-NEXT: vadd.vi v11, v9, 1 +; CHECK-NEXT: vrgather.vv v9, v10, v11 +; CHECK-NEXT: vrgather.vi v9, v12, 1, v0.t +; CHECK-NEXT: ret %interleaved.vec = load <6 x i32>, ptr %ptr %v0 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> %v1 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> @@ -159,142 +131,163 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 58 +; RV32-NEXT: li a3, 62 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x3a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 58 * vlenb -; RV32-NEXT: addi a3, a1, 256 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x3e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 62 * vlenb +; RV32-NEXT: addi a3, a1, 128 +; RV32-NEXT: addi a4, a1, 256 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vle32.v v8, (a3) -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 25 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a1, 128 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vslideup.vi v16, v8, 4 +; RV32-NEXT: vle32.v v16, (a4) ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 12 +; RV32-NEXT: li a5, 29 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs4r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vid.v v20 -; RV32-NEXT: vadd.vi v4, v20, -10 -; RV32-NEXT: vmv.v.v v2, v20 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV32-NEXT: vid.v v10 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a5, a4, 4 +; RV32-NEXT: slli a5, a4, 3 ; RV32-NEXT: add a4, a5, a4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs2r.v v20, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vs2r.v v10, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vadd.vi v8, v10, -4 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 13 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs2r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vrgatherei16.vv v12, v16, v8 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 21 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs4r.v v12, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vadd.vi v8, v10, -10 ; RV32-NEXT: lui a4, 12 -; RV32-NEXT: vmv.s.x v1, a4 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 16 +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a5, a4, 5 -; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: slli a4, a4, 3 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v16, v16, 16 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: li a5, 45 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs1r.v v1, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vrgatherei16.vv v16, v8, v4, v0.t +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 21 +; RV32-NEXT: li a5, 25 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs4r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v12, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, %hi(.LCPI6_0) ; RV32-NEXT: addi a4, a4, %lo(.LCPI6_0) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: lui a5, %hi(.LCPI6_1) +; RV32-NEXT: addi a5, a5, %lo(.LCPI6_1) +; RV32-NEXT: lui a6, 1 ; RV32-NEXT: vle16.v v8, (a4) +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vle16.v v8, (a5) ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 2 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, %hi(.LCPI6_1) -; RV32-NEXT: addi a4, a4, %lo(.LCPI6_1) -; RV32-NEXT: lui a5, 1 -; RV32-NEXT: vle16.v v8, (a4) -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vle32.v v16, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 49 +; RV32-NEXT: li a4, 37 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vle32.v v24, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 41 +; RV32-NEXT: li a3, 53 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, a5, -64 +; RV32-NEXT: addi a1, a6, -64 ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: slli a3, a1, 4 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v16, v4 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v4 -; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 +; RV32-NEXT: li a3, 25 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vmv.v.v v12, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 +; RV32-NEXT: li a3, 25 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: slli a3, a1, 3 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl2r.v v10, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vi v8, v10, -2 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 29 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v12, v8, 2 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v12, v16, v8 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vadd.vi v8, v2, -8 +; RV32-NEXT: vadd.vi v8, v10, -8 +; RV32-NEXT: vmv2r.v v30, v10 ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl1r.v v28, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv1r.v v0, v28 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 45 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv.v.v v20, v12 +; RV32-NEXT: vmv.v.v v24, v12 ; RV32-NEXT: lui a1, %hi(.LCPI6_2) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_2) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu @@ -308,165 +301,166 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v8, v0, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: slli a3, a1, 4 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 41 +; RV32-NEXT: li a3, 53 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t +; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v20, v8 +; RV32-NEXT: vmv.v.v v24, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: slli a3, a1, 4 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_4) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_4) ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vle16.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 29 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v24, v16, v8 +; RV32-NEXT: vrgatherei16.vv v4, v16, v8 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vadd.vi v8, v30, -6 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl2r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vi v8, v8, -6 +; RV32-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vmv1r.v v0, v28 +; RV32-NEXT: vmv1r.v v2, v28 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 45 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v24, v16, v8, v0.t +; RV32-NEXT: vrgatherei16.vv v4, v16, v8, v0.t ; RV32-NEXT: lui a1, %hi(.LCPI6_5) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_5) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: lui a3, %hi(.LCPI6_6) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_6) -; RV32-NEXT: vle16.v v16, (a1) -; RV32-NEXT: vle16.v v28, (a3) +; RV32-NEXT: vle16.v v20, (a1) +; RV32-NEXT: vle16.v v8, (a3) +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a3, a1, 3 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 960 -; RV32-NEXT: vmv.s.x v20, a1 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs1r.v v20, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv.s.x v1, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v0, v16 -; RV32-NEXT: vmv1r.v v0, v20 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v24, v8, v20 +; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 41 +; RV32-NEXT: li a3, 53 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v28, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a3, a1, 3 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v24, v16, v8, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v24, v8 +; RV32-NEXT: vmv.v.v v4, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: slli a3, a1, 3 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_7) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_7) -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vle16.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 29 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v12, v16, v8 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl2r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vi v8, v8, -4 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 45 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv.v.v v24, v12 +; RV32-NEXT: vmv.v.v v4, v12 ; RV32-NEXT: lui a1, %hi(.LCPI6_8) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_8) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: lui a3, %hi(.LCPI6_9) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_9) ; RV32-NEXT: vle16.v v16, (a1) -; RV32-NEXT: vle16.v v28, (a3) +; RV32-NEXT: vle16.v v20, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v0, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v24, v16 +; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 41 +; RV32-NEXT: li a3, 53 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v28, v0.t +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v24, v20, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v24, v8 +; RV32-NEXT: vmv.v.v v4, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: lui a1, %hi(.LCPI6_10) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_10) @@ -474,20 +468,25 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: lui a1, 15 ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 29 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v20, v16, 6 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl2r.v v10, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v20, v16, v10 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 45 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload @@ -502,13 +501,13 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: li a1, 1008 ; RV32-NEXT: vmv.s.x v28, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 29 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -516,7 +515,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vrgatherei16.vv v8, v0, v24 ; RV32-NEXT: vmv1r.v v0, v28 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 41 +; RV32-NEXT: li a3, 53 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -529,19 +528,19 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vle16.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 12 +; RV32-NEXT: li a3, 21 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 45 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload @@ -554,33 +553,33 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vle16.v v24, (a1) ; RV32-NEXT: vle16.v v8, (a2) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a2, a1, 5 -; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: li a2, 45 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 49 +; RV32-NEXT: li a2, 37 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v8, v0, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 25 +; RV32-NEXT: li a2, 29 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 41 +; RV32-NEXT: li a2, 53 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a2, a1, 5 -; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: li a2, 45 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload @@ -594,35 +593,37 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vse32.v v20, (a1) ; RV32-NEXT: addi a1, a0, 192 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a3, a2, 4 -; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 128 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 64 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 21 +; RV32-NEXT: li a2, 25 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 58 +; RV32-NEXT: li a1, 62 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll index a34fa9502d93b..d0777962a7565 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll @@ -8,11 +8,13 @@ define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) { ; CHECK-LABEL: trn1.v8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 170 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vadd.vi v10, v10, -1 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ret <8 x i8> %tmp0 @@ -22,11 +24,13 @@ define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) { ; CHECK-LABEL: trn2.v8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: li a0, 170 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ret <8 x i8> %tmp0 @@ -36,14 +40,16 @@ define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: trn1.v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vadd.vi v10, v10, -1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vadd.vi v8, v11, -1 ; CHECK-NEXT: lui a0, 11 ; CHECK-NEXT: addi a0, a0, -1366 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ret <16 x i8> %tmp0 @@ -53,14 +59,16 @@ define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: trn2.v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 +; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: lui a0, 11 ; CHECK-NEXT: addi a0, a0, -1366 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ret <16 x i8> %tmp0 @@ -70,10 +78,12 @@ define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ; CHECK-LABEL: trn1.v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vmv.v.i v0, 10 -; CHECK-NEXT: vadd.vi v10, v10, -1 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ret <4 x i16> %tmp0 @@ -83,10 +93,12 @@ define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ; CHECK-LABEL: trn2.v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: vmv.v.i v0, 10 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ret <4 x i16> %tmp0 @@ -96,11 +108,13 @@ define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: trn1.v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 170 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vadd.vi v10, v10, -1 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ret <8 x i16> %tmp0 @@ -110,11 +124,13 @@ define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: trn2.v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: li a0, 170 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ret <8 x i16> %tmp0 @@ -147,10 +163,12 @@ define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: trn1.v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vmv.v.i v0, 10 -; CHECK-NEXT: vadd.vi v10, v10, -1 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ret <4 x i32> %tmp0 @@ -160,10 +178,12 @@ define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: trn2.v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: vmv.v.i v0, 10 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ret <4 x i32> %tmp0 @@ -219,10 +239,12 @@ define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: trn1.v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vmv.v.i v0, 10 -; CHECK-NEXT: vadd.vi v10, v10, -1 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ret <4 x float> %tmp0 @@ -232,10 +254,12 @@ define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: trn2.v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: vmv.v.i v0, 10 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ret <4 x float> %tmp0 @@ -268,10 +292,12 @@ define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) { ; CHECK-LABEL: trn1.v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vmv.v.i v0, 10 -; CHECK-NEXT: vadd.vi v10, v10, -1 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ret <4 x half> %tmp0 @@ -281,10 +307,12 @@ define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) { ; CHECK-LABEL: trn2.v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: vmv.v.i v0, 10 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ret <4 x half> %tmp0 @@ -294,11 +322,13 @@ define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: trn1.v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 170 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vadd.vi v10, v10, -1 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ret <8 x half> %tmp0 @@ -308,11 +338,13 @@ define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: trn2.v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: li a0, 170 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ret <8 x half> %tmp0 From 8b7b3fbe29051f5456334a9c6990e053fd3e59dc Mon Sep 17 00:00:00 2001 From: Matthew Devereau Date: Fri, 2 Feb 2024 08:12:05 +0000 Subject: [PATCH 09/54] [AArch64][SME] Implement inline-asm clobbers for za/zt0 (#79276) This enables specifing "za" or "zt0" to the clobber list for inline asm. This complies with the acle SME addition to the asm extension here: https://github.com/ARM-software/acle/pull/276 (cherry picked from commit d9c20e437fe110fb79b5ca73a52762e5b930b361) --- clang/lib/Basic/Targets/AArch64.cpp | 9 ++++++++- clang/test/CodeGen/aarch64-inline-asm.c | 8 ++++++++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 8 ++++++++ llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp | 4 ++++ llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll | 16 ++++++++++++++++ 5 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 336b7a5e3d727..3036f461c1ded 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -1187,6 +1187,8 @@ TargetInfo::BuiltinVaListKind AArch64TargetInfo::getBuiltinVaListKind() const { } const char *const AArch64TargetInfo::GCCRegNames[] = { + // clang-format off + // 32-bit Integer registers "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", "w16", "w17", "w18", "w19", "w20", "w21", "w22", @@ -1223,7 +1225,12 @@ const char *const AArch64TargetInfo::GCCRegNames[] = { // SVE predicate-as-counter registers "pn0", "pn1", "pn2", "pn3", "pn4", "pn5", "pn6", "pn7", "pn8", - "pn9", "pn10", "pn11", "pn12", "pn13", "pn14", "pn15" + "pn9", "pn10", "pn11", "pn12", "pn13", "pn14", "pn15", + + // SME registers + "za", "zt0", + + // clang-format on }; ArrayRef AArch64TargetInfo::getGCCRegNames() const { diff --git a/clang/test/CodeGen/aarch64-inline-asm.c b/clang/test/CodeGen/aarch64-inline-asm.c index 75e9a8c46b876..8ddee560b11da 100644 --- a/clang/test/CodeGen/aarch64-inline-asm.c +++ b/clang/test/CodeGen/aarch64-inline-asm.c @@ -95,3 +95,11 @@ void test_reduced_gpr_constraints(int var32, long var64) { // CHECK: [[ARG2:%.+]] = load i64, ptr // CHECK: call void asm sideeffect "add x0, x0, $0", "@3Ucj,~{x0}"(i64 [[ARG2]]) } + +void test_sme_constraints(){ + asm("movt zt0[3, mul vl], z0" : : : "za"); +// CHECK: call void asm sideeffect "movt zt0[3, mul vl], z0", "~{za}"() + + asm("movt zt0[3, mul vl], z0" : : : "zt0"); +// CHECK: call void asm sideeffect "movt zt0[3, mul vl], z0", "~{zt0}"() +} \ No newline at end of file diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e97f5e3220148..bfce5bc92a9ad 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10718,6 +10718,14 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( parseConstraintCode(Constraint) != AArch64CC::Invalid) return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass); + if (Constraint == "{za}") { + return std::make_pair(unsigned(AArch64::ZA), &AArch64::MPRRegClass); + } + + if (Constraint == "{zt0}") { + return std::make_pair(unsigned(AArch64::ZT0), &AArch64::ZTRRegClass); + } + // Use the default implementation in TargetLowering to convert the register // constraint into a member of a register class. std::pair Res; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index f86e6947c9cdb..48e1c1bc73022 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -507,6 +507,10 @@ bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF, MCRegisterInfo::regsOverlap(PhysReg, AArch64::X16)) return true; + // ZA/ZT0 registers are reserved but may be permitted in the clobber list. + if (PhysReg == AArch64::ZA || PhysReg == AArch64::ZT0) + return true; + return !isReservedReg(MF, PhysReg); } diff --git a/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll b/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll new file mode 100644 index 0000000000000..a8cba7dc9a91e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-linux-gnu -stop-after=aarch64-isel < %s -o - | FileCheck %s + +define void @alpha( %x) local_unnamed_addr { +entry: +; CHECK: INLINEASM &"movt zt0[3, mul vl], z0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $za + tail call void asm sideeffect "movt zt0[3, mul vl], z0", "~{za}"() + ret void +} + +define void @beta( %x) local_unnamed_addr { +entry: +; CHECK: INLINEASM &"movt zt0[3, mul vl], z0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $zt0 + tail call void asm sideeffect "movt zt0[3, mul vl], z0", "~{zt0}"() + ret void +} From e3c6d5abb1a8a4bfb40fdccc5ceda8e5377167b6 Mon Sep 17 00:00:00 2001 From: Maryam Moghadas Date: Fri, 16 Feb 2024 01:01:26 -0500 Subject: [PATCH 10/54] [PowerPC] Update V18.1.0 release notes (#81631) Adding PowerPC updates for clang and llvm into the V18.1.0 release notes. --------- Co-authored-by: Maryam Moghadas --- clang/docs/ReleaseNotes.rst | 26 +++++++++++++++++++++++++ llvm/docs/ReleaseNotes.rst | 38 +++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 95d44951ae7ee..22eceea5d265e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -304,6 +304,10 @@ Non-comprehensive list of changes in this release * The version of Unicode used by Clang (primarily to parse identifiers) has been updated to 15.1. +* Clang now defines macro ``__LLVM_INSTR_PROFILE_GENERATE`` when compiling with + PGO instrumentation profile generation, and ``__LLVM_INSTR_PROFILE_USE`` when + compiling with PGO profile use. + New Compiler Flags ------------------ @@ -344,6 +348,8 @@ New Compiler Flags attribute the replaceable global new and delete operators behave normally (like other functions) with respect to visibility attributes, pragmas and options (e.g ``--fvisibility=``). +* Full register names can be used when printing assembly via ``-mregnames``. + This option now matches the one used by GCC. Deprecated Compiler Flags ------------------------- @@ -363,6 +369,7 @@ Modified Compiler Flags * ``-fvisibility-global-new-delete-hidden`` is now a deprecated spelling of ``-fvisibility-global-new-delete=force-hidden`` (``-fvisibility-global-new-delete=`` is new in this release). +* ``-fprofile-update`` is enabled for ``-fprofile-generate``. Removed Compiler Flags ------------------------- @@ -860,6 +867,9 @@ Bug Fixes in This Version Fixes (`#78290 `_) - Fixed assertion failure with deleted overloaded unary operators. Fixes (`#78314 `_) +- The XCOFF object file format does not support aliases to symbols having common + linkage. Clang now diagnoses the use of an alias for a common symbol when + compiling for AIX. - Clang now doesn't produce false-positive warning `-Wconstant-logical-operand` for logical operators in C23. @@ -1261,6 +1271,16 @@ CUDA Support - Clang now supports CUDA SDK up to 12.3 - Added support for sm_90a +PowerPC Support +^^^^^^^^^^^^^^^ + +- Added ``nmmintrin.h`` to intrinsics headers. +- Added ``__builtin_ppc_fence`` as barrier of code motion, and + ``__builtin_ppc_mffsl`` for corresponding instruction. +- Supported ``__attribute__((target("tune=cpu")))``. +- Emit ``float-abi`` module flag on 64-bit ELFv2 PowerPC targets if + ``long double`` type is used in current module. + AIX Support ^^^^^^^^^^^ @@ -1269,6 +1289,10 @@ AIX Support base is encoded as an immediate operand. This access sequence is not used for TLS variables larger than 32KB, and is currently only supported on 64-bit mode. +- Inline assembler supports VSR register in pure digits. +- Enabled ThinLTO support. Requires AIX 7.2 TL5 SP7 or newer, or AIX 7.3 TL2 + or newer. Similar to the LTO support on AIX, ThinLTO is implemented with + the libLTO.so plugin. WebAssembly Support ^^^^^^^^^^^^^^^^^^^ @@ -1332,6 +1356,8 @@ libclang - Exposed arguments of ``clang::annotate``. - ``clang::getCursorKindForDecl`` now recognizes linkage specifications such as ``extern "C"`` and reports them as ``CXCursor_LinkageSpec``. +- Changed the libclang library on AIX to export only the necessary symbols to + prevent issues of resolving to the wrong duplicate symbol. Static Analyzer --------------- diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 82f4a7a15c9c1..5b3210138f2f8 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -164,6 +164,30 @@ Changes to the MIPS Backend Changes to the PowerPC Backend ------------------------------ +* LLJIT's JIT linker now defaults to JITLink on 64-bit ELFv2 targets. +* Initial-exec TLS model is supported on AIX. +* Implemented new resource based scheduling model of POWER7 and POWER8. +* ``frexp`` libcall now references correct symbol name for ``fp128``. +* Optimized materialization of 64-bit immediates, code generation of + ``vec_promote`` and atomics. +* Global constant strings are pooled in the TOC under one entry to reduce the + number of entries in the TOC. +* Added a number of missing Power10 extended mnemonics. +* Added the SCV instruction. +* Fixed register class for the paddi instruction. +* Optimize VPERM and fix code order for swapping vector operands on LE. +* Added various bug fixes and code gen improvements. + +AIX Support/improvements: + +* Support for a non-TOC-based access sequence for the local-exec TLS model (called small local-exec). +* XCOFF toc-data peephole optimization and bug fixes. +* Move less often used __ehinfo TOC entries to the end of the TOC section. +* Fixed problems when the AIX libunwind unwinds starting from a signal handler + and the function that raised the signal happens to be a leaf function that + shares the stack frame with its caller or a leaf function that does not store + the stack frame backchain. + Changes to the RISC-V Backend ----------------------------- @@ -317,6 +341,7 @@ Changes to the LLVM tools * llvm-symbolizer now treats invalid input as an address for which source information is not found. +* Fixed big-endian support in llvm-symbolizer's DWARF location parser. * llvm-readelf now supports ``--extra-sym-info`` (``-X``) to display extra information (section name) when showing symbols. @@ -328,6 +353,13 @@ Changes to the LLVM tools * llvm-objcopy now supports ``--gap-fill`` and ``--pad-to`` options, for ELF input and binary output files only. +* Supported parsing XCOFF auxiliary symbols in obj2yaml. + +* ``llvm-ranlib`` now supports ``-X`` on AIX to specify the type of object file + ranlib should examine. + +* ``llvm-nm`` now supports ``--export-symbol`` to ignore the import symbol file. + * llvm-rc and llvm-windres now accept file path references in ``.rc`` files concatenated from multiple string literals. @@ -387,6 +419,12 @@ Changes to Sanitizers --------------------- * HWASan now defaults to detecting use-after-scope bugs. +Changes to the Profile Runtime +------------------------------ + +* Public header ``profile/instr_prof_interface.h`` is added to declare four + API functions to fine tune profile collection. + Other Changes ------------- From 9cf0c2962fd26b6fc9a665b75732b44b1603e1ee Mon Sep 17 00:00:00 2001 From: YunQiang Su Date: Thu, 15 Feb 2024 04:48:55 +0800 Subject: [PATCH 11/54] MipsAsmParser/O32: Don't add redundant $ to $-prefixed symbol in the la macro (#80644) When parsing the `la` macro, we add a duplicate `$` prefix in `getOrCreateSymbol`, leading to `error: Undefined temporary symbol $$yy` for code like: ``` xx: la $2,$yy $yy: nop ``` Remove the duplicate prefix. In addition, recognize `.L`-prefixed symbols as local for O32. See: #65020. --------- Co-authored-by: Fangrui Song (cherry picked from commit c007fbb19879f9b597b47ae772c53e53cdc65f29) --- .../Target/Mips/AsmParser/MipsAsmParser.cpp | 7 +++++- llvm/test/CodeGen/Mips/hf1_body.ll | 4 ++-- llvm/test/MC/Mips/macro-la-pic.s | 22 +++++++++++++++++++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 3c673ae938fde..36aab383da68d 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -2920,6 +2920,11 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, (Res.getSymA()->getSymbol().isELF() && cast(Res.getSymA()->getSymbol()).getBinding() == ELF::STB_LOCAL); + // For O32, "$"-prefixed symbols are recognized as temporary while + // .L-prefixed symbols are not (PrivateGlobalPrefix is "$"). Recognize ".L" + // manually. + if (ABI.IsO32() && Res.getSymA()->getSymbol().getName().starts_with(".L")) + IsLocalSym = true; bool UseXGOT = STI->hasFeature(Mips::FeatureXGOT) && !IsLocalSym; // The case where the result register is $25 is somewhat special. If the @@ -6359,7 +6364,7 @@ bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { return true; SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - MCSymbol *Sym = getContext().getOrCreateSymbol("$" + Identifier); + MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); // Otherwise create a symbol reference. const MCExpr *SymRef = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); diff --git a/llvm/test/CodeGen/Mips/hf1_body.ll b/llvm/test/CodeGen/Mips/hf1_body.ll index 184ea31bddc9d..c3dea67896210 100644 --- a/llvm/test/CodeGen/Mips/hf1_body.ll +++ b/llvm/test/CodeGen/Mips/hf1_body.ll @@ -23,8 +23,8 @@ entry: ; ALL: .set reorder ; ALL: .reloc 0, R_MIPS_NONE, v_sf ; GAS: la $25, $__fn_local_v_sf -; IAS: lw $25, %got($$__fn_local_v_sf)($gp) -; IAS: addiu $25, $25, %lo($$__fn_local_v_sf) +; IAS: lw $25, %got($__fn_local_v_sf)($gp) +; IAS: addiu $25, $25, %lo($__fn_local_v_sf) ; ALL: mfc1 $4, $f12 ; ALL: jr $25 ; ALL: .end __fn_stub_v_sf diff --git a/llvm/test/MC/Mips/macro-la-pic.s b/llvm/test/MC/Mips/macro-la-pic.s index 2303f34c35bcf..1875952d80c4e 100644 --- a/llvm/test/MC/Mips/macro-la-pic.s +++ b/llvm/test/MC/Mips/macro-la-pic.s @@ -255,3 +255,25 @@ la $25, 2f # XN32: lw $25, %got_disp(.Ltmp1)($gp) # encoding: [0x8f,0x99,A,A] # XN32: # fixup A - offset: 0, value: %got_disp(.Ltmp1), kind: fixup_Mips_GOT_DISP 2: + +la $2,.Lstr +# O32: lw $2, %got(.Lstr)($gp) # encoding: [0x8f,0x82,A,A] +# O32-NEXT: # fixup A - offset: 0, value: %got(.Lstr), kind: fixup_Mips_GOT +# O32-NEXT: addiu $2, $2, %lo(.Lstr) # encoding: [0x24,0x42,A,A] +# O32-NEXT: # fixup A - offset: 0, value: %lo(.Lstr), kind: fixup_Mips_LO16 + +# N32: lw $2, %got_disp(.Lstr)($gp) # encoding: [0x8f,0x82,A,A] +# N32-NEXT: # fixup A - offset: 0, value: %got_disp(.Lstr), kind: fixup_Mips_GOT_DISP + +la $2,$str2 +# O32: lw $2, %got($str2)($gp) # encoding: [0x8f,0x82,A,A] +# O32-NEXT: # fixup A - offset: 0, value: %got($str2), kind: fixup_Mips_GOT +# O32-NEXT: addiu $2, $2, %lo($str2) # encoding: [0x24,0x42,A,A] +# O32-NEXT: # fixup A - offset: 0, value: %lo($str2), kind: fixup_Mips_LO16 + +# N32: lw $2, %got_disp($str2)($gp) # encoding: [0x8f,0x82,A,A] +# N32-NEXT: # fixup A - offset: 0, value: %got_disp($str2), kind: fixup_Mips_GOT_DISP + +.rodata +.Lstr: .4byte 0 +$str2: .4byte 0 From c90f200b1e6d116ae8e566aae8fd86f6aa0c3808 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Fri, 16 Feb 2024 07:16:19 +0100 Subject: [PATCH 12/54] [analyzer][docs] Admit that the cleanup attribute is not supported (#81834) In fact, the cleanup attribute is only added to the CFG, but still unhandled by CSA. I propose dropping this false "support" statement from the docs. --- clang/docs/ReleaseNotes.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 22eceea5d265e..9edbfbfbbac02 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1369,9 +1369,6 @@ New features of static analysis tools, such as the Clang Static Analyzer. `Documentation `__. -- Added support for the ``cleanup`` attribute. - `Documentation `__. - - Support "Deducing this" (P0847R7). (Worked out of the box) (`af4751738db8 `__) From 023925bcdfbc06941edaa64ba789dbad2bca2ce1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 15 Feb 2024 10:48:52 -0800 Subject: [PATCH 13/54] [RISCV] Use APInt in useInversedSetcc to prevent crashes when mask is larger than UINT64_MAX. (#81888) There are no checks that the type is legal so we need to handle any type. (cherry picked from commit b57ba8ec514190b38eced26d541e8e25af66c485) --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 +- llvm/test/CodeGen/RISCV/condops.ll | 51 +++++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index dba4df77663b0..37d94be5316ee 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -14654,8 +14654,8 @@ static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, ISD::CondCode CC = cast(Cond.getOperand(2))->get(); if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND && isa(LHS.getOperand(1)) && isNullConstant(RHS)) { - uint64_t MaskVal = LHS.getConstantOperandVal(1); - if (isPowerOf2_64(MaskVal) && !isInt<12>(MaskVal)) + const APInt &MaskVal = LHS.getConstantOperandAPInt(1); + if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12)) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE), False, True); diff --git a/llvm/test/CodeGen/RISCV/condops.ll b/llvm/test/CodeGen/RISCV/condops.ll index 8e53782b5dcd7..101cb5aeeb094 100644 --- a/llvm/test/CodeGen/RISCV/condops.ll +++ b/llvm/test/CodeGen/RISCV/condops.ll @@ -3719,3 +3719,54 @@ entry: %cond = select i1 %tobool.not, i64 0, i64 %x ret i64 %cond } + +; Test that we don't crash on types larger than 64 bits. +define i64 @single_bit3(i80 %x, i64 %y) { +; RV32I-LABEL: single_bit3: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lw a0, 8(a0) +; RV32I-NEXT: slli a0, a0, 31 +; RV32I-NEXT: srai a3, a0, 31 +; RV32I-NEXT: and a0, a3, a1 +; RV32I-NEXT: and a1, a3, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: single_bit3: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: srai a0, a1, 63 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: ret +; +; RV64XVENTANACONDOPS-LABEL: single_bit3: +; RV64XVENTANACONDOPS: # %bb.0: # %entry +; RV64XVENTANACONDOPS-NEXT: andi a1, a1, 1 +; RV64XVENTANACONDOPS-NEXT: vt.maskc a0, a2, a1 +; RV64XVENTANACONDOPS-NEXT: ret +; +; RV64XTHEADCONDMOV-LABEL: single_bit3: +; RV64XTHEADCONDMOV: # %bb.0: # %entry +; RV64XTHEADCONDMOV-NEXT: slli a1, a1, 63 +; RV64XTHEADCONDMOV-NEXT: srai a0, a1, 63 +; RV64XTHEADCONDMOV-NEXT: and a0, a0, a2 +; RV64XTHEADCONDMOV-NEXT: ret +; +; RV32ZICOND-LABEL: single_bit3: +; RV32ZICOND: # %bb.0: # %entry +; RV32ZICOND-NEXT: lw a0, 8(a0) +; RV32ZICOND-NEXT: andi a3, a0, 1 +; RV32ZICOND-NEXT: czero.eqz a0, a1, a3 +; RV32ZICOND-NEXT: czero.eqz a1, a2, a3 +; RV32ZICOND-NEXT: ret +; +; RV64ZICOND-LABEL: single_bit3: +; RV64ZICOND: # %bb.0: # %entry +; RV64ZICOND-NEXT: andi a1, a1, 1 +; RV64ZICOND-NEXT: czero.eqz a0, a2, a1 +; RV64ZICOND-NEXT: ret +entry: + %and = and i80 %x, 18446744073709551616 ; 1 << 64 + %tobool.not = icmp eq i80 %and, 0 + %cond = select i1 %tobool.not, i64 0, i64 %y + ret i64 %cond +} From 38c5b352c6f3b26632f40faa17d07c2bfab88a2d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 15 Feb 2024 16:34:40 -0800 Subject: [PATCH 14/54] [RISCV] Make sure ADDI replacement in optimizeCondBranch has a virtual reg destination. (#81938) If it isn't virtual, we may extend the live range of the physical register past were it is valid. For example, across a call. Found while trying to enable -riscv-enable-sink-fold which enables some copy propagation in machine sink that led to ADDIs with physical register destinations. (cherry picked from commit feee627974df81e4cbf15537e4c4688aed66b12f) --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 3 +- llvm/test/CodeGen/RISCV/branch-opt.mir | 68 ++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/RISCV/branch-opt.mir diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 592962cebe897..d5b1ddfbeb3dc 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1229,7 +1229,8 @@ bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const { MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend(); auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool { int64_t Imm; - return isLoadImm(&I, Imm) && Imm == C1; + return isLoadImm(&I, Imm) && Imm == C1 && + I.getOperand(0).getReg().isVirtual(); }); if (DefC1 != E) return DefC1->getOperand(0).getReg(); diff --git a/llvm/test/CodeGen/RISCV/branch-opt.mir b/llvm/test/CodeGen/RISCV/branch-opt.mir new file mode 100644 index 0000000000000..ba3a20f2fbfcd --- /dev/null +++ b/llvm/test/CodeGen/RISCV/branch-opt.mir @@ -0,0 +1,68 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc %s -mtriple=riscv64 -run-pass=peephole-opt -o - | FileCheck %s + +# Make sure we shouldn't replace the %2 ADDI with the $x10 ADDI since it has a +# physical register destination. + +--- | + define void @foo(i32 signext %0) { + tail call void @bar(i32 1) + %2 = icmp ugt i32 %0, 1 + br i1 %2, label %3, label %4 + + 3: ; preds = %1 + tail call void @bar(i32 3) + ret void + + 4: ; preds = %1 + ret void + } + + declare void @bar(...) + +... +--- +name: foo +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: foo + ; CHECK: bb.0 (%ir-block.1): + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + ; CHECK-NEXT: $x10 = ADDI $x0, 1 + ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) @bar, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 2 + ; CHECK-NEXT: BLTU [[COPY]], killed [[ADDI]], %bb.2 + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1 (%ir-block.3): + ; CHECK-NEXT: $x10 = ADDI $x0, 3 + ; CHECK-NEXT: PseudoTAIL target-flags(riscv-call) @bar, implicit $x2, implicit $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2 (%ir-block.4): + ; CHECK-NEXT: PseudoRET + bb.0 (%ir-block.1): + successors: %bb.1, %bb.2 + liveins: $x10 + + %0:gpr = COPY $x10 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + $x10 = ADDI $x0, 1 + PseudoCALL target-flags(riscv-call) @bar, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + %2:gpr = ADDI $x0, 2 + BLTU %0, killed %2, %bb.2 + PseudoBR %bb.1 + + bb.1 (%ir-block.3): + $x10 = ADDI $x0, 3 + PseudoTAIL target-flags(riscv-call) @bar, implicit $x2, implicit $x10 + + bb.2 (%ir-block.4): + PseudoRET + +... From d7c6794aff6625c420a719d64402827cbae55292 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 8 Feb 2024 15:28:46 +0200 Subject: [PATCH 15/54] [OpenMP] [cmake] Don't use -fno-semantic-interposition on Windows (#81113) This was added in 4b7beab4187ab0766c3d7b272511d5751431a8da. When the flag was added implicitly elsewhere, it was added via llvm/cmake/modules/HandleLLVMOptions.cmake, where it wasn't added on Windows/Cygwin targets. This avoids one warning per object file in OpenMP. (cherry picked from commit 72f04fa0734f8559ad515f507a4a3ce3f461f196) --- openmp/cmake/HandleOpenMPOptions.cmake | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/openmp/cmake/HandleOpenMPOptions.cmake b/openmp/cmake/HandleOpenMPOptions.cmake index 71346201129b6..9387d9b3b0ff7 100644 --- a/openmp/cmake/HandleOpenMPOptions.cmake +++ b/openmp/cmake/HandleOpenMPOptions.cmake @@ -46,7 +46,11 @@ append_if(OPENMP_HAVE_WEXTRA_FLAG "-Wno-extra" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) append_if(OPENMP_HAVE_WPEDANTIC_FLAG "-Wno-pedantic" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) append_if(OPENMP_HAVE_WMAYBE_UNINITIALIZED_FLAG "-Wno-maybe-uninitialized" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) -append_if(OPENMP_HAVE_NO_SEMANTIC_INTERPOSITION "-fno-semantic-interposition" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) +if (NOT (WIN32 OR CYGWIN)) + # This flag is not relevant on Windows; the flag is accepted, but produces warnings + # about argument unused during compilation. + append_if(OPENMP_HAVE_NO_SEMANTIC_INTERPOSITION "-fno-semantic-interposition" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) +endif() append_if(OPENMP_HAVE_FUNCTION_SECTIONS "-ffunction-section" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) append_if(OPENMP_HAVE_DATA_SECTIONS "-fdata-sections" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) From e098f6c4aaccec326a2fc4b45323b3822e02c270 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 12 Feb 2024 10:00:34 +0100 Subject: [PATCH 16/54] [AArch64] Only apply bool vector bitcast opt if result is scalar (#81256) This optimization tries to optimize bitcasts from `` to iN, but currently also triggers for `` to `` bitcasts, if custom lowering has been requested for these for an unrelated reason. Fix this by explicitly checking that the result type is scalar. Fixes https://github.com/llvm/llvm-project/issues/81216. (cherry picked from commit 92d79922051f732560acf3791b543df1e6580689) --- .../Target/AArch64/AArch64ISelLowering.cpp | 3 +- .../AArch64/vec-combine-compare-to-bitmask.ll | 28 +++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index bfce5bc92a9ad..0287856560e91 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -24427,7 +24427,8 @@ void AArch64TargetLowering::ReplaceBITCASTResults( return; } - if (SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1) + if (SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 && + !VT.isVector()) return replaceBoolVectorBitcast(N, Results, DAG); if (VT != MVT::i16 || (SrcVT != MVT::f16 && SrcVT != MVT::bf16)) diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll index 1b22e2f900ddb..557aa010b3a7d 100644 --- a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll +++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll @@ -489,3 +489,31 @@ define i6 @no_combine_illegal_num_elements(<6 x i32> %vec) { %bitmask = bitcast <6 x i1> %cmp_result to i6 ret i6 %bitmask } + +; Only apply the combine when casting a vector to a scalar. +define <2 x i8> @vector_to_vector_cast(<16 x i1> %arg) nounwind { +; CHECK-LABEL: vector_to_vector_cast: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: shl.16b v0, v0, #7 +; CHECK-NEXT: Lloh36: +; CHECK-NEXT: adrp x8, lCPI20_0@PAGE +; CHECK-NEXT: Lloh37: +; CHECK-NEXT: ldr q1, [x8, lCPI20_0@PAGEOFF] +; CHECK-NEXT: add x8, sp, #14 +; CHECK-NEXT: cmlt.16b v0, v0, #0 +; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: ext.16b v1, v0, v0, #8 +; CHECK-NEXT: zip1.16b v0, v0, v1 +; CHECK-NEXT: addv.8h h0, v0 +; CHECK-NEXT: str h0, [sp, #14] +; CHECK-NEXT: ld1.b { v0 }[0], [x8] +; CHECK-NEXT: orr x8, x8, #0x1 +; CHECK-NEXT: ld1.b { v0 }[4], [x8] +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh36, Lloh37 + %bc = bitcast <16 x i1> %arg to <2 x i8> + ret <2 x i8> %bc +} From 5750be5fc5d130c62f3f7703926ac2c8c4992586 Mon Sep 17 00:00:00 2001 From: yozhu <101743168+yozhu@users.noreply.github.com> Date: Fri, 16 Feb 2024 04:53:41 -0800 Subject: [PATCH 17/54] [CFI][annotation] Leave alone function pointers in function annotations (#81673) Function annotation, as part of llvm.metadata, is for the function itself and doesn't apply to its corresponding jump table entry, so with CFI we shouldn't replace function pointer in function annotation with pointer to its corresponding jump table entry. (cherry picked from commit c7a0db1e20251f436e3d500eac03bd9be1d88b45) --- llvm/lib/Transforms/IPO/LowerTypeTests.cpp | 28 +++++++- .../LowerTypeTests/cfi-annotation.ll | 68 +++++++++++++++++++ 2 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/LowerTypeTests/cfi-annotation.ll diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index 733f290b1bc93..633fcb3314c42 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -470,6 +470,9 @@ class LowerTypeTestsModule { Function *WeakInitializerFn = nullptr; + GlobalVariable *GlobalAnnotation; + DenseSet FunctionAnnotations; + bool shouldExportConstantsAsAbsoluteSymbols(); uint8_t *exportTypeId(StringRef TypeId, const TypeIdLowering &TIL); TypeIdLowering importTypeId(StringRef TypeId); @@ -531,6 +534,10 @@ class LowerTypeTestsModule { /// replace each use, which is a direct function call. void replaceDirectCalls(Value *Old, Value *New); + bool isFunctionAnnotation(Value *V) const { + return FunctionAnnotations.contains(V); + } + public: LowerTypeTestsModule(Module &M, ModuleAnalysisManager &AM, ModuleSummaryIndex *ExportSummary, @@ -1377,8 +1384,11 @@ void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr( // (all?) targets. Switch to a runtime initializer. SmallSetVector GlobalVarUsers; findGlobalVariableUsersOf(F, GlobalVarUsers); - for (auto *GV : GlobalVarUsers) + for (auto *GV : GlobalVarUsers) { + if (GV == GlobalAnnotation) + continue; moveInitializerToModuleConstructor(GV); + } // Can not RAUW F with an expression that uses F. Replace with a temporary // placeholder first. @@ -1837,6 +1847,16 @@ LowerTypeTestsModule::LowerTypeTestsModule( } OS = TargetTriple.getOS(); ObjectFormat = TargetTriple.getObjectFormat(); + + // Function annotation describes or applies to function itself, and + // shouldn't be associated with jump table thunk generated for CFI. + GlobalAnnotation = M.getGlobalVariable("llvm.global.annotations"); + if (GlobalAnnotation && GlobalAnnotation->hasInitializer()) { + const ConstantArray *CA = + cast(GlobalAnnotation->getInitializer()); + for (Value *Op : CA->operands()) + FunctionAnnotations.insert(Op); + } } bool LowerTypeTestsModule::runForTesting(Module &M, ModuleAnalysisManager &AM) { @@ -1896,10 +1916,14 @@ void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, if (isa(U.getUser())) continue; - // Skip direct calls to externally defined or non-dso_local functions + // Skip direct calls to externally defined or non-dso_local functions. if (isDirectCall(U) && (Old->isDSOLocal() || !IsJumpTableCanonical)) continue; + // Skip function annotation. + if (isFunctionAnnotation(U.getUser())) + continue; + // Must handle Constants specially, we cannot call replaceUsesOfWith on a // constant because they are uniqued. if (auto *C = dyn_cast(U.getUser())) { diff --git a/llvm/test/Transforms/LowerTypeTests/cfi-annotation.ll b/llvm/test/Transforms/LowerTypeTests/cfi-annotation.ll new file mode 100644 index 0000000000000..034af89112cb6 --- /dev/null +++ b/llvm/test/Transforms/LowerTypeTests/cfi-annotation.ll @@ -0,0 +1,68 @@ +; REQUIRES: aarch64-registered-target + +; RUN: opt -passes=lowertypetests %s -o %t.o +; RUN: llvm-dis %t.o -o - | FileCheck %s --check-prefix=CHECK-foobar +; CHECK-foobar: {{llvm.global.annotations = .*[foo|bar], .*[foo|bar],}} +; RUN: llvm-dis %t.o -o - | FileCheck %s --check-prefix=CHECK-cfi +; CHECK-cfi-NOT: {{llvm.global.annotations = .*cfi.*}} + +target triple = "aarch64-none-linux-gnu" + +@.src = private unnamed_addr constant [7 x i8] c"test.c\00", align 1 +@.str = private unnamed_addr constant [30 x i8] c"annotation_string_literal_bar\00", section "llvm.metadata" +@.str.1 = private unnamed_addr constant [7 x i8] c"test.c\00", section "llvm.metadata" +@.str.2 = private unnamed_addr constant [30 x i8] c"annotation_string_literal_foo\00", section "llvm.metadata" +@llvm.global.annotations = appending global [2 x { ptr, ptr, ptr, i32, ptr }] [{ ptr, ptr, ptr, i32, ptr } { ptr @bar, ptr @.str, ptr @.str.1, i32 2, ptr null }, { ptr, ptr, ptr, i32, ptr } { ptr @foo, ptr @.str.2, ptr @.str.1, i32 1, ptr null }], section "llvm.metadata" + +define i32 @bar(i32 noundef %0) #0 !type !8 !type !9 { + %2 = alloca i32, align 4 + store i32 %0, ptr %2, align 4 + %3 = load i32, ptr %2, align 4 + %4 = call i32 @foo(i32 noundef %3) + ret i32 %4 +} + +declare !type !8 !type !9 i32 @foo(i32 noundef) #1 + +define i32 @test(i32 noundef %0) #0 !type !8 !type !9 { + %2 = alloca i32, align 4 + %3 = alloca ptr, align 8 + store i32 %0, ptr %2, align 4 + %4 = load i32, ptr %2, align 4 + %5 = icmp sgt i32 %4, 0 + %6 = zext i1 %5 to i64 + %7 = select i1 %5, ptr @foo, ptr @bar + store ptr %7, ptr %3, align 8 + %8 = load ptr, ptr %3, align 8 + %9 = call i1 @llvm.type.test(ptr %8, metadata !"_ZTSFiiE"), !nosanitize !10 + br i1 %9, label %11, label %10, !nosanitize !10 + +10: + call void @llvm.ubsantrap(i8 2) #4, !nosanitize !10 + unreachable, !nosanitize !10 + +11: + %12 = load i32, ptr %2, align 4 + %13 = call i32 %8(i32 noundef %12) + ret i32 %13 +} + +declare i1 @llvm.type.test(ptr, metadata) +declare void @llvm.ubsantrap(i8 immarg) + +attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" } +attributes #4 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 4, !"CFI Canonical Jump Tables", i32 0} +!2 = !{i32 8, !"PIC Level", i32 2} +!3 = !{i32 7, !"uwtable", i32 2} +!4 = !{i32 7, !"frame-pointer", i32 1} +!5 = !{i32 1, !"ThinLTO", i32 0} +!6 = !{i32 1, !"EnableSplitLTOUnit", i32 1} +!8 = !{i64 0, !"_ZTSFiiE"} +!9 = !{i64 0, !"_ZTSFiiE.generalized"} +!10 = !{} From 28be6f670fabe068e02d59670c26571efad1be4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 13 Feb 2024 09:32:40 +0200 Subject: [PATCH 18/54] [LLD] [MinGW] Implement the --lto-emit-asm and -plugin-opt=emit-llvm options (#81475) These were implemented in the COFF linker in 3923e61b96cf90123762f0e0381504efaba2d77a and d12b99a4313816cf99e97cb5f579e2d51ba72b0b. This matches the corresponding options in the ELF linker. (cherry picked from commit d033366bd2189e33343ca93d276b40341dc39770) --- lld/MinGW/Driver.cpp | 4 ++++ lld/MinGW/Options.td | 5 +++++ lld/test/MinGW/driver.test | 7 +++++++ 3 files changed, 16 insertions(+) diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp index 4752d92e3b1d7..7b16764dd2c7c 100644 --- a/lld/MinGW/Driver.cpp +++ b/lld/MinGW/Driver.cpp @@ -448,6 +448,10 @@ bool link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS, add("-lto-cs-profile-generate"); if (auto *arg = args.getLastArg(OPT_lto_cs_profile_file)) add("-lto-cs-profile-file:" + StringRef(arg->getValue())); + if (args.hasArg(OPT_plugin_opt_emit_llvm)) + add("-lldemit:llvm"); + if (args.hasArg(OPT_lto_emit_asm)) + add("-lldemit:asm"); if (auto *a = args.getLastArg(OPT_thinlto_cache_dir)) add("-lldltocache:" + StringRef(a->getValue())); diff --git a/lld/MinGW/Options.td b/lld/MinGW/Options.td index 02f00f27406c0..9a0a96aac7f1c 100644 --- a/lld/MinGW/Options.td +++ b/lld/MinGW/Options.td @@ -158,6 +158,8 @@ def lto_cs_profile_generate: FF<"lto-cs-profile-generate">, HelpText<"Perform context sensitive PGO instrumentation">; def lto_cs_profile_file: JJ<"lto-cs-profile-file=">, HelpText<"Context sensitive profile file path">; +def lto_emit_asm: FF<"lto-emit-asm">, + HelpText<"Emit assembly code">; def thinlto_cache_dir: JJ<"thinlto-cache-dir=">, HelpText<"Path to ThinLTO cached object file directory">; @@ -181,6 +183,9 @@ def: J<"plugin-opt=cs-profile-path=">, Alias, HelpText<"Alias for --lto-cs-profile-file">; def plugin_opt_dwo_dir_eq: J<"plugin-opt=dwo_dir=">, HelpText<"Directory to store .dwo files when LTO and debug fission are used">; +def plugin_opt_emit_asm: F<"plugin-opt=emit-asm">, + Alias, HelpText<"Alias for --lto-emit-asm">; +def plugin_opt_emit_llvm: F<"plugin-opt=emit-llvm">; def: J<"plugin-opt=jobs=">, Alias, HelpText<"Alias for --thinlto-jobs=">; def plugin_opt_mcpu_eq: J<"plugin-opt=mcpu=">; diff --git a/lld/test/MinGW/driver.test b/lld/test/MinGW/driver.test index 559a32bfa242f..057de2a22f6a0 100644 --- a/lld/test/MinGW/driver.test +++ b/lld/test/MinGW/driver.test @@ -409,6 +409,13 @@ LTO_OPTS: -mllvm:-mcpu=x86-64 -opt:lldlto=2 -dwodir:foo -lto-cs-profile-generate RUN: ld.lld -### foo.o -m i386pep --lto-O2 --lto-CGO1 --lto-cs-profile-generate --lto-cs-profile-file=foo 2>&1 | FileCheck -check-prefix=LTO_OPTS2 %s LTO_OPTS2:-opt:lldlto=2 -opt:lldltocgo=1 -lto-cs-profile-generate -lto-cs-profile-file:foo +RUN: ld.lld -### foo.o -m i386pe -plugin-opt=emit-asm 2>&1 | FileCheck -check-prefix=LTO_EMIT_ASM %s +RUN: ld.lld -### foo.o -m i386pe --lto-emit-asm 2>&1 | FileCheck -check-prefix=LTO_EMIT_ASM %s +LTO_EMIT_ASM: -lldemit:asm + +RUN: ld.lld -### foo.o -m i386pe -plugin-opt=emit-llvm 2>&1 | FileCheck -check-prefix=LTO_EMIT_LLVM %s +LTO_EMIT_LLVM: -lldemit:llvm + Test GCC specific LTO options that GCC passes unconditionally, that we ignore. RUN: ld.lld -### foo.o -m i386pep -plugin /usr/lib/gcc/x86_64-w64-mingw32/10-posix/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-w64-mingw32/10-posix/lto-wrapper -plugin-opt=-fresolution=/tmp/ccM9d4fP.res -plugin-opt=-pass-through=-lmingw32 2> /dev/null From cf130269fade1c08e3f83a7f34bc450a27287852 Mon Sep 17 00:00:00 2001 From: Xing Xue Date: Wed, 7 Feb 2024 15:24:52 -0500 Subject: [PATCH 19/54] [OpenMP][test]Flip bit-fields in 'struct flags' for big-endian in test cases (#79895) This patch flips bit-fields in `struct flags` for big-endian in test cases to be consistent with the definition of the structure in libomp `kmp.h`. (cherry picked from commit 7a9b0e4acb3b5ee15f8eb138aad937cfa4763fb8) --- openmp/runtime/src/kmp.h | 3 ++- .../test/tasking/bug_nested_proxy_task.c | 21 +++++++++++++------ .../test/tasking/bug_proxy_task_dep_waiting.c | 21 +++++++++++++------ .../test/tasking/hidden_helper_task/common.h | 18 +++++++++++++--- 4 files changed, 47 insertions(+), 16 deletions(-) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index c287a31e0b1b5..b147063d22826 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -2494,7 +2494,8 @@ typedef struct kmp_dephash_entry kmp_dephash_entry_t; #define KMP_DEP_MTX 0x4 #define KMP_DEP_SET 0x8 #define KMP_DEP_ALL 0x80 -// Compiler sends us this info: +// Compiler sends us this info. Note: some test cases contain an explicit copy +// of this struct and should be in sync with any changes here. typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; diff --git a/openmp/runtime/test/tasking/bug_nested_proxy_task.c b/openmp/runtime/test/tasking/bug_nested_proxy_task.c index 43502bdcd1abd..24fe1f3fe7607 100644 --- a/openmp/runtime/test/tasking/bug_nested_proxy_task.c +++ b/openmp/runtime/test/tasking/bug_nested_proxy_task.c @@ -50,12 +50,21 @@ typedef struct kmp_depend_info { union { kmp_uint8 flag; // flag as an unsigned char struct { // flag as a set of 8 bits - unsigned in : 1; - unsigned out : 1; - unsigned mtx : 1; - unsigned set : 1; - unsigned unused : 3; - unsigned all : 1; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + unsigned all : 1; + unsigned unused : 3; + unsigned set : 1; + unsigned mtx : 1; + unsigned out : 1; + unsigned in : 1; +#else + unsigned in : 1; + unsigned out : 1; + unsigned mtx : 1; + unsigned set : 1; + unsigned unused : 3; + unsigned all : 1; +#endif } flags; }; } kmp_depend_info_t; diff --git a/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c b/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c index ff75df51aff07..688860c035728 100644 --- a/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c +++ b/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c @@ -47,12 +47,21 @@ typedef struct kmp_depend_info { union { kmp_uint8 flag; // flag as an unsigned char struct { // flag as a set of 8 bits - unsigned in : 1; - unsigned out : 1; - unsigned mtx : 1; - unsigned set : 1; - unsigned unused : 3; - unsigned all : 1; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + unsigned all : 1; + unsigned unused : 3; + unsigned set : 1; + unsigned mtx : 1; + unsigned out : 1; + unsigned in : 1; +#else + unsigned in : 1; + unsigned out : 1; + unsigned mtx : 1; + unsigned set : 1; + unsigned unused : 3; + unsigned all : 1; +#endif } flags; }; } kmp_depend_info_t; diff --git a/openmp/runtime/test/tasking/hidden_helper_task/common.h b/openmp/runtime/test/tasking/hidden_helper_task/common.h index 402ecf3ed553c..ba57656cbac41 100644 --- a/openmp/runtime/test/tasking/hidden_helper_task/common.h +++ b/openmp/runtime/test/tasking/hidden_helper_task/common.h @@ -17,9 +17,21 @@ typedef struct kmp_depend_info { union { unsigned char flag; struct { - bool in : 1; - bool out : 1; - bool mtx : 1; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + unsigned all : 1; + unsigned unused : 3; + unsigned set : 1; + unsigned mtx : 1; + unsigned out : 1; + unsigned in : 1; +#else + unsigned in : 1; + unsigned out : 1; + unsigned mtx : 1; + unsigned set : 1; + unsigned unused : 3; + unsigned all : 1; +#endif } flags; }; } kmp_depend_info_t; From 34fdf52cce678cb4fd3714c31f1a798bece84303 Mon Sep 17 00:00:00 2001 From: Xing Xue Date: Tue, 13 Feb 2024 15:11:24 -0500 Subject: [PATCH 20/54] [OpenMP][AIX]Define struct kmp_base_tas_lock with the order of two members swapped for big-endian (#79188) The direct lock data structure has bit `0` (the least significant bit) of the first 32-bit word set to `1` to indicate it is a direct lock. On the other hand, the first word (in 32-bit mode) or first two words (in 64-bit mode) of an indirect lock are the address of the entry allocated from the indirect lock table. The runtime checks bit `0` of the first 32-bit word to tell if this is a direct or an indirect lock. This works fine for 32-bit and 64-bit little-endian because its memory layout of a 64-bit address is (`low word`, `high word`). However, this causes problems for big-endian where the memory layout of a 64-bit address is (`high word`, `low word`). If an address of the indirect lock table entry is something like `0x110035300`, i.e., (`0x1`, `0x10035300`), it is treated as a direct lock. This patch defines `struct kmp_base_tas_lock` with the ordering of the two 32-bit members flipped for big-endian PPC64 so that when checking/setting tags in member `poll`, the second word (the low word) is used. This patch also changes places where `poll` is not already explicitly specified for checking/setting tags. (cherry picked from commit ac97562c99c3ae97f063048ccaf08ebdae60ac30) --- openmp/runtime/src/kmp_csupport.cpp | 5 +++-- openmp/runtime/src/kmp_gsupport.cpp | 2 +- openmp/runtime/src/kmp_lock.cpp | 6 +++--- openmp/runtime/src/kmp_lock.h | 17 +++++++++++++---- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp index 9eeaeb88fb9ec..878e78b5c7ad2 100644 --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -1533,8 +1533,9 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); if (*lk == 0) { if (KMP_IS_D_LOCK(lockseq)) { - KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, - KMP_GET_D_TAG(lockseq)); + KMP_COMPARE_AND_STORE_ACQ32( + (volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0, + KMP_GET_D_TAG(lockseq)); } else { __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq)); } diff --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp index 88189659a2341..4dc8a90f83b4e 100644 --- a/openmp/runtime/src/kmp_gsupport.cpp +++ b/openmp/runtime/src/kmp_gsupport.cpp @@ -144,7 +144,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER)(void) { // Mutual exclusion -// The symbol that icc/ifort generates for unnamed for unnamed critical sections +// The symbol that icc/ifort generates for unnamed critical sections // - .gomp_critical_user_ - is defined using .comm in any objects reference it. // We can't reference it directly here in C code, as the symbol contains a ".". // diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp index 85c54f4cdc7e9..0ad14f862bcb9 100644 --- a/openmp/runtime/src/kmp_lock.cpp +++ b/openmp/runtime/src/kmp_lock.cpp @@ -2689,7 +2689,7 @@ void __kmp_spin_backoff(kmp_backoff_t *boff) { // lock word. static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) { - TCW_4(*lck, KMP_GET_D_TAG(seq)); + TCW_4(((kmp_base_tas_lock_t *)lck)->poll, KMP_GET_D_TAG(seq)); KA_TRACE( 20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); @@ -3180,8 +3180,8 @@ kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock, lck->type = tag; if (OMP_LOCK_T_SIZE < sizeof(void *)) { - *((kmp_lock_index_t *)user_lock) = idx - << 1; // indirect lock word must be even + *(kmp_lock_index_t *)&(((kmp_base_tas_lock_t *)user_lock)->poll) = + idx << 1; // indirect lock word must be even } else { *((kmp_indirect_lock_t **)user_lock) = lck; } diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h index f21179b4eb68a..e2a0cda01a971 100644 --- a/openmp/runtime/src/kmp_lock.h +++ b/openmp/runtime/src/kmp_lock.h @@ -50,7 +50,7 @@ typedef struct ident ident_t; // recent versions), but we are bounded by the pointer-sized chunks that // the Intel compiler allocates. -#if KMP_OS_LINUX && defined(KMP_GOMP_COMPAT) +#if (KMP_OS_LINUX || KMP_OS_AIX) && defined(KMP_GOMP_COMPAT) #define OMP_LOCK_T_SIZE sizeof(int) #define OMP_NEST_LOCK_T_SIZE sizeof(void *) #else @@ -120,8 +120,15 @@ extern void __kmp_validate_locks(void); struct kmp_base_tas_lock { // KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __LP64__ + // Flip the ordering of the high and low 32-bit member to be consistent + // with the memory layout of the address in 64-bit big-endian. + kmp_int32 depth_locked; // depth locked, for nested locks only + std::atomic poll; +#else std::atomic poll; kmp_int32 depth_locked; // depth locked, for nested locks only +#endif }; typedef struct kmp_base_tas_lock kmp_base_tas_lock_t; @@ -1138,11 +1145,13 @@ extern int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32); // Extracts direct lock tag from a user lock pointer #define KMP_EXTRACT_D_TAG(l) \ - (*((kmp_dyna_lock_t *)(l)) & ((1 << KMP_LOCK_SHIFT) - 1) & \ - -(*((kmp_dyna_lock_t *)(l)) & 1)) + ((kmp_dyna_lock_t)((kmp_base_tas_lock_t *)(l))->poll & \ + ((1 << KMP_LOCK_SHIFT) - 1) & \ + -((kmp_dyna_lock_t)((kmp_tas_lock_t *)(l))->lk.poll & 1)) // Extracts indirect lock index from a user lock pointer -#define KMP_EXTRACT_I_INDEX(l) (*(kmp_lock_index_t *)(l) >> 1) +#define KMP_EXTRACT_I_INDEX(l) \ + ((kmp_lock_index_t)((kmp_base_tas_lock_t *)(l))->poll >> 1) // Returns function pointer to the direct lock function with l (kmp_dyna_lock_t // *) and op (operation type). From 0a44c3792a6ff799df5f100670d7e19d1bc49f03 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 13 Feb 2024 11:29:21 +0100 Subject: [PATCH 21/54] [lld] Add target support for SystemZ (s390x) (#75643) This patch adds full support for linking SystemZ (ELF s390x) object files. Support should be generally complete: - All relocation types are supported. - Full shared library support (DYNAMIC, GOT, PLT, ifunc). - Relaxation of TLS and GOT relocations where appropriate. - Platform-specific test cases. In addition to new platform code and the obvious changes, there were a few additional changes to common code: - Add three new RelExpr members (R_GOTPLT_OFF, R_GOTPLT_PC, and R_PLT_GOTREL) needed to support certain s390x relocations. I chose not to use a platform-specific name since nothing in the definition of these relocs is actually platform-specific; it is well possible that other platforms will need the same. - A couple of tweaks to TLS relocation handling, as the particular semantics of the s390x versions differ slightly. See comments in the code. This was tested by building and testing >1500 Fedora packages, with only a handful of failures; as these also have issues when building with LLD on other architectures, they seem unrelated. Co-authored-by: Tulio Magno Quites Machado Filho (cherry picked from commit fe3406e349884e4ef61480dd0607f1e237102c74) --- lld/ELF/Arch/SystemZ.cpp | 607 ++++++++++++++++++++ lld/ELF/CMakeLists.txt | 1 + lld/ELF/Driver.cpp | 3 +- lld/ELF/InputFiles.cpp | 2 + lld/ELF/InputSection.cpp | 7 + lld/ELF/Relocations.cpp | 25 +- lld/ELF/Relocations.h | 3 + lld/ELF/ScriptParser.cpp | 1 + lld/ELF/SyntheticSections.cpp | 3 + lld/ELF/Target.cpp | 2 + lld/ELF/Target.h | 1 + lld/test/ELF/Inputs/systemz-init.s | 5 + lld/test/ELF/basic-systemz.s | 63 ++ lld/test/ELF/emulation-systemz.s | 29 + lld/test/ELF/lto/systemz.ll | 18 + lld/test/ELF/systemz-got.s | 16 + lld/test/ELF/systemz-gotent-relax-align.s | 48 ++ lld/test/ELF/systemz-gotent-relax-und-dso.s | 68 +++ lld/test/ELF/systemz-gotent-relax.s | 91 +++ lld/test/ELF/systemz-ifunc-nonpreemptible.s | 75 +++ lld/test/ELF/systemz-init-padding.s | 27 + lld/test/ELF/systemz-pie.s | 38 ++ lld/test/ELF/systemz-plt.s | 83 +++ lld/test/ELF/systemz-reloc-abs.s | 32 ++ lld/test/ELF/systemz-reloc-disp12.s | 21 + lld/test/ELF/systemz-reloc-disp20.s | 21 + lld/test/ELF/systemz-reloc-got.s | 92 +++ lld/test/ELF/systemz-reloc-gotrel.s | 36 ++ lld/test/ELF/systemz-reloc-pc16.s | 39 ++ lld/test/ELF/systemz-reloc-pc32.s | 39 ++ lld/test/ELF/systemz-reloc-pcdbl.s | 68 +++ lld/test/ELF/systemz-tls-gd.s | 142 +++++ lld/test/ELF/systemz-tls-ie.s | 87 +++ lld/test/ELF/systemz-tls-ld.s | 114 ++++ lld/test/ELF/systemz-tls-le.s | 61 ++ lld/test/lit.cfg.py | 1 + 36 files changed, 1959 insertions(+), 10 deletions(-) create mode 100644 lld/ELF/Arch/SystemZ.cpp create mode 100644 lld/test/ELF/Inputs/systemz-init.s create mode 100644 lld/test/ELF/basic-systemz.s create mode 100644 lld/test/ELF/emulation-systemz.s create mode 100644 lld/test/ELF/lto/systemz.ll create mode 100644 lld/test/ELF/systemz-got.s create mode 100644 lld/test/ELF/systemz-gotent-relax-align.s create mode 100644 lld/test/ELF/systemz-gotent-relax-und-dso.s create mode 100644 lld/test/ELF/systemz-gotent-relax.s create mode 100644 lld/test/ELF/systemz-ifunc-nonpreemptible.s create mode 100644 lld/test/ELF/systemz-init-padding.s create mode 100644 lld/test/ELF/systemz-pie.s create mode 100644 lld/test/ELF/systemz-plt.s create mode 100644 lld/test/ELF/systemz-reloc-abs.s create mode 100644 lld/test/ELF/systemz-reloc-disp12.s create mode 100644 lld/test/ELF/systemz-reloc-disp20.s create mode 100644 lld/test/ELF/systemz-reloc-got.s create mode 100644 lld/test/ELF/systemz-reloc-gotrel.s create mode 100644 lld/test/ELF/systemz-reloc-pc16.s create mode 100644 lld/test/ELF/systemz-reloc-pc32.s create mode 100644 lld/test/ELF/systemz-reloc-pcdbl.s create mode 100644 lld/test/ELF/systemz-tls-gd.s create mode 100644 lld/test/ELF/systemz-tls-ie.s create mode 100644 lld/test/ELF/systemz-tls-ld.s create mode 100644 lld/test/ELF/systemz-tls-le.s diff --git a/lld/ELF/Arch/SystemZ.cpp b/lld/ELF/Arch/SystemZ.cpp new file mode 100644 index 0000000000000..d37db6877559d --- /dev/null +++ b/lld/ELF/Arch/SystemZ.cpp @@ -0,0 +1,607 @@ +//===- SystemZ.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "OutputSections.h" +#include "Symbols.h" +#include "SyntheticSections.h" +#include "Target.h" +#include "lld/Common/ErrorHandler.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::support::endian; +using namespace llvm::ELF; +using namespace lld; +using namespace lld::elf; + +namespace { +class SystemZ : public TargetInfo { +public: + SystemZ(); + int getTlsGdRelaxSkip(RelType type) const override; + RelExpr getRelExpr(RelType type, const Symbol &s, + const uint8_t *loc) const override; + RelType getDynRel(RelType type) const override; + void writeGotHeader(uint8_t *buf) const override; + void writeGotPlt(uint8_t *buf, const Symbol &s) const override; + void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; + void writePltHeader(uint8_t *buf) const override; + void addPltHeaderSymbols(InputSection &isd) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; + RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override; + RelExpr adjustGotPcExpr(RelType type, int64_t addend, + const uint8_t *loc) const override; + bool relaxOnce(int pass) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; + +private: + void relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const; + void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const; + void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; + void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; +}; +} // namespace + +SystemZ::SystemZ() { + copyRel = R_390_COPY; + gotRel = R_390_GLOB_DAT; + pltRel = R_390_JMP_SLOT; + relativeRel = R_390_RELATIVE; + iRelativeRel = R_390_IRELATIVE; + symbolicRel = R_390_64; + tlsGotRel = R_390_TLS_TPOFF; + tlsModuleIndexRel = R_390_TLS_DTPMOD; + tlsOffsetRel = R_390_TLS_DTPOFF; + gotHeaderEntriesNum = 3; + gotPltHeaderEntriesNum = 0; + gotEntrySize = 8; + pltHeaderSize = 32; + pltEntrySize = 32; + ipltEntrySize = 32; + + // This "trap instruction" is used to fill gaps between sections. + // On SystemZ, the behavior of the GNU ld is to fill those gaps + // with nop instructions instead - and unfortunately the default + // glibc crt object files (used to) rely on that behavior since + // they use an alignment on the .init section fragments that causes + // gaps which must be filled with nops as they are being executed. + // Therefore, we provide a nop instruction as "trapInstr" here. + trapInstr = {0x07, 0x07, 0x07, 0x07}; + + defaultImageBase = 0x1000000; +} + +RelExpr SystemZ::getRelExpr(RelType type, const Symbol &s, + const uint8_t *loc) const { + switch (type) { + case R_390_NONE: + return R_NONE; + // Relocations targeting the symbol value. + case R_390_8: + case R_390_12: + case R_390_16: + case R_390_20: + case R_390_32: + case R_390_64: + return R_ABS; + case R_390_PC16: + case R_390_PC32: + case R_390_PC64: + case R_390_PC12DBL: + case R_390_PC16DBL: + case R_390_PC24DBL: + case R_390_PC32DBL: + return R_PC; + case R_390_GOTOFF16: + case R_390_GOTOFF: // a.k.a. R_390_GOTOFF32 + case R_390_GOTOFF64: + return R_GOTREL; + // Relocations targeting the PLT associated with the symbol. + case R_390_PLT32: + case R_390_PLT64: + case R_390_PLT12DBL: + case R_390_PLT16DBL: + case R_390_PLT24DBL: + case R_390_PLT32DBL: + return R_PLT_PC; + case R_390_PLTOFF16: + case R_390_PLTOFF32: + case R_390_PLTOFF64: + return R_PLT_GOTREL; + // Relocations targeting the GOT entry associated with the symbol. + case R_390_GOTENT: + return R_GOT_PC; + case R_390_GOT12: + case R_390_GOT16: + case R_390_GOT20: + case R_390_GOT32: + case R_390_GOT64: + return R_GOT_OFF; + // Relocations targeting the GOTPLT entry associated with the symbol. + case R_390_GOTPLTENT: + return R_GOTPLT_PC; + case R_390_GOTPLT12: + case R_390_GOTPLT16: + case R_390_GOTPLT20: + case R_390_GOTPLT32: + case R_390_GOTPLT64: + return R_GOTPLT_GOTREL; + // Relocations targeting _GLOBAL_OFFSET_TABLE_. + case R_390_GOTPC: + case R_390_GOTPCDBL: + return R_GOTONLY_PC; + // TLS-related relocations. + case R_390_TLS_LOAD: + return R_NONE; + case R_390_TLS_GDCALL: + return R_TLSGD_PC; + case R_390_TLS_LDCALL: + return R_TLSLD_PC; + case R_390_TLS_GD32: + case R_390_TLS_GD64: + return R_TLSGD_GOT; + case R_390_TLS_LDM32: + case R_390_TLS_LDM64: + return R_TLSLD_GOT; + case R_390_TLS_LDO32: + case R_390_TLS_LDO64: + return R_DTPREL; + case R_390_TLS_LE32: + case R_390_TLS_LE64: + return R_TPREL; + case R_390_TLS_IE32: + case R_390_TLS_IE64: + return R_GOT; + case R_390_TLS_GOTIE12: + case R_390_TLS_GOTIE20: + case R_390_TLS_GOTIE32: + case R_390_TLS_GOTIE64: + return R_GOT_OFF; + case R_390_TLS_IEENT: + return R_GOT_PC; + + default: + error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + + ") against symbol " + toString(s)); + return R_NONE; + } +} + +void SystemZ::writeGotHeader(uint8_t *buf) const { + // _GLOBAL_OFFSET_TABLE_[0] holds the value of _DYNAMIC. + // _GLOBAL_OFFSET_TABLE_[1] and [2] are reserved. + write64be(buf, mainPart->dynamic->getVA()); +} + +void SystemZ::writeGotPlt(uint8_t *buf, const Symbol &s) const { + write64be(buf, s.getPltVA() + 14); +} + +void SystemZ::writeIgotPlt(uint8_t *buf, const Symbol &s) const { + if (config->writeAddends) + write64be(buf, s.getVA()); +} + +void SystemZ::writePltHeader(uint8_t *buf) const { + const uint8_t pltData[] = { + 0xe3, 0x10, 0xf0, 0x38, 0x00, 0x24, // stg %r1,56(%r15) + 0xc0, 0x10, 0x00, 0x00, 0x00, 0x00, // larl %r1,_GLOBAL_OFFSET_TABLE_ + 0xd2, 0x07, 0xf0, 0x30, 0x10, 0x08, // mvc 48(8,%r15),8(%r1) + 0xe3, 0x10, 0x10, 0x10, 0x00, 0x04, // lg %r1,16(%r1) + 0x07, 0xf1, // br %r1 + 0x07, 0x00, // nopr + 0x07, 0x00, // nopr + 0x07, 0x00, // nopr + }; + memcpy(buf, pltData, sizeof(pltData)); + uint64_t got = in.got->getVA(); + uint64_t plt = in.plt->getVA(); + write32be(buf + 8, (got - plt - 6) >> 1); +} + +void SystemZ::addPltHeaderSymbols(InputSection &isec) const { + // The PLT header needs a reference to _GLOBAL_OFFSET_TABLE_, so we + // must ensure the .got section is created even if otherwise unused. + in.got->hasGotOffRel.store(true, std::memory_order_relaxed); +} + +void SystemZ::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { + const uint8_t inst[] = { + 0xc0, 0x10, 0x00, 0x00, 0x00, 0x00, // larl %r1,<.got.plt slot> + 0xe3, 0x10, 0x10, 0x00, 0x00, 0x04, // lg %r1,0(%r1) + 0x07, 0xf1, // br %r1 + 0x0d, 0x10, // basr %r1,%r0 + 0xe3, 0x10, 0x10, 0x0c, 0x00, 0x14, // lgf %r1,12(%r1) + 0xc0, 0xf4, 0x00, 0x00, 0x00, 0x00, // jg + 0x00, 0x00, 0x00, 0x00, // + }; + memcpy(buf, inst, sizeof(inst)); + + write32be(buf + 2, (sym.getGotPltVA() - pltEntryAddr) >> 1); + write32be(buf + 24, (in.plt->getVA() - pltEntryAddr - 22) >> 1); + write32be(buf + 28, in.relaPlt->entsize * sym.getPltIdx()); +} + +int64_t SystemZ::getImplicitAddend(const uint8_t *buf, RelType type) const { + switch (type) { + case R_390_8: + return SignExtend64<8>(*buf); + case R_390_16: + case R_390_PC16: + return SignExtend64<16>(read16be(buf)); + case R_390_PC16DBL: + return SignExtend64<16>(read16be(buf)) << 1; + case R_390_32: + case R_390_PC32: + return SignExtend64<32>(read32be(buf)); + case R_390_PC32DBL: + return SignExtend64<32>(read32be(buf)) << 1; + case R_390_64: + case R_390_PC64: + case R_390_TLS_DTPMOD: + case R_390_TLS_DTPOFF: + case R_390_TLS_TPOFF: + case R_390_GLOB_DAT: + case R_390_RELATIVE: + case R_390_IRELATIVE: + return read64be(buf); + case R_390_COPY: + case R_390_JMP_SLOT: + case R_390_NONE: + // These relocations are defined as not having an implicit addend. + return 0; + default: + internalLinkerError(getErrorLocation(buf), + "cannot read addend for relocation " + toString(type)); + return 0; + } +} + +RelType SystemZ::getDynRel(RelType type) const { + if (type == R_390_64 || type == R_390_PC64) + return type; + return R_390_NONE; +} + +RelExpr SystemZ::adjustTlsExpr(RelType type, RelExpr expr) const { + if (expr == R_RELAX_TLS_GD_TO_IE) + return R_RELAX_TLS_GD_TO_IE_GOT_OFF; + return expr; +} + +int SystemZ::getTlsGdRelaxSkip(RelType type) const { + // A __tls_get_offset call instruction is marked with 2 relocations: + // + // R_390_TLS_GDCALL / R_390_TLS_LDCALL: marker relocation + // R_390_PLT32DBL: __tls_get_offset + // + // After the relaxation we no longer call __tls_get_offset and should skip + // both relocations to not create a false dependence on __tls_get_offset + // being defined. + // + // Note that this mechanism only works correctly if the R_390_TLS_[GL]DCALL + // is seen immediately *before* the R_390_PLT32DBL. Unfortunately, current + // compilers on the platform will typically generate the inverse sequence. + // To fix this, we sort relocations by offset in RelocationScanner::scan; + // this ensures the correct sequence as the R_390_TLS_[GL]DCALL applies to + // the first byte of the brasl instruction, while the R_390_PLT32DBL applies + // to its third byte (the relative displacement). + + if (type == R_390_TLS_GDCALL || type == R_390_TLS_LDCALL) + return 2; + return 1; +} + +void SystemZ::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + // The general-dynamic code sequence for a global `x`: + // + // Instruction Relocation Symbol + // ear %rX,%a0 + // sllg %rX,%rX,32 + // ear %rX,%a1 + // larl %r12,_GLOBAL_OFFSET_TABLE_ R_390_GOTPCDBL _GLOBAL_OFFSET_TABLE_ + // lgrl %r2,.LC0 R_390_PC32DBL .LC0 + // brasl %r14,__tls_get_offset@plt R_390_TLS_GDCALL x + // :tls_gdcall:x R_390_PLT32DBL __tls_get_offset + // la %r2,0(%r2,%rX) + // + // .LC0: + // .quad x@TLSGD R_390_TLS_GD64 x + // + // Relaxing to initial-exec entails: + // 1) Replacing the call by a load from the GOT. + // 2) Replacing the relocation on the constant LC0 by R_390_TLS_GOTIE64. + + switch (rel.type) { + case R_390_TLS_GDCALL: + // brasl %r14,__tls_get_offset@plt -> lg %r2,0(%r2,%r12) + write16be(loc, 0xe322); + write32be(loc + 2, 0xc0000004); + break; + case R_390_TLS_GD64: + relocateNoSym(loc, R_390_TLS_GOTIE64, val); + break; + default: + llvm_unreachable("unsupported relocation for TLS GD to IE relaxation"); + } +} + +void SystemZ::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + // The general-dynamic code sequence for a global `x`: + // + // Instruction Relocation Symbol + // ear %rX,%a0 + // sllg %rX,%rX,32 + // ear %rX,%a1 + // larl %r12,_GLOBAL_OFFSET_TABLE_ R_390_GOTPCDBL _GLOBAL_OFFSET_TABLE_ + // lgrl %r2,.LC0 R_390_PC32DBL .LC0 + // brasl %r14,__tls_get_offset@plt R_390_TLS_GDCALL x + // :tls_gdcall:x R_390_PLT32DBL __tls_get_offset + // la %r2,0(%r2,%rX) + // + // .LC0: + // .quad x@tlsgd R_390_TLS_GD64 x + // + // Relaxing to local-exec entails: + // 1) Replacing the call by a nop. + // 2) Replacing the relocation on the constant LC0 by R_390_TLS_LE64. + + switch (rel.type) { + case R_390_TLS_GDCALL: + // brasl %r14,__tls_get_offset@plt -> brcl 0,. + write16be(loc, 0xc004); + write32be(loc + 2, 0x00000000); + break; + case R_390_TLS_GD64: + relocateNoSym(loc, R_390_TLS_LE64, val); + break; + default: + llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); + } +} + +void SystemZ::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + // The local-dynamic code sequence for a global `x`: + // + // Instruction Relocation Symbol + // ear %rX,%a0 + // sllg %rX,%rX,32 + // ear %rX,%a1 + // larl %r12,_GLOBAL_OFFSET_TABLE_ R_390_GOTPCDBL _GLOBAL_OFFSET_TABLE_ + // lgrl %r2,.LC0 R_390_PC32DBL .LC0 + // brasl %r14,__tls_get_offset@plt R_390_TLS_LDCALL + // :tls_ldcall: R_390_PLT32DBL __tls_get_offset + // la %r2,0(%r2,%rX) + // lgrl %rY,.LC1 R_390_PC32DBL .LC1 + // la %r2,0(%r2,%rY) + // + // .LC0: + // .quad @tlsldm R_390_TLS_LDM64 + // .LC1: + // .quad x@dtpoff R_390_TLS_LDO64 x + // + // Relaxing to local-exec entails: + // 1) Replacing the call by a nop. + // 2) Replacing the constant LC0 by 0 (i.e. ignoring the relocation). + // 3) Replacing the relocation on the constant LC1 by R_390_TLS_LE64. + + switch (rel.type) { + case R_390_TLS_LDCALL: + // brasl %r14,__tls_get_offset@plt -> brcl 0,. + write16be(loc, 0xc004); + write32be(loc + 2, 0x00000000); + break; + case R_390_TLS_LDM64: + break; + case R_390_TLS_LDO64: + relocateNoSym(loc, R_390_TLS_LE64, val); + break; + default: + llvm_unreachable("unsupported relocation for TLS LD to LE relaxation"); + } +} + +RelExpr SystemZ::adjustGotPcExpr(RelType type, int64_t addend, + const uint8_t *loc) const { + // Only R_390_GOTENT with addend 2 can be relaxed. + if (!config->relax || addend != 2 || type != R_390_GOTENT) + return R_GOT_PC; + const uint16_t op = read16be(loc - 2); + + // lgrl rx,sym@GOTENT -> larl rx, sym + // This relaxation is legal if "sym" binds locally (which was already + // verified by our caller) and is in-range and properly aligned for a + // LARL instruction. We cannot verify the latter constraint here, so + // we assume it is true and revert the decision later on in relaxOnce + // if necessary. + if ((op & 0xff0f) == 0xc408) + return R_RELAX_GOT_PC; + + return R_GOT_PC; +} + +bool SystemZ::relaxOnce(int pass) const { + // If we decided in adjustGotPcExpr to relax a R_390_GOTENT, + // we need to validate the target symbol is in-range and aligned. + SmallVector storage; + bool changed = false; + for (OutputSection *osec : outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (InputSection *sec : getInputSections(*osec, storage)) { + for (Relocation &rel : sec->relocs()) { + if (rel.expr != R_RELAX_GOT_PC) + continue; + + uint64_t v = sec->getRelocTargetVA( + sec->file, rel.type, rel.addend, + sec->getOutputSection()->addr + rel.offset, *rel.sym, rel.expr); + if (isInt<33>(v) && !(v & 1)) + continue; + if (rel.sym->auxIdx == 0) { + rel.sym->allocateAux(); + addGotEntry(*rel.sym); + changed = true; + } + rel.expr = R_GOT_PC; + } + } + } + return changed; +} + +void SystemZ::relaxGot(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + assert(isInt<33>(val) && + "R_390_GOTENT should not have been relaxed if it overflows"); + assert(!(val & 1) && + "R_390_GOTENT should not have been relaxed if it is misaligned"); + const uint16_t op = read16be(loc - 2); + + // lgrl rx,sym@GOTENT -> larl rx, sym + if ((op & 0xff0f) == 0xc408) { + write16be(loc - 2, 0xc000 | (op & 0x00f0)); + write32be(loc, val >> 1); + } +} + +void SystemZ::relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.expr) { + case R_RELAX_GOT_PC: + return relaxGot(loc, rel, val); + case R_RELAX_TLS_GD_TO_IE_GOT_OFF: + return relaxTlsGdToIe(loc, rel, val); + case R_RELAX_TLS_GD_TO_LE: + return relaxTlsGdToLe(loc, rel, val); + case R_RELAX_TLS_LD_TO_LE: + return relaxTlsLdToLe(loc, rel, val); + default: + break; + } + switch (rel.type) { + case R_390_8: + checkIntUInt(loc, val, 8, rel); + *loc = val; + break; + case R_390_12: + case R_390_GOT12: + case R_390_GOTPLT12: + case R_390_TLS_GOTIE12: + checkUInt(loc, val, 12, rel); + write16be(loc, (read16be(loc) & 0xF000) | val); + break; + case R_390_PC12DBL: + case R_390_PLT12DBL: + checkInt(loc, val, 13, rel); + checkAlignment(loc, val, 2, rel); + write16be(loc, (read16be(loc) & 0xF000) | ((val >> 1) & 0x0FFF)); + break; + case R_390_16: + case R_390_GOT16: + case R_390_GOTPLT16: + case R_390_GOTOFF16: + case R_390_PLTOFF16: + checkIntUInt(loc, val, 16, rel); + write16be(loc, val); + break; + case R_390_PC16: + checkInt(loc, val, 16, rel); + write16be(loc, val); + break; + case R_390_PC16DBL: + case R_390_PLT16DBL: + checkInt(loc, val, 17, rel); + checkAlignment(loc, val, 2, rel); + write16be(loc, val >> 1); + break; + case R_390_20: + case R_390_GOT20: + case R_390_GOTPLT20: + case R_390_TLS_GOTIE20: + checkInt(loc, val, 20, rel); + write32be(loc, (read32be(loc) & 0xF00000FF) | ((val & 0xFFF) << 16) | + ((val & 0xFF000) >> 4)); + break; + case R_390_PC24DBL: + case R_390_PLT24DBL: + checkInt(loc, val, 25, rel); + checkAlignment(loc, val, 2, rel); + loc[0] = val >> 17; + loc[1] = val >> 9; + loc[2] = val >> 1; + break; + case R_390_32: + case R_390_GOT32: + case R_390_GOTPLT32: + case R_390_GOTOFF: + case R_390_PLTOFF32: + case R_390_TLS_IE32: + case R_390_TLS_GOTIE32: + case R_390_TLS_GD32: + case R_390_TLS_LDM32: + case R_390_TLS_LDO32: + case R_390_TLS_LE32: + checkIntUInt(loc, val, 32, rel); + write32be(loc, val); + break; + case R_390_PC32: + case R_390_PLT32: + checkInt(loc, val, 32, rel); + write32be(loc, val); + break; + case R_390_PC32DBL: + case R_390_PLT32DBL: + case R_390_GOTPCDBL: + case R_390_GOTENT: + case R_390_GOTPLTENT: + case R_390_TLS_IEENT: + checkInt(loc, val, 33, rel); + checkAlignment(loc, val, 2, rel); + write32be(loc, val >> 1); + break; + case R_390_64: + case R_390_PC64: + case R_390_PLT64: + case R_390_GOT64: + case R_390_GOTPLT64: + case R_390_GOTOFF64: + case R_390_PLTOFF64: + case R_390_GOTPC: + case R_390_TLS_IE64: + case R_390_TLS_GOTIE64: + case R_390_TLS_GD64: + case R_390_TLS_LDM64: + case R_390_TLS_LDO64: + case R_390_TLS_LE64: + case R_390_TLS_DTPMOD: + case R_390_TLS_DTPOFF: + case R_390_TLS_TPOFF: + write64be(loc, val); + break; + case R_390_TLS_LOAD: + case R_390_TLS_GDCALL: + case R_390_TLS_LDCALL: + break; + default: + llvm_unreachable("unknown relocation"); + } +} + +TargetInfo *elf::getSystemZTargetInfo() { + static SystemZ t; + return &t; +} diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt index 475f7dea1dd7e..83d816ddb0601 100644 --- a/lld/ELF/CMakeLists.txt +++ b/lld/ELF/CMakeLists.txt @@ -33,6 +33,7 @@ add_lld_library(lldELF Arch/PPC64.cpp Arch/RISCV.cpp Arch/SPARCV9.cpp + Arch/SystemZ.cpp Arch/X86.cpp Arch/X86_64.cpp ARMErrataFix.cpp diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index f4b7d1c9d5b97..8b2c32b153482 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -200,6 +200,7 @@ static std::tuple parseEmulation(StringRef emul) { .Case("msp430elf", {ELF32LEKind, EM_MSP430}) .Case("elf64_amdgpu", {ELF64LEKind, EM_AMDGPU}) .Case("elf64loongarch", {ELF64LEKind, EM_LOONGARCH}) + .Case("elf64_s390", {ELF64BEKind, EM_S390}) .Default({ELFNoneKind, EM_NONE}); if (ret.first == ELFNoneKind) @@ -1137,7 +1138,7 @@ static SmallVector getSymbolOrderingFile(MemoryBufferRef mb) { static bool getIsRela(opt::InputArgList &args) { // The psABI specifies the default relocation entry format. bool rela = is_contained({EM_AARCH64, EM_AMDGPU, EM_HEXAGON, EM_LOONGARCH, - EM_PPC, EM_PPC64, EM_RISCV, EM_X86_64}, + EM_PPC, EM_PPC64, EM_RISCV, EM_S390, EM_X86_64}, config->emachine); // If -z rel or -z rela is specified, use the last option. for (auto *arg : args.filtered(OPT_z)) { diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index a292e873e72f7..6c7ef27cbd494 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1614,6 +1614,8 @@ static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) { return EM_RISCV; case Triple::sparcv9: return EM_SPARCV9; + case Triple::systemz: + return EM_S390; case Triple::x86: return t.isOSIAMCU() ? EM_IAMCU : EM_386; case Triple::x86_64: diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 0e0b9783bd88a..71870539d531c 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -654,6 +654,7 @@ static int64_t getTlsTpOffset(const Symbol &s) { // Variant 2. case EM_HEXAGON: + case EM_S390: case EM_SPARCV9: case EM_386: case EM_X86_64: @@ -716,6 +717,10 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, case R_GOT_PC: case R_RELAX_TLS_GD_TO_IE: return sym.getGotVA() + a - p; + case R_GOTPLT_GOTREL: + return sym.getGotPltVA() + a - in.got->getVA(); + case R_GOTPLT_PC: + return sym.getGotPltVA() + a - p; case R_LOONGARCH_GOT_PAGE_PC: if (sym.hasFlag(NEEDS_TLSGD)) return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p, type); @@ -807,6 +812,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, return getLoongArchPageDelta(sym.getPltVA() + a, p, type); case R_PLT_GOTPLT: return sym.getPltVA() + a - in.gotPlt->getVA(); + case R_PLT_GOTREL: + return sym.getPltVA() + a - in.got->getVA(); case R_PPC32_PLTREL: // R_PPC_PLTREL24 uses the addend (usually 0 or 0x8000) to indicate r30 // stores _GLOBAL_OFFSET_TABLE_ or .got2+0x8000. The addend is ignored for diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 79c8230724ade..f64b4219e0acc 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -203,8 +203,9 @@ static bool isAbsoluteValue(const Symbol &sym) { // Returns true if Expr refers a PLT entry. static bool needsPlt(RelExpr expr) { - return oneof(expr); + return oneof(expr); } bool lld::elf::needsGot(RelExpr expr) { @@ -233,6 +234,8 @@ static RelExpr toPlt(RelExpr expr) { return R_PLT_PC; case R_ABS: return R_PLT; + case R_GOTREL: + return R_PLT_GOTREL; default: return expr; } @@ -253,6 +256,8 @@ static RelExpr fromPlt(RelExpr expr) { return R_ABS; case R_PLT_GOTPLT: return R_GOTPLTREL; + case R_PLT_GOTREL: + return R_GOTREL; default: return expr; } @@ -979,10 +984,10 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, if (oneof( - e)) + R_PLT_PC, R_PLT_GOTREL, R_PLT_GOTPLT, R_GOTPLT_GOTREL, R_GOTPLT_PC, + R_PPC32_PLTREL, R_PPC64_CALL_PLT, R_PPC64_RELAX_TOC, R_RISCV_ADD, + R_AARCH64_GOT_PAGE, R_LOONGARCH_PLT_PAGE_PC, R_LOONGARCH_GOT, + R_LOONGARCH_GOT_PAGE_PC>(e)) return true; // These never do, except if the entire file is position dependent or if @@ -1374,8 +1379,8 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, R_LOONGARCH_GOT_PAGE_PC, R_GOT_OFF, R_TLSIE_HINT>(expr)) { ctx.hasTlsIe.store(true, std::memory_order_relaxed); // Initial-Exec relocs can be optimized to Local-Exec if the symbol is - // locally defined. - if (execOptimize && isLocalInExecutable) { + // locally defined. This is not supported on SystemZ. + if (execOptimize && isLocalInExecutable && config->emachine != EM_S390) { c.addReloc({R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym}); } else if (expr != R_TLSIE_HINT) { sym.setFlags(NEEDS_TLSIE); @@ -1534,8 +1539,10 @@ void RelocationScanner::scan(ArrayRef rels) { // For EhInputSection, OffsetGetter expects the relocations to be sorted by // r_offset. In rare cases (.eh_frame pieces are reordered by a linker // script), the relocations may be unordered. + // On SystemZ, all sections need to be sorted by r_offset, to allow TLS + // relaxation to be handled correctly - see SystemZ::getTlsGdRelaxSkip. SmallVector storage; - if (isa(sec)) + if (isa(sec) || config->emachine == EM_S390) rels = sortRels(rels, storage); end = static_cast(rels.end()); diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index cfb9092149f3e..7eb8a811e6934 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -40,11 +40,14 @@ enum RelExpr { R_GOTPLT, R_GOTPLTREL, R_GOTREL, + R_GOTPLT_GOTREL, + R_GOTPLT_PC, R_NONE, R_PC, R_PLT, R_PLT_PC, R_PLT_GOTPLT, + R_PLT_GOTREL, R_RELAX_HINT, R_RELAX_GOT_PC, R_RELAX_GOT_PC_NOPIC, diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index dd69916d6b05e..f0ede1f43bbdb 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -445,6 +445,7 @@ static std::pair parseBfdName(StringRef s) { .Case("elf32-msp430", {ELF32LEKind, EM_MSP430}) .Case("elf32-loongarch", {ELF32LEKind, EM_LOONGARCH}) .Case("elf64-loongarch", {ELF64LEKind, EM_LOONGARCH}) + .Case("elf64-s390", {ELF64BEKind, EM_S390}) .Default({ELFNoneKind, EM_NONE}); } diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 4b413163314b2..bada394aa30d7 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1419,6 +1419,9 @@ DynamicSection::computeContents() { case EM_MIPS: addInSec(DT_MIPS_PLTGOT, *in.gotPlt); break; + case EM_S390: + addInSec(DT_PLTGOT, *in.got); + break; case EM_SPARCV9: addInSec(DT_PLTGOT, *in.plt); break; diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp index 671d22cc66a0e..b7922425a34e4 100644 --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -87,6 +87,8 @@ TargetInfo *elf::getTarget() { return getRISCVTargetInfo(); case EM_SPARCV9: return getSPARCV9TargetInfo(); + case EM_S390: + return getSystemZTargetInfo(); case EM_X86_64: return getX86_64TargetInfo(); } diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index ab6b6b9c013ba..3c06789cdbd36 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -186,6 +186,7 @@ TargetInfo *getPPC64TargetInfo(); TargetInfo *getPPCTargetInfo(); TargetInfo *getRISCVTargetInfo(); TargetInfo *getSPARCV9TargetInfo(); +TargetInfo *getSystemZTargetInfo(); TargetInfo *getX86TargetInfo(); TargetInfo *getX86_64TargetInfo(); template TargetInfo *getMipsTargetInfo(); diff --git a/lld/test/ELF/Inputs/systemz-init.s b/lld/test/ELF/Inputs/systemz-init.s new file mode 100644 index 0000000000000..1611b69b4419e --- /dev/null +++ b/lld/test/ELF/Inputs/systemz-init.s @@ -0,0 +1,5 @@ +// glibc < 2.39 used to align .init and .fini code at a 4-byte boundary. +// This file aims to recreate that behavior. + .section .init,"ax",@progbits + .align 4 + lg %r4, 272(%r15) diff --git a/lld/test/ELF/basic-systemz.s b/lld/test/ELF/basic-systemz.s new file mode 100644 index 0000000000000..f7bb0e8cbd020 --- /dev/null +++ b/lld/test/ELF/basic-systemz.s @@ -0,0 +1,63 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o +# RUN: ld.lld --hash-style=sysv -discard-all -shared %t.o -o %t.so +# RUN: llvm-readelf --file-header --program-headers --section-headers --dynamic-table %t.so | FileCheck %s + +# Exits with return code 55 on linux. +.text + lghi 2,55 + svc 1 + +# CHECK: ELF Header: +# CHECK-NEXT: Magic: 7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00 +# CHECK-NEXT: Class: ELF64 +# CHECK-NEXT: Data: 2's complement, big endian +# CHECK-NEXT: Version: 1 (current) +# CHECK-NEXT: OS/ABI: UNIX - System V +# CHECK-NEXT: ABI Version: 0 +# CHECK-NEXT: Type: DYN (Shared object file) +# CHECK-NEXT: Machine: IBM S/390 +# CHECK-NEXT: Version: 0x1 +# CHECK-NEXT: Entry point address: 0x0 +# CHECK-NEXT: Start of program headers: 64 (bytes into file) +# CHECK-NEXT: Start of section headers: 768 (bytes into file) +# CHECK-NEXT: Flags: 0x0 +# CHECK-NEXT: Size of this header: 64 (bytes) +# CHECK-NEXT: Size of program headers: 56 (bytes) +# CHECK-NEXT: Number of program headers: 7 +# CHECK-NEXT: Size of section headers: 64 (bytes) +# CHECK-NEXT: Number of section headers: 11 +# CHECK-NEXT: Section header string table index: 9 + +# CHECK: Section Headers: +# CHECK-NEXT: [Nr] Name Type Address Off Size ES Flg Lk Inf Al +# CHECK-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 0 0 +# CHECK-NEXT: [ 1] .dynsym DYNSYM 00000000000001c8 0001c8 000018 18 A 3 1 8 +# CHECK-NEXT: [ 2] .hash HASH 00000000000001e0 0001e0 000010 04 A 1 0 4 +# CHECK-NEXT: [ 3] .dynstr STRTAB 00000000000001f0 0001f0 000001 00 A 0 0 1 +# CHECK-NEXT: [ 4] .text PROGBITS 00000000000011f4 0001f4 000006 00 AX 0 0 4 +# CHECK-NEXT: [ 5] .dynamic DYNAMIC 0000000000002200 000200 000060 10 WA 3 0 8 +# CHECK-NEXT: [ 6] .relro_padding NOBITS 0000000000002260 000260 000da0 00 WA 0 0 1 +# CHECK-NEXT: [ 7] .comment PROGBITS 0000000000000000 000260 000008 01 MS 0 0 1 +# CHECK-NEXT: [ 8] .symtab SYMTAB 0000000000000000 000268 000030 18 10 2 8 +# CHECK-NEXT: [ 9] .shstrtab STRTAB 0000000000000000 000298 000058 00 0 0 1 +# CHECK-NEXT: [10] .strtab STRTAB 0000000000000000 0002f0 00000a 00 0 0 1 + +# CHECK: Program Headers: +# CHECK-NEXT: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align +# CHECK-NEXT: PHDR 0x000040 0x0000000000000040 0x0000000000000040 0x000188 0x000188 R 0x8 +# CHECK-NEXT: LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x0001f1 0x0001f1 R 0x1000 +# CHECK-NEXT: LOAD 0x0001f4 0x00000000000011f4 0x00000000000011f4 0x000006 0x000006 R E 0x1000 +# CHECK-NEXT: LOAD 0x000200 0x0000000000002200 0x0000000000002200 0x000060 0x000e00 RW 0x1000 +# CHECK-NEXT: DYNAMIC 0x000200 0x0000000000002200 0x0000000000002200 0x000060 0x000060 RW 0x8 +# CHECK-NEXT: GNU_RELRO 0x000200 0x0000000000002200 0x0000000000002200 0x000060 0x000e00 R 0x1 +# CHECK-NEXT: GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0x0 + +# CHECK: Dynamic section at offset 0x200 contains 6 entries: +# CHECK-NEXT: Tag Type Name/Value +# CHECK-NEXT: 0x0000000000000006 (SYMTAB) 0x1c8 +# CHECK-NEXT: 0x000000000000000b (SYMENT) 24 (bytes) +# CHECK-NEXT: 0x0000000000000005 (STRTAB) 0x1f0 +# CHECK-NEXT: 0x000000000000000a (STRSZ) 1 (bytes) +# CHECK-NEXT: 0x0000000000000004 (HASH) 0x1e0 +# CHECK-NEXT: 0x0000000000000000 (NULL) 0x0 diff --git a/lld/test/ELF/emulation-systemz.s b/lld/test/ELF/emulation-systemz.s new file mode 100644 index 0000000000000..dfdb4620954c8 --- /dev/null +++ b/lld/test/ELF/emulation-systemz.s @@ -0,0 +1,29 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o +# RUN: ld.lld -m elf64_s390 %t.o -o %t1 +# RUN: llvm-readelf --file-header %t1 | FileCheck %s +# RUN: ld.lld %t.o -o %t2 +# RUN: llvm-readelf --file-header %t2 | FileCheck %s +# RUN: echo 'OUTPUT_FORMAT(elf64-s390)' > %t.script +# RUN: ld.lld %t.script %t.o -o %t3 +# RUN: llvm-readelf --file-header %t3 | FileCheck %s + +# CHECK: ELF Header: +# CHECK-NEXT: Magic: 7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00 +# CHECK-NEXT: Class: ELF64 +# CHECK-NEXT: Data: 2's complement, big endian +# CHECK-NEXT: Version: 1 (current) +# CHECK-NEXT: OS/ABI: UNIX - System V +# CHECK-NEXT: ABI Version: 0 +# CHECK-NEXT: Type: EXEC (Executable file) +# CHECK-NEXT: Machine: IBM S/390 +# CHECK-NEXT: Version: 0x1 +# CHECK-NEXT: Entry point address: +# CHECK-NEXT: Start of program headers: 64 (bytes into file) +# CHECK-NEXT: Start of section headers: +# CHECK-NEXT: Flags: 0x0 +# CHECK-NEXT: Size of this header: 64 (bytes) +# CHECK-NEXT: Size of program headers: 56 (bytes) + +.globl _start +_start: diff --git a/lld/test/ELF/lto/systemz.ll b/lld/test/ELF/lto/systemz.ll new file mode 100644 index 0000000000000..42bf4e32fb6d7 --- /dev/null +++ b/lld/test/ELF/lto/systemz.ll @@ -0,0 +1,18 @@ +; REQUIRES: systemz +;; Test we can infer the e_machine value EM_S390 from a bitcode file. + +; RUN: llvm-as %s -o %t.o +; RUN: ld.lld %t.o -o %t +; RUN: llvm-readobj -h %t | FileCheck %s + +; CHECK: Class: 64-bit +; CHECK: DataEncoding: BigEndian +; CHECK: Machine: EM_S390 + +target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" +target triple = "s390x-unknown-linux-gnu" + +define void @_start() { +entry: + ret void +} diff --git a/lld/test/ELF/systemz-got.s b/lld/test/ELF/systemz-got.s new file mode 100644 index 0000000000000..1d558aa3b0290 --- /dev/null +++ b/lld/test/ELF/systemz-got.s @@ -0,0 +1,16 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %p/Inputs/shared.s -o %t2.o +# RUN: ld.lld -shared %t2.o -soname=%t2.so -o %t2.so + +# RUN: ld.lld -dynamic-linker /lib/ld64.so.1 %t.o %t2.so -o %t +# RUN: llvm-readelf -S -r %t | FileCheck %s + +# CHECK: .got PROGBITS {{.*}} {{.*}} 000020 00 WA 0 0 8 + +# CHECK: Relocation section '.rela.dyn' at offset {{.*}} contains 1 entries: +# CHECK: {{.*}} 000000010000000a R_390_GLOB_DAT 0000000000000000 bar + 0 + +.global _start +_start: + lgrl %r1,bar@GOT diff --git a/lld/test/ELF/systemz-gotent-relax-align.s b/lld/test/ELF/systemz-gotent-relax-align.s new file mode 100644 index 0000000000000..c6326086f56db --- /dev/null +++ b/lld/test/ELF/systemz-gotent-relax-align.s @@ -0,0 +1,48 @@ +# REQUIRES: systemz +## Verify that R_390_GOTENT optimization is not performed on misaligned symbols. + +# RUN: llvm-mc -filetype=obj -relax-relocations -triple=s390x-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t1 +# RUN: llvm-readelf -S -r -x .got -x .got.plt %t1 | FileCheck --check-prefixes=CHECK %s +# RUN: llvm-objdump --no-print-imm-hex -d %t1 | FileCheck --check-prefix=DISASM %s + +## We retain one .got entry for the unaligned symbol. +# CHECK: Name Type Address Off Size ES Flg Lk Inf Al +# CHECK: .got PROGBITS 00000000010021e0 0001e0 000020 00 WA 0 0 8 +# CHECK-NEXT: .relro_padding NOBITS 0000000001002200 000200 000e00 00 WA 0 0 1 +# CHECK-NEXT: .data PROGBITS 0000000001003200 000200 000006 00 WA 0 0 2 + +# CHECK-LABEL: Hex dump of section '.got': +# CHECK-NEXT: 0x010021e0 00000000 00000000 00000000 00000000 +# CHECK-NEXT: 0x010021f0 00000000 00000000 00000000 01003205 + +# DISASM: Disassembly of section .text: +# DISASM: <_start>: +# DISASM-NEXT: larl %r1, 0x1003200 +# DISASM-NEXT: larl %r1, 0x1003200 +# DISASM-NEXT: lgrl %r1, 0x10021f8 +# DISASM-NEXT: lgrl %r1, 0x10021f8 + +.data +.globl var_align +.hidden var_align + .align 2 +var_align: + .long 0 + +.data +.globl var_unalign +.hidden var_unalign + .align 2 + .byte 0 +var_unalign: + .byte 0 + +.text +.globl _start +.type _start, @function +_start: + lgrl %r1, var_align@GOT + lgrl %r1, var_align@GOT + lgrl %r1, var_unalign@GOT + lgrl %r1, var_unalign@GOT diff --git a/lld/test/ELF/systemz-gotent-relax-und-dso.s b/lld/test/ELF/systemz-gotent-relax-und-dso.s new file mode 100644 index 0000000000000..57369a417fd44 --- /dev/null +++ b/lld/test/ELF/systemz-gotent-relax-und-dso.s @@ -0,0 +1,68 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -relax-relocations -triple=s390x-unknown-linux %s -o %t.o +# RUN: llvm-mc -filetype=obj -relax-relocations -triple=s390x-unknown-linux %S/Inputs/gotpc-relax-und-dso.s -o %tdso.o +# RUN: ld.lld -shared %tdso.o -soname=t.so -o %t.so +# RUN: ld.lld --hash-style=sysv -shared %t.o %t.so -o %t +# RUN: llvm-readelf -r %t | FileCheck --check-prefix=RELOC %s +# RUN: llvm-objdump --no-print-imm-hex -d %t | FileCheck --check-prefix=DISASM %s + +# RELOC-LABEL: Relocation section '.rela.dyn' at offset {{.*}} contains 3 entries: +# RELOC: 00000000000023f8 000000010000000a R_390_GLOB_DAT 00000000000012d8 foo + 0 +# RELOC: 0000000000002400 000000030000000a R_390_GLOB_DAT 0000000000000000 und + 0 +# RELOC: 0000000000002408 000000040000000a R_390_GLOB_DAT 0000000000000000 dsofoo + 0 + +# DISASM: Disassembly of section .text: +# DISASM-EMPTY: +# DISASM-NEXT: : +# DISASM-NEXT: bc 0, 0 +# DISASM: : +# DISASM-NEXT: bc 0, 0 +# DISASM: <_start>: +# DISASM-NEXT: lgrl %r1, 0x2400 +# DISASM-NEXT: lgrl %r1, 0x2400 +# DISASM-NEXT: lgrl %r1, 0x2408 +# DISASM-NEXT: lgrl %r1, 0x2408 +# DISASM-NEXT: larl %r1, 0x12dc +# DISASM-NEXT: larl %r1, 0x12dc +# DISASM-NEXT: lgrl %r1, 0x23f8 +# DISASM-NEXT: lgrl %r1, 0x23f8 +# DISASM-NEXT: lgrl %r1, 0x2400 +# DISASM-NEXT: lgrl %r1, 0x2400 +# DISASM-NEXT: lgrl %r1, 0x2408 +# DISASM-NEXT: lgrl %r1, 0x2408 +# DISASM-NEXT: larl %r1, 0x12dc +# DISASM-NEXT: larl %r1, 0x12dc +# DISASM-NEXT: lgrl %r1, 0x23f8 +# DISASM-NEXT: lgrl %r1, 0x23f8 + +.text +.globl foo +.type foo, @function +foo: + nop + +.globl hid +.hidden hid +.type hid, @function +hid: + nop + +.globl _start +.type _start, @function +_start: + lgrl %r1, und@GOT + lgrl %r1, und@GOT + lgrl %r1, dsofoo@GOT + lgrl %r1, dsofoo@GOT + lgrl %r1, hid@GOT + lgrl %r1, hid@GOT + lgrl %r1, foo@GOT + lgrl %r1, foo@GOT + lgrl %r1, und@GOT + lgrl %r1, und@GOT + lgrl %r1, dsofoo@GOT + lgrl %r1, dsofoo@GOT + lgrl %r1, hid@GOT + lgrl %r1, hid@GOT + lgrl %r1, foo@GOT + lgrl %r1, foo@GOT diff --git a/lld/test/ELF/systemz-gotent-relax.s b/lld/test/ELF/systemz-gotent-relax.s new file mode 100644 index 0000000000000..f665e1af9e53d --- /dev/null +++ b/lld/test/ELF/systemz-gotent-relax.s @@ -0,0 +1,91 @@ +# REQUIRES: systemz +## Test R_390_GOTENT optimization. + +# RUN: llvm-mc -filetype=obj -relax-relocations -triple=s390x-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t1 --no-apply-dynamic-relocs +# RUN: llvm-readelf -S -r -x .got.plt %t1 | FileCheck --check-prefixes=CHECK,NOAPPLY %s +# RUN: ld.lld %t.o -o %t1 --apply-dynamic-relocs +# RUN: llvm-readelf -S -r -x .got.plt %t1 | FileCheck --check-prefixes=CHECK,APPLY %s +# RUN: ld.lld %t.o -o %t1 +# RUN: llvm-objdump --no-print-imm-hex -d %t1 | FileCheck --check-prefix=DISASM %s + +## --no-relax disables GOT optimization. +# RUN: ld.lld --no-relax %t.o -o %t2 +# RUN: llvm-objdump --no-print-imm-hex -d %t2 | FileCheck --check-prefix=NORELAX %s + +## In our implementation, .got is retained even if all GOT-generating relocations are optimized. +# CHECK: Name Type Address Off Size ES Flg Lk Inf Al +# CHECK: .iplt PROGBITS 0000000001001240 000240 000020 00 AX 0 0 16 +# CHECK-NEXT: .got PROGBITS 0000000001002260 000260 000018 00 WA 0 0 8 +# CHECK-NEXT: .relro_padding NOBITS 0000000001002278 000278 000d88 00 WA 0 0 1 +# CHECK-NEXT: .got.plt PROGBITS 0000000001003278 000278 000008 00 WA 0 0 8 + +## There is one R_S390_IRELATIVE relocation. +# CHECK-LABEL: Relocation section '.rela.dyn' at offset {{.*}} contains 1 entries: +# CHECK: 0000000001003278 000000000000003d R_390_IRELATIVE 10011e8 + +# CHECK-LABEL: Hex dump of section '.got.plt': +# NOAPPLY-NEXT: 0x01003278 00000000 00000000 +# APPLY-NEXT: 0x01003278 00000000 010011e8 + +# DISASM: Disassembly of section .text: +# DISASM: 00000000010011e0 : +# DISASM-NEXT: bc 0, 0 +# DISASM: 00000000010011e4 : +# DISASM-NEXT: bc 0, 0 +# DISASM: 00000000010011e8 : +# DISASM-NEXT: br %r14 +# DISASM: 00000000010011ea <_start>: +# DISASM-NEXT: larl %r1, 0x10011e0 +# DISASM-NEXT: larl %r1, 0x10011e0 +# DISASM-NEXT: larl %r1, 0x10011e4 +# DISASM-NEXT: larl %r1, 0x10011e4 +# DISASM-NEXT: lgrl %r1, 0x1003278 +# DISASM-NEXT: lgrl %r1, 0x1003278 +# DISASM-NEXT: larl %r1, 0x10011e0 +# DISASM-NEXT: larl %r1, 0x10011e0 +# DISASM-NEXT: larl %r1, 0x10011e4 +# DISASM-NEXT: larl %r1, 0x10011e4 +# DISASM-NEXT: lgrl %r1, 0x1003278 +# DISASM-NEXT: lgrl %r1, 0x1003278 + +# NORELAX-LABEL: <_start>: +# NORELAX-COUNT-12: lgrl + +.text +.globl foo + +.text +.globl foo +.type foo, @function +foo: + nop + +.globl hid +.hidden hid +.type hid, @function +hid: + nop + +.text +.type ifunc STT_GNU_IFUNC +.globl ifunc +.type ifunc, @function +ifunc: + br %r14 + +.globl _start +.type _start, @function +_start: + lgrl %r1, foo@GOT + lgrl %r1, foo@GOT + lgrl %r1, hid@GOT + lgrl %r1, hid@GOT + lgrl %r1, ifunc@GOT + lgrl %r1, ifunc@GOT + lgrl %r1, foo@GOT + lgrl %r1, foo@GOT + lgrl %r1, hid@GOT + lgrl %r1, hid@GOT + lgrl %r1, ifunc@GOT + lgrl %r1, ifunc@GOT diff --git a/lld/test/ELF/systemz-ifunc-nonpreemptible.s b/lld/test/ELF/systemz-ifunc-nonpreemptible.s new file mode 100644 index 0000000000000..5056db302ca1c --- /dev/null +++ b/lld/test/ELF/systemz-ifunc-nonpreemptible.s @@ -0,0 +1,75 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-none-linux-gnu %s -o %t.o +# RUN: ld.lld -static %t.o -o %t +# RUN: ld.lld -static %t.o -o %t.apply --apply-dynamic-relocs +# RUN: llvm-readelf --section-headers --relocations --symbols %t | FileCheck %s +# RUN: llvm-readelf -x .got.plt %t | FileCheck %s --check-prefix=NO-APPLY-RELOC +# RUN: llvm-readelf -x .got.plt %t.apply | FileCheck %s --check-prefix=APPLY-RELOC +# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t | FileCheck %s --check-prefix=DISASM + +# CHECK: Section Headers: +# CHECK-NEXT: [Nr] Name Type Address Off Size ES Flg Lk Inf Al +# CHECK-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 0 0 +# CHECK-NEXT: [ 1] .rela.dyn RELA 0000000001000158 000158 000030 18 AI 0 4 8 +# CHECK-NEXT: [ 2] .text PROGBITS 0000000001001188 000188 00001c 00 AX 0 0 4 +# CHECK-NEXT: [ 3] .iplt PROGBITS 00000000010011b0 0001b0 000040 00 AX 0 0 16 +# CHECK-NEXT: [ 4] .got.plt PROGBITS 00000000010021f0 0001f0 000010 00 WA 0 0 8 + +# CHECK: Relocation section '.rela.dyn' at offset 0x158 contains 2 entries: +# CHECK-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# CHECK-NEXT: 00000000010021f0 000000000000003d R_390_IRELATIVE 1001188 +# CHECK-NEXT: 00000000010021f8 000000000000003d R_390_IRELATIVE 100118a + +# CHECK: Symbol table '.symtab' contains 6 entries: +# CHECK-NEXT: Num: Value Size Type Bind Vis Ndx Name +# CHECK-NEXT: 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND +# CHECK-NEXT: 1: 0000000001000158 0 NOTYPE LOCAL HIDDEN 1 __rela_iplt_start +# CHECK-NEXT: 2: 0000000001000188 0 NOTYPE LOCAL HIDDEN 1 __rela_iplt_end +# CHECK-NEXT: 3: 0000000001001188 0 IFUNC GLOBAL DEFAULT 2 foo +# CHECK-NEXT: 4: 000000000100118a 0 IFUNC GLOBAL DEFAULT 2 bar +# CHECK-NEXT: 5: 000000000100118c 0 NOTYPE GLOBAL DEFAULT 2 _start + +# NO-APPLY-RELOC-LABEL: Hex dump of section '.got.plt': +# NO-APPLY-RELOC-NEXT: 0x010021f0 00000000 00000000 00000000 00000000 +# NO-APPLY-RELOC-EMPTY: + +# APPLY-RELOC-LABEL: Hex dump of section '.got.plt': +# APPLY-RELOC-NEXT: 0x010021f0 00000000 01001188 00000000 0100118a +# APPLY-RELOC-EMPTY: + +# DISASM: Disassembly of section .text: +# DISASM: 0000000001001188 : +# DISASM-NEXT: br %r14 +# DISASM: 000000000100118a : +# DISASM-NEXT: br %r14 +# DISASM: 000000000100118c <_start>: +# DISASM-NEXT: brasl %r14, 0x10011b0 +# DISASM-NEXT: brasl %r14, 0x10011d0 +# DISASM-NEXT: larl %r2, 0x1000158 +# DISASM-NEXT: larl %r2, 0x1000188 +# DISASM: Disassembly of section .iplt: +# DISASM: <.iplt>: +# DISASM: 10011b0: larl %r1, 0x10021f0 +# DISASM-NEXT: 10011b6: lg %r1, 0(%r1) +# DISASM-NEXT: 10011bc: br %r1 +# DISASM: 10011d0: larl %r1, 0x10021f8 +# DISASM-NEXT: 10011d6: lg %r1, 0(%r1) +# DISASM-NEXT: 10011dc: br %r1 + +.text +.type foo STT_GNU_IFUNC +.globl foo +foo: + br %r14 + +.type bar STT_GNU_IFUNC +.globl bar +bar: + br %r14 + +.globl _start +_start: + brasl %r14, foo@plt + brasl %r14, bar@plt + larl %r2, __rela_iplt_start + larl %r2, __rela_iplt_end diff --git a/lld/test/ELF/systemz-init-padding.s b/lld/test/ELF/systemz-init-padding.s new file mode 100644 index 0000000000000..c56b98d43f1b0 --- /dev/null +++ b/lld/test/ELF/systemz-init-padding.s @@ -0,0 +1,27 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %p/Inputs/systemz-init.s -o systemz-init.o +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o +# RUN: ld.lld -dynamic-linker /lib/ld64.so.1 %t.o systemz-init.o -o %t +# RUN: llvm-objdump -d --no-show-raw-insn -j .init %t | FileCheck %s + +# glibc < 2.39 used to align .init and .fini code at a 4-byte boundary. +# When that happens, the linker must not pad the code with invalid +# instructions, e.g. null bytes. + .section .init,"ax",@progbits + brasl %r14, startup + +# CHECK: <.init>: +# CHECK-NEXT: brasl %r14, +# CHECK-NEXT: bcr 0, %r7 +# CHECK-NEXT: lg %r4, 272(%r15) + + .text + .globl startup + .p2align 4 +startup: + br %r14 + + .globl main + .p2align 4 +main: + br %r14 diff --git a/lld/test/ELF/systemz-pie.s b/lld/test/ELF/systemz-pie.s new file mode 100644 index 0000000000000..bb971a82fb8ce --- /dev/null +++ b/lld/test/ELF/systemz-pie.s @@ -0,0 +1,38 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t1.o + +## Check -pie. +# RUN: ld.lld -pie %t1.o -o %t +# RUN: llvm-readelf --file-headers --program-headers --dynamic %t | FileCheck %s + +# CHECK: ELF Header: +# CHECK-NEXT: Magic: 7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00 +# CHECK-NEXT: Class: ELF64 +# CHECK-NEXT: Data: 2's complement, big endian +# CHECK-NEXT: Version: 1 (current) +# CHECK-NEXT: OS/ABI: UNIX - System V +# CHECK-NEXT: ABI Version: 0 +# CHECK-NEXT: Type: DYN (Shared object file) +# CHECK-NEXT: Machine: IBM S/390 +# CHECK-NEXT: Version: 0x1 + +# CHECK: Program Headers: +# CHECK-NEXT: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align +# CHECK-NEXT: PHDR 0x000040 0x0000000000000040 0x0000000000000040 0x000188 0x000188 R 0x8 +# CHECK-NEXT: LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x00020d 0x00020d R 0x1000 +# CHECK-NEXT: LOAD 0x000210 0x0000000000002210 0x0000000000002210 0x000090 0x000df0 RW 0x1000 +# CHECK-NEXT: DYNAMIC 0x000210 0x0000000000002210 0x0000000000002210 0x000090 0x000090 RW 0x8 +# CHECK-NEXT: GNU_RELRO 0x000210 0x0000000000002210 0x0000000000002210 0x000090 0x000df0 R 0x1 +# CHECK-NEXT: GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0x0 + +# CHECK: Dynamic section at offset 0x210 contains 9 entries: +# CHECK-NEXT: Tag Type Name/Value +# CHECK-NEXT: 0x000000006ffffffb (FLAGS_1) PIE + +## Check -nopie +# RUN: ld.lld -no-pie %t1.o -o %t2 +# RUN: llvm-readelf --file-headers %t2 | FileCheck %s --check-prefix=NOPIE +# NOPIE-NOT: Type: DYN + +.globl _start +_start: diff --git a/lld/test/ELF/systemz-plt.s b/lld/test/ELF/systemz-plt.s new file mode 100644 index 0000000000000..4669f01f58812 --- /dev/null +++ b/lld/test/ELF/systemz-plt.s @@ -0,0 +1,83 @@ +# REQUIRES: systemz +# RUN: echo '.globl bar, weak; .type bar,@function; .type weak,@function; bar: weak:' > %t1.s + +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %t1.s -o %t1.o +# RUN: ld.lld -shared %t1.o -soname=t1.so -o %t1.so +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o %t1.so -z separate-code -o %t +# RUN: llvm-readelf -S -s -r -x .got.plt %t | FileCheck %s +# RUN: llvm-objdump -d %t | FileCheck --check-prefixes=DIS %s + +# CHECK: Section Headers: +# CHECK: .plt PROGBITS 0000000001001020 001020 000060 00 AX 0 0 16 +# CHECK: .got PROGBITS 00000000010020d0 0020d0 000018 00 WA 0 0 8 +# CHECK: .got.plt PROGBITS 00000000010030e8 0020e8 000010 00 WA 0 0 8 + +# CHECK: Relocation section '.rela.plt' at offset {{.*}} contains 2 entries: +# CHECK: 00000000010030e8 000000010000000b R_390_JMP_SLOT 0000000000000000 bar + 0 +# CHECK: 00000000010030f0 000000020000000b R_390_JMP_SLOT 0000000000000000 weak + 0 + +## A canonical PLT has a non-zero st_value. bar and weak are called but their +## addresses are not taken, so a canonical PLT is not necessary. +# CHECK: Symbol table '.dynsym' contains 3 entries: +# CHECK-NEXT: Num: Value Size Type Bind Vis Ndx Name +# CHECK-NEXT: 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND +# CHECK-NEXT: 1: 0000000000000000 0 FUNC GLOBAL DEFAULT UND bar +# CHECK-NEXT: 2: 0000000000000000 0 FUNC WEAK DEFAULT UND weak + +## The .got.plt slots relocated by .rela.plt point to .plt +## This is required by glibc. +# CHECK: Hex dump of section '.got.plt': +# CHECK-NEXT: 0x010030e8 00000000 0100104e 00000000 0100106e + +# DIS: Disassembly of section .text: + +# DIS: 0000000001001000 <_start>: +# DIS-NEXT: brasl %r14, 0x1001012 +# DIS-NEXT: brasl %r14, 0x1001040 +# DIS-NEXT: brasl %r14, 0x1001060 + +# DIS: 0000000001001012 : +# DIS-NEXT: br %r14 + +# DIS: Disassembly of section .plt: + +# DIS: 0000000001001020 <.plt>: +# DIS-NEXT: 1001020: e3 10 f0 38 00 24 stg %r1, 56(%r15) +# DIS-NEXT: 1001026: c0 10 00 00 08 55 larl %r1, 0x10020d0 +# DIS-NEXT: 100102c: d2 07 f0 30 10 08 mvc 48(8,%r15), 8(%r1) +# DIS-NEXT: 1001032: e3 10 10 10 00 04 lg %r1, 16(%r1) +# DIS-NEXT: 1001038: 07 f1 br %r1 +# DIS-NEXT: 100103a: 07 00 bcr 0, %r0 +# DIS-NEXT: 100103c: 07 00 bcr 0, %r0 +# DIS-NEXT: 100103e: 07 00 bcr 0, %r0 +# DIS-NEXT: 1001040: c0 10 00 00 10 54 larl %r1, 0x10030e8 +# DIS-NEXT: 1001046: e3 10 10 00 00 04 lg %r1, 0(%r1) +# DIS-NEXT: 100104c: 07 f1 br %r1 +# DIS-NEXT: 100104e: 0d 10 basr %r1, 0 +# DIS-NEXT: 1001050: e3 10 10 0c 00 14 lgf %r1, 12(%r1) +# DIS-NEXT: 1001056: c0 f4 ff ff ff e5 jg 0x1001020 +# DIS-NEXT: 100105c: 00 00 +# DIS-NEXT: 100105e: 00 00 +# DIS-NEXT: 1001060: c0 10 00 00 10 48 larl %r1, 0x10030f0 +# DIS-NEXT: 1001066: e3 10 10 00 00 04 lg %r1, 0(%r1) +# DIS-NEXT: 100106c: 07 f1 br %r1 +# DIS-NEXT: 100106e: 0d 10 basr %r1, 0 +# DIS-NEXT: 1001070: e3 10 10 0c 00 14 lgf %r1, 12(%r1) +# DIS-NEXT: 1001076: c0 f4 ff ff ff d5 jg 0x1001020 +# DIS-NEXT: 100107c: 00 00 +# DIS-NEXT: 100107e: 00 18 + +.global _start, foo, bar +.weak weak + +_start: + ## Use @plt to avoid generating direct references that would force + ## allocation of a canonical PLT entry. + brasl %r14, foo@plt + brasl %r14, bar@plt + brasl %r14, weak@plt + +## foo is local and non-preemptable, no PLT is generated. +foo: + br %r14 diff --git a/lld/test/ELF/systemz-reloc-abs.s b/lld/test/ELF/systemz-reloc-abs.s new file mode 100644 index 0000000000000..b5ad94d90d3a9 --- /dev/null +++ b/lld/test/ELF/systemz-reloc-abs.s @@ -0,0 +1,32 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=s390x %S/Inputs/abs255.s -o %t255.o +# RUN: llvm-mc -filetype=obj -triple=s390x %S/Inputs/abs256.s -o %t256.o +# RUN: llvm-mc -filetype=obj -triple=s390x %S/Inputs/abs257.s -o %t257.o + +# RUN: ld.lld %t.o %t256.o -o %t +# RUN: llvm-readelf -x .data %t | FileCheck %s +# CHECK: 0x{{[0-9a-f]+}} ff80ffff 8000ffff ffff8000 0000ffff +# CHECK-NEXT: ffffffff ffff8000 00000000 0000 + +# RUN: not ld.lld %t.o %t255.o -o /dev/null 2>&1 | FileCheck --check-prefix=OVERFLOW1 %s +# OVERFLOW1: relocation R_390_8 out of range: -129 is not in [-128, 255] +# OVERFLOW1: relocation R_390_16 out of range: -32769 is not in [-32768, 65535] +# OVERFLOW1: relocation R_390_32 out of range: -2147483649 is not in [-2147483648, 4294967295] + +# RUN: not ld.lld %t.o %t257.o -o /dev/null 2>&1 | FileCheck --check-prefix=OVERFLOW2 %s +# OVERFLOW2: relocation R_390_8 out of range: 256 is not in [-128, 255] +# OVERFLOW2: relocation R_390_16 out of range: 65536 is not in [-32768, 65535] +# OVERFLOW2: relocation R_390_32 out of range: 4294967296 is not in [-2147483648, 4294967295] + +.globl _start +_start: +.data +.byte foo - 1 +.byte foo - 384 +.word foo + 0xfeff +.word foo - 0x8100 +.long foo + 0xfffffeff +.long foo - 0x80000100 +.quad foo + 0xfffffffffffffeff +.quad foo - 0x8000000000000100 diff --git a/lld/test/ELF/systemz-reloc-disp12.s b/lld/test/ELF/systemz-reloc-disp12.s new file mode 100644 index 0000000000000..3d32707d149fe --- /dev/null +++ b/lld/test/ELF/systemz-reloc-disp12.s @@ -0,0 +1,21 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym DISP=291 %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym DISP=4095 %s -o %t2.o +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym DISP=4096 %s -o %t3.o + +# RUN: ld.lld --section-start=.text=0x0 %t1.o -o %t1out +# RUN: ld.lld --section-start=.text=0x0 %t2.o -o %t2out +# RUN: not ld.lld --section-start=.text=0x0 %t3.o -o /dev/null 2>&1 | FileCheck %s --check-prefix RANGE + +# RANGE: relocation R_390_12 out of range: 4096 is not in [0, 4095] + +# RUN: llvm-readelf --hex-dump=.text %t1out | FileCheck %s -DINSN=58678123 --check-prefix DUMP +# RUN: llvm-readelf --hex-dump=.text %t2out | FileCheck %s -DINSN=58678fff --check-prefix DUMP + +# DUMP: 0x00000000 [[INSN]] + +.text +.globl _start +_start: + .reloc .+2, R_390_12, DISP + l %r6, 0(%r7,%r8) diff --git a/lld/test/ELF/systemz-reloc-disp20.s b/lld/test/ELF/systemz-reloc-disp20.s new file mode 100644 index 0000000000000..88cd657c6ae3c --- /dev/null +++ b/lld/test/ELF/systemz-reloc-disp20.s @@ -0,0 +1,21 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym DISP=74565 %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym DISP=524287 %s -o %t2.o +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym DISP=524288 %s -o %t3.o + +# RUN: ld.lld --section-start=.text=0x0 %t1.o -o %t1out +# RUN: ld.lld --section-start=.text=0x0 %t2.o -o %t2out +# RUN: not ld.lld --section-start=.text=0x0 %t3.o -o /dev/null 2>&1 | FileCheck %s --check-prefix RANGE + +# RANGE: relocation R_390_20 out of range: 524288 is not in [-524288, 524287] + +# RUN: llvm-readelf --hex-dump=.text %t1out | FileCheck %s -DINSN="e3678345 1204" --check-prefix DUMP +# RUN: llvm-readelf --hex-dump=.text %t2out | FileCheck %s -DINSN="e3678fff 7f04" --check-prefix DUMP + +# DUMP: 0x00000000 [[INSN]] + +.text +.globl _start +_start: + .reloc .+2, R_390_20, DISP + lg %r6, 0(%r7,%r8) diff --git a/lld/test/ELF/systemz-reloc-got.s b/lld/test/ELF/systemz-reloc-got.s new file mode 100644 index 0000000000000..4b9ac16481f4c --- /dev/null +++ b/lld/test/ELF/systemz-reloc-got.s @@ -0,0 +1,92 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o +# RUN: ld.lld -z norelro -shared %t.o -soname=t.so -o %t.so +## Note: Without norelro the distance between .got and .got.plt causes +## R_390_GOTPLT12 relocations to always overflow. + +# RUN: llvm-readelf -S -x .data %t.so | FileCheck %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck %s --check-prefix=DISASM + +# CHECK: Section Headers: +# CHECK: .got PROGBITS 0000000000002458 +# CHECK: .got.plt PROGBITS 0000000000002480 + +## Note: _GLOBAL_OFFSET_TABLE is at .got +## GOT (foo) is at .got + 24 == 0x2470 +## GOT (bar) is at .got + 32 == 0x2478 +## GOTPLT (foo) is at .got.plt + 0 == .got + 40 == 0x2480 +## GOTPLT (bar) is at .got.plt + 8 == .got + 48 == 0x2488 + +# DISASM: larl %r12, 0x2458 +# DISASM-NEXT: larl %r1, 0x2470 +# DISASM-NEXT: larl %r1, 0x2478 +# DISASM-NEXT: larl %r1, 0x2480 +# DISASM-NEXT: larl %r1, 0x2488 + +# DISASM-NEXT: l %r1, 24(%r12) +# DISASM-NEXT: l %r1, 32(%r12) +# DISASM-NEXT: l %r1, 40(%r12) +# DISASM-NEXT: l %r1, 48(%r12) +# DISASM-NEXT: lg %r1, 24(%r12) +# DISASM-NEXT: lg %r1, 32(%r12) +# DISASM-NEXT: lg %r1, 40(%r12) +# DISASM-NEXT: lg %r1, 48(%r12) + +# CHECK: Hex dump of section '.data': +# CHECK-NEXT: 00180020 00280030 00000018 00000020 +# CHECK-NEXT: 00000028 00000030 00000000 00000018 +# CHECK-NEXT: 00000000 00000020 00000000 00000028 +# CHECK-NEXT: 00000000 00000030 + +.text +larl %r12, _GLOBAL_OFFSET_TABLE_ +.reloc .+2, R_390_GOTENT, foo+2 +larl %r1, 0 +.reloc .+2, R_390_GOTENT, bar+2 +larl %r1, 0 +.reloc .+2, R_390_GOTPLTENT, foo+2 +larl %r1, 0 +.reloc .+2, R_390_GOTPLTENT, bar+2 +larl %r1, 0 +.reloc .+2, R_390_GOT12, foo +l %r1, 0(%r12) +.reloc .+2, R_390_GOT12, bar +l %r1, 0(%r12) +.reloc .+2, R_390_GOTPLT12, foo +l %r1, 0(%r12) +.reloc .+2, R_390_GOTPLT12, bar +l %r1, 0(%r12) +.reloc .+2, R_390_GOT20, foo +lg %r1, 0(%r12) +.reloc .+2, R_390_GOT20, bar +lg %r1, 0(%r12) +.reloc .+2, R_390_GOTPLT20, foo +lg %r1, 0(%r12) +.reloc .+2, R_390_GOTPLT20, bar +lg %r1, 0(%r12) + +.data +.reloc ., R_390_GOT16, foo +.space 2 +.reloc ., R_390_GOT16, bar +.space 2 +.reloc ., R_390_GOTPLT16, foo +.space 2 +.reloc ., R_390_GOTPLT16, bar +.space 2 +.reloc ., R_390_GOT32, foo +.space 4 +.reloc ., R_390_GOT32, bar +.space 4 +.reloc ., R_390_GOTPLT32, foo +.space 4 +.reloc ., R_390_GOTPLT32, bar +.space 4 +.reloc ., R_390_GOT64, foo +.space 8 +.reloc ., R_390_GOT64, bar +.space 8 +.reloc ., R_390_GOTPLT64, foo +.space 8 +.reloc ., R_390_GOTPLT64, bar +.space 8 diff --git a/lld/test/ELF/systemz-reloc-gotrel.s b/lld/test/ELF/systemz-reloc-gotrel.s new file mode 100644 index 0000000000000..46669ecfa7fd0 --- /dev/null +++ b/lld/test/ELF/systemz-reloc-gotrel.s @@ -0,0 +1,36 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o +# RUN: ld.lld -shared %t.o -soname=t.so -o %t.so + +# RUN: llvm-readelf -S -s -x .data %t.so | FileCheck %s + +# CHECK: Section Headers: +# CHECK: .plt PROGBITS 0000000000001290 +# CHECK: .got PROGBITS 0000000000002390 + +# CHECK: Symbol table '.symtab' +# CHECK: 0000000000001288 {{.*}} bar + +## Note: foo is the first (and only) PLT entry, which resides at .plt + 32 +## PLTOFF (foo) is (.plt + 32) - .got == 0x12b0 - 0x2390 == 0xffffef20 +## GOTOFF (bar) is bar - .got == 0x1288 - 0x2390 == 0xffffeef8 +# CHECK: Hex dump of section '.data': +# CHECK-NEXT: eef8ef20 ffffeef8 ffffef20 ffffffff +# CHECK-NEXT: ffffeef8 ffffffff ffffef20 + +bar: + br %r14 + +.data +.reloc ., R_390_GOTOFF16, bar +.space 2 +.reloc ., R_390_PLTOFF16, foo +.space 2 +.reloc ., R_390_GOTOFF, bar +.space 4 +.reloc ., R_390_PLTOFF32, foo +.space 4 +.reloc ., R_390_GOTOFF64, bar +.space 8 +.reloc ., R_390_PLTOFF64, foo +.space 8 diff --git a/lld/test/ELF/systemz-reloc-pc16.s b/lld/test/ELF/systemz-reloc-pc16.s new file mode 100644 index 0000000000000..e1dad5af239d4 --- /dev/null +++ b/lld/test/ELF/systemz-reloc-pc16.s @@ -0,0 +1,39 @@ +# REQUIRES: systemz +# RUN: rm -rf %t && split-file %s %t + +## Check recompile with -fPIC error message +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %t/shared.s -o %t/shared.o +# RUN: not ld.lld -shared %t/shared.o -o /dev/null 2>&1 | FileCheck %s + +# CHECK: error: relocation R_390_PC16 cannot be used against symbol '_shared'; recompile with -fPIC +# CHECK: >>> defined in {{.*}} +# CHECK: >>> referenced by {{.*}}:(.data+0x1) + +## Check patching of negative addends + +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym ADDEND=1 %t/addend.s -o %t/1.o +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym ADDEND=32768 %t/addend.s -o %t/2.o +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym ADDEND=32769 %t/addend.s -o %t/3.o + +# RUN: ld.lld --section-start=.text=0x0 %t/1.o -o %t/1out +# RUN: ld.lld --section-start=.text=0x0 %t/2.o -o %t/2out +# RUN: not ld.lld --section-start=.text=0x0 %t/3.o -o /dev/null 2>&1 | FileCheck %s -DFILE=%t/3.o --check-prefix RANGE + +# RANGE: error: [[FILE]]:(.text+0x0): relocation R_390_PC16 out of range + +# RUN: llvm-readelf --hex-dump=.text %t/1out | FileCheck %s -DADDEND=ffff --check-prefix DUMP +# RUN: llvm-readelf --hex-dump=.text %t/2out | FileCheck %s -DADDEND=8000 --check-prefix DUMP + +# DUMP: 0x00000000 [[ADDEND]] + +#--- shared.s +.data + .byte 0xe8 + .word _shared - . + +#--- addend.s +.text +.globl _start +_start: + .reloc ., R_390_PC16, .text-ADDEND + .space 2 diff --git a/lld/test/ELF/systemz-reloc-pc32.s b/lld/test/ELF/systemz-reloc-pc32.s new file mode 100644 index 0000000000000..0cb9322eb1c1b --- /dev/null +++ b/lld/test/ELF/systemz-reloc-pc32.s @@ -0,0 +1,39 @@ +# REQUIRES: systemz +# RUN: rm -rf %t && split-file %s %t + +## Check recompile with -fPIC error message +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %t/shared.s -o %t/shared.o +# RUN: not ld.lld -shared %t/shared.o -o /dev/null 2>&1 | FileCheck %s + +# CHECK: error: relocation R_390_PC32 cannot be used against symbol '_shared'; recompile with -fPIC +# CHECK: >>> defined in {{.*}} +# CHECK: >>> referenced by {{.*}}:(.data+0x1) + +## Check patching of negative addends + +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym ADDEND=1 %t/addend.s -o %t/1.o +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym ADDEND=2147483648 %t/addend.s -o %t/2.o +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym ADDEND=2147483649 %t/addend.s -o %t/3.o + +# RUN: ld.lld --section-start=.text=0x0 %t/1.o -o %t/1out +# RUN: ld.lld --section-start=.text=0x0 %t/2.o -o %t/2out +# RUN: not ld.lld --section-start=.text=0x0 %t/3.o -o /dev/null 2>&1 | FileCheck %s -DFILE=%t/3.o --check-prefix RANGE + +# RANGE: error: [[FILE]]:(.text+0x0): relocation R_390_PC32 out of range + +# RUN: llvm-readelf --hex-dump=.text %t/1out | FileCheck %s -DADDEND=ffffffff --check-prefix DUMP +# RUN: llvm-readelf --hex-dump=.text %t/2out | FileCheck %s -DADDEND=80000000 --check-prefix DUMP + +# DUMP: 0x00000000 [[ADDEND]] + +#--- shared.s +.data + .byte 0xe8 + .long _shared - . + +#--- addend.s +.text +.globl _start +_start: + .reloc ., R_390_PC32, .text-ADDEND + .space 4 diff --git a/lld/test/ELF/systemz-reloc-pcdbl.s b/lld/test/ELF/systemz-reloc-pcdbl.s new file mode 100644 index 0000000000000..faee756f5e95b --- /dev/null +++ b/lld/test/ELF/systemz-reloc-pcdbl.s @@ -0,0 +1,68 @@ +# REQUIRES: systemz + +# RUN: llvm-mc --filetype=obj --triple=s390x-unknown-linux -mcpu=z13 %s -o %t.o + +# RUN: ld.lld %t.o --defsym foo16=pc16dbl+4 --defsym bar16=pc16dbl --defsym foo32=pc32dbl+6 --defsym bar32=pc32dbl --defsym foo12=pc12dbl+6 --defsym bar12=pc12dbl --defsym foo24=pc24dbl+6 --defsym bar24=pc24dbl -o %t +# RUN: llvm-objdump --no-show-raw-insn --mcpu=z13 -d %t | FileCheck %s --check-prefix=CHECK +# CHECK: 0000000001001120 : +# CHECK: je 0x1001124 +# CHECK: jne 0x1001120 +# CHECK: 0000000001001128 : +# CHECK: jge 0x100112e +# CHECK: jgne 0x1001128 +# CHECK: 0000000001001134 : +# CHECK: bprp 5, 0x100113a, 0x1001134 +# CHECK: bprp 6, 0x1001134, 0x100113a +# CHECK: 0000000001001140 : +# CHECK: bprp 5, 0x1001140, 0x1001146 +# CHECK: bprp 6, 0x1001146, 0x1001140 + +# RUN: ld.lld %t.o --defsym foo16=pc16dbl+0xfffe --defsym bar16=pc16dbl+4-0x10000 --defsym foo32=pc32dbl+0xfffffffe --defsym bar32=pc32dbl+6-0x100000000 --defsym foo12=pc12dbl+0xffe --defsym bar12=pc12dbl+6-0x1000 --defsym foo24=pc24dbl+0xfffffe --defsym bar24=pc24dbl+6-0x1000000 -o %t.limits +# RUN: llvm-objdump --no-show-raw-insn --mcpu=z13 -d %t.limits | FileCheck %s --check-prefix=LIMITS +# LIMITS: je 0x101111e +# LIMITS-NEXT: jne 0xff1124 +# LIMITS: jge 0x101001126 +# LIMITS-NEXT: jgne 0xffffffff0100112e +# LIMITS: bprp 5, 0x1002132, 0x1001134 +# LIMITS-NEXT: bprp 6, 0x100013a, 0x100113a +# LIMITS: bprp 5, 0x1001140, 0x200113e +# LIMITS-NEXT: bprp 6, 0x1001146, 0x1146 + +# RUN: not ld.lld %t.o --defsym foo16=pc16dbl+0x10000 --defsym bar16=pc16dbl+4-0x10002 --defsym foo32=pc32dbl+0x100000000 --defsym bar32=pc32dbl+6-0x100000002 --defsym foo12=pc12dbl+0x1000 --defsym bar12=pc12dbl+6-0x1002 --defsym foo24=pc24dbl+0x1000000 --defsym bar24=pc24dbl+6-0x1000002 -o /dev/null 2>&1 | FileCheck -DFILE=%t.o --check-prefix=ERROR-RANGE %s +# ERROR-RANGE: error: [[FILE]]:(.text+0x2): relocation R_390_PC16DBL out of range: 65536 is not in [-65536, 65535]; references 'foo16' +# ERROR-RANGE: error: [[FILE]]:(.text+0x6): relocation R_390_PC16DBL out of range: -65538 is not in [-65536, 65535]; references 'bar16' +# ERROR-RANGE: error: [[FILE]]:(.text+0xa): relocation R_390_PC32DBL out of range: 4294967296 is not in [-4294967296, 4294967295]; references 'foo32' +# ERROR-RANGE: error: [[FILE]]:(.text+0x10): relocation R_390_PC32DBL out of range: -4294967298 is not in [-4294967296, 4294967295]; references 'bar32' +# ERROR-RANGE: error: [[FILE]]:(.text+0x15): relocation R_390_PC12DBL out of range: 4096 is not in [-4096, 4095]; references 'foo12' +# ERROR-RANGE: error: [[FILE]]:(.text+0x1b): relocation R_390_PC12DBL out of range: -4098 is not in [-4096, 4095]; references 'bar12' +# ERROR-RANGE: error: [[FILE]]:(.text+0x23): relocation R_390_PC24DBL out of range: 16777216 is not in [-16777216, 16777215]; references 'foo24' +# ERROR-RANGE: error: [[FILE]]:(.text+0x29): relocation R_390_PC24DBL out of range: -16777218 is not in [-16777216, 16777215]; references 'bar24' + +# RUN: not ld.lld %t.o --defsym foo16=pc16dbl+1 --defsym bar16=pc16dbl-1 --defsym foo32=pc32dbl+1 --defsym bar32=pc32dbl-1 --defsym foo12=pc12dbl+1 --defsym bar12=pc12dbl-1 --defsym foo24=pc24dbl+1 --defsym bar24=pc24dbl-1 -o /dev/null 2>&1 | FileCheck -DFILE=%t.o --check-prefix=ERROR-ALIGN %s +# ERROR-ALIGN: error: [[FILE]]:(.text+0x2): improper alignment for relocation R_390_PC16DBL: 0x1 is not aligned to 2 bytes +# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0x6): improper alignment for relocation R_390_PC16DBL: 0xFFFFFFFFFFFFFFFB is not aligned to 2 bytes +# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0xa): improper alignment for relocation R_390_PC32DBL: 0x1 is not aligned to 2 bytes +# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0x10): improper alignment for relocation R_390_PC32DBL: 0xFFFFFFFFFFFFFFF9 is not aligned to 2 bytes +# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0x15): improper alignment for relocation R_390_PC12DBL: 0x1 is not aligned to 2 bytes +# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0x1b): improper alignment for relocation R_390_PC12DBL: 0xFFFFFFFFFFFFFFF9 is not aligned to 2 bytes +# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0x23): improper alignment for relocation R_390_PC24DBL: 0x1 is not aligned to 2 bytes +# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0x29): improper alignment for relocation R_390_PC24DBL: 0xFFFFFFFFFFFFFFF9 is not aligned to 2 bytes + +.global _start +.global pc16dbl +.global pc32dbl +.global pc12dbl +.global pc24dbl +_start: +pc16dbl: + je foo16 + jne bar16 +pc32dbl: + jge foo32 + jgne bar32 +pc12dbl: + bprp 5,foo12,0 + bprp 6,bar12,0 +pc24dbl: + bprp 5,0,foo24 + bprp 6,0,bar24 diff --git a/lld/test/ELF/systemz-tls-gd.s b/lld/test/ELF/systemz-tls-gd.s new file mode 100644 index 0000000000000..3976f55a6ae39 --- /dev/null +++ b/lld/test/ELF/systemz-tls-gd.s @@ -0,0 +1,142 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o +# RUN: echo '.tbss; .globl b, c; b: .zero 4; c:' | llvm-mc -filetype=obj -triple=s390x-unknown-linux - -o %t1.o +# RUN: ld.lld -shared -soname=t1.so %t1.o -o %t1.so + +# RUN: ld.lld -shared %t.o %t1.o -o %t.so +# RUN: llvm-readelf -r %t.so | FileCheck --check-prefix=GD-REL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck --check-prefix=GD %s +# RUN: llvm-objdump --section .data.rel.ro --full-contents %t.so | FileCheck --check-prefix=GD-DATA %s + +# RUN: ld.lld %t.o %t1.o -o %t.le +# RUN: llvm-readelf -r %t.le | FileCheck --check-prefix=NOREL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.le | FileCheck --check-prefix=LE %s +# RUN: llvm-objdump --section .data.rel.ro --full-contents %t.le | FileCheck --check-prefix=LE-DATA %s + +# RUN: ld.lld %t.o %t1.so -o %t.ie +# RUN: llvm-readelf -r %t.ie | FileCheck --check-prefix=IE-REL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.ie | FileCheck --check-prefix=IE %s +# RUN: llvm-objdump --section .data.rel.ro --full-contents %t.ie | FileCheck --check-prefix=IE-DATA %s + +# GD-REL: Relocation section '.rela.dyn' at offset {{.*}} contains 6 entries: +# GD-REL: 0000000000002570 0000000200000036 R_390_TLS_DTPMOD 0000000000000008 a + 0 +# GD-REL-NEXT: 0000000000002578 0000000200000037 R_390_TLS_DTPOFF 0000000000000008 a + 0 +# GD-REL-NEXT: 0000000000002580 0000000300000036 R_390_TLS_DTPMOD 000000000000000c b + 0 +# GD-REL-NEXT: 0000000000002588 0000000300000037 R_390_TLS_DTPOFF 000000000000000c b + 0 +# GD-REL-NEXT: 0000000000002590 0000000400000036 R_390_TLS_DTPMOD 0000000000000010 c + 0 +# GD-REL-NEXT: 0000000000002598 0000000400000037 R_390_TLS_DTPOFF 0000000000000010 c + 0 + +## _GLOBAL_OFFSET_TABLE is at 0x2558 +# GD: larl %r12, 0x2558 + +## GOT offset of the TLS module ID / offset pair for a is at 0x2460 +# GD-NEXT: lgrl %r2, 0x2460 +# GD-NEXT: brasl %r14, 0x1440 +# GD-NEXT: lgf %r2, 0(%r2,%r7) + +## GOT offset of the TLS module ID / offset pair for b is at 0x2468 +# GD-NEXT: lgrl %r2, 0x2468 +# GD-NEXT: brasl %r14, 0x1440 +# GD-NEXT: lgf %r2, 0(%r2,%r7) + +## GOT offset of the TLS module ID / offset pair for c is at 0x2470 +# GD-NEXT: lgrl %r2, 0x2470 +# GD-NEXT: brasl %r14, 0x1440 +# GD-NEXT: lgf %r2, 0(%r2,%r7) + +## Constant pool holding GOT offsets of TLS module ID / offset pairs: +# a: 0x2570 / 0x18 +# b: 0x2580 / 0x28 +# c: 0x2590 / 0x38 +# GD-DATA: 2460 00000000 00000018 00000000 00000028 +# GD-DATA-NEXT: 2470 00000000 00000038 + +# NOREL: no relocations + +## _GLOBAL_OFFSET_TABLE is at 0x1002230 +# LE: larl %r12, 0x1002230 + +## TP offset of a is at 0x1002218 +# LE-NEXT: lgrl %r2, 0x1002218 +# LE-NEXT: brcl 0, +# LE-NEXT: lgf %r2, 0(%r2,%r7) + +## TP offset of b is at 0x1002220 +# LE-NEXT: lgrl %r2, 0x1002220 +# LE-NEXT: brcl 0, +# LE-NEXT: lgf %r2, 0(%r2,%r7) + +## TP offset of c is at 0x1002228 +# LE-NEXT: lgrl %r2, 0x1002228 +# LE-NEXT: brcl 0, +# LE-NEXT: lgf %r2, 0(%r2,%r7) + +## TP offsets +# a: -8 +# b: -4 +# c: 0 +# LE-DATA: 1002218 ffffffff fffffff8 ffffffff fffffffc +# LE-DATA-NEXT: 1002228 00000000 00000000 + + +# IE-REL: Relocation section '.rela.dyn' at offset {{.*}} contains 2 entries: +# IE-REL: 0000000001002430 0000000200000038 R_390_TLS_TPOFF 0000000000000000 b + 0 +# IE-REL-NEXT: 0000000001002438 0000000300000038 R_390_TLS_TPOFF 0000000000000000 c + 0 + +## _GLOBAL_OFFSET_TABLE is at 0x1002418 +# IE: larl %r12, 0x1002418 + +## TP offset of a is at 0x1002340 +# IE-NEXT: lgrl %r2, 0x1002340 +# IE-NEXT: brcl 0, +# IE-NEXT: lgf %r2, 0(%r2,%r7) + +## GOT offset of the TP offset for b is at 0x1002348 +# IE-NEXT: lgrl %r2, 0x1002348 +# IE-NEXT: lg %r2, 0(%r2,%r12) +# IE-NEXT: lgf %r2, 0(%r2,%r7) + +## GOT offset of the TP offset for c is at 0x1002350 +# IE-NEXT: lgrl %r2, 0x1002350 +# IE-NEXT: lg %r2, 0(%r2,%r12) +# IE-NEXT: lgf %r2, 0(%r2,%r7) + +## TP offsets (a) / GOT offset of TP offsets (b, c) +# a: -4 +# b: 0x1002430 / 0x18 +# c: 0x1002438 / 0x20 +# IE-DATA: 1002340 ffffffff fffffffc 00000000 00000018 +# IE-DATA-NEXT: 1002350 00000000 00000020 + + +ear %r7,%a0 +sllg %r7,%r1,32 +ear %r7,%a1 +larl %r12,_GLOBAL_OFFSET_TABLE_ + +lgrl %r2,.LC0 +brasl %r14,__tls_get_offset@PLT:tls_gdcall:a +lgf %r2,0(%r2,%r7) + +lgrl %r2,.LC1 +brasl %r14,__tls_get_offset@PLT:tls_gdcall:b +lgf %r2,0(%r2,%r7) + +lgrl %r2,.LC2 +brasl %r14,__tls_get_offset@PLT:tls_gdcall:c +lgf %r2,0(%r2,%r7) + + .section .data.rel.ro,"aw" + .align 8 +.LC0: + .quad a@TLSGD +.LC1: + .quad b@TLSGD +.LC2: + .quad c@TLSGD + + .section .tbss + .globl a + .zero 8 +a: + .zero 4 diff --git a/lld/test/ELF/systemz-tls-ie.s b/lld/test/ELF/systemz-tls-ie.s new file mode 100644 index 0000000000000..27b642ed2dfc5 --- /dev/null +++ b/lld/test/ELF/systemz-tls-ie.s @@ -0,0 +1,87 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o + +# RUN: ld.lld -shared %t.o -o %t.so +# RUN: llvm-readelf -r %t.so | FileCheck --check-prefix=IE-REL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck --check-prefix=IE %s +# RUN: llvm-objdump --section .data --full-contents %t.so | FileCheck --check-prefix=IE-DATA %s + +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readelf -r %t | FileCheck --check-prefix=NOREL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=LE %s +# RUN: llvm-objdump --section .data --full-contents %t | FileCheck --check-prefix=LE-DATA %s +# RUN: llvm-objdump --section .got --full-contents %t | FileCheck --check-prefix=LE-GOT %s + +# IE-REL: Relocation section '.rela.dyn' at offset {{.*}} contains 4 entries: +# IE-REL: 0000000000003478 000000000000000c R_390_RELATIVE 2460 +# IE-REL: 0000000000002460 0000000100000038 R_390_TLS_TPOFF 0000000000000008 a + 0 +# IE-REL: 0000000000002468 0000000200000038 R_390_TLS_TPOFF 000000000000000c b + 0 +# IE-REL: 0000000000002470 0000000300000038 R_390_TLS_TPOFF 0000000000000010 c + 0 + +## TP offset for a is at 0x2460 +# IE: lgrl %r1, 0x2460 +# IE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offset for b is at 0x2468 +# IE-NEXT: lgrl %r1, 0x2468 +# IE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offset for c is at 0x2470 +# IE-NEXT: lgrl %r1, 0x2470 +# IE-NEXT: lgf %r1, 0(%r1,%r7) + +## Data element: TP offset for a is at 0x2460 (relocated via R_390_RELATIVE above) +# IE-DATA: 3478 00000000 00000000 + +# NOREL: no relocations + +## TP offset for a is at 0x1002250 +# LE: lgrl %r1, 0x1002250 +# LE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offset for b is at 0x1002258 +# LE-NEXT: lgrl %r1, 0x1002258 +# LE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offset for c is at 0x1002260 +# LE-NEXT: lgrl %r1, 0x1002260 +# LE-NEXT: lgf %r1, 0(%r1,%r7) + +## Data element: TP offset for a is at 0x1002250 +# LE-DATA: 00000000 01002250 + +## TP offsets in GOT: +# a: -8 +# b: -4 +# c: 0 +# LE-GOT: 1002238 00000000 00000000 00000000 00000000 +# LE-GOT: 1002248 00000000 00000000 ffffffff fffffff8 +# LE-GOT: 1002258 ffffffff fffffffc 00000000 00000000 + +ear %r7,%a0 +sllg %r7,%r1,32 +ear %r7,%a1 + +lgrl %r1, a@indntpoff +lgf %r1,0(%r1,%r7) + +lgrl %r1, b@indntpoff +lgf %r1,0(%r1,%r7) + +lgrl %r1, c@indntpoff +lgf %r1,0(%r1,%r7) + + .data + .reloc .,R_390_TLS_IE64,a + .space 8 + + .section .tbss + .globl a + .globl b + .globl c + .zero 8 +a: + .zero 4 +b: + .zero 4 +c: diff --git a/lld/test/ELF/systemz-tls-ld.s b/lld/test/ELF/systemz-tls-ld.s new file mode 100644 index 0000000000000..2cb36d7294f2b --- /dev/null +++ b/lld/test/ELF/systemz-tls-ld.s @@ -0,0 +1,114 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o + +# RUN: ld.lld -shared %t.o -o %t.so +# RUN: llvm-readelf -r %t.so | FileCheck --check-prefix=LD-REL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck --check-prefix=LD %s +# RUN: llvm-objdump --section .data.rel.ro --full-contents %t.so | FileCheck --check-prefix=LD-DATA %s + +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readelf -r %t | FileCheck --check-prefix=NOREL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=LE %s +# RUN: llvm-objdump --section .data.rel.ro --full-contents %t | FileCheck --check-prefix=LE-DATA %s + +# LD-REL: Relocation section '.rela.dyn' at offset {{.*}} contains 1 entries: +# LD-REL: 00000000000024f8 0000000000000036 R_390_TLS_DTPMOD 0 + +## _GLOBAL_OFFSET_TABLE is at 0x24e0 +# LD: larl %r12, 0x24e0 + +## GOT offset of the LDM TLS module ID is at 0x23e0 +# LD-NEXT: lgrl %r2, 0x23e0 +# LD-NEXT: brasl %r14, 0x13c0 +# LD-NEXT: la %r2, 0(%r2,%r7) + +## DTP offset for a is at 0x23e8 +# LD-NEXT: lgrl %r1, 0x23e8 +# LD-NEXT: lgf %r1, 0(%r1,%r2) + +## DTP offset for b is at 0x23f0 +# LD-NEXT: lgrl %r1, 0x23f0 +# LD-NEXT: lgf %r1, 0(%r1,%r2) + +## DTP offset for c is at 0x23f8 +# LD-NEXT: lgrl %r1, 0x23f8 +# LD-NEXT: lgf %r1, 0(%r1,%r2) + +## Constant pool holding GOT offsets of TLS module ID and DTP offsets: +# TLS module ID: 0x24f8 / 0x18 +# a: 8 +# b: 12 +# c: 16 +# LD-DATA: 23e0 00000000 00000018 00000000 00000008 +# LD-DATA: 23f0 00000000 0000000c 00000000 00000010 + +# NOREL: no relocations + +## _GLOBAL_OFFSET_TABLE is at 0x1002230 +# LE: larl %r12, 0x1002230 + +## GOT offset of the LDM TLS module ID is at 0x1002210 +# LE-NEXT: lgrl %r2, 0x1002210 +# LE-NEXT: brcl 0, +# LE-NEXT: la %r2, 0(%r2,%r7) + +## TP offset for a is at 0x1002218 +# LE-NEXT: lgrl %r1, 0x1002218 +# LE-NEXT: lgf %r1, 0(%r1,%r2) + +## TP offset for b is at 0x1002220 +# LE-NEXT: lgrl %r1, 0x1002220 +# LE-NEXT: lgf %r1, 0(%r1,%r2) + +## TP offset for c is at 0x1002228 +# LE-NEXT: lgrl %r1, 0x1002228 +# LE-NEXT: lgf %r1, 0(%r1,%r2) + +## zeroed LDM / TP offsets: +# LDM TLS: 0 +# a: -8 +# b: -4 +# c: 0 +# LE-DATA: 1002210 00000000 00000000 ffffffff fffffff8 +# LE-DATA: 1002220 ffffffff fffffffc 00000000 00000000 + + +ear %r7,%a0 +sllg %r7,%r1,32 +ear %r7,%a1 +larl %r12,_GLOBAL_OFFSET_TABLE_ + +lgrl %r2,.LC0 +brasl %r14,__tls_get_offset@PLT:tls_ldcall:a +la %r2,0(%r2,%r7) + +lgrl %r1, .LC1 +lgf %r1,0(%r1,%r2) + +lgrl %r1, .LC2 +lgf %r1,0(%r1,%r2) + +lgrl %r1, .LC3 +lgf %r1,0(%r1,%r2) + + .section .data.rel.ro,"aw" + .align 8 +.LC0: + .quad a@TLSLDM +.LC1: + .quad a@DTPOFF +.LC2: + .quad b@DTPOFF +.LC3: + .quad c@DTPOFF + + .section .tbss + .globl a + .globl b + .globl c + .zero 8 +a: + .zero 4 +b: + .zero 4 +c: diff --git a/lld/test/ELF/systemz-tls-le.s b/lld/test/ELF/systemz-tls-le.s new file mode 100644 index 0000000000000..9e41fc768da39 --- /dev/null +++ b/lld/test/ELF/systemz-tls-le.s @@ -0,0 +1,61 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o + +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readelf -r %t | FileCheck --check-prefix=NOREL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=LE %s +# RUN: llvm-objdump --section .data.rel.ro --full-contents %t | FileCheck --check-prefix=LE-DATA %s + +# NOREL: no relocations + +## TP offset for a is at 0x1002200 +# LE: lgrl %r1, 0x1002200 +# LE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offset for b is at 0x1002208 +# LE-NEXT: lgrl %r1, 0x1002208 +# LE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offset for c is at 0x1002210 +# LE-NEXT: lgrl %r1, 0x1002210 +# LE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offsets: +# a: -8 +# b: -4 +# c: 0 +# LE-DATA: 1002200 ffffffff fffffff8 ffffffff fffffffc +# LE-DATA: 1002210 00000000 00000000 + +ear %r7,%a0 +sllg %r7,%r1,32 +ear %r7,%a1 + +lgrl %r1, .LC0 +lgf %r1,0(%r1,%r7) + +lgrl %r1, .LC1 +lgf %r1,0(%r1,%r7) + +lgrl %r1, .LC2 +lgf %r1,0(%r1,%r7) + + .section .data.rel.ro,"aw" + .align 8 +.LC0: + .quad a@ntpoff +.LC1: + .quad b@ntpoff +.LC2: + .quad c@ntpoff + + .section .tbss + .globl a + .globl b + .globl c + .zero 8 +a: + .zero 4 +b: + .zero 4 +c: diff --git a/lld/test/lit.cfg.py b/lld/test/lit.cfg.py index b3e07f1f823cc..d309c2ad4ee28 100644 --- a/lld/test/lit.cfg.py +++ b/lld/test/lit.cfg.py @@ -83,6 +83,7 @@ "PowerPC": "ppc", "RISCV": "riscv", "Sparc": "sparc", + "SystemZ": "systemz", "WebAssembly": "wasm", "X86": "x86", }, From ddc2a5ff4e149d07fcda735c1d860be95006fe2a Mon Sep 17 00:00:00 2001 From: Zixu Wang <9819235+zixu-w@users.noreply.github.com> Date: Fri, 16 Feb 2024 05:36:18 -0800 Subject: [PATCH 22/54] [18.x][Docs] Add release note about Clang-defined target OS macros (#80044) The change is included in the 18.x release. Move the release note to the release branch and reformat. (cherry picked from commit b40d5b1b08564d23d5e0769892ebbc32447b2987) --- clang/docs/ReleaseNotes.rst | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 9edbfbfbbac02..93a67e7a89559 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -171,6 +171,22 @@ AST Dumping Potentially Breaking Changes "qualType": "foo" } +Clang Frontend Potentially Breaking Changes +------------------------------------------- +- Target OS macros extension + A new Clang extension (see :ref:`here `) is enabled for + Darwin (Apple platform) targets. Clang now defines ``TARGET_OS_*`` macros for + these targets, which could break existing code bases with improper checks for + the ``TARGET_OS_`` macros. For example, existing checks might fail to include + the ``TargetConditionals.h`` header from Apple SDKs and therefore leaving the + macros undefined and guarded code unexercised. + + Affected code should be checked to see if it's still intended for the specific + target and fixed accordingly. + + The extension can be turned off by the option ``-fno-define-target-os-macros`` + as a workaround. + What's New in Clang |release|? ============================== Some of the major new features and improvements to Clang are listed @@ -351,6 +367,15 @@ New Compiler Flags * Full register names can be used when printing assembly via ``-mregnames``. This option now matches the one used by GCC. +.. _target_os_detail: + +* ``-fdefine-target-os-macros`` and its complement + ``-fno-define-target-os-macros``. Enables or disables the Clang extension to + provide built-in definitions of a list of ``TARGET_OS_*`` macros based on the + target triple. + + The extension is enabled by default for Darwin (Apple platform) targets. + Deprecated Compiler Flags ------------------------- From 60a8ec3a35c722a9eb8298c215321b89d0faf5b5 Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Tue, 6 Feb 2024 09:09:13 +0800 Subject: [PATCH 23/54] [lld][ELF] Support relax R_LARCH_ALIGN (#78692) Refer to commit 6611d58f5bbc ("Relax R_RISCV_ALIGN"), we can relax R_LARCH_ALIGN by same way. Reuse `SymbolAnchor`, `RISCVRelaxAux` and `initSymbolAnchors` to simplify codes. As `riscvFinalizeRelax` is an arch-specific function, put it override on `TargetInfo::finalizeRelax`, so that LoongArch can override it, too. The flow of relax R_LARCH_ALIGN is almost consistent with RISCV. The difference is that LoongArch only has 4-bytes NOP and all executable insn is 4-bytes aligned. So LoongArch not need rewrite NOP sequence. Alignment maxBytesEmit parameter is supported in psABI v2.30. (cherry picked from commit 06a728f3feab876f9195738b5774e82dadc0f3a7) --- lld/ELF/Arch/LoongArch.cpp | 156 ++++++++++++++++++++- lld/ELF/Arch/RISCV.cpp | 29 +--- lld/ELF/InputSection.cpp | 7 +- lld/ELF/InputSection.h | 24 +++- lld/ELF/Target.h | 3 + lld/ELF/Writer.cpp | 4 +- lld/test/ELF/loongarch-relax-align.s | 126 +++++++++++++++++ lld/test/ELF/loongarch-relax-emit-relocs.s | 49 +++++++ 8 files changed, 363 insertions(+), 35 deletions(-) create mode 100644 lld/test/ELF/loongarch-relax-align.s create mode 100644 lld/test/ELF/loongarch-relax-emit-relocs.s diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index ab2ec5b447d00..05fd38fb753fd 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -36,6 +36,8 @@ class LoongArch final : public TargetInfo { bool usesOnlyLowPageBits(RelType type) const override; void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; + bool relaxOnce(int pass) const override; + void finalizeRelax(int passes) const override; }; } // end anonymous namespace @@ -465,8 +467,9 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, case R_LARCH_TLS_GD_HI20: return R_TLSGD_GOT; case R_LARCH_RELAX: - // LoongArch linker relaxation is not implemented yet. - return R_NONE; + return config->relax ? R_RELAX_HINT : R_NONE; + case R_LARCH_ALIGN: + return R_RELAX_HINT; // Other known relocs that are explicitly unimplemented: // @@ -659,6 +662,155 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, } } +static bool relax(InputSection &sec) { + const uint64_t secAddr = sec.getVA(); + const MutableArrayRef relocs = sec.relocs(); + auto &aux = *sec.relaxAux; + bool changed = false; + ArrayRef sa = ArrayRef(aux.anchors); + uint64_t delta = 0; + + std::fill_n(aux.relocTypes.get(), relocs.size(), R_LARCH_NONE); + aux.writes.clear(); + for (auto [i, r] : llvm::enumerate(relocs)) { + const uint64_t loc = secAddr + r.offset - delta; + uint32_t &cur = aux.relocDeltas[i], remove = 0; + switch (r.type) { + case R_LARCH_ALIGN: { + const uint64_t addend = + r.sym->isUndefined() ? Log2_64(r.addend) + 1 : r.addend; + const uint64_t allBytes = (1 << (addend & 0xff)) - 4; + const uint64_t align = 1 << (addend & 0xff); + const uint64_t maxBytes = addend >> 8; + const uint64_t off = loc & (align - 1); + const uint64_t curBytes = off == 0 ? 0 : align - off; + // All bytes beyond the alignment boundary should be removed. + // If emit bytes more than max bytes to emit, remove all. + if (maxBytes != 0 && curBytes > maxBytes) + remove = allBytes; + else + remove = allBytes - curBytes; + // If we can't satisfy this alignment, we've found a bad input. + if (LLVM_UNLIKELY(static_cast(remove) < 0)) { + errorOrWarn(getErrorLocation((const uint8_t *)loc) + + "insufficient padding bytes for " + lld::toString(r.type) + + ": " + Twine(allBytes) + " bytes available for " + + "requested alignment of " + Twine(align) + " bytes"); + remove = 0; + } + break; + } + } + + // For all anchors whose offsets are <= r.offset, they are preceded by + // the previous relocation whose `relocDeltas` value equals `delta`. + // Decrease their st_value and update their st_size. + for (; sa.size() && sa[0].offset <= r.offset; sa = sa.slice(1)) { + if (sa[0].end) + sa[0].d->size = sa[0].offset - delta - sa[0].d->value; + else + sa[0].d->value = sa[0].offset - delta; + } + delta += remove; + if (delta != cur) { + cur = delta; + changed = true; + } + } + + for (const SymbolAnchor &a : sa) { + if (a.end) + a.d->size = a.offset - delta - a.d->value; + else + a.d->value = a.offset - delta; + } + // Inform assignAddresses that the size has changed. + if (!isUInt<32>(delta)) + fatal("section size decrease is too large: " + Twine(delta)); + sec.bytesDropped = delta; + return changed; +} + +// When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in +// the absence of a linker script. For call and load/store R_LARCH_RELAX, code +// shrinkage may reduce displacement and make more relocations eligible for +// relaxation. Code shrinkage may increase displacement to a call/load/store +// target at a higher fixed address, invalidating an earlier relaxation. Any +// change in section sizes can have cascading effect and require another +// relaxation pass. +bool LoongArch::relaxOnce(int pass) const { + if (config->relocatable) + return false; + + if (pass == 0) + initSymbolAnchors(); + + SmallVector storage; + bool changed = false; + for (OutputSection *osec : outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (InputSection *sec : getInputSections(*osec, storage)) + changed |= relax(*sec); + } + return changed; +} + +void LoongArch::finalizeRelax(int passes) const { + log("relaxation passes: " + Twine(passes)); + SmallVector storage; + for (OutputSection *osec : outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (InputSection *sec : getInputSections(*osec, storage)) { + RelaxAux &aux = *sec->relaxAux; + if (!aux.relocDeltas) + continue; + + MutableArrayRef rels = sec->relocs(); + ArrayRef old = sec->content(); + size_t newSize = old.size() - aux.relocDeltas[rels.size() - 1]; + uint8_t *p = context().bAlloc.Allocate(newSize); + uint64_t offset = 0; + int64_t delta = 0; + sec->content_ = p; + sec->size = newSize; + sec->bytesDropped = 0; + + // Update section content: remove NOPs for R_LARCH_ALIGN and rewrite + // instructions for relaxed relocations. + for (size_t i = 0, e = rels.size(); i != e; ++i) { + uint32_t remove = aux.relocDeltas[i] - delta; + delta = aux.relocDeltas[i]; + if (remove == 0 && aux.relocTypes[i] == R_LARCH_NONE) + continue; + + // Copy from last location to the current relocated location. + const Relocation &r = rels[i]; + uint64_t size = r.offset - offset; + memcpy(p, old.data() + offset, size); + p += size; + offset = r.offset + remove; + } + memcpy(p, old.data() + offset, old.size() - offset); + + // Subtract the previous relocDeltas value from the relocation offset. + // For a pair of R_LARCH_XXX/R_LARCH_RELAX with the same offset, decrease + // their r_offset by the same delta. + delta = 0; + for (size_t i = 0, e = rels.size(); i != e;) { + uint64_t cur = rels[i].offset; + do { + rels[i].offset -= delta; + if (aux.relocTypes[i] != R_LARCH_NONE) + rels[i].type = aux.relocTypes[i]; + } while (++i != e && rels[i].offset == cur); + delta = aux.relocDeltas[i - 1]; + } + } + } +} + TargetInfo *elf::getLoongArchTargetInfo() { static LoongArch target; return ⌖ diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 8ce92b4badfbd..5fcab4d39d43a 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -45,6 +45,7 @@ class RISCV final : public TargetInfo { uint64_t val) const override; void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; bool relaxOnce(int pass) const override; + void finalizeRelax(int passes) const override; }; } // end anonymous namespace @@ -104,26 +105,6 @@ static uint32_t setLO12_S(uint32_t insn, uint32_t imm) { (extractBits(imm, 4, 0) << 7); } -namespace { -struct SymbolAnchor { - uint64_t offset; - Defined *d; - bool end; // true for the anchor of st_value+st_size -}; -} // namespace - -struct elf::RISCVRelaxAux { - // This records symbol start and end offsets which will be adjusted according - // to the nearest relocDeltas element. - SmallVector anchors; - // For relocations[i], the actual offset is - // r_offset - (i ? relocDeltas[i-1] : 0). - std::unique_ptr relocDeltas; - // For relocations[i], the actual type is relocTypes[i]. - std::unique_ptr relocTypes; - SmallVector writes; -}; - RISCV::RISCV() { copyRel = R_RISCV_COPY; pltRel = R_RISCV_JUMP_SLOT; @@ -695,13 +676,13 @@ void RISCV::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { } } -static void initSymbolAnchors() { +void elf::initSymbolAnchors() { SmallVector storage; for (OutputSection *osec : outputSections) { if (!(osec->flags & SHF_EXECINSTR)) continue; for (InputSection *sec : getInputSections(*osec, storage)) { - sec->relaxAux = make(); + sec->relaxAux = make(); if (sec->relocs().size()) { sec->relaxAux->relocDeltas = std::make_unique(sec->relocs().size()); @@ -948,7 +929,7 @@ bool RISCV::relaxOnce(int pass) const { return changed; } -void elf::riscvFinalizeRelax(int passes) { +void RISCV::finalizeRelax(int passes) const { llvm::TimeTraceScope timeScope("Finalize RISC-V relaxation"); log("relaxation passes: " + Twine(passes)); SmallVector storage; @@ -956,7 +937,7 @@ void elf::riscvFinalizeRelax(int passes) { if (!(osec->flags & SHF_EXECINSTR)) continue; for (InputSection *sec : getInputSections(*osec, storage)) { - RISCVRelaxAux &aux = *sec->relaxAux; + RelaxAux &aux = *sec->relaxAux; if (!aux.relocDeltas) continue; diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 71870539d531c..e033a715b5921 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -354,9 +354,10 @@ InputSectionBase *InputSection::getRelocatedSection() const { template void InputSection::copyRelocations(uint8_t *buf) { - if (config->relax && !config->relocatable && config->emachine == EM_RISCV) { - // On RISC-V, relaxation might change relocations: copy from internal ones - // that are updated by relaxation. + if (config->relax && !config->relocatable && + (config->emachine == EM_RISCV || config->emachine == EM_LOONGARCH)) { + // On LoongArch and RISC-V, relaxation might change relocations: copy + // from internal ones that are updated by relaxation. InputSectionBase *sec = getRelocatedSection(); copyRelocations(buf, llvm::make_range(sec->relocations.begin(), sec->relocations.end())); diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index dda4242d8be1c..243b28d90bb4c 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -102,7 +102,23 @@ class SectionBase { link(link), info(info) {} }; -struct RISCVRelaxAux; +struct SymbolAnchor { + uint64_t offset; + Defined *d; + bool end; // true for the anchor of st_value+st_size +}; + +struct RelaxAux { + // This records symbol start and end offsets which will be adjusted according + // to the nearest relocDeltas element. + SmallVector anchors; + // For relocations[i], the actual offset is + // r_offset - (i ? relocDeltas[i-1] : 0). + std::unique_ptr relocDeltas; + // For relocations[i], the actual type is relocTypes[i]. + std::unique_ptr relocTypes; + SmallVector writes; +}; // This corresponds to a section of an input file. class InputSectionBase : public SectionBase { @@ -226,9 +242,9 @@ class InputSectionBase : public SectionBase { // basic blocks. JumpInstrMod *jumpInstrMod = nullptr; - // Auxiliary information for RISC-V linker relaxation. RISC-V does not use - // jumpInstrMod. - RISCVRelaxAux *relaxAux; + // Auxiliary information for RISC-V and LoongArch linker relaxation. + // They do not use jumpInstrMod. + RelaxAux *relaxAux; // The compressed content size when `compressed` is true. size_t compressedSize; diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index 3c06789cdbd36..0cefa31813566 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -95,6 +95,8 @@ class TargetInfo { // Do a linker relaxation pass and return true if we changed something. virtual bool relaxOnce(int pass) const { return false; } + // Do finalize relaxation after collecting relaxation infos. + virtual void finalizeRelax(int passes) const {} virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type, JumpModType val) const {} @@ -237,6 +239,7 @@ void addArmSyntheticSectionMappingSymbol(Defined *); void sortArmMappingSymbols(); void convertArmInstructionstoBE8(InputSection *sec, uint8_t *buf); void createTaggedSymbols(const SmallVector &files); +void initSymbolAnchors(); LLVM_LIBRARY_VISIBILITY extern const TargetInfo *target; TargetInfo *getTarget(); diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 501c10f358497..6df43a34be013 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1752,8 +1752,8 @@ template void Writer::finalizeAddressDependentContent() { } } } - if (!config->relocatable && config->emachine == EM_RISCV) - riscvFinalizeRelax(pass); + if (!config->relocatable) + target->finalizeRelax(pass); if (config->relocatable) for (OutputSection *sec : outputSections) diff --git a/lld/test/ELF/loongarch-relax-align.s b/lld/test/ELF/loongarch-relax-align.s new file mode 100644 index 0000000000000..ab61e15d5caca --- /dev/null +++ b/lld/test/ELF/loongarch-relax-align.s @@ -0,0 +1,126 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax %s -o %t.32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.64.o +# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.32.o -o %t.32 +# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.64.o -o %t.64 +# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.32.o --no-relax -o %t.32n +# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.64.o --no-relax -o %t.64n +# RUN: llvm-objdump -td --no-show-raw-insn %t.32 | FileCheck %s +# RUN: llvm-objdump -td --no-show-raw-insn %t.64 | FileCheck %s +# RUN: llvm-objdump -td --no-show-raw-insn %t.32n | FileCheck %s +# RUN: llvm-objdump -td --no-show-raw-insn %t.64n | FileCheck %s + +## Test the R_LARCH_ALIGN without symbol index. +# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.o64.o --defsym=old=1 +# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.o64.o -o %t.o64 +# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.o64.o --no-relax -o %t.o64n +# RUN: llvm-objdump -td --no-show-raw-insn %t.o64 | FileCheck %s +# RUN: llvm-objdump -td --no-show-raw-insn %t.o64n | FileCheck %s + +## -r keeps section contents unchanged. +# RUN: ld.lld -r %t.64.o -o %t.64.r +# RUN: llvm-objdump -dr --no-show-raw-insn %t.64.r | FileCheck %s --check-prefix=CHECKR + +# CHECK-DAG: {{0*}}10000 l .text {{0*}}44 .Ltext_start +# CHECK-DAG: {{0*}}10038 l .text {{0*}}0c .L1 +# CHECK-DAG: {{0*}}10040 l .text {{0*}}04 .L2 +# CHECK-DAG: {{0*}}20000 l .text2 {{0*}}14 .Ltext2_start + +# CHECK: <.Ltext_start>: +# CHECK-NEXT: break 1 +# CHECK-NEXT: break 2 +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: break 3 +# CHECK-NEXT: break 4 +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: pcalau12i $a0, 0 +# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 0 +# CHECK-NEXT: pcalau12i $a0, 0 +# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 56 +# CHECK-NEXT: pcalau12i $a0, 0 +# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 64 +# CHECK-EMPTY: +# CHECK-NEXT: <.L1>: +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-EMPTY: +# CHECK-NEXT: <.L2>: +# CHECK-NEXT: break 5 + +# CHECK: <.Ltext2_start>: +# CHECK-NEXT: pcalau12i $a0, 0 +# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 0 +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: break 6 + +# CHECKR: <.Ltext2_start>: +# CHECKR-NEXT: pcalau12i $a0, 0 +# CHECKR-NEXT: {{0*}}00: R_LARCH_PCALA_HI20 .Ltext2_start +# CHECKR-NEXT: {{0*}}00: R_LARCH_RELAX *ABS* +# CHECKR-NEXT: addi.d $a0, $a0, 0 +# CHECKR-NEXT: {{0*}}04: R_LARCH_PCALA_LO12 .Ltext2_start +# CHECKR-NEXT: {{0*}}04: R_LARCH_RELAX *ABS* +# CHECKR-NEXT: nop +# CHECKR-NEXT: {{0*}}08: R_LARCH_ALIGN .Lalign_symbol+0x4 +# CHECKR-NEXT: nop +# CHECKR-NEXT: nop +# CHECKR-NEXT: break 6 + +.macro .fake_p2align_4 max=0 + .ifdef old + .if \max==0 + .reloc ., R_LARCH_ALIGN, 0xc + nop; nop; nop + .endif + .else + .reloc ., R_LARCH_ALIGN, .Lalign_symbol + 0x4 + (\max << 8) + nop; nop; nop + .endif +.endm + + .text +.Lalign_symbol: +.Ltext_start: + break 1 + break 2 +## +0x8: Emit 2 nops, delete 1 nop. + .fake_p2align_4 + + break 3 +## +0x14: Emit 3 nops > 8 bytes, not emit. + .fake_p2align_4 8 + + break 4 + .fake_p2align_4 8 +## +0x18: Emit 2 nops <= 8 bytes. + +## Compensate +.ifdef old + nop; nop +.endif + +## +0x20: Test symbol value and symbol size can be handled. + la.pcrel $a0, .Ltext_start + la.pcrel $a0, .L1 + la.pcrel $a0, .L2 + +## +0x38: Emit 2 nops, delete 1 nop. +.L1: + .fake_p2align_4 +.L2: + break 5 + .size .L1, . - .L1 + .size .L2, . - .L2 + .size .Ltext_start, . - .Ltext_start + +## Test another text section. + .section .text2,"ax",@progbits +.Ltext2_start: + la.pcrel $a0, .Ltext2_start + .fake_p2align_4 + break 6 + .size .Ltext2_start, . - .Ltext2_start diff --git a/lld/test/ELF/loongarch-relax-emit-relocs.s b/lld/test/ELF/loongarch-relax-emit-relocs.s new file mode 100644 index 0000000000000..581fce8c95caa --- /dev/null +++ b/lld/test/ELF/loongarch-relax-emit-relocs.s @@ -0,0 +1,49 @@ +# REQUIRES: loongarch +## Test that we can handle --emit-relocs while relaxing. + +# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax %s -o %t.32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.64.o +# RUN: ld.lld -Ttext=0x10000 --emit-relocs %t.32.o -o %t.32 +# RUN: ld.lld -Ttext=0x10000 --emit-relocs %t.64.o -o %t.64 +# RUN: llvm-objdump -dr %t.32 | FileCheck %s +# RUN: llvm-objdump -dr %t.64 | FileCheck %s + +## -r should keep original relocations. +# RUN: ld.lld -r %t.64.o -o %t.64.r +# RUN: llvm-objdump -dr %t.64.r | FileCheck %s --check-prefix=CHECKR + +## --no-relax should keep original relocations. +## TODO Due to R_LARCH_RELAX is not relaxed, it plays same as --relax now. +# RUN: ld.lld -Ttext=0x10000 --emit-relocs --no-relax %t.64.o -o %t.64.norelax +# RUN: llvm-objdump -dr %t.64.norelax | FileCheck %s + +# CHECK: 00010000 <_start>: +# CHECK-NEXT: pcalau12i $a0, 0 +# CHECK-NEXT: R_LARCH_PCALA_HI20 _start +# CHECK-NEXT: R_LARCH_RELAX *ABS* +# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 0 +# CHECK-NEXT: R_LARCH_PCALA_LO12 _start +# CHECK-NEXT: R_LARCH_RELAX *ABS* +# CHECK-NEXT: nop +# CHECK-NEXT: R_LARCH_ALIGN .Lla-relax-align0+0x4 +# CHECK-NEXT: nop +# CHECK-NEXT: ret + +# CHECKR: <_start>: +# CHECKR-NEXT: pcalau12i $a0, 0 +# CHECKR-NEXT: R_LARCH_PCALA_HI20 _start +# CHECKR-NEXT: R_LARCH_RELAX *ABS* +# CHECKR-NEXT: addi.d $a0, $a0, 0 +# CHECKR-NEXT: R_LARCH_PCALA_LO12 _start +# CHECKR-NEXT: R_LARCH_RELAX *ABS* +# CHECKR-NEXT: nop +# CHECKR-NEXT: R_LARCH_ALIGN .Lla-relax-align0+0x4 +# CHECKR-NEXT: nop +# CHECKR-NEXT: nop +# CHECKR-NEXT: ret + +.global _start +_start: + la.pcrel $a0, _start + .p2align 4 + ret From d01a4ab21044ceb20e39b783a5983a8d4cc93cb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 16 Feb 2024 15:48:29 +0200 Subject: [PATCH 24/54] [LLD] [docs] Add more release notes for COFF and MinGW (#81977) Add review references to all items already mentioned. Move some items to the right section (from the MinGW section to COFF, as the implementation is in the COFF linker side, and may be relevant for non-MinGW cases as well). --- lld/docs/ReleaseNotes.rst | 48 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 82f9d93b8e86a..56ba3463aeadc 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -82,14 +82,46 @@ COFF Improvements * Added support for ``--time-trace`` and associated ``--time-trace-granularity``. This generates a .json profile trace of the linker execution. + (`#68236 `_) + +* The ``-dependentloadflag`` option was implemented. + (`#71537 `_) * LLD now prefers library paths specified with ``-libpath:`` over the implicitly detected toolchain paths. + (`#78039 `_) + +* Added new options ``-lldemit:llvm`` and ``-lldemit:asm`` for getting + the output of LTO compilation as LLVM bitcode or assembly. + (`#66964 `_) + (`#67079 `_) + +* Added a new option ``-build-id`` for generating a ``.buildid`` section + when not generating a PDB. A new symbol ``__buildid`` is generated by + the linker, allowing code to reference the build ID of the binary. + (`#71433 `_) + (`#74652 `_) + +* A new, LLD specific option, ``-lld-allow-duplicate-weak``, was added + for allowing duplicate weak symbols. + (`#68077 `_) + +* More correctly handle LTO of files that define ``__imp_`` prefixed dllimport + redirections. + (`#70777 `_) + (`#71376 `_) + (`#72989 `_) + +* Linking undefined references to weak symbols with LTO now works. + (`#70430 `_) * Use the ``SOURCE_DATE_EPOCH`` environment variable for the PE header and debug directory timestamps, if neither the ``/Brepro`` nor ``/timestamp:`` options have been specified. This makes the linker output reproducible by setting this environment variable. + (`#81326 `_) + +* Lots of incremental work towards supporting linking ARM64EC binaries. MinGW Improvements ------------------ @@ -97,19 +129,29 @@ MinGW Improvements * Added support for many LTO and ThinLTO options (most LTO options supported by the ELF driver, that are implemented by the COFF backend as well, should be supported now). + (`D158412 `_) + (`D158887 `_) + (`#77387 `_) + (`#81475 `_) * LLD no longer tries to autodetect and use library paths from MSVC/WinSDK installations when run in MinGW mode; that mode of operation shouldn't ever be needed in MinGW mode, and could be a source of unexpected behaviours. + (`D144084 `_) * The ``--icf=safe`` option now works as expected; it was previously a no-op. - -* More correctly handle LTO of files that define ``__imp_`` prefixed dllimport - redirections. + (`#70037 `_) * The strip flags ``-S`` and ``-s`` now can be used to strip out DWARF debug info and symbol tables while emitting a PDB debug info file. + (`#75181 `_) + +* The option ``--dll`` is handled as an alias for the ``--shared`` option. + (`#68575 `_) + +* The option ``--sort-common`` is ignored now. + (`#66336 `_) MachO Improvements ------------------ From 1a69056c899a74c311d700bd0f5618cbfee23518 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 16 Feb 2024 14:50:14 +0100 Subject: [PATCH 25/54] Backport [DAGCombine] Fix multi-use miscompile in load combine (#81586) (#81633) (cherry picked from commit 25b9ed6e4964344e3710359bec4c831e5a8448b9) --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- llvm/test/CodeGen/X86/load-combine.ll | 32 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 98d8a6d9409f2..3135ec73a99e7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9253,7 +9253,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // Transfer chain users from old loads to the new load. for (LoadSDNode *L : Loads) - DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1)); + DAG.makeEquivalentMemoryOrdering(L, NewLoad); if (!NeedsBswap) return NewLoad; diff --git a/llvm/test/CodeGen/X86/load-combine.ll b/llvm/test/CodeGen/X86/load-combine.ll index 7f8115dc1ce38..b5f3e78991881 100644 --- a/llvm/test/CodeGen/X86/load-combine.ll +++ b/llvm/test/CodeGen/X86/load-combine.ll @@ -1282,3 +1282,35 @@ define i32 @zext_load_i32_by_i8_bswap_shl_16(ptr %arg) { %tmp8 = or i32 %tmp7, %tmp30 ret i32 %tmp8 } + +define i32 @pr80911_vector_load_multiuse(ptr %ptr, ptr %clobber) nounwind { +; CHECK-LABEL: pr80911_vector_load_multiuse: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl (%edx), %esi +; CHECK-NEXT: movzwl (%edx), %eax +; CHECK-NEXT: movl $0, (%ecx) +; CHECK-NEXT: movl %esi, (%edx) +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl +; +; CHECK64-LABEL: pr80911_vector_load_multiuse: +; CHECK64: # %bb.0: +; CHECK64-NEXT: movl (%rdi), %ecx +; CHECK64-NEXT: movzwl (%rdi), %eax +; CHECK64-NEXT: movl $0, (%rsi) +; CHECK64-NEXT: movl %ecx, (%rdi) +; CHECK64-NEXT: retq + %load = load <4 x i8>, ptr %ptr, align 16 + store i32 0, ptr %clobber + store <4 x i8> %load, ptr %ptr, align 16 + %e1 = extractelement <4 x i8> %load, i64 1 + %e1.ext = zext i8 %e1 to i32 + %e1.ext.shift = shl nuw nsw i32 %e1.ext, 8 + %e0 = extractelement <4 x i8> %load, i64 0 + %e0.ext = zext i8 %e0 to i32 + %res = or i32 %e1.ext.shift, %e0.ext + ret i32 %res +} From 5226ae4617023e3b8957e9db0b9c2c83ea7e77a2 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 24 Jan 2024 10:57:18 -0800 Subject: [PATCH 26/54] [SLP]Fix PR79229: Check that extractelement is used only in a single node before erasing. Before trying to erase the extractelement instruction, not enough to check for single use, need to check that it is not used in several nodes because of the preliminary nodes reordering. (cherry picked from commit 48bbd7658710ef1699bf2a6532ff5830230aacc5) --- .../Transforms/Vectorize/SLPVectorizer.cpp | 11 +- .../extractelement-single-use-many-nodes.ll | 144 ++++++++++++++++++ 2 files changed, 154 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 601d2454c1e16..83f787d7fb624 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10216,7 +10216,16 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { // If the only one use is vectorized - can delete the extractelement // itself. if (!EI->hasOneUse() || any_of(EI->users(), [&](User *U) { - return !R.ScalarToTreeEntry.count(U); + const TreeEntry *UTE = R.getTreeEntry(U); + return !UTE || R.MultiNodeScalars.contains(U) || + count_if(R.VectorizableTree, + [&](const std::unique_ptr &TE) { + return any_of(TE->UserTreeIndices, + [&](const EdgeInfo &Edge) { + return Edge.UserTE == UTE; + }) && + is_contained(TE->Scalars, EI); + }) != 1; })) continue; R.eraseInstruction(EI); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll new file mode 100644 index 0000000000000..f665dac3282b7 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64-v3 -S < %s | FileCheck %s + +define void @foo(double %i) { +; CHECK-LABEL: define void @foo( +; CHECK-SAME: double [[I:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> , double [[I]], i32 2 +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x double> zeroinitializer, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[I]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> zeroinitializer, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> , <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> [[TMP7]], double [[TMP2]], i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> , <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x double> [[TMP10]], double [[TMP5]], i32 6 +; CHECK-NEXT: [[TMP12:%.*]] = fmul <8 x double> [[TMP8]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = fadd <8 x double> zeroinitializer, [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = fadd <8 x double> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = fcmp ult <8 x double> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = freeze <8 x i1> [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP16]]) +; CHECK-NEXT: br i1 [[TMP17]], label [[BB58:%.*]], label [[BB115:%.*]] +; CHECK: bb115: +; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x double> zeroinitializer, [[TMP4]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[TMP18]], i32 1 +; CHECK-NEXT: [[I118:%.*]] = fadd double [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = fmul <4 x double> zeroinitializer, [[TMP1]] +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x double> , <4 x double> [[TMP22]], <4 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[TMP21]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = select <4 x i1> zeroinitializer, <4 x double> zeroinitializer, <4 x double> [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = fmul <4 x double> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = fmul <4 x double> [[TMP27]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = fptosi <4 x double> [[TMP28]] to <4 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = or <4 x i32> zeroinitializer, [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP30]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = icmp slt i32 [[TMP31]], 32000 +; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP31]], i32 32000 +; CHECK-NEXT: [[I163:%.*]] = fcmp ogt double [[I118]], 0.000000e+00 +; CHECK-NEXT: [[I164:%.*]] = icmp slt i32 0, [[OP_RDX1]] +; CHECK-NEXT: unreachable +; CHECK: bb58: +; CHECK-NEXT: ret void +; +bb: + %i75 = fsub double 0.000000e+00, 0.000000e+00 + %i76 = fsub double 0.000000e+00, 0.000000e+00 + %i77 = fmul double 0.000000e+00, %i75 + %i78 = fmul double 0.000000e+00, %i76 + %i79 = fadd double %i78, 0.000000e+00 + %i80 = fadd double %i79, 0.000000e+00 + %i81 = fcmp ult double %i80, 0.000000e+00 + %i82 = fsub double 0.000000e+00, poison + %i83 = fmul double 0.000000e+00, %i82 + %i84 = fadd double 0.000000e+00, %i83 + %i85 = fadd double %i84, 0.000000e+00 + %i86 = fcmp ult double %i85, 0.000000e+00 + %i87 = fsub double 0.000000e+00, %i + %i88 = fadd double 0.000000e+00, %i77 + %i89 = fadd double %i88, 0.000000e+00 + %i90 = fcmp ult double %i89, 0.000000e+00 + %i91 = fsub double 0.000000e+00, 0.000000e+00 + %i92 = fmul double poison, 0.000000e+00 + %i93 = fadd double %i92, 0.000000e+00 + %i94 = fadd double %i93, 0.000000e+00 + %i95 = fcmp ult double %i94, 0.000000e+00 + %i96 = fadd double %i79, 0.000000e+00 + %i97 = fcmp ult double %i96, 0.000000e+00 + %i98 = fadd double %i84, 0.000000e+00 + %i99 = fcmp ult double %i98, 0.000000e+00 + %i100 = fadd double 0.000000e+00, %i77 + %i101 = fadd double %i100, 0.000000e+00 + %i102 = fcmp ult double %i101, 0.000000e+00 + %i103 = fsub double 0.000000e+00, %i + %i104 = fmul double poison, 0.000000e+00 + %i105 = fadd double %i104, 0.000000e+00 + %i106 = fadd double %i105, 0.000000e+00 + %i107 = fcmp ult double %i106, 0.000000e+00 + %i108 = select i1 %i107, i1 %i102, i1 false + %i109 = select i1 %i108, i1 %i99, i1 false + %i110 = select i1 %i109, i1 %i97, i1 false + %i111 = select i1 %i110, i1 %i95, i1 false + %i112 = select i1 %i111, i1 %i90, i1 false + %i113 = select i1 %i112, i1 %i86, i1 false + %i114 = select i1 %i113, i1 %i81, i1 false + br i1 %i114, label %bb58, label %bb115 + +bb115: + %i116 = fmul double 0.000000e+00, %i103 + %i117 = fmul double 0.000000e+00, %i82 + %i118 = fadd double %i116, %i117 + %i120 = fmul double 0.000000e+00, %i75 + %i121 = fmul double 0.000000e+00, %i76 + %i122 = fadd double %i121, 0.000000e+00 + %i123 = fadd double 0.000000e+00, %i120 + %i124 = fmul double 0.000000e+00, %i91 + %i125 = fadd double %i124, %i82 + %i126 = fadd double %i125, 0.000000e+00 + %i127 = fmul double 0.000000e+00, %i87 + %i128 = fadd double %i127, 0.000000e+00 + %i129 = fadd double %i128, 0.000000e+00 + %i130 = fadd double %i122, 0.000000e+00 + %i131 = fadd double %i123, 0.000000e+00 + %i132 = select i1 false, double 0.000000e+00, double %i131 + %i133 = fmul double %i132, 0.000000e+00 + %i134 = fmul double %i133, 0.000000e+00 + %i135 = fptosi double %i134 to i32 + %i136 = or i32 0, %i135 + %i137 = icmp slt i32 %i136, 32000 + %i138 = select i1 %i137, i32 %i136, i32 32000 + %i139 = select i1 false, double 0.000000e+00, double %i130 + %i140 = fmul double %i139, 0.000000e+00 + %i141 = fmul double %i140, 0.000000e+00 + %i142 = fptosi double %i141 to i32 + %i143 = or i32 0, %i142 + %i144 = icmp slt i32 %i143, %i138 + %i145 = select i1 %i144, i32 %i143, i32 %i138 + %i146 = select i1 false, double 0.000000e+00, double %i129 + %i147 = fmul double %i146, 0.000000e+00 + %i148 = fmul double %i147, 0.000000e+00 + %i149 = fptosi double %i148 to i32 + %i150 = or i32 0, %i149 + %i151 = icmp slt i32 %i150, %i145 + %i152 = select i1 %i151, i32 %i150, i32 %i145 + %i153 = select i1 false, double 0.000000e+00, double %i126 + %i154 = fmul double %i153, 0.000000e+00 + %i155 = fmul double %i154, 0.000000e+00 + %i156 = fptosi double %i155 to i32 + %i157 = or i32 0, %i156 + %i158 = icmp slt i32 %i157, %i152 + %i159 = select i1 %i158, i32 %i157, i32 %i152 + %i163 = fcmp ogt double %i118, 0.000000e+00 + %i164 = icmp slt i32 0, %i159 + unreachable + +bb58: + ret void +} From b7a4ff80a4ccaecf1d497db51bfdc9499c3cbb48 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 25 Jan 2024 06:06:15 -0800 Subject: [PATCH 27/54] [SLP]Fix PR79229: Do not erase extractelement, if it used in multiregister node. If the node can be span between several registers and same extractelement instruction is used in several parts, it may be required to keep such extractelement instruction to avoid compiler crash. (cherry picked from commit 6fe21bc1dac883efa0dfa807f327048ae9969b81) --- .../Transforms/Vectorize/SLPVectorizer.cpp | 3 +- .../X86/extractelement-multi-register-use.ll | 107 ++++++++++++++++++ 2 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 83f787d7fb624..0a9e2c7f49f55 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10215,7 +10215,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { UniqueBases.insert(VecBase); // If the only one use is vectorized - can delete the extractelement // itself. - if (!EI->hasOneUse() || any_of(EI->users(), [&](User *U) { + if (!EI->hasOneUse() || (NumParts != 1 && count(E->Scalars, EI) > 1) || + any_of(EI->users(), [&](User *U) { const TreeEntry *UTE = R.getTreeEntry(U); return !UTE || R.MultiNodeScalars.contains(U) || count_if(R.VectorizableTree, diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll new file mode 100644 index 0000000000000..ba406c8f20bb0 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll @@ -0,0 +1,107 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64-v3 < %s | FileCheck %s + +define void @test(double %i) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: double [[I:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[I]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> zeroinitializer, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> , double [[I]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> zeroinitializer, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP3]], <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> [[TMP7]], <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x double> [[TMP8]], <8 x double> , <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[TMP4]], i32 7 +; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> zeroinitializer, [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = fadd <8 x double> zeroinitializer, [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = fadd <8 x double> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = fcmp ult <8 x double> [[TMP13]], zeroinitializer +; CHECK-NEXT: br label [[BB116:%.*]] +; CHECK: bb116: +; CHECK-NEXT: [[TMP15:%.*]] = fmul <2 x double> zeroinitializer, [[TMP5]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x double> [[TMP15]], i32 1 +; CHECK-NEXT: [[I120:%.*]] = fadd double [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]] +; CHECK-NEXT: [[TMP19:%.*]] = fmul <2 x double> zeroinitializer, [[TMP3]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[TMP18]], i32 1 +; CHECK-NEXT: [[I128:%.*]] = fadd double [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[I139:%.*]] = call double @llvm.maxnum.f64(double [[I128]], double 0.000000e+00) +; CHECK-NEXT: [[TMP22:%.*]] = fadd <2 x double> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP22]], <2 x double> zeroinitializer) +; CHECK-NEXT: [[TMP24:%.*]] = fmul <2 x double> [[TMP23]], zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = fptosi <2 x double> [[TMP24]] to <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = sub <2 x i32> zeroinitializer, [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp sgt <2 x i32> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[I147:%.*]] = fcmp ogt double [[I120]], 0.000000e+00 +; CHECK-NEXT: ret void +; +bb: + %i74 = fsub double 0.000000e+00, poison + %i75 = fsub double 0.000000e+00, %i + %i76 = fmul double 0.000000e+00, %i75 + %i77 = fadd double %i76, 0.000000e+00 + %i78 = fadd double %i77, 0.000000e+00 + %i79 = fcmp ult double %i78, 0.000000e+00 + %i81 = fsub double %i, 0.000000e+00 + %i82 = fmul double 0.000000e+00, %i81 + %i83 = fadd double 0.000000e+00, %i82 + %i84 = fadd double %i83, 0.000000e+00 + %i85 = fcmp ult double %i84, 0.000000e+00 + %i86 = fsub double 0.000000e+00, %i + %i87 = fmul double 0.000000e+00, %i86 + %i88 = fadd double %i87, 0.000000e+00 + %i89 = fadd double %i88, 0.000000e+00 + %i90 = fcmp ult double %i89, 0.000000e+00 + %i91 = fsub double 0.000000e+00, 0.000000e+00 + %i92 = fmul double 0.000000e+00, 0.000000e+00 + %i93 = fadd double %i92, 0.000000e+00 + %i94 = fadd double %i93, 0.000000e+00 + %i95 = fcmp ult double %i94, 0.000000e+00 + %i96 = fsub double poison, 0.000000e+00 + %i97 = fadd double %i77, 0.000000e+00 + %i98 = fcmp ult double %i97, 0.000000e+00 + %i99 = fadd double %i83, 0.000000e+00 + %i100 = fcmp ult double %i99, 0.000000e+00 + %i101 = fmul double 0.000000e+00, 0.000000e+00 + %i102 = fadd double %i101, 0.000000e+00 + %i103 = fadd double %i102, 0.000000e+00 + %i104 = fcmp ult double %i103, 0.000000e+00 + %i105 = fmul double 0.000000e+00, 0.000000e+00 + %i106 = fadd double %i105, 0.000000e+00 + %i107 = fadd double %i106, 0.000000e+00 + %i108 = fcmp ult double %i107, 0.000000e+00 + br label %bb116 + +bb116: + %i117 = fmul double 0.000000e+00, %i81 + %i119 = fmul double 0.000000e+00, %i96 + %i120 = fadd double %i117, %i119 + %i121 = fmul double 0.000000e+00, %i74 + %i122 = fmul double 0.000000e+00, %i75 + %i123 = fadd double %i122, 0.000000e+00 + %i124 = fmul double 0.000000e+00, %i91 + %i125 = fadd double %i124, 0.000000e+00 + %i127 = fmul double 0.000000e+00, %i86 + %i128 = fadd double %i127, %i121 + %i133 = call double @llvm.maxnum.f64(double %i123, double 0.000000e+00) + %i134 = fmul double %i133, 0.000000e+00 + %i135 = fptosi double %i134 to i32 + %i136 = sub i32 0, %i135 + %i137 = icmp sgt i32 %i136, 0 + %i139 = call double @llvm.maxnum.f64(double %i128, double 0.000000e+00) + %i142 = call double @llvm.maxnum.f64(double %i125, double 0.000000e+00) + %i143 = fmul double %i142, 0.000000e+00 + %i144 = fptosi double %i143 to i32 + %i145 = sub i32 0, %i144 + %i146 = icmp sgt i32 %i145, 0 + %i147 = fcmp ogt double %i120, 0.000000e+00 + ret void +} + +declare double @llvm.maxnum.f64(double, double) From 0756378b77054938b2e252c105e91395954366ec Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Tue, 13 Feb 2024 20:04:34 +0100 Subject: [PATCH 28/54] [libc++][modules] Re-add build dir CMakeLists.txt. (#81370) This CMakeLists.txt is used to build modules without build system support. This was removed in d06ae33ec32122bb526fb35025c1f0cf979f1090. This is used in the documentation how to use modules. Made some minor changes to make it work with the std.compat module using the std module. Note the CMakeLists.txt in the build dir should be removed once build system support is generally available. (cherry picked from commit fc0e9c8315564288f9079a633892abadace534cf) --- libcxx/docs/Modules.rst | 4 ++ libcxx/modules/CMakeLists.txt | 20 ++++++++ libcxx/modules/CMakeLists.txt.in | 88 ++++++++++++++++++++++++++++++++ 3 files changed, 112 insertions(+) create mode 100644 libcxx/modules/CMakeLists.txt.in diff --git a/libcxx/docs/Modules.rst b/libcxx/docs/Modules.rst index 533c3fbd2a1ee..ee2b81d3b9e7c 100644 --- a/libcxx/docs/Modules.rst +++ b/libcxx/docs/Modules.rst @@ -218,9 +218,13 @@ Building this project is done with the following steps, assuming the files $ mkdir build $ cmake -G Ninja -S . -B build -DCMAKE_CXX_COMPILER= -DLIBCXX_BUILD= + $ ninja -j1 std -C build $ ninja -C build $ build/main +.. note:: The ``std`` dependencies of ``std.compat`` is not always resolved when + building the ``std`` target using multiple jobs. + .. warning:: ```` should point point to the real binary and not to a symlink. diff --git a/libcxx/modules/CMakeLists.txt b/libcxx/modules/CMakeLists.txt index 0388c048dacb8..0dea8cfca94ac 100644 --- a/libcxx/modules/CMakeLists.txt +++ b/libcxx/modules/CMakeLists.txt @@ -137,6 +137,25 @@ set(LIBCXX_MODULE_STD_COMPAT_SOURCES std.compat/cwctype.inc ) +# TODO MODULES the CMakeLists.txt in the build directory is only temporary. +# This allows using as available in the build directory. Once build systems +# have proper support for the installed files this will be removed. +if ("${LIBCXX_GENERATED_INCLUDE_DIR}" STREQUAL "${LIBCXX_GENERATED_INCLUDE_TARGET_DIR}") + # This typically happens when the target is not installed. + set(LIBCXX_CONFIGURED_INCLUDE_DIRS "${LIBCXX_GENERATED_INCLUDE_DIR}") +else() + # It's important that the arch directory be included first so that its header files + # which interpose on the default include dir be included instead of the default ones. + set(LIBCXX_CONFIGURED_INCLUDE_DIRS + "${LIBCXX_GENERATED_INCLUDE_TARGET_DIR};${LIBCXX_GENERATED_INCLUDE_DIR}" + ) +endif() +configure_file( + "CMakeLists.txt.in" + "${LIBCXX_GENERATED_MODULE_DIR}/CMakeLists.txt" + @ONLY +) + set(LIBCXX_MODULE_STD_INCLUDE_SOURCES) foreach(file ${LIBCXX_MODULE_STD_SOURCES}) set( @@ -166,6 +185,7 @@ configure_file( ) set(_all_modules) +list(APPEND _all_modules "${LIBCXX_GENERATED_MODULE_DIR}/CMakeLists.txt") list(APPEND _all_modules "${LIBCXX_GENERATED_MODULE_DIR}/std.cppm") list(APPEND _all_modules "${LIBCXX_GENERATED_MODULE_DIR}/std.compat.cppm") foreach(file ${LIBCXX_MODULE_STD_SOURCES} ${LIBCXX_MODULE_STD_COMPAT_SOURCES}) diff --git a/libcxx/modules/CMakeLists.txt.in b/libcxx/modules/CMakeLists.txt.in new file mode 100644 index 0000000000000..e332d70cc1633 --- /dev/null +++ b/libcxx/modules/CMakeLists.txt.in @@ -0,0 +1,88 @@ +cmake_minimum_required(VERSION 3.26) + +project(libc++-modules LANGUAGES CXX) + +# Enable CMake's module support +if(CMAKE_VERSION VERSION_LESS "3.28.0") + if(CMAKE_VERSION VERSION_LESS "3.27.0") + set(CMAKE_EXPERIMENTAL_CXX_MODULE_CMAKE_API "2182bf5c-ef0d-489a-91da-49dbc3090d2a") + else() + set(CMAKE_EXPERIMENTAL_CXX_MODULE_CMAKE_API "aa1f7df0-828a-4fcd-9afc-2dc80491aca7") + endif() + set(CMAKE_EXPERIMENTAL_CXX_MODULE_DYNDEP 1) +else() + cmake_policy(VERSION 3.28) +endif() + +# Default to C++ extensions being off. Libc++'s modules support have trouble +# with extensions right now. +set(CMAKE_CXX_EXTENSIONS OFF) + +# Propagates the CMake options to the modules. +# +# This uses the std module hard-coded since the std.compat module does not +# depend on these flags. +macro(compile_define_if_not condition def) + if (NOT ${condition}) + target_compile_definitions(std PRIVATE ${def}) + endif() +endmacro() +macro(compile_define_if condition def) + if (${condition}) + target_compile_definitions(std PRIVATE ${def}) + endif() +endmacro() + +### STD + +add_library(std) +target_sources(std + PUBLIC FILE_SET cxx_modules TYPE CXX_MODULES FILES + std.cppm +) + +target_include_directories(std SYSTEM PRIVATE @LIBCXX_CONFIGURED_INCLUDE_DIRS@) + +if (NOT @LIBCXX_ENABLE_EXCEPTIONS@) + target_compile_options(std PUBLIC -fno-exceptions) +endif() + +target_compile_options(std + PUBLIC + -nostdinc++ + -Wno-reserved-module-identifier + -Wno-reserved-user-defined-literal + @LIBCXX_COMPILE_FLAGS@ +) +set_target_properties(std + PROPERTIES + OUTPUT_NAME "c++std" +) + +### STD.COMPAT + +add_library(std.compat) +target_sources(std.compat + PUBLIC FILE_SET cxx_modules TYPE CXX_MODULES FILES + std.compat.cppm +) + +target_include_directories(std.compat SYSTEM PRIVATE @LIBCXX_CONFIGURED_INCLUDE_DIRS@) + +if (NOT @LIBCXX_ENABLE_EXCEPTIONS@) + target_compile_options(std.compat PUBLIC -fno-exceptions) +endif() + +target_compile_options(std.compat + PUBLIC + -nostdinc++ + -Wno-reserved-module-identifier + -Wno-reserved-user-defined-literal + -fmodule-file=std=${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/std.dir/std.pcm + @LIBCXX_COMPILE_FLAGS@ +) +set_target_properties(std.compat + PROPERTIES + OUTPUT_NAME "c++std.compat" +) +add_dependencies(std.compat std) From d71aae5f79863ce897e38f6aab46710f0257f72e Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 9 Feb 2024 20:57:05 -0800 Subject: [PATCH 29/54] [lld] Fix test failures when running as root user (#81339) This makes it easier to run the tests in a containerized environment. (cherry picked from commit e165bea1d4ec2de96ee0548cece79d71a75ce8f8) --- lld/test/COFF/lto-cache-errors.ll | 2 +- lld/test/COFF/thinlto-emit-imports.ll | 2 +- lld/test/ELF/lto/resolution-err.ll | 2 +- lld/test/ELF/lto/thinlto-cant-write-index.ll | 2 +- lld/test/ELF/lto/thinlto-emit-imports.ll | 2 +- lld/test/MachO/invalid/invalid-lto-object-path.ll | 2 +- lld/test/MachO/thinlto-emit-imports.ll | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lld/test/COFF/lto-cache-errors.ll b/lld/test/COFF/lto-cache-errors.ll index 55244e5690dc3..a46190a81b623 100644 --- a/lld/test/COFF/lto-cache-errors.ll +++ b/lld/test/COFF/lto-cache-errors.ll @@ -1,4 +1,4 @@ -; REQUIRES: x86 +; REQUIRES: x86, non-root-user ;; Not supported on windows since we use permissions to deny the creation ; UNSUPPORTED: system-windows diff --git a/lld/test/COFF/thinlto-emit-imports.ll b/lld/test/COFF/thinlto-emit-imports.ll index a9f22c1dc2dcf..b47a6cea4eb7d 100644 --- a/lld/test/COFF/thinlto-emit-imports.ll +++ b/lld/test/COFF/thinlto-emit-imports.ll @@ -1,4 +1,4 @@ -; REQUIRES: x86 +; REQUIRES: x86, non-root-user ; Generate summary sections and test lld handling. ; RUN: opt -module-summary %s -o %t1.obj diff --git a/lld/test/ELF/lto/resolution-err.ll b/lld/test/ELF/lto/resolution-err.ll index 6dfa64b1b8b9e..f9855abaff327 100644 --- a/lld/test/ELF/lto/resolution-err.ll +++ b/lld/test/ELF/lto/resolution-err.ll @@ -1,5 +1,5 @@ ; UNSUPPORTED: system-windows -; REQUIRES: shell +; REQUIRES: shell, non-root-user ; RUN: llvm-as %s -o %t.bc ; RUN: touch %t.resolution.txt ; RUN: chmod u-w %t.resolution.txt diff --git a/lld/test/ELF/lto/thinlto-cant-write-index.ll b/lld/test/ELF/lto/thinlto-cant-write-index.ll index e664acbb17de1..286fcddd4238a 100644 --- a/lld/test/ELF/lto/thinlto-cant-write-index.ll +++ b/lld/test/ELF/lto/thinlto-cant-write-index.ll @@ -1,4 +1,4 @@ -; REQUIRES: x86 +; REQUIRES: x86, non-root-user ; Basic ThinLTO tests. ; RUN: opt -module-summary %s -o %t1.o diff --git a/lld/test/ELF/lto/thinlto-emit-imports.ll b/lld/test/ELF/lto/thinlto-emit-imports.ll index 6d0e1e65047db..253ec08619c98 100644 --- a/lld/test/ELF/lto/thinlto-emit-imports.ll +++ b/lld/test/ELF/lto/thinlto-emit-imports.ll @@ -1,4 +1,4 @@ -; REQUIRES: x86 +; REQUIRES: x86, non-root-user ;; Test a few properties not tested by thinlto-index-only.ll ; RUN: opt -module-summary %s -o %t1.o diff --git a/lld/test/MachO/invalid/invalid-lto-object-path.ll b/lld/test/MachO/invalid/invalid-lto-object-path.ll index 75c6a97e446fb..c862538d592ce 100644 --- a/lld/test/MachO/invalid/invalid-lto-object-path.ll +++ b/lld/test/MachO/invalid/invalid-lto-object-path.ll @@ -1,4 +1,4 @@ -; REQUIRES: x86 +; REQUIRES: x86, non-root-user ;; Creating read-only directories with `chmod 400` isn't supported on Windows ; UNSUPPORTED: system-windows diff --git a/lld/test/MachO/thinlto-emit-imports.ll b/lld/test/MachO/thinlto-emit-imports.ll index 47a612bd0a7b5..88f766f59c887 100644 --- a/lld/test/MachO/thinlto-emit-imports.ll +++ b/lld/test/MachO/thinlto-emit-imports.ll @@ -1,4 +1,4 @@ -; REQUIRES: x86 +; REQUIRES: x86, non-root-user ; RUN: rm -rf %t; split-file %s %t ; Generate summary sections and test lld handling. From 347977c8b16fc4db809d7e049ceca874a5e4940b Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Wed, 14 Feb 2024 18:26:38 +0100 Subject: [PATCH 30/54] [lld/ELF] Avoid unnecessary TPOFF relocations in GOT for -pie (#81739) With the new SystemZ port we noticed that -pie executables generated from files containing R_390_TLS_IEENT relocations will have unnecessary relocations in their GOT: 9e8d8: R_390_TLS_TPOFF *ABS*+0x18 This is caused by the config->isPic conditon in addTpOffsetGotEntry: static void addTpOffsetGotEntry(Symbol &sym) { in.got->addEntry(sym); uint64_t off = sym.getGotOffset(); if (!sym.isPreemptible && !config->isPic) { in.got->addConstant({R_TPREL, target->symbolicRel, off, 0, &sym}); return; } It is correct that we need to retain a TPOFF relocation if the target symbol is preemptible or if we're building a shared library. But when building a -pie executable, those values are fixed at link time and there's no need for any remaining dynamic relocation. Note that the equivalent MIPS-specific code in MipsGotSection::build checks for config->shared instead of config->isPic; we should use the same check here. (Note also that on many other platforms we're not even using addTpOffsetGotEntry in this case as an IE->LE relaxation is applied before; we don't have this type of relaxation on SystemZ.) (cherry picked from commit 6f907733e65d24edad65f763fb14402464bd578b) --- lld/ELF/Relocations.cpp | 2 +- lld/test/ELF/systemz-tls-ie.s | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index f64b4219e0acc..619fbaf5dc545 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -940,7 +940,7 @@ void elf::addGotEntry(Symbol &sym) { static void addTpOffsetGotEntry(Symbol &sym) { in.got->addEntry(sym); uint64_t off = sym.getGotOffset(); - if (!sym.isPreemptible && !config->isPic) { + if (!sym.isPreemptible && !config->shared) { in.got->addConstant({R_TPREL, target->symbolicRel, off, 0, &sym}); return; } diff --git a/lld/test/ELF/systemz-tls-ie.s b/lld/test/ELF/systemz-tls-ie.s index 27b642ed2dfc5..85e2f24cb61f6 100644 --- a/lld/test/ELF/systemz-tls-ie.s +++ b/lld/test/ELF/systemz-tls-ie.s @@ -12,6 +12,14 @@ # RUN: llvm-objdump --section .data --full-contents %t | FileCheck --check-prefix=LE-DATA %s # RUN: llvm-objdump --section .got --full-contents %t | FileCheck --check-prefix=LE-GOT %s +## With -pie we still have the R_390_RELATIVE for the data element, but all GOT +## entries should be fully resolved without any remaining R_390_TLS_TPOFF. +# RUN: ld.lld -pie %t.o -o %t.pie +# RUN: llvm-readelf -r %t.pie | FileCheck --check-prefix=PIE-REL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.pie | FileCheck --check-prefix=PIE %s +# RUN: llvm-objdump --section .data --full-contents %t.pie | FileCheck --check-prefix=PIE-DATA %s +# RUN: llvm-objdump --section .got --full-contents %t.pie | FileCheck --check-prefix=PIE-GOT %s + # IE-REL: Relocation section '.rela.dyn' at offset {{.*}} contains 4 entries: # IE-REL: 0000000000003478 000000000000000c R_390_RELATIVE 2460 # IE-REL: 0000000000002460 0000000100000038 R_390_TLS_TPOFF 0000000000000008 a + 0 @@ -58,6 +66,32 @@ # LE-GOT: 1002248 00000000 00000000 ffffffff fffffff8 # LE-GOT: 1002258 ffffffff fffffffc 00000000 00000000 +# PIE-REL: Relocation section '.rela.dyn' at offset {{.*}} contains 1 entries: +# PIE-REL: 00000000000033d0 000000000000000c R_390_RELATIVE 23b8 + +## TP offset for a is at 0x23b8 +# PIE: lgrl %r1, 0x23b8 +# PIE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offset for b is at 0x23c0 +# PIE-NEXT: lgrl %r1, 0x23c0 +# PIE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offset for c is at 0x23c8 +# PIE-NEXT: lgrl %r1, 0x23c8 +# PIE-NEXT: lgf %r1, 0(%r1,%r7) + +## Data element: TP offset for a is at 0x23b8 (relocated via R_390_RELATIVE above) +# PIE-DATA: 33d0 00000000 00000000 + +## TP offsets in GOT: +# a: -8 +# b: -4 +# c: 0 +# PIE-GOT: 23a0 00000000 000022d0 00000000 00000000 +# PIE-GOT: 23b0 00000000 00000000 ffffffff fffffff8 +# PIE-GOT: 23c0 ffffffff fffffffc 00000000 00000000 + ear %r7,%a0 sllg %r7,%r1,32 ear %r7,%a1 From 8a3d8f0c2531877c84203298a979419a2ba652ee Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 17 Feb 2024 08:57:14 +0000 Subject: [PATCH 31/54] [AArch64][GlobalISel] Fail legalization for unknown libcalls. (#81873) If, like powi on windows, the libcall is unavailable we should fall back to SDAG. Currently we try and generate a call to "". (cherry picked from commit 47c65cf62d06add9f55a77c9d45390fa3b986fc5) --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 2 + llvm/test/CodeGen/AArch64/win64-fpowi.ll | 136 ++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/win64-fpowi.ll diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 3b2cf31910927..c0c22e36004f7 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -596,6 +596,8 @@ llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, LostDebugLocObserver &LocObserver, MachineInstr *MI) { auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); const char *Name = TLI.getLibcallName(Libcall); + if (!Name) + return LegalizerHelper::UnableToLegalize; const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall); return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI); } diff --git a/llvm/test/CodeGen/AArch64/win64-fpowi.ll b/llvm/test/CodeGen/AArch64/win64-fpowi.ll new file mode 100644 index 0000000000000..3eb74f8394ec4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/win64-fpowi.ll @@ -0,0 +1,136 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -mtriple=aarch64-pc-windows-msvc19 -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64-pc-windows-msvc19 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - | FileCheck %s + +define double @powi_f64(double %a, i32 %b) { +; CHECK-LABEL: powi_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf d1, w0 +; CHECK-NEXT: b pow +entry: + %c = call double @llvm.powi.f64.i32(double %a, i32 %b) + ret double %c +} + +define float @powi_f32(float %a, i32 %b) { +; CHECK-LABEL: powi_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf s1, w0 +; CHECK-NEXT: b powf +entry: + %c = call float @llvm.powi.f32.i32(float %a, i32 %b) + ret float %c +} + +define half @powi_f16(half %a, i32 %b) { +; CHECK-LABEL: powi_f16: +; CHECK: .seh_proc powi_f16 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_reg_x x30, 16 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: scvtf s1, w0 +; CHECK-NEXT: bl powf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x30, 16 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc +entry: + %c = call half @llvm.powi.f16.i32(half %a, i32 %b) + ret half %c +} + +define <2 x double> @powi_v2f64(<2 x double> %a, i32 %b) { +; CHECK-LABEL: powi_v2f64: +; CHECK: .seh_proc powi_v2f64 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: .seh_stackalloc 48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_reg x30, 32 +; CHECK-NEXT: str d8, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_freg d8, 40 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: scvtf d8, w0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: fmov d1, d8 +; CHECK-NEXT: bl pow +; CHECK-NEXT: fmov d1, d8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: bl pow +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldr d8, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_freg d8, 40 +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg x30, 32 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .seh_stackalloc 48 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc +entry: + %c = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> %a, i32 %b) + ret <2 x double> %c +} + +define <2 x float> @powi_v2f32(<2 x float> %a, i32 %b) { +; CHECK-LABEL: powi_v2f32: +; CHECK: .seh_proc powi_v2f32 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: .seh_stackalloc 48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_reg x30, 32 +; CHECK-NEXT: str d8, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_freg d8, 40 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: scvtf s8, w0 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: fmov s1, s8 +; CHECK-NEXT: bl powf +; CHECK-NEXT: fmov s1, s8 +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl powf +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: mov v0.s[1], v1.s[0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldr d8, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_freg d8, 40 +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg x30, 32 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .seh_stackalloc 48 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc +entry: + %c = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> %a, i32 %b) + ret <2 x float> %c +} + +declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32) +declare <2 x float> @llvm.powi.v2f32.i32(<2 x float>, i32) +declare double @llvm.powi.f64.i32(double, i32) +declare float @llvm.powi.f32.i32(float, i32) +declare half @llvm.powi.f16.i32(half, i32) From 82e17c153611fbb8c5bb9b990f0f143910a4c2e0 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Sat, 17 Feb 2024 16:19:39 -0800 Subject: [PATCH 32/54] Use container on Linux to run llvm-project-tests workflow (#81349) (#81807) (cherry picked from commit fe20a759fcd20e1755ea1b34c5e6447a787925dc) --- .github/workflows/llvm-project-tests.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/llvm-project-tests.yml b/.github/workflows/llvm-project-tests.yml index 68b4a68d1af98..43b90193406fc 100644 --- a/.github/workflows/llvm-project-tests.yml +++ b/.github/workflows/llvm-project-tests.yml @@ -58,6 +58,10 @@ jobs: lit-tests: name: Lit Tests runs-on: ${{ matrix.os }} + container: + image: ${{(startsWith(matrix.os, 'ubuntu') && 'ghcr.io/llvm/ci-ubuntu-22.04:latest') || null}} + volumes: + - /mnt/:/mnt/ strategy: fail-fast: false matrix: @@ -77,6 +81,7 @@ jobs: with: python-version: ${{ inputs.python_version }} - name: Install Ninja + if: runner.os != 'Linux' uses: llvm/actions/install-ninja@main # actions/checkout deletes any existing files in the new git directory, # so this needs to either run before ccache-action or it has to use @@ -108,8 +113,8 @@ jobs: run: | if [ "${{ runner.os }}" == "Linux" ]; then builddir="/mnt/build/" - sudo mkdir -p $builddir - sudo chown `whoami`:`whoami` $builddir + mkdir -p $builddir + extra_cmake_args="-DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang" else builddir="$(pwd)"/build fi @@ -123,6 +128,7 @@ jobs: -DLLDB_INCLUDE_TESTS=OFF \ -DCMAKE_C_COMPILER_LAUNCHER=sccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ + $extra_cmake_args \ ${{ inputs.extra_cmake_args }} ninja -C "$builddir" '${{ inputs.build_target }}' From b27f0b4fae68fea5c2468bc080e31bcecfb7faa7 Mon Sep 17 00:00:00 2001 From: Wael Yehia <44115484+w2yehia@users.noreply.github.com> Date: Fri, 16 Feb 2024 12:55:20 -0500 Subject: [PATCH 33/54] [AIX] Add a dummy variable in the __llvm_orderfile section (#81968) to satisfy the __start___llvm_orderfile reference when linking with -bexpfull and -fprofile-generate on AIX. (cherry picked from commit 15cccc55919d27eb2e89379a65f6c7809f679fda) --- compiler-rt/lib/profile/InstrProfilingPlatformAIX.c | 4 +++- compiler-rt/test/profile/AIX/bexpfull-pgo.c | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 compiler-rt/test/profile/AIX/bexpfull-pgo.c diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c b/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c index 9f46a98d78ac4..002bec164d7e8 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c @@ -195,6 +195,8 @@ static const int dummy_name[0] COMPILER_RT_SECTION( COMPILER_RT_SEG INSTR_PROF_NAME_SECT_NAME); static int dummy_vnds[0] COMPILER_RT_SECTION( COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME); +static int dummy_orderfile[0] COMPILER_RT_SECTION( + COMPILER_RT_SEG INSTR_PROF_ORDERFILE_SECT_NAME); // To avoid GC'ing of the dummy variables by the linker, reference them in an // array and reference the array in the runtime registration code @@ -206,7 +208,7 @@ static int dummy_vnds[0] COMPILER_RT_SECTION( COMPILER_RT_VISIBILITY void *__llvm_profile_keep[] = {(void *)&dummy_cnts, (void *)&dummy_bits, (void *)&dummy_data, (void *)&dummy_name, - (void *)&dummy_vnds}; + (void *)&dummy_vnds, (void *)&dummy_orderfile}; #ifdef __GNUC__ #pragma GCC diagnostic pop #endif diff --git a/compiler-rt/test/profile/AIX/bexpfull-pgo.c b/compiler-rt/test/profile/AIX/bexpfull-pgo.c new file mode 100644 index 0000000000000..f48242ec6bfea --- /dev/null +++ b/compiler-rt/test/profile/AIX/bexpfull-pgo.c @@ -0,0 +1,7 @@ +// RUN: %clang_pgogen %s -bexpall +// RUN: %clang_pgogen %s -bexpfull + +#include +int ar[10]; +int n; +int main() { memcpy(ar, ar + 1, n); }; From ae276000164a41a8fc814bf1d4eccf31347c88f5 Mon Sep 17 00:00:00 2001 From: Xing Xue Date: Fri, 16 Feb 2024 15:12:41 -0500 Subject: [PATCH 34/54] [OpenMP][AIX] Set worker stack size to 2 x KMP_DEFAULT_STKSIZE if system stack size is too big (#81996) This patch sets the stack size of worker threads to `2 x KMP_DEFAULT_STKSIZE` (2 x 4MB) for AIX if the system stack size is too big. Also defines maximum stack size for 32-bit AIX. (cherry picked from commit 2de269a641e4ffbb7a44e559c4c0a91bb66df823) --- openmp/runtime/src/kmp.h | 4 ++++ openmp/runtime/src/kmp_settings.cpp | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index b147063d22826..259c57b5afbca 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -1181,7 +1181,11 @@ extern void __kmp_init_target_task(); #define KMP_MIN_STKSIZE ((size_t)(32 * 1024)) #endif +#if KMP_OS_AIX && KMP_ARCH_PPC +#define KMP_MAX_STKSIZE 0x10000000 /* 256Mb max size on 32-bit AIX */ +#else #define KMP_MAX_STKSIZE (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1))) +#endif #if KMP_ARCH_X86 #define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024)) diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index d2157b10b7819..ec86ee07472c1 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -255,8 +255,13 @@ static void __kmp_stg_parse_bool(char const *name, char const *value, // placed here in order to use __kmp_round4k static function void __kmp_check_stksize(size_t *val) { // if system stack size is too big then limit the size for worker threads +#if KMP_OS_AIX + if (*val > KMP_DEFAULT_STKSIZE * 2) // Use 2 times, 16 is too large for AIX. + *val = KMP_DEFAULT_STKSIZE * 2; +#else if (*val > KMP_DEFAULT_STKSIZE * 16) // just a heuristics... *val = KMP_DEFAULT_STKSIZE * 16; +#endif if (*val < __kmp_sys_min_stksize) *val = __kmp_sys_min_stksize; if (*val > KMP_MAX_STKSIZE) From c5e1885af0426897affc008fab174348eeca7af7 Mon Sep 17 00:00:00 2001 From: Frederic Cambus Date: Sat, 17 Feb 2024 15:38:05 +0100 Subject: [PATCH 35/54] [Support/ELF] Add OpenBSD PT_OPENBSD_SYSCALLS constant. Reference: https://github.com/openbsd/src/blob/master/sys/sys/exec_elf.h (cherry picked from commit 97eff26d0ca4d187a5efb8534af484dbb68bce30) --- llvm/include/llvm/BinaryFormat/ELF.h | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 81cdd39afc6ba..f17ba75e3efa6 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -1464,6 +1464,7 @@ enum { PT_OPENBSD_RANDOMIZE = 0x65a3dbe6, // Fill with random data. PT_OPENBSD_WXNEEDED = 0x65a3dbe7, // Program does W^X violations. PT_OPENBSD_NOBTCFI = 0x65a3dbe8, // Do not enforce branch target CFI. + PT_OPENBSD_SYSCALLS = 0x65a3dbe9, // System call sites. PT_OPENBSD_BOOTDATA = 0x65a41be6, // Section for boot arguments. // ARM program header types. From b845f0662a287912332c89e56f5900037aa7c70f Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sat, 17 Feb 2024 23:30:45 +0800 Subject: [PATCH 36/54] [ValueTracking] Fix computeKnownFPClass for fpext (#81972) This patch adds the missing `subnormal -> normal` part for `fpext` in `computeKnownFPClass`. Fixes the miscompilation reported by https://github.com/llvm/llvm-project/pull/80941#issuecomment-1947302100. (cherry picked from commit a5865c3c3dbbd17ae12ecc1c297fe1fc2605df52) --- llvm/lib/Analysis/ValueTracking.cpp | 7 +++- .../Transforms/Attributor/nofpclass-fpext.ll | 34 +++++++++++++++++-- llvm/test/Transforms/InstCombine/pr80941.ll | 32 +++++++++++++++++ 3 files changed, 70 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/pr80941.ll diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 5d6c3465a0c36..412115eb649c2 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -5083,8 +5083,13 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, Op->getOperand(0)->getType()->getScalarType()->getFltSemantics(); // All subnormal inputs should be in the normal range in the result type. - if (APFloat::isRepresentableAsNormalIn(SrcTy, DstTy)) + if (APFloat::isRepresentableAsNormalIn(SrcTy, DstTy)) { + if (Known.KnownFPClasses & fcPosSubnormal) + Known.KnownFPClasses |= fcPosNormal; + if (Known.KnownFPClasses & fcNegSubnormal) + Known.KnownFPClasses |= fcNegNormal; Known.knownNot(fcSubnormal); + } // Sign bit of a nan isn't guaranteed. if (!Known.isKnownNeverNaN()) diff --git a/llvm/test/Transforms/Attributor/nofpclass-fpext.ll b/llvm/test/Transforms/Attributor/nofpclass-fpext.ll index 0ba114117ceec..ee36f949529d4 100644 --- a/llvm/test/Transforms/Attributor/nofpclass-fpext.ll +++ b/llvm/test/Transforms/Attributor/nofpclass-fpext.ll @@ -142,7 +142,7 @@ define double @ret_fpext_f32_to_f64_nosub(float nofpclass(sub) %arg0) { } define double @ret_fpext_f32_to_f64_nonorm(float nofpclass(norm) %arg0) { -; CHECK-LABEL: define nofpclass(sub norm) double @ret_fpext_f32_to_f64_nonorm +; CHECK-LABEL: define nofpclass(sub) double @ret_fpext_f32_to_f64_nonorm ; CHECK-SAME: (float nofpclass(sub norm) [[ARG0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[EXT:%.*]] = fpext float [[ARG0]] to double ; CHECK-NEXT: ret double [[EXT]] @@ -482,7 +482,37 @@ define double @ret_fpext_bf16_f64_nosub(bfloat nofpclass(sub) %arg0) { } define double @ret_fpext_bf16_f64_nonorm(bfloat nofpclass(norm) %arg0) { -; CHECK-LABEL: define nofpclass(sub norm) double @ret_fpext_bf16_f64_nonorm +; CHECK-LABEL: define nofpclass(sub) double @ret_fpext_bf16_f64_nonorm +; CHECK-SAME: (bfloat nofpclass(sub norm) [[ARG0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[EXT:%.*]] = fpext bfloat [[ARG0]] to double +; CHECK-NEXT: ret double [[EXT]] +; + %ext = fpext bfloat %arg0 to double + ret double %ext +} + +define double @ret_fpext_bf16_f64_nonorm_psub(bfloat nofpclass(norm psub) %arg0) { +; CHECK-LABEL: define nofpclass(sub pnorm) double @ret_fpext_bf16_f64_nonorm_psub +; CHECK-SAME: (bfloat nofpclass(sub norm) [[ARG0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[EXT:%.*]] = fpext bfloat [[ARG0]] to double +; CHECK-NEXT: ret double [[EXT]] +; + %ext = fpext bfloat %arg0 to double + ret double %ext +} + +define double @ret_fpext_bf16_f64_nonorm_nsub(bfloat nofpclass(norm nsub) %arg0) { +; CHECK-LABEL: define nofpclass(sub nnorm) double @ret_fpext_bf16_f64_nonorm_nsub +; CHECK-SAME: (bfloat nofpclass(sub norm) [[ARG0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[EXT:%.*]] = fpext bfloat [[ARG0]] to double +; CHECK-NEXT: ret double [[EXT]] +; + %ext = fpext bfloat %arg0 to double + ret double %ext +} + +define double @ret_fpext_bf16_f64_nonorm_sub(bfloat nofpclass(norm sub) %arg0) { +; CHECK-LABEL: define nofpclass(sub norm) double @ret_fpext_bf16_f64_nonorm_sub ; CHECK-SAME: (bfloat nofpclass(sub norm) [[ARG0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[EXT:%.*]] = fpext bfloat [[ARG0]] to double ; CHECK-NEXT: ret double [[EXT]] diff --git a/llvm/test/Transforms/InstCombine/pr80941.ll b/llvm/test/Transforms/InstCombine/pr80941.ll new file mode 100644 index 0000000000000..95242b1d1407b --- /dev/null +++ b/llvm/test/Transforms/InstCombine/pr80941.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +define float @pr80941(float %arg) { +; CHECK-LABEL: define float @pr80941( +; CHECK-SAME: float [[ARG:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[ARG]], i32 144) +; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_EXIT:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[FPEXT:%.*]] = fpext float [[ARG]] to double +; CHECK-NEXT: [[SIGN:%.*]] = call double @llvm.copysign.f64(double 0.000000e+00, double [[FPEXT]]) +; CHECK-NEXT: [[FPTRUNC:%.*]] = fptrunc double [[SIGN]] to float +; CHECK-NEXT: br label [[IF_EXIT]] +; CHECK: if.exit: +; CHECK-NEXT: [[RET:%.*]] = phi float [ [[FPTRUNC]], [[IF_THEN]] ], [ [[ARG]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret float [[RET]] +; +entry: + %cond = tail call i1 @llvm.is.fpclass.f32(float %arg, i32 144) + br i1 %cond, label %if.then, label %if.exit + +if.then: + %fpext = fpext float %arg to double + %sign = call double @llvm.copysign.f64(double 0.000000e+00, double %fpext) + %fptrunc = fptrunc double %sign to float + br label %if.exit + +if.exit: + %ret = phi float [ %fptrunc, %if.then ], [ %arg, %entry ] + ret float %ret +} From c7b0a6ecd442363620d13e44077ca25a5f59fb59 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 9 Feb 2024 06:51:11 +0800 Subject: [PATCH 37/54] [RISCV] Check type is legal before combining mgather to vlse intrinsic (#81107) Otherwise we will crash since target intrinsics don't have their types legalized. Let the mgather get legalized first, then do the combine on the legal type. Fixes #81088 Co-authored-by: Craig Topper (cherry picked from commit 06c89bd59ca2279f76a41e851b7b2df634a6191e) --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +- .../RISCV/rvv/fixed-vectors-masked-gather.ll | 448 ++++++++++++++++++ 2 files changed, 449 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 37d94be5316ee..80447d03c000b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -15561,7 +15561,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, MGN->getMemOperand(), IndexType, MGN->getExtensionType()); if (Index.getOpcode() == ISD::BUILD_VECTOR && - MGN->getExtensionType() == ISD::NON_EXTLOAD) { + MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) { if (std::optional SimpleVID = isSimpleVIDSequence(Index); SimpleVID && SimpleVID->StepDenominator == 1) { const int64_t StepNumerator = SimpleVID->StepNumerator; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index df41ac10f80d3..890707c6337fa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -14638,5 +14638,453 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) ret <8 x i16> %v } + +; v32i64 is not a legal type, so make sure we don't try to combine the mgather +; to a vlse intrinsic until it is legalized and split. +define <32 x i64> @mgather_strided_split(ptr %base) { +; RV32V-LABEL: mgather_strided_split: +; RV32V: # %bb.0: +; RV32V-NEXT: li a1, 16 +; RV32V-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32V-NEXT: vlse64.v v8, (a0), a1 +; RV32V-NEXT: addi a0, a0, 256 +; RV32V-NEXT: vlse64.v v16, (a0), a1 +; RV32V-NEXT: ret +; +; RV64V-LABEL: mgather_strided_split: +; RV64V: # %bb.0: +; RV64V-NEXT: li a1, 16 +; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64V-NEXT: vlse64.v v8, (a0), a1 +; RV64V-NEXT: addi a0, a0, 256 +; RV64V-NEXT: vlse64.v v16, (a0), a1 +; RV64V-NEXT: ret +; +; RV32ZVE32F-LABEL: mgather_strided_split: +; RV32ZVE32F: # %bb.0: +; RV32ZVE32F-NEXT: addi sp, sp, -512 +; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 512 +; RV32ZVE32F-NEXT: sw ra, 508(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s0, 504(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s2, 500(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s3, 496(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s4, 492(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s5, 488(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s6, 484(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s7, 480(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s8, 476(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s9, 472(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s10, 468(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s11, 464(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: .cfi_offset ra, -4 +; RV32ZVE32F-NEXT: .cfi_offset s0, -8 +; RV32ZVE32F-NEXT: .cfi_offset s2, -12 +; RV32ZVE32F-NEXT: .cfi_offset s3, -16 +; RV32ZVE32F-NEXT: .cfi_offset s4, -20 +; RV32ZVE32F-NEXT: .cfi_offset s5, -24 +; RV32ZVE32F-NEXT: .cfi_offset s6, -28 +; RV32ZVE32F-NEXT: .cfi_offset s7, -32 +; RV32ZVE32F-NEXT: .cfi_offset s8, -36 +; RV32ZVE32F-NEXT: .cfi_offset s9, -40 +; RV32ZVE32F-NEXT: .cfi_offset s10, -44 +; RV32ZVE32F-NEXT: .cfi_offset s11, -48 +; RV32ZVE32F-NEXT: addi s0, sp, 512 +; RV32ZVE32F-NEXT: .cfi_def_cfa s0, 0 +; RV32ZVE32F-NEXT: andi sp, sp, -128 +; RV32ZVE32F-NEXT: li a2, 32 +; RV32ZVE32F-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32ZVE32F-NEXT: vid.v v8 +; RV32ZVE32F-NEXT: vsll.vi v8, v8, 4 +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 216(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 208(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 1 +; RV32ZVE32F-NEXT: vmv.x.s a1, v16 +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 252(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 248(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v16 +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 244(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 236(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v16 +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 228(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 220(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v16 +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 240(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 232(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v16 +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 224(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 212(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v16 +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 204(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 200(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v16 +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 196(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 192(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: addi a1, sp, 256 +; RV32ZVE32F-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32ZVE32F-NEXT: vse32.v v8, (a1) +; RV32ZVE32F-NEXT: lw a1, 288(sp) +; RV32ZVE32F-NEXT: lw a2, 292(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 188(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 184(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 296(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: sw a3, 180(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 4(a2) +; RV32ZVE32F-NEXT: sw a2, 176(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 300(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 172(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 168(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 304(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: sw a3, 164(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 4(a2) +; RV32ZVE32F-NEXT: sw a2, 160(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 308(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 156(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 152(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 312(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: sw a3, 148(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 4(a2) +; RV32ZVE32F-NEXT: sw a2, 144(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 316(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 140(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 136(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 320(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: sw a3, 132(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 4(a2) +; RV32ZVE32F-NEXT: sw a2, 128(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 324(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 124(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 120(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 328(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: sw a3, 116(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 4(a2) +; RV32ZVE32F-NEXT: sw a2, 112(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 332(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 104(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw ra, 4(a1) +; RV32ZVE32F-NEXT: lw a1, 336(sp) +; RV32ZVE32F-NEXT: lw s10, 0(a2) +; RV32ZVE32F-NEXT: lw s8, 4(a2) +; RV32ZVE32F-NEXT: lw a2, 340(sp) +; RV32ZVE32F-NEXT: lw s6, 0(a1) +; RV32ZVE32F-NEXT: lw s4, 4(a1) +; RV32ZVE32F-NEXT: lw a4, 344(sp) +; RV32ZVE32F-NEXT: lw s2, 0(a2) +; RV32ZVE32F-NEXT: lw t5, 4(a2) +; RV32ZVE32F-NEXT: lw a2, 348(sp) +; RV32ZVE32F-NEXT: lw t3, 0(a4) +; RV32ZVE32F-NEXT: lw t2, 4(a4) +; RV32ZVE32F-NEXT: lw a4, 352(sp) +; RV32ZVE32F-NEXT: lw t0, 0(a2) +; RV32ZVE32F-NEXT: lw a7, 4(a2) +; RV32ZVE32F-NEXT: lw a2, 356(sp) +; RV32ZVE32F-NEXT: lw a6, 0(a4) +; RV32ZVE32F-NEXT: lw a5, 4(a4) +; RV32ZVE32F-NEXT: lw a4, 360(sp) +; RV32ZVE32F-NEXT: lw a1, 0(a2) +; RV32ZVE32F-NEXT: sw a1, 108(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a2) +; RV32ZVE32F-NEXT: sw a1, 100(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 364(sp) +; RV32ZVE32F-NEXT: lw s11, 0(a4) +; RV32ZVE32F-NEXT: lw s9, 4(a4) +; RV32ZVE32F-NEXT: lw a1, 368(sp) +; RV32ZVE32F-NEXT: lw s7, 0(a2) +; RV32ZVE32F-NEXT: lw s5, 4(a2) +; RV32ZVE32F-NEXT: lw a3, 372(sp) +; RV32ZVE32F-NEXT: lw s3, 0(a1) +; RV32ZVE32F-NEXT: lw t6, 4(a1) +; RV32ZVE32F-NEXT: lw a2, 376(sp) +; RV32ZVE32F-NEXT: lw t4, 0(a3) +; RV32ZVE32F-NEXT: lw a1, 380(sp) +; RV32ZVE32F-NEXT: lw t1, 4(a3) +; RV32ZVE32F-NEXT: lw a4, 0(a2) +; RV32ZVE32F-NEXT: lw a3, 4(a2) +; RV32ZVE32F-NEXT: lw a2, 0(a1) +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a5, 196(a0) +; RV32ZVE32F-NEXT: sw a6, 192(a0) +; RV32ZVE32F-NEXT: sw a7, 188(a0) +; RV32ZVE32F-NEXT: sw t0, 184(a0) +; RV32ZVE32F-NEXT: sw t2, 180(a0) +; RV32ZVE32F-NEXT: sw t3, 176(a0) +; RV32ZVE32F-NEXT: sw t5, 172(a0) +; RV32ZVE32F-NEXT: sw s2, 168(a0) +; RV32ZVE32F-NEXT: sw s4, 164(a0) +; RV32ZVE32F-NEXT: sw s6, 160(a0) +; RV32ZVE32F-NEXT: sw s8, 156(a0) +; RV32ZVE32F-NEXT: sw s10, 152(a0) +; RV32ZVE32F-NEXT: sw ra, 148(a0) +; RV32ZVE32F-NEXT: lw a5, 104(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 144(a0) +; RV32ZVE32F-NEXT: lw a5, 112(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 140(a0) +; RV32ZVE32F-NEXT: lw a5, 116(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 136(a0) +; RV32ZVE32F-NEXT: lw a5, 120(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 132(a0) +; RV32ZVE32F-NEXT: lw a5, 124(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 128(a0) +; RV32ZVE32F-NEXT: lw a5, 128(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 124(a0) +; RV32ZVE32F-NEXT: lw a5, 132(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 120(a0) +; RV32ZVE32F-NEXT: lw a5, 136(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 116(a0) +; RV32ZVE32F-NEXT: lw a5, 140(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 112(a0) +; RV32ZVE32F-NEXT: lw a5, 144(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 108(a0) +; RV32ZVE32F-NEXT: lw a5, 148(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 104(a0) +; RV32ZVE32F-NEXT: lw a5, 152(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 100(a0) +; RV32ZVE32F-NEXT: lw a5, 156(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 96(a0) +; RV32ZVE32F-NEXT: lw a5, 160(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 92(a0) +; RV32ZVE32F-NEXT: lw a5, 164(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 88(a0) +; RV32ZVE32F-NEXT: lw a5, 168(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 84(a0) +; RV32ZVE32F-NEXT: lw a5, 172(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 80(a0) +; RV32ZVE32F-NEXT: lw a5, 176(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 76(a0) +; RV32ZVE32F-NEXT: lw a5, 180(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 72(a0) +; RV32ZVE32F-NEXT: lw a5, 184(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 68(a0) +; RV32ZVE32F-NEXT: lw a5, 188(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 64(a0) +; RV32ZVE32F-NEXT: lw a5, 208(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 4(a0) +; RV32ZVE32F-NEXT: lw a5, 216(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 0(a0) +; RV32ZVE32F-NEXT: sw a1, 252(a0) +; RV32ZVE32F-NEXT: sw a2, 248(a0) +; RV32ZVE32F-NEXT: sw a3, 244(a0) +; RV32ZVE32F-NEXT: sw a4, 240(a0) +; RV32ZVE32F-NEXT: sw t1, 236(a0) +; RV32ZVE32F-NEXT: sw t4, 232(a0) +; RV32ZVE32F-NEXT: sw t6, 228(a0) +; RV32ZVE32F-NEXT: sw s3, 224(a0) +; RV32ZVE32F-NEXT: sw s5, 220(a0) +; RV32ZVE32F-NEXT: sw s7, 216(a0) +; RV32ZVE32F-NEXT: sw s9, 212(a0) +; RV32ZVE32F-NEXT: sw s11, 208(a0) +; RV32ZVE32F-NEXT: lw a1, 100(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 204(a0) +; RV32ZVE32F-NEXT: lw a1, 108(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 200(a0) +; RV32ZVE32F-NEXT: lw a1, 220(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 28(a0) +; RV32ZVE32F-NEXT: lw a1, 228(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 24(a0) +; RV32ZVE32F-NEXT: lw a1, 236(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 20(a0) +; RV32ZVE32F-NEXT: lw a1, 244(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 16(a0) +; RV32ZVE32F-NEXT: lw a1, 248(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 12(a0) +; RV32ZVE32F-NEXT: lw a1, 252(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 8(a0) +; RV32ZVE32F-NEXT: lw a1, 192(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 60(a0) +; RV32ZVE32F-NEXT: lw a1, 196(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 56(a0) +; RV32ZVE32F-NEXT: lw a1, 200(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 52(a0) +; RV32ZVE32F-NEXT: lw a1, 204(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 48(a0) +; RV32ZVE32F-NEXT: lw a1, 212(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 44(a0) +; RV32ZVE32F-NEXT: lw a1, 224(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 40(a0) +; RV32ZVE32F-NEXT: lw a1, 232(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 36(a0) +; RV32ZVE32F-NEXT: lw a1, 240(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 32(a0) +; RV32ZVE32F-NEXT: addi sp, s0, -512 +; RV32ZVE32F-NEXT: lw ra, 508(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s0, 504(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s2, 500(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s3, 496(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s4, 492(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s5, 488(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s6, 484(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s7, 480(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s8, 476(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s9, 472(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s10, 468(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s11, 464(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: addi sp, sp, 512 +; RV32ZVE32F-NEXT: ret +; +; RV64ZVE32F-LABEL: mgather_strided_split: +; RV64ZVE32F: # %bb.0: +; RV64ZVE32F-NEXT: addi sp, sp, -144 +; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 144 +; RV64ZVE32F-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s1, 120(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s2, 112(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s3, 104(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s4, 96(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s5, 88(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s6, 80(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s7, 72(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s8, 64(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s9, 56(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s10, 48(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s11, 40(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: .cfi_offset ra, -8 +; RV64ZVE32F-NEXT: .cfi_offset s0, -16 +; RV64ZVE32F-NEXT: .cfi_offset s1, -24 +; RV64ZVE32F-NEXT: .cfi_offset s2, -32 +; RV64ZVE32F-NEXT: .cfi_offset s3, -40 +; RV64ZVE32F-NEXT: .cfi_offset s4, -48 +; RV64ZVE32F-NEXT: .cfi_offset s5, -56 +; RV64ZVE32F-NEXT: .cfi_offset s6, -64 +; RV64ZVE32F-NEXT: .cfi_offset s7, -72 +; RV64ZVE32F-NEXT: .cfi_offset s8, -80 +; RV64ZVE32F-NEXT: .cfi_offset s9, -88 +; RV64ZVE32F-NEXT: .cfi_offset s10, -96 +; RV64ZVE32F-NEXT: .cfi_offset s11, -104 +; RV64ZVE32F-NEXT: ld a2, 0(a1) +; RV64ZVE32F-NEXT: sd a2, 32(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: ld a2, 16(a1) +; RV64ZVE32F-NEXT: sd a2, 24(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: ld a2, 32(a1) +; RV64ZVE32F-NEXT: sd a2, 16(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: ld a2, 48(a1) +; RV64ZVE32F-NEXT: sd a2, 8(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: ld a2, 64(a1) +; RV64ZVE32F-NEXT: sd a2, 0(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: ld a7, 80(a1) +; RV64ZVE32F-NEXT: ld t0, 96(a1) +; RV64ZVE32F-NEXT: ld t1, 112(a1) +; RV64ZVE32F-NEXT: ld t2, 128(a1) +; RV64ZVE32F-NEXT: ld t3, 144(a1) +; RV64ZVE32F-NEXT: ld t4, 160(a1) +; RV64ZVE32F-NEXT: ld t5, 176(a1) +; RV64ZVE32F-NEXT: ld t6, 192(a1) +; RV64ZVE32F-NEXT: ld s0, 208(a1) +; RV64ZVE32F-NEXT: ld s1, 224(a1) +; RV64ZVE32F-NEXT: ld s2, 240(a1) +; RV64ZVE32F-NEXT: ld s3, 256(a1) +; RV64ZVE32F-NEXT: ld s4, 272(a1) +; RV64ZVE32F-NEXT: ld s5, 288(a1) +; RV64ZVE32F-NEXT: ld s6, 304(a1) +; RV64ZVE32F-NEXT: ld s7, 320(a1) +; RV64ZVE32F-NEXT: ld s8, 336(a1) +; RV64ZVE32F-NEXT: ld s9, 352(a1) +; RV64ZVE32F-NEXT: ld s10, 368(a1) +; RV64ZVE32F-NEXT: ld s11, 384(a1) +; RV64ZVE32F-NEXT: ld ra, 400(a1) +; RV64ZVE32F-NEXT: ld a6, 416(a1) +; RV64ZVE32F-NEXT: ld a5, 432(a1) +; RV64ZVE32F-NEXT: ld a2, 496(a1) +; RV64ZVE32F-NEXT: ld a3, 480(a1) +; RV64ZVE32F-NEXT: ld a4, 464(a1) +; RV64ZVE32F-NEXT: ld a1, 448(a1) +; RV64ZVE32F-NEXT: sd a2, 248(a0) +; RV64ZVE32F-NEXT: sd a3, 240(a0) +; RV64ZVE32F-NEXT: sd a4, 232(a0) +; RV64ZVE32F-NEXT: sd a1, 224(a0) +; RV64ZVE32F-NEXT: sd a5, 216(a0) +; RV64ZVE32F-NEXT: sd a6, 208(a0) +; RV64ZVE32F-NEXT: sd ra, 200(a0) +; RV64ZVE32F-NEXT: sd s11, 192(a0) +; RV64ZVE32F-NEXT: sd s10, 184(a0) +; RV64ZVE32F-NEXT: sd s9, 176(a0) +; RV64ZVE32F-NEXT: sd s8, 168(a0) +; RV64ZVE32F-NEXT: sd s7, 160(a0) +; RV64ZVE32F-NEXT: sd s6, 152(a0) +; RV64ZVE32F-NEXT: sd s5, 144(a0) +; RV64ZVE32F-NEXT: sd s4, 136(a0) +; RV64ZVE32F-NEXT: sd s3, 128(a0) +; RV64ZVE32F-NEXT: sd s2, 120(a0) +; RV64ZVE32F-NEXT: sd s1, 112(a0) +; RV64ZVE32F-NEXT: sd s0, 104(a0) +; RV64ZVE32F-NEXT: sd t6, 96(a0) +; RV64ZVE32F-NEXT: sd t5, 88(a0) +; RV64ZVE32F-NEXT: sd t4, 80(a0) +; RV64ZVE32F-NEXT: sd t3, 72(a0) +; RV64ZVE32F-NEXT: sd t2, 64(a0) +; RV64ZVE32F-NEXT: sd t1, 56(a0) +; RV64ZVE32F-NEXT: sd t0, 48(a0) +; RV64ZVE32F-NEXT: sd a7, 40(a0) +; RV64ZVE32F-NEXT: ld a1, 0(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: sd a1, 32(a0) +; RV64ZVE32F-NEXT: ld a1, 8(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: sd a1, 24(a0) +; RV64ZVE32F-NEXT: ld a1, 16(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: sd a1, 16(a0) +; RV64ZVE32F-NEXT: ld a1, 24(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: sd a1, 8(a0) +; RV64ZVE32F-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: sd a1, 0(a0) +; RV64ZVE32F-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s1, 120(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s2, 112(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s3, 104(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s4, 96(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s5, 88(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s6, 80(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s7, 72(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s8, 64(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s9, 56(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s10, 48(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s11, 40(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: addi sp, sp, 144 +; RV64ZVE32F-NEXT: ret + %ptrs = getelementptr inbounds i64, ptr %base, <32 x i64> + %x = call <32 x i64> @llvm.masked.gather.v32i64.v32p0(<32 x ptr> %ptrs, i32 8, <32 x i1> shufflevector(<32 x i1> insertelement(<32 x i1> poison, i1 true, i32 0), <32 x i1> poison, <32 x i32> zeroinitializer), <32 x i64> poison) + ret <32 x i64> %x +} + ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; RV64: {{.*}} From c74afe6f52e9db0c5acd90b78a232176ad460b5e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 20 Feb 2024 11:08:01 +0100 Subject: [PATCH 38/54] [InstCombine] Add unsigned variants of gep exact div tests (NFC) (cherry picked from commit ec2c770b9f9a0e9eca4a893383d2b27dd4c0bfe7) --- .../Transforms/InstCombine/getelementptr.ll | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll index 642c3eb2a0e41..e6a3b759cf78d 100644 --- a/llvm/test/Transforms/InstCombine/getelementptr.ll +++ b/llvm/test/Transforms/InstCombine/getelementptr.ll @@ -116,6 +116,7 @@ define void @test_overaligned_vec(i8 %B) { ; CHECK-LABEL: @test_overaligned_vec( ; CHECK-NEXT: store i8 [[B:%.*]], ptr getelementptr inbounds ([10 x i8], ptr @Global, i64 0, i64 2), align 1 ; CHECK-NEXT: ret void +; %A = getelementptr <2 x half>, ptr @Global, i64 0, i64 1 store i8 %B, ptr %A ret void @@ -1473,6 +1474,17 @@ define ptr @gep_sdiv(ptr %p, i64 %off) { ret ptr %ptr } +define ptr @gep_udiv(ptr %p, i64 %off) { +; CHECK-LABEL: @gep_udiv( +; CHECK-NEXT: [[INDEX:%.*]] = udiv exact i64 [[OFF:%.*]], 7 +; CHECK-NEXT: [[PTR:%.*]] = getelementptr [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: ret ptr [[PTR]] +; + %index = udiv exact i64 %off, 7 + %ptr = getelementptr %struct.C, ptr %p, i64 %index + ret ptr %ptr +} + define <2 x ptr> @gep_sdiv_vec(<2 x ptr> %p, <2 x i64> %off) { ; CHECK-LABEL: @gep_sdiv_vec( ; CHECK-NEXT: [[PTR:%.*]] = getelementptr i8, <2 x ptr> [[P:%.*]], <2 x i64> [[OFF:%.*]] @@ -1503,6 +1515,17 @@ define ptr @gep_ashr(ptr %p, i64 %off) { ret ptr %ptr } +define ptr @gep_lshr(ptr %p, i64 %off) { +; CHECK-LABEL: @gep_lshr( +; CHECK-NEXT: [[INDEX:%.*]] = lshr exact i64 [[OFF:%.*]], 2 +; CHECK-NEXT: [[PTR:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: ret ptr [[PTR]] +; + %index = lshr exact i64 %off, 2 + %ptr = getelementptr i32, ptr %p, i64 %index + ret ptr %ptr +} + ; Negative tests define ptr @gep_i8(ptr %p, i64 %off) { @@ -1525,6 +1548,17 @@ define ptr @gep_sdiv_mismatched_size(ptr %p, i64 %off) { ret ptr %ptr } +define ptr @gep_udiv_mismatched_size(ptr %p, i64 %off) { +; CHECK-LABEL: @gep_udiv_mismatched_size( +; CHECK-NEXT: [[INDEX:%.*]] = udiv exact i64 [[OFF:%.*]], 20 +; CHECK-NEXT: [[PTR:%.*]] = getelementptr [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: ret ptr [[PTR]] +; + %index = udiv exact i64 %off, 20 + %ptr = getelementptr %struct.C, ptr %p, i64 %index + ret ptr %ptr +} + define ptr @gep_sdiv_without_exact(ptr %p, i64 %off) { ; CHECK-LABEL: @gep_sdiv_without_exact( ; CHECK-NEXT: [[INDEX:%.*]] = sdiv i64 [[OFF:%.*]], 7 @@ -1536,6 +1570,17 @@ define ptr @gep_sdiv_without_exact(ptr %p, i64 %off) { ret ptr %ptr } +define ptr @gep_udiv_without_exact(ptr %p, i64 %off) { +; CHECK-LABEL: @gep_udiv_without_exact( +; CHECK-NEXT: [[INDEX:%.*]] = udiv i64 [[OFF:%.*]], 7 +; CHECK-NEXT: [[PTR:%.*]] = getelementptr [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: ret ptr [[PTR]] +; + %index = udiv i64 %off, 7 + %ptr = getelementptr %struct.C, ptr %p, i64 %index + ret ptr %ptr +} + define ptr @gep_ashr_without_exact(ptr %p, i64 %off) { ; CHECK-LABEL: @gep_ashr_without_exact( ; CHECK-NEXT: [[INDEX:%.*]] = ashr i64 [[OFF:%.*]], 2 @@ -1547,6 +1592,17 @@ define ptr @gep_ashr_without_exact(ptr %p, i64 %off) { ret ptr %ptr } +define ptr @gep_lshr_without_exact(ptr %p, i64 %off) { +; CHECK-LABEL: @gep_lshr_without_exact( +; CHECK-NEXT: [[INDEX:%.*]] = lshr i64 [[OFF:%.*]], 2 +; CHECK-NEXT: [[PTR:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: ret ptr [[PTR]] +; + %index = lshr i64 %off, 2 + %ptr = getelementptr i32, ptr %p, i64 %index + ret ptr %ptr +} + define i1 @test_only_used_by_icmp(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: @test_only_used_by_icmp( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[B:%.*]], [[C:%.*]] From ebc589e44ffe7b77cc500f3d2dc1a7ba11dd82b1 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 20 Feb 2024 12:48:13 +0100 Subject: [PATCH 39/54] [InstCombine] Fold gep of exact unsigned division (#82334) Extend the transform added in https://github.com/llvm/llvm-project/pull/76458 to also handle unsigned division. X exact/ Y * Y == X holds independently of whether the division is signed or unsigned. Proofs: https://alive2.llvm.org/ce/z/wFd5Ec (cherry picked from commit 26d4afc3de86ca5416c8e38000362c526b6808cd) --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 6 +++--- llvm/test/Transforms/InstCombine/getelementptr.ll | 6 ++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 249f4a7710e04..5d207dcfd18dd 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2594,10 +2594,10 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *V; if ((has_single_bit(TyAllocSize) && match(GEP.getOperand(1), - m_Exact(m_AShr(m_Value(V), - m_SpecificInt(countr_zero(TyAllocSize)))))) || + m_Exact(m_Shr(m_Value(V), + m_SpecificInt(countr_zero(TyAllocSize)))))) || match(GEP.getOperand(1), - m_Exact(m_SDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) { + m_Exact(m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) { GetElementPtrInst *NewGEP = GetElementPtrInst::Create( Builder.getInt8Ty(), GEP.getPointerOperand(), V); NewGEP->setIsInBounds(GEP.isInBounds()); diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll index e6a3b759cf78d..c90b6c9fb2959 100644 --- a/llvm/test/Transforms/InstCombine/getelementptr.ll +++ b/llvm/test/Transforms/InstCombine/getelementptr.ll @@ -1476,8 +1476,7 @@ define ptr @gep_sdiv(ptr %p, i64 %off) { define ptr @gep_udiv(ptr %p, i64 %off) { ; CHECK-LABEL: @gep_udiv( -; CHECK-NEXT: [[INDEX:%.*]] = udiv exact i64 [[OFF:%.*]], 7 -; CHECK-NEXT: [[PTR:%.*]] = getelementptr [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[PTR:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[OFF:%.*]] ; CHECK-NEXT: ret ptr [[PTR]] ; %index = udiv exact i64 %off, 7 @@ -1517,8 +1516,7 @@ define ptr @gep_ashr(ptr %p, i64 %off) { define ptr @gep_lshr(ptr %p, i64 %off) { ; CHECK-LABEL: @gep_lshr( -; CHECK-NEXT: [[INDEX:%.*]] = lshr exact i64 [[OFF:%.*]], 2 -; CHECK-NEXT: [[PTR:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[PTR:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[OFF:%.*]] ; CHECK-NEXT: ret ptr [[PTR]] ; %index = lshr exact i64 %off, 2 From 801a10d3058a44b257236edfa08748c7f7ddbccb Mon Sep 17 00:00:00 2001 From: Xing Xue Date: Tue, 20 Feb 2024 12:08:37 -0500 Subject: [PATCH 40/54] [OpenMP][AIX]Add assembly file containing microtasking routines and unnamed common block definitions (#81770) This patch adds assembly file `z_AIX_asm.S` that contains the 32- and 64-bit XCOFF version of microtasking routines and unnamed common block definitions. This code has been run through the libomp LIT tests and a user package successfully. (cherry picked from commit 94100bc2fb1a39dbeb43d18a95176097c53f1324) --- openmp/runtime/src/z_AIX_asm.S | 410 +++++++++++++++++++++++++++++++++ 1 file changed, 410 insertions(+) create mode 100644 openmp/runtime/src/z_AIX_asm.S diff --git a/openmp/runtime/src/z_AIX_asm.S b/openmp/runtime/src/z_AIX_asm.S new file mode 100644 index 0000000000000..d711fcb7a7854 --- /dev/null +++ b/openmp/runtime/src/z_AIX_asm.S @@ -0,0 +1,410 @@ +// z_AIX_asm.S: - microtasking routines specifically +// written for Power platforms running AIX OS + +// +////===----------------------------------------------------------------------===// +//// +//// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +//// See https://llvm.org/LICENSE.txt for license information. +//// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//// +////===----------------------------------------------------------------------===// +// + +// ----------------------------------------------------------------------- +// macros +// ----------------------------------------------------------------------- + +#include "kmp_config.h" + +#if KMP_OS_AIX +//------------------------------------------------------------------------ +// int +// __kmp_invoke_microtask( void (*pkfn) (int *gtid, int *tid, ...), +// int gtid, int tid, +// int argc, void *p_argv[] +// #if OMPT_SUPPORT +// , +// void **exit_frame_ptr +// #endif +// ) { +// #if OMPT_SUPPORT +// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); +// #endif +// +// (*pkfn)( & gtid, & tid, p_argv[0], ... ); +// +// // FIXME: This is done at call-site and can be removed here. +// #if OMPT_SUPPORT +// *exit_frame_ptr = 0; +// #endif +// +// return 1; +// } +// +// parameters: +// r3: pkfn +// r4: gtid +// r5: tid +// r6: argc +// r7: p_argv +// r8: &exit_frame +// +// return: r3 (always 1/TRUE) +// + +#if KMP_ARCH_PPC64_XCOFF + + .globl __kmp_invoke_microtask[DS] + .globl .__kmp_invoke_microtask + .align 4 + .csect __kmp_invoke_microtask[DS],3 + .vbyte 8, .__kmp_invoke_microtask + .vbyte 8, TOC[TC0] + .vbyte 8, 0 + .csect .text[PR],2 + .machine "pwr7" +.__kmp_invoke_microtask: + + +// -- Begin __kmp_invoke_microtask +// mark_begin; + +// We need to allocate a stack frame large enough to hold all of the parameters +// on the stack for the microtask plus what this function needs. That's 48 +// bytes under the XCOFF64 ABI, plus max(64, 8*(2 + argc)) for +// the parameters to the microtask (gtid, tid, argc elements of p_argv), +// plus 8 bytes to store the values of r4 and r5, and 8 bytes to store r31. +// With OMP-T support, we need an additional 8 bytes to save r30 to hold +// a copy of r8. +// Stack offsets relative to stack pointer: +// r31: -8, r30: -16, gtid: -20, tid: -24 + + mflr 0 + std 31, -8(1) # Save r31 to the stack + std 0, 16(1) # Save LR to the linkage area + +// This is unusual because normally we'd set r31 equal to r1 after the stack +// frame is established. In this case, however, we need to dynamically compute +// the stack frame size, and so we keep a direct copy of r1 to access our +// register save areas and restore the r1 value before returning. + mr 31, 1 + +// Compute the size of the "argc" portion of the parameter save area. +// The parameter save area is always at least 64 bytes long (i.e. 8 regs) +// The microtask has (2 + argc) parameters, so if argc <= 6, we need to +// to allocate 8*6 bytes, not 8*argc. + li 0, 6 + cmpwi 0, 6, 6 + iselgt 0, 6, 0 # r0 = (argc > 6)? argc : 6 + sldi 0, 0, 3 # r0 = 8 * max(argc, 6) + +// Compute the size necessary for the local stack frame. +// 88 = 48 + 4 (for r4) + 4 (for r5) + 8 (for r31) + 8 (for OMP-T r30) + +// 8 (parameter gtid) + 8 (parameter tid) + li 12, 88 + add 12, 0, 12 + neg 12, 12 + +// We need to make sure that the stack frame stays aligned (to 16 bytes). + li 0, -16 + and 12, 0, 12 + +// Establish the local stack frame. + stdux 1, 1, 12 + +#if OMPT_SUPPORT + std 30, -16(31) # Save r30 to the stack + std 1, 0(8) + mr 30, 8 +#endif + +// Store gtid and tid to the stack because they're passed by reference to the microtask. + stw 4, -20(31) # Save gtid to the stack + stw 5, -24(31) # Save tid to the stack + + mr 12, 6 # r12 = argc + mr 4, 7 # r4 = p_argv + + cmpwi 0, 12, 1 + blt 0, .Lcall # if (argc < 1) goto .Lcall + + ld 5, 0(4) # r5 = p_argv[0] + + cmpwi 0, 12, 2 + blt 0, .Lcall # if (argc < 2) goto .Lcall + + ld 6, 8(4) # r6 = p_argv[1] + + cmpwi 0, 12, 3 + blt 0, .Lcall # if (argc < 3) goto .Lcall + + ld 7, 16(4) # r7 = p_argv[2] + + cmpwi 0, 12, 4 + blt 0, .Lcall # if (argc < 4) goto .Lcall + + ld 8, 24(4) # r8 = p_argv[3] + + cmpwi 0, 12, 5 + blt 0, .Lcall # if (argc < 5) goto .Lcall + + ld 9, 32(4) # r9 = p_argv[4] + + cmpwi 0, 12, 6 + blt 0, .Lcall # if (argc < 6) goto .Lcall + + ld 10, 40(4) # r10 = p_argv[5] + + cmpwi 0, 12, 7 + blt 0, .Lcall # if (argc < 7) goto .Lcall + +// There are more than 6 microtask parameters, so we need to store the +// remainder to the stack. + addi 12, 12, -6 # argc -= 6 + mtctr 12 + +// These are set to 8 bytes before the first desired store address (we're using +// pre-increment loads and stores in the loop below). The parameter save area +// for the microtask begins 48 + 8*8 == 112 bytes above r1 for XCOFF64. + addi 4, 4, 40 # p_argv = p_argv + 5 + # (i.e. skip the 5 elements we already processed) + addi 12, 1, 104 # r12 = stack offset (112 - 8) + +.Lnext: + ldu 0, 8(4) + stdu 0, 8(12) + bdnz .Lnext + +.Lcall: + std 2, 40(1) # Save the TOC pointer to the linkage area +// Load the actual function address from the function descriptor. + ld 12, 0(3) # Function address + ld 2, 8(3) # TOC pointer + ld 11, 16(3) # Environment pointer + + addi 3, 31, -20 # r3 = >id + addi 4, 31, -24 # r4 = &tid + + mtctr 12 # CTR = function address + bctrl # Branch to CTR + ld 2, 40(1) # Restore TOC pointer from linkage area + +#if OMPT_SUPPORT + li 3, 0 + std 3, 0(30) +#endif + + li 3, 1 + +#if OMPT_SUPPORT + ld 30, -16(31) # Restore r30 from the saved value on the stack +#endif + + mr 1, 31 + ld 31, -8(1) # Restore r31 from the saved value on the stack + ld 0, 16(1) + mtlr 0 # Restore LR from the linkage area + blr # Branch to LR + +#else // KMP_ARCH_PPC_XCOFF + + .globl __kmp_invoke_microtask[DS] + .globl .__kmp_invoke_microtask + .align 4 + .csect __kmp_invoke_microtask[DS],2 + .vbyte 4, .__kmp_invoke_microtask + .vbyte 4, TOC[TC0] + .vbyte 4, 0 + .csect .text[PR],2 + .machine "pwr7" +.__kmp_invoke_microtask: + + +// -- Begin __kmp_invoke_microtask +// mark_begin; + +// We need to allocate a stack frame large enough to hold all of the parameters +// on the stack for the microtask plus what this function needs. That's 24 +// bytes under the XCOFF ABI, plus max(32, 8*(2 + argc)) for +// the parameters to the microtask (gtid, tid, argc elements of p_argv), +// plus 8 bytes to store the values of r4 and r5, and 4 bytes to store r31. +// With OMP-T support, we need an additional 4 bytes to save r30 to hold +// a copy of r8. +// Stack offsets relative to stack pointer: +// r31: -4, r30: -8, gtid: -12, tid: -16 + + mflr 0 + stw 31, -4(1) # Save r31 to the stack + stw 0, 8(1) # Save LR to the linkage area + +// This is unusual because normally we'd set r31 equal to r1 after the stack +// frame is established. In this case, however, we need to dynamically compute +// the stack frame size, and so we keep a direct copy of r1 to access our +// register save areas and restore the r1 value before returning. + mr 31, 1 + +// Compute the size of the "argc" portion of the parameter save area. +// The parameter save area is always at least 32 bytes long (i.e. 8 regs) +// The microtask has (2 + argc) parameters, so if argc <= 6, we need to +// to allocate 4*6 bytes, not 4*argc. + li 0, 6 + cmpwi 0, 6, 6 + iselgt 0, 6, 0 # r0 = (argc > 6)? argc : 6 + slwi 0, 0, 2 # r0 = 4 * max(argc, 6) + +// Compute the size necessary for the local stack frame. +// 56 = 32 + 4 (for r4) + 4 (for r5) + 4 (for r31) + 4 (for OMP-T r30) + +// 4 (parameter gtid) + 4 (parameter tid) + li 12, 56 + add 12, 0, 12 + neg 12, 12 + +// We need to make sure that the stack frame stays aligned (to 16 bytes). + li 0, -16 + and 12, 0, 12 + +// Establish the local stack frame. + stwux 1, 1, 12 + +#if OMPT_SUPPORT + stw 30, -8(31) # Save r30 to the stack + stw 1, 0(8) + mr 30, 8 +#endif + +// Store gtid and tid to the stack because they're passed by reference to the microtask. + stw 4, -12(31) # Save gtid to the stack + stw 5, -16(31) # Save tid to the stack + + mr 12, 6 # r12 = argc + mr 4, 7 # r4 = p_argv + + cmpwi 0, 12, 1 + blt 0, .Lcall # if (argc < 1) goto .Lcall + + lwz 5, 0(4) # r5 = p_argv[0] + + cmpwi 0, 12, 2 + blt 0, .Lcall # if (argc < 2) goto .Lcall + + lwz 6, 4(4) # r6 = p_argv[1] + + cmpwi 0, 12, 3 + blt 0, .Lcall # if (argc < 3) goto .Lcall + + lwz 7, 8(4) # r7 = p_argv[2] + + cmpwi 0, 12, 4 + blt 0, .Lcall # if (argc < 4) goto .Lcall + + lwz 8, 12(4) # r8 = p_argv[3] + + cmpwi 0, 12, 5 + blt 0, .Lcall # if (argc < 5) goto .Lcall + + lwz 9, 16(4) # r9 = p_argv[4] + + cmpwi 0, 12, 6 + blt 0, .Lcall # if (argc < 6) goto .Lcall + + lwz 10, 20(4) # r10 = p_argv[5] + + cmpwi 0, 12, 7 + blt 0, .Lcall # if (argc < 7) goto .Lcall + +// There are more than 6 microtask parameters, so we need to store the +// remainder to the stack. + addi 12, 12, -6 # argc -= 6 + mtctr 12 + +// These are set to 4 bytes before the first desired store address (we're using +// pre-increment loads and stores in the loop below). The parameter save area +// for the microtask begins 24 + 4*8 == 56 bytes above r1 for XCOFF. + addi 4, 4, 20 # p_argv = p_argv + 5 + # (i.e. skip the 5 elements we already processed) + addi 12, 1, 52 # r12 = stack offset (56 - 4) + +.Lnext: + lwzu 0, 4(4) + stwu 0, 4(12) + bdnz .Lnext + +.Lcall: + stw 2, 20(1) # Save the TOC pointer to the linkage area +// Load the actual function address from the function descriptor. + lwz 12, 0(3) # Function address + lwz 2, 4(3) # TOC pointer + lwz 11, 8(3) # Environment pointer + + addi 3, 31, -12 # r3 = >id + addi 4, 31, -16 # r4 = &tid + + mtctr 12 # CTR = function address + bctrl # Branch to CTR + lwz 2, 20(1) # Restore TOC pointer from linkage area + +#if OMPT_SUPPORT + li 3, 0 + stw 3, 0(30) +#endif + + li 3, 1 + +#if OMPT_SUPPORT + lwz 30, -8(31) # Restore r30 from the saved value on the stack +#endif + + mr 1, 31 + lwz 31, -4(1) # Restore r31 from the saved value on the stack + lwz 0, 8(1) + mtlr 0 # Restore LR from the linkage area + blr # Branch to LR + +#endif // KMP_ARCH_PPC64_XCOFF + +.Lfunc_end0: + .vbyte 4, 0x00000000 # Traceback table begin + .byte 0x00 # Version = 0 + .byte 0x09 # Language = CPlusPlus + .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue + # +HasTraceBackTableOffset, -IsInternalProcedure + # -HasControlledStorage, -IsTOCless + # -IsFloatingPointPresent + # -IsFloatingPointOperationLogOrAbortEnabled + .byte 0x61 # -IsInterruptHandler, +IsFunctionNamePresent, +IsAllocaUsed + # OnConditionDirective = 0, -IsCRSaved, +IsLRSaved + .byte 0x80 # +IsBackChainStored, -IsFixup, NumOfFPRsSaved = 0 +#if OMPT_SUPPORT + .byte 0x02 # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 2 + .byte 0x06 # NumberOfFixedParms = 6 +#else + .byte 0x01 # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 1 + .byte 0x05 # NumberOfFixedParms = 5 +#endif + .byte 0x01 # NumberOfFPParms = 0, +HasParmsOnStack + .vbyte 4, 0x00000000 # Parameter type = i, i, i, i, i + .vbyte 4, .Lfunc_end0-.__kmp_invoke_microtask # Function size + .vbyte 2, 0x0016 # Function name len = 22 + .byte "__kmp_invoke_microtask" # Function Name + .byte 0x1f # AllocaRegister = 31 + # -- End function + +// -- End __kmp_invoke_microtask + +// Support for unnamed common blocks. + + .comm .gomp_critical_user_, 32, 3 +#if KMP_ARCH_PPC64_XCOFF + .csect __kmp_unnamed_critical_addr[RW],3 +#else + .csect __kmp_unnamed_critical_addr[RW],2 +#endif + .globl __kmp_unnamed_critical_addr[RW] + .ptr .gomp_critical_user_ + +// -- End unnamed common block + + .toc + +#endif // KMP_OS_AIX From 3af6881cab6c98c304eed112f2153f9bbe67b5ee Mon Sep 17 00:00:00 2001 From: Frederic Cambus Date: Tue, 20 Feb 2024 09:12:58 +0100 Subject: [PATCH 41/54] [llvm-readobj] Add support for the PT_OPENBSD_SYSCALLS segment type. (#82122) Reference: https://github.com/openbsd/src/blob/master/sys/sys/exec_elf.h (cherry picked from commit a8d7511811c7d7c689c3e8f858e8e00a56aba152) --- .../llvm-readobj/ELF/program-headers.test | 254 ++++++++++-------- llvm/tools/llvm-readobj/ELFDumper.cpp | 1 + 2 files changed, 138 insertions(+), 117 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/ELF/program-headers.test b/llvm/test/tools/llvm-readobj/ELF/program-headers.test index 702a06b6403f0..856cf378ddad9 100644 --- a/llvm/test/tools/llvm-readobj/ELF/program-headers.test +++ b/llvm/test/tools/llvm-readobj/ELF/program-headers.test @@ -29,68 +29,70 @@ # RUN: FileCheck %s --check-prefixes=ELF64,MAPPING --strict-whitespace --match-full-lines # RUN: llvm-readobj -l %t64.elf | FileCheck %s --check-prefixes=ELF-LLVM,ELF64-LLVM -# ELF32:There are 25 program headers, starting at offset 52 +# ELF32:There are 26 program headers, starting at offset 52 # ELF32-EMPTY: # ELF32-NEXT:Program Headers: # ELF32-NEXT: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align -# ELF32-NEXT: PHDR 0x000354 0x00001000 0x00001000 0x00003 0x00003 W 0x1 -# ELF32-NEXT: PHDR 0x000357 0x00002000 0x00002000 0x00007 0x00007 E 0x1 -# ELF32-NEXT: NULL 0x000357 0x00002000 0x00002000 0x00007 0x00007 E 0x1 -# ELF32-NEXT: DYNAMIC 0x000354 0x00001000 0x00001000 0x00003 0x00003 RWE 0x1 -# ELF32-NEXT: INTERP 0x00035e 0x00003000 0x00003000 0x00004 0x00004 RW 0x1 +# ELF32-NEXT: PHDR 0x000374 0x00001000 0x00001000 0x00003 0x00003 W 0x1 +# ELF32-NEXT: PHDR 0x000377 0x00002000 0x00002000 0x00007 0x00007 E 0x1 +# ELF32-NEXT: NULL 0x000377 0x00002000 0x00002000 0x00007 0x00007 E 0x1 +# ELF32-NEXT: DYNAMIC 0x000374 0x00001000 0x00001000 0x00003 0x00003 RWE 0x1 +# ELF32-NEXT: INTERP 0x00037e 0x00003000 0x00003000 0x00004 0x00004 RW 0x1 # ELF32-NEXT: [Requesting program interpreter: ABC] -# ELF32-NEXT: NOTE 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: SHLIB 0x000354 0x00001000 0x00001000 0x00001 0x00001 0x1 -# ELF32-NEXT: TLS 0x000362 0x00004000 0x00004000 0x00001 0x00001 0x1 -# ELF32-NEXT: : 0x60000000 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: GNU_EH_FRAME 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: SUNW_UNWIND 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: GNU_STACK 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: GNU_RELRO 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: GNU_PROPERTY 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: OPENBSD_MUTABLE 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: OPENBSD_RANDOMIZE 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: OPENBSD_WXNEEDED 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: OPENBSD_NOBTCFI 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: OPENBSD_BOOTDATA 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: : 0x6fffffff 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: : 0x70000000 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: : 0x70000001 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: : 0x70000002 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: : 0x70000003 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: : 0x7fffffff 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: NOTE 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: SHLIB 0x000374 0x00001000 0x00001000 0x00001 0x00001 0x1 +# ELF32-NEXT: TLS 0x000382 0x00004000 0x00004000 0x00001 0x00001 0x1 +# ELF32-NEXT: : 0x60000000 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: GNU_EH_FRAME 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: SUNW_UNWIND 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: GNU_STACK 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: GNU_RELRO 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: GNU_PROPERTY 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: OPENBSD_MUTABLE 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: OPENBSD_RANDOMIZE 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: OPENBSD_WXNEEDED 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: OPENBSD_NOBTCFI 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: OPENBSD_SYSCALLS 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: OPENBSD_BOOTDATA 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x6fffffff 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x70000000 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x70000001 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x70000002 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x70000003 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x7fffffff 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 # ELF32-EMPTY: -# ELF64:There are 25 program headers, starting at offset 64 +# ELF64:There are 26 program headers, starting at offset 64 # ELF64-EMPTY: # ELF64-NEXT:Program Headers: # ELF64-NEXT: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align -# ELF64-NEXT: PHDR 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 W 0x1 -# ELF64-NEXT: PHDR 0x0005bb 0x0000000000002000 0x0000000000002000 0x000007 0x000007 E 0x1 -# ELF64-NEXT: NULL 0x0005bb 0x0000000000002000 0x0000000000002000 0x000007 0x000007 E 0x1 -# ELF64-NEXT: DYNAMIC 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 RWE 0x1 -# ELF64-NEXT: INTERP 0x0005c2 0x0000000000003000 0x0000000000003000 0x000004 0x000004 RW 0x1 +# ELF64-NEXT: PHDR 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 W 0x1 +# ELF64-NEXT: PHDR 0x0005f3 0x0000000000002000 0x0000000000002000 0x000007 0x000007 E 0x1 +# ELF64-NEXT: NULL 0x0005f3 0x0000000000002000 0x0000000000002000 0x000007 0x000007 E 0x1 +# ELF64-NEXT: DYNAMIC 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 RWE 0x1 +# ELF64-NEXT: INTERP 0x0005fa 0x0000000000003000 0x0000000000003000 0x000004 0x000004 RW 0x1 # ELF64-NEXT: [Requesting program interpreter: ABC] -# ELF64-NEXT: NOTE 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: SHLIB 0x0005b8 0x0000000000001000 0x0000000000001000 0x000001 0x000001 0x1 -# ELF64-NEXT: TLS 0x0005c6 0x0000000000004000 0x0000000000004000 0x000001 0x000001 0x1 -# ELF64-NEXT: : 0x60000000 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: GNU_EH_FRAME 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: SUNW_UNWIND 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: GNU_STACK 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: GNU_RELRO 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: GNU_PROPERTY 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: OPENBSD_MUTABLE 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: OPENBSD_RANDOMIZE 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: OPENBSD_WXNEEDED 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: OPENBSD_NOBTCFI 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: OPENBSD_BOOTDATA 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: : 0x6fffffff 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: : 0x70000000 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: : 0x70000001 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: : 0x70000002 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: : 0x70000003 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: : 0x7fffffff 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: NOTE 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: SHLIB 0x0005f0 0x0000000000001000 0x0000000000001000 0x000001 0x000001 0x1 +# ELF64-NEXT: TLS 0x0005fe 0x0000000000004000 0x0000000000004000 0x000001 0x000001 0x1 +# ELF64-NEXT: : 0x60000000 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: GNU_EH_FRAME 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: SUNW_UNWIND 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: GNU_STACK 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: GNU_RELRO 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: GNU_PROPERTY 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: OPENBSD_MUTABLE 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: OPENBSD_RANDOMIZE 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: OPENBSD_WXNEEDED 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: OPENBSD_NOBTCFI 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: OPENBSD_SYSCALLS 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: OPENBSD_BOOTDATA 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x6fffffff 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x70000000 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x70000001 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x70000002 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x70000003 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x7fffffff 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 # ELF64-EMPTY: # MAPPING: Section to Segment mapping: @@ -120,13 +122,14 @@ # MAPPING-NEXT: 22 .foo.begin .foo.end {{$}} # MAPPING-NEXT: 23 .foo.begin .foo.end {{$}} # MAPPING-NEXT: 24 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 25 .foo.begin .foo.end {{$}} # MAPPING-NEXT: None .unused .strtab .shstrtab {{$}} # ELF-LLVM: ProgramHeaders [ # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_PHDR (0x6) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -138,8 +141,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_PHDR (0x6) -# ELF32-LLVM-NEXT: Offset: 0x357 -# ELF64-LLVM-NEXT: Offset: 0x5BB +# ELF32-LLVM-NEXT: Offset: 0x377 +# ELF64-LLVM-NEXT: Offset: 0x5F3 # ELF-LLVM-NEXT: VirtualAddress: 0x2000 # ELF-LLVM-NEXT: PhysicalAddress: 0x2000 # ELF-LLVM-NEXT: FileSize: 7 @@ -151,8 +154,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_NULL (0x0) -# ELF32-LLVM-NEXT: Offset: 0x357 -# ELF64-LLVM-NEXT: Offset: 0x5BB +# ELF32-LLVM-NEXT: Offset: 0x377 +# ELF64-LLVM-NEXT: Offset: 0x5F3 # ELF-LLVM-NEXT: VirtualAddress: 0x2000 # ELF-LLVM-NEXT: PhysicalAddress: 0x2000 # ELF-LLVM-NEXT: FileSize: 7 @@ -164,8 +167,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_DYNAMIC (0x2) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -179,8 +182,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_INTERP (0x3) -# ELF32-LLVM-NEXT: Offset: 0x35E -# ELF64-LLVM-NEXT: Offset: 0x5C2 +# ELF32-LLVM-NEXT: Offset: 0x37E +# ELF64-LLVM-NEXT: Offset: 0x5FA # ELF-LLVM-NEXT: VirtualAddress: 0x3000 # ELF-LLVM-NEXT: PhysicalAddress: 0x3000 # ELF-LLVM-NEXT: FileSize: 4 @@ -193,8 +196,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_NOTE (0x4) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -205,8 +208,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_SHLIB (0x5) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 1 @@ -217,8 +220,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_TLS (0x7) -# ELF32-LLVM-NEXT: Offset: 0x362 -# ELF64-LLVM-NEXT: Offset: 0x5C6 +# ELF32-LLVM-NEXT: Offset: 0x382 +# ELF64-LLVM-NEXT: Offset: 0x5FE # ELF-LLVM-NEXT: VirtualAddress: 0x4000 # ELF-LLVM-NEXT: PhysicalAddress: 0x4000 # ELF-LLVM-NEXT: FileSize: 1 @@ -229,8 +232,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: Unknown (0x60000000) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -241,8 +244,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_GNU_EH_FRAME (0x6474E550) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -253,8 +256,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_SUNW_UNWIND (0x6464E550) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -265,8 +268,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_GNU_STACK (0x6474E551) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -277,8 +280,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_GNU_RELRO (0x6474E552) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -289,8 +292,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_GNU_PROPERTY (0x6474E553) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -301,8 +304,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_OPENBSD_MUTABLE (0x65A3DBE5) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -313,8 +316,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_OPENBSD_RANDOMIZE (0x65A3DBE6) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -325,8 +328,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_OPENBSD_WXNEEDED (0x65A3DBE7) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -337,8 +340,20 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_OPENBSD_NOBTCFI (0x65A3DBE8) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 +# ELF-LLVM-NEXT: VirtualAddress: 0x1000 +# ELF-LLVM-NEXT: PhysicalAddress: 0x1000 +# ELF-LLVM-NEXT: FileSize: 3 +# ELF-LLVM-NEXT: MemSize: 3 +# ELF-LLVM-NEXT: Flags [ (0x0) +# ELF-LLVM-NEXT: ] +# ELF-LLVM-NEXT: Alignment: 1 +# ELF-LLVM-NEXT: } +# ELF-LLVM-NEXT: ProgramHeader { +# ELF-LLVM-NEXT: Type: PT_OPENBSD_SYSCALLS (0x65A3DBE9) +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -349,8 +364,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_OPENBSD_BOOTDATA (0x65A41BE6) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -361,8 +376,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: Unknown (0x6FFFFFFF) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -373,8 +388,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: Unknown (0x70000000) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -385,8 +400,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: Unknown (0x70000001) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -397,8 +412,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: Unknown (0x70000002) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -409,8 +424,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: Unknown (0x70000003) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -421,8 +436,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: Unknown (0x7FFFFFFF) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -569,37 +584,42 @@ ProgramHeaders: VAddr: 0x1000 FirstSec: .foo.begin LastSec: .foo.end -## Case 19: the PT_OPENBSD_BOOTDATA segment. +## Case 19: the PT_OPENBSD_SYSCALLS segment. + - Type: 0x65a3dbe9 ## PT_OPENBSD_SYSCALLS + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end +## Case 20: the PT_OPENBSD_BOOTDATA segment. - Type: 0x65a41be6 ## PT_OPENBSD_BOOTDATA VAddr: 0x1000 FirstSec: .foo.begin LastSec: .foo.end -## Case 20: the PT_HIOS segment. +## Case 21: the PT_HIOS segment. - Type: 0x6fffffff ## PT_HIOS VAddr: 0x1000 FirstSec: .foo.begin LastSec: .foo.end -## Case 21: the PT_LOPROC/PT_ARM_ARCHEXT/PT_MIPS_REGINFO segment. +## Case 22: the PT_LOPROC/PT_ARM_ARCHEXT/PT_MIPS_REGINFO segment. - Type: 0x70000000 ## PT_LOPROC/PT_ARM_ARCHEXT/PT_MIPS_REGINFO VAddr: 0x1000 FirstSec: .foo.begin LastSec: .foo.end -## Case 22: the PT_ARM_EXIDX/PT_MIPS_RTPROC segment. +## Case 23: the PT_ARM_EXIDX/PT_MIPS_RTPROC segment. - Type: 0x70000001 ## PT_ARM_EXIDX, PT_MIPS_RTPROC VAddr: 0x1000 FirstSec: .foo.begin LastSec: .foo.end -## Case 23: the PT_MIPS_OPTIONS segment. +## Case 24: the PT_MIPS_OPTIONS segment. - Type: 0x70000002 ## PT_MIPS_OPTIONS VAddr: 0x1000 FirstSec: .foo.begin LastSec: .foo.end -## Case 24: the PT_MIPS_ABIFLAGS/PT_RISCV_ATTRIBUTES segment. +## Case 25: the PT_MIPS_ABIFLAGS/PT_RISCV_ATTRIBUTES segment. - Type: 0x70000003 ## PT_MIPS_ABIFLAGS/PT_RISCV_ATTRIBUTES VAddr: 0x1000 FirstSec: .foo.begin LastSec: .foo.end -## Case 25: the PT_HIPROC segment. +## Case 26: the PT_HIPROC segment. - Type: 0x7fffffff ## PT_HIPROC VAddr: 0x1000 FirstSec: .foo.begin @@ -610,9 +630,9 @@ ProgramHeaders: # RUN: llvm-readelf --program-headers %tarm.elf | FileCheck %s --check-prefix=ARM-GNU # RUN: llvm-readobj --program-headers %tarm.elf | FileCheck %s --check-prefix=ARM-LLVM -# ARM-GNU: : 0x70000000 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ARM-GNU-NEXT: EXIDX 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ARM-GNU-NEXT: : 0x70000002 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ARM-GNU: : 0x70000000 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ARM-GNU-NEXT: EXIDX 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ARM-GNU-NEXT: : 0x70000002 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 # ARM-LLVM: ProgramHeader { # ARM-LLVM: Type: Unknown (0x70000000) @@ -626,10 +646,10 @@ ProgramHeaders: # RUN: llvm-readelf --program-headers %tmips.elf | FileCheck %s --check-prefix=MIPS-GNU # RUN: llvm-readobj --program-headers %tmips.elf | FileCheck %s --check-prefix=MIPS-LLVM -# MIPS-GNU: REGINFO 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# MIPS-GNU-NEXT: RTPROC 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# MIPS-GNU-NEXT: OPTIONS 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# MIPS-GNU-NEXT: ABIFLAGS 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# MIPS-GNU: REGINFO 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# MIPS-GNU-NEXT: RTPROC 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# MIPS-GNU-NEXT: OPTIONS 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# MIPS-GNU-NEXT: ABIFLAGS 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 # MIPS-LLVM: ProgramHeader { # MIPS-LLVM: Type: PT_MIPS_REGINFO (0x70000000) @@ -645,7 +665,7 @@ ProgramHeaders: # RUN: llvm-readelf --program-headers %triscv.elf | FileCheck %s --check-prefix=RISCV-GNU # RUN: llvm-readobj --program-headers %triscv.elf | FileCheck %s --check-prefix=RISCV-LLVM -# RISCV-GNU: ATTRIBUTES 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# RISCV-GNU: ATTRIBUTES 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 # RISCV-LLVM: ProgramHeader { # RISCV-LLVM: Type: PT_RISCV_ATTRIBUTES (0x70000003) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index f369a63add114..387124ad53e40 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1478,6 +1478,7 @@ static StringRef segmentTypeToString(unsigned Arch, unsigned Type) { LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_RANDOMIZE); LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_WXNEEDED); LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_NOBTCFI); + LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_SYSCALLS); LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_BOOTDATA); default: return ""; From 12114d2d5a66cf4cb656a6dd83bb17704f2261a0 Mon Sep 17 00:00:00 2001 From: Frederic Cambus Date: Tue, 20 Feb 2024 09:11:54 +0100 Subject: [PATCH 42/54] [llvm-objdump] Add support for the PT_OPENBSD_SYSCALLS segment type. (#82121) Reference: https://github.com/openbsd/src/blob/master/sys/sys/exec_elf.h (cherry picked from commit 1b894864862d8049e4a2567a472efdc2eda1e035) --- llvm/test/tools/llvm-objdump/openbsd-headers.test | 3 +++ llvm/tools/llvm-objdump/ELFDump.cpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/llvm/test/tools/llvm-objdump/openbsd-headers.test b/llvm/test/tools/llvm-objdump/openbsd-headers.test index f547854feeeed..84fa59bdf89f5 100644 --- a/llvm/test/tools/llvm-objdump/openbsd-headers.test +++ b/llvm/test/tools/llvm-objdump/openbsd-headers.test @@ -11,6 +11,8 @@ # CHECK-NEXT: filesz 0x0000000000000000 memsz 0x0000000000000000 flags --- # CHECK-NEXT: OPENBSD_NOBTCFI off 0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**0 # CHECK-NEXT: filesz 0x0000000000000000 memsz 0x0000000000000000 flags --- +# CHECK-NEXT: OPENBSD_SYSCALLS off 0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**0 +# CHECK-NEXT: filesz 0x0000000000000000 memsz 0x0000000000000000 flags --- # CHECK-NEXT: OPENBSD_BOOTDATA off 0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**0 # CHECK-NEXT: filesz 0x0000000000000000 memsz 0x0000000000000000 flags --- @@ -25,4 +27,5 @@ ProgramHeaders: - Type: 0x65a3dbe6 ## PT_OPENBSD_RANDOMIZE - Type: 0x65a3dbe7 ## PT_OPENBSD_WXNEEDED - Type: 0x65a3dbe8 ## PT_OPENBSD_NOBTCFI + - Type: 0x65a3dbe9 ## PT_OPENBSD_SYSCALLS - Type: 0x65a41be6 ## PT_OPENBSD_BOOTDATA diff --git a/llvm/tools/llvm-objdump/ELFDump.cpp b/llvm/tools/llvm-objdump/ELFDump.cpp index 34861ee92128f..fda99bd6d33e1 100644 --- a/llvm/tools/llvm-objdump/ELFDump.cpp +++ b/llvm/tools/llvm-objdump/ELFDump.cpp @@ -291,6 +291,9 @@ template void ELFDumper::printProgramHeaders() { case ELF::PT_OPENBSD_RANDOMIZE: outs() << "OPENBSD_RANDOMIZE "; break; + case ELF::PT_OPENBSD_SYSCALLS: + outs() << "OPENBSD_SYSCALLS "; + break; case ELF::PT_OPENBSD_WXNEEDED: outs() << "OPENBSD_WXNEEDED "; break; From 3b4b0476423e2340cf6ef370df3c98420014c97b Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Tue, 6 Feb 2024 18:37:31 +0800 Subject: [PATCH 43/54] [PowerPC] Mask constant operands in ValueBit tracking (#67653) In IR or C code, shift amount larger than value size is undefined behavior. But in practice, backend lowering for shift_parts produces add/sub of shift amounts, thus constant shift amounts might be negative or larger than value size, which depends on ISA definition. PowerPC ISA says, the lowest 7 bits (6 bits for 32-bit instruction) will be taken, and if the highest among them is 1, result will be zero, otherwise the low 6 bits (or 5 on 32-bit) are used as shift amount. This commit emulates the behavior and avoids array overflow in bit permutation's value bits calculator. (cherry picked from commit 292d9e869fcfc2ece694848db4022b0b939847e3) --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 37 ++++-- llvm/test/CodeGen/PowerPC/pr59074.ll | 132 ++++++++++++++++++++ 2 files changed, 156 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/pr59074.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 26ed74108ec36..18a4223d481ef 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1635,7 +1635,8 @@ class BitPermutationSelector { default: break; case ISD::ROTL: if (isa(V.getOperand(1))) { - unsigned RotAmt = V.getConstantOperandVal(1); + assert(isPowerOf2_32(NumBits) && "rotl bits should be power of 2!"); + unsigned RotAmt = V.getConstantOperandVal(1) & (NumBits - 1); const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; @@ -1648,15 +1649,20 @@ class BitPermutationSelector { case ISD::SHL: case PPCISD::SHL: if (isa(V.getOperand(1))) { - unsigned ShiftAmt = V.getConstantOperandVal(1); + // sld takes 7 bits, slw takes 6. + unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1); const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; - for (unsigned i = ShiftAmt; i < NumBits; ++i) - Bits[i] = LHSBits[i - ShiftAmt]; - - for (unsigned i = 0; i < ShiftAmt; ++i) - Bits[i] = ValueBit(ValueBit::ConstZero); + if (ShiftAmt >= NumBits) { + for (unsigned i = 0; i < NumBits; ++i) + Bits[i] = ValueBit(ValueBit::ConstZero); + } else { + for (unsigned i = ShiftAmt; i < NumBits; ++i) + Bits[i] = LHSBits[i - ShiftAmt]; + for (unsigned i = 0; i < ShiftAmt; ++i) + Bits[i] = ValueBit(ValueBit::ConstZero); + } return std::make_pair(Interesting = true, &Bits); } @@ -1664,15 +1670,20 @@ class BitPermutationSelector { case ISD::SRL: case PPCISD::SRL: if (isa(V.getOperand(1))) { - unsigned ShiftAmt = V.getConstantOperandVal(1); + // srd takes lowest 7 bits, srw takes 6. + unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1); const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; - for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) - Bits[i] = LHSBits[i + ShiftAmt]; - - for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) - Bits[i] = ValueBit(ValueBit::ConstZero); + if (ShiftAmt >= NumBits) { + for (unsigned i = 0; i < NumBits; ++i) + Bits[i] = ValueBit(ValueBit::ConstZero); + } else { + for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) + Bits[i] = LHSBits[i + ShiftAmt]; + for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) + Bits[i] = ValueBit(ValueBit::ConstZero); + } return std::make_pair(Interesting = true, &Bits); } diff --git a/llvm/test/CodeGen/PowerPC/pr59074.ll b/llvm/test/CodeGen/PowerPC/pr59074.ll new file mode 100644 index 0000000000000..3e328c6ad9f0b --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr59074.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=LE64 +; RUN: llc -mtriple=powerpcle-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=LE32 +; RUN: llc -mtriple=powerpc64-ibm-aix -mcpu=pwr7 < %s | FileCheck %s --check-prefix=BE64 +; RUN: llc -mtriple=powerpc-ibm-aix -mcpu=pwr7 < %s | FileCheck %s --check-prefix=BE32 + +; To verify this doesn't crash due to array out of bound. +define void @pr59074(ptr %0) { +; LE64-LABEL: pr59074: +; LE64: # %bb.0: # %entry +; LE64-NEXT: lwz 6, 0(3) +; LE64-NEXT: li 7, 12 +; LE64-NEXT: ld 4, 16(3) +; LE64-NEXT: ld 5, 24(3) +; LE64-NEXT: addi 6, 6, -12 +; LE64-NEXT: std 4, 16(3) +; LE64-NEXT: std 5, 24(3) +; LE64-NEXT: srd 6, 7, 6 +; LE64-NEXT: li 7, 0 +; LE64-NEXT: std 7, 8(3) +; LE64-NEXT: std 6, 0(3) +; LE64-NEXT: blr +; +; LE32-LABEL: pr59074: +; LE32: # %bb.0: # %entry +; LE32-NEXT: stwu 1, -80(1) +; LE32-NEXT: .cfi_def_cfa_offset 80 +; LE32-NEXT: lwz 4, 0(3) +; LE32-NEXT: xxlxor 0, 0, 0 +; LE32-NEXT: li 5, 4 +; LE32-NEXT: addi 6, 1, 16 +; LE32-NEXT: li 7, 0 +; LE32-NEXT: li 8, 12 +; LE32-NEXT: xxswapd 0, 0 +; LE32-NEXT: addi 4, 4, -12 +; LE32-NEXT: rlwinm 9, 4, 29, 28, 31 +; LE32-NEXT: stxvd2x 0, 6, 5 +; LE32-NEXT: stw 7, 44(1) +; LE32-NEXT: stw 7, 40(1) +; LE32-NEXT: stw 7, 36(1) +; LE32-NEXT: stw 8, 16(1) +; LE32-NEXT: lwzux 5, 9, 6 +; LE32-NEXT: li 6, 7 +; LE32-NEXT: lwz 7, 8(9) +; LE32-NEXT: nand 6, 4, 6 +; LE32-NEXT: lwz 8, 4(9) +; LE32-NEXT: clrlwi 4, 4, 29 +; LE32-NEXT: lwz 9, 12(9) +; LE32-NEXT: clrlwi 6, 6, 27 +; LE32-NEXT: subfic 11, 4, 32 +; LE32-NEXT: srw 5, 5, 4 +; LE32-NEXT: slwi 10, 7, 1 +; LE32-NEXT: srw 7, 7, 4 +; LE32-NEXT: slw 6, 10, 6 +; LE32-NEXT: srw 10, 8, 4 +; LE32-NEXT: slw 8, 8, 11 +; LE32-NEXT: slw 11, 9, 11 +; LE32-NEXT: srw 4, 9, 4 +; LE32-NEXT: or 5, 8, 5 +; LE32-NEXT: or 7, 11, 7 +; LE32-NEXT: or 6, 10, 6 +; LE32-NEXT: stw 4, 12(3) +; LE32-NEXT: stw 7, 8(3) +; LE32-NEXT: stw 5, 0(3) +; LE32-NEXT: stw 6, 4(3) +; LE32-NEXT: addi 1, 1, 80 +; LE32-NEXT: blr +; +; BE64-LABEL: pr59074: +; BE64: # %bb.0: # %entry +; BE64-NEXT: lwz 6, 12(3) +; BE64-NEXT: li 7, 12 +; BE64-NEXT: ld 4, 24(3) +; BE64-NEXT: ld 5, 16(3) +; BE64-NEXT: addi 6, 6, -12 +; BE64-NEXT: std 4, 24(3) +; BE64-NEXT: std 5, 16(3) +; BE64-NEXT: srd 6, 7, 6 +; BE64-NEXT: li 7, 0 +; BE64-NEXT: std 7, 0(3) +; BE64-NEXT: std 6, 8(3) +; BE64-NEXT: blr +; +; BE32-LABEL: pr59074: +; BE32: # %bb.0: # %entry +; BE32-NEXT: lwz 4, 12(3) +; BE32-NEXT: xxlxor 0, 0, 0 +; BE32-NEXT: addi 5, 1, -64 +; BE32-NEXT: li 6, 12 +; BE32-NEXT: li 7, 0 +; BE32-NEXT: addi 8, 1, -48 +; BE32-NEXT: li 10, 7 +; BE32-NEXT: stxvw4x 0, 0, 5 +; BE32-NEXT: addi 4, 4, -12 +; BE32-NEXT: stw 6, -36(1) +; BE32-NEXT: stw 7, -40(1) +; BE32-NEXT: stw 7, -44(1) +; BE32-NEXT: rlwinm 9, 4, 29, 28, 31 +; BE32-NEXT: stw 7, -48(1) +; BE32-NEXT: sub 5, 8, 9 +; BE32-NEXT: nand 6, 4, 10 +; BE32-NEXT: clrlwi 4, 4, 29 +; BE32-NEXT: clrlwi 6, 6, 27 +; BE32-NEXT: lwz 7, 4(5) +; BE32-NEXT: lwz 8, 8(5) +; BE32-NEXT: lwz 9, 0(5) +; BE32-NEXT: lwz 5, 12(5) +; BE32-NEXT: slwi 10, 7, 1 +; BE32-NEXT: srw 11, 8, 4 +; BE32-NEXT: srw 7, 7, 4 +; BE32-NEXT: srw 5, 5, 4 +; BE32-NEXT: slw 6, 10, 6 +; BE32-NEXT: subfic 10, 4, 32 +; BE32-NEXT: srw 4, 9, 4 +; BE32-NEXT: slw 8, 8, 10 +; BE32-NEXT: slw 10, 9, 10 +; BE32-NEXT: or 6, 11, 6 +; BE32-NEXT: or 7, 10, 7 +; BE32-NEXT: or 5, 8, 5 +; BE32-NEXT: stw 4, 0(3) +; BE32-NEXT: stw 6, 8(3) +; BE32-NEXT: stw 5, 12(3) +; BE32-NEXT: stw 7, 4(3) +; BE32-NEXT: blr +entry: + %v1 = load <2 x i128>, <2 x i128>* %0 + %v2 = insertelement <2 x i128> %v1, i128 12, i32 0 + %v3 = sub <2 x i128> %v1, %v2 + %v4 = lshr <2 x i128> %v2, %v3 + store <2 x i128> %v4, <2 x i128>* %0 + ret void +} From 390dcd4cbbf511954ba1e0ca000d3e1c6394060e Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Sat, 10 Feb 2024 19:14:28 +0000 Subject: [PATCH 44/54] [compiler-rt][profile] Fix InstrProfilingFile possible resource leak. (#81363) close #79708 (cherry picked from commit 0a255fcf4a90f9e864ae9321b28e4956f7c865fb) --- compiler-rt/lib/profile/InstrProfilingFile.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c index 867ae73f0d3b2..f3b457d786e6b 100644 --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -677,6 +677,7 @@ static void initializeProfileForContinuousMode(void) { PROF_ERR("Continuous counter sync mode is enabled, but raw profile is not" "page-aligned. CurrentFileOffset = %" PRIu64 ", pagesz = %u.\n", (uint64_t)CurrentFileOffset, PageSize); + fclose(File); return; } if (writeProfileWithFileObject(Filename, File) != 0) { @@ -692,6 +693,8 @@ static void initializeProfileForContinuousMode(void) { if (doMerging()) { lprofUnlockFileHandle(File); + } + if (File != NULL) { fclose(File); } } From 5ef297ab611822537a385d45244867519563d3ef Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Fri, 16 Feb 2024 11:58:05 +0100 Subject: [PATCH 45/54] [llvm-objcopy] Add SystemZ support (#81841) This is also necessary for enabling ClangBuiltLinux: https://github.com/ClangBuiltLinux/linux/issues/1530 (cherry picked from commit 3c02cb7492fc78fb678264cebf57ff88e478e14f) --- llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test | 6 ++++++ llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test | 7 +++++++ llvm/tools/llvm-objcopy/ObjcopyOptions.cpp | 2 ++ 3 files changed, 15 insertions(+) diff --git a/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test b/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test index fc5856691f8dc..f88b7575002a9 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test +++ b/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test @@ -48,6 +48,9 @@ # RUN: llvm-objcopy -I binary -O elf64-loongarch %t.txt %t.la64.o # RUN: llvm-readobj --file-headers %t.la64.o | FileCheck %s --check-prefixes=CHECK,LE,LA64,64 +# RUN: llvm-objcopy -I binary -O elf64-s390 %t.txt %t.s390x.o +# RUN: llvm-readobj --file-headers %t.s390x.o | FileCheck %s --check-prefixes=CHECK,BE,S390X,64 + # CHECK: Format: # 32-SAME: elf32- # 64-SAME: elf64- @@ -64,6 +67,7 @@ # PPCLE-SAME: powerpcle{{$}} # SPARC-SAME: sparc # SPARCEL-SAME: sparc +# S390X-SAME: s390 # X86-64-SAME: x86-64 # AARCH64-NEXT: Arch: aarch64 @@ -81,6 +85,7 @@ # RISCV64-NEXT: Arch: riscv64 # SPARC-NEXT: Arch: sparc{{$}} # SPARCEL-NEXT: Arch: sparcel +# S390X-NEXT: Arch: s390x # X86-64-NEXT: Arch: x86_64 # 32-NEXT: AddressSize: 32bit @@ -116,6 +121,7 @@ # RISCV64-NEXT: Machine: EM_RISCV (0xF3) # SPARC-NEXT: Machine: EM_SPARC (0x2) # SPARCEL-NEXT: Machine: EM_SPARC (0x2) +# S390X-NEXT: Machine: EM_S390 (0x16) # X86-64-NEXT: Machine: EM_X86_64 (0x3E) # CHECK-NEXT: Version: 1 diff --git a/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test b/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test index 882940c05e19c..9a8128611792d 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test +++ b/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test @@ -117,6 +117,10 @@ # RUN: llvm-readobj --file-headers %t.elf64_loongarch.o | FileCheck %s --check-prefixes=CHECK,LE,LA64,64,SYSV # RUN: llvm-readobj --file-headers %t.elf64_loongarch.dwo | FileCheck %s --check-prefixes=CHECK,LE,LA64,64,SYSV +# RUN: llvm-objcopy %t.o -O elf64-s390 %t.elf64_s390.o --split-dwo=%t.elf64_s390.dwo +# RUN: llvm-readobj --file-headers %t.elf64_s390.o | FileCheck %s --check-prefixes=CHECK,BE,S390X,64,SYSV +# RUN: llvm-readobj --file-headers %t.elf64_s390.dwo | FileCheck %s --check-prefixes=CHECK,BE,S390X,64,SYSV + !ELF FileHeader: Class: ELFCLASS32 @@ -160,6 +164,7 @@ Symbols: # RISCV32-SAME: riscv{{$}} # RISCV64-SAME: riscv{{$}} # SPARC-SAME: sparc +# S390X-SAME: s390 # X86-64-SAME: x86-64 # DEFAULT-SAME: unknown @@ -182,6 +187,7 @@ Symbols: # RISCV64-NEXT: Arch: riscv64 # SPARC-NEXT: Arch: sparc{{$}} # SPARCEL-NEXT: Arch: sparcel +# S390X-NEXT: Arch: s390x # X86-64-NEXT: Arch: x86_64 # DEFAULT-NEXT: Arch: unknown @@ -210,6 +216,7 @@ Symbols: # RISCV32: Machine: EM_RISCV (0xF3) # RISCV64: Machine: EM_RISCV (0xF3) # SPARC: Machine: EM_SPARC (0x2) +# S390X: Machine: EM_S390 (0x16) # X86-64: Machine: EM_X86_64 (0x3E) # 32: HeaderSize: 52 diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp index f15307181fad6..f63e5c61e802c 100644 --- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp +++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp @@ -299,6 +299,8 @@ static const StringMap TargetMap{ // LoongArch {"elf32-loongarch", {ELF::EM_LOONGARCH, false, true}}, {"elf64-loongarch", {ELF::EM_LOONGARCH, true, true}}, + // SystemZ + {"elf64-s390", {ELF::EM_S390, true, false}}, }; static Expected From 66351a501e5106353a4919617851469c7c61a660 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Tue, 20 Feb 2024 13:31:28 +0800 Subject: [PATCH 46/54] [Serialization] Record whether the ODR is skipped (#82302) Close https://github.com/llvm/llvm-project/issues/80570. In https://github.com/llvm/llvm-project/commit/a0b6747804e46665ecfd00295b60432bfe1775b6, we skipped ODR checks for decls in GMF. Then it should be natural to skip storing the ODR values in BMI. Generally it should be fine as long as the writer and the reader keep consistent. However, the use of preamble in clangd shows the tricky part. For, ``` // test.cpp module; // any one off these is enough to crash clangd // #include // #include // #include // #include // #include // #include // probably many more // only ok with libc++, not the system provided libstdc++ 13.2.1 // these are ok export module test; ``` clangd will store the headers as preamble to speedup the parsing and the preamble reuses the serialization techniques. (Generally we'd call the preamble as PCH. However it is not true strictly. I've tested the PCH wouldn't be problematic.) However, the tricky part is that the preamble is not modules. It literally serialiaze and deserialize things. So before clangd parsing the above test module, clangd will serialize the headers into the preamble. Note that there is no concept like GMF now. So the ODR bits are stored. However, when clangd parse the file actually, the decls from preamble are thought as in GMF literally, then hte ODR bits are skipped. Then mismatch happens. To solve the problem, this patch adds another bit for decls to record whether or not the ODR bits are skipped. (cherry picked from commit 49775b1dc0cdb3a9d18811f67f268e3b3a381669) --- clang/lib/Serialization/ASTReaderDecl.cpp | 10 +- clang/lib/Serialization/ASTWriter.cpp | 6 +- clang/lib/Serialization/ASTWriterDecl.cpp | 15 +- clang/unittests/Serialization/CMakeLists.txt | 1 + .../PreambleInNamedModulesTest.cpp | 132 ++++++++++++++++++ 5 files changed, 154 insertions(+), 10 deletions(-) create mode 100644 clang/unittests/Serialization/PreambleInNamedModulesTest.cpp diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 1fadd8039462d..321c11e55c14e 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -800,11 +800,12 @@ void ASTDeclReader::VisitEnumDecl(EnumDecl *ED) { BitsUnpacker EnumDeclBits(Record.readInt()); ED->setNumPositiveBits(EnumDeclBits.getNextBits(/*Width=*/8)); ED->setNumNegativeBits(EnumDeclBits.getNextBits(/*Width=*/8)); + bool ShouldSkipCheckingODR = EnumDeclBits.getNextBit(); ED->setScoped(EnumDeclBits.getNextBit()); ED->setScopedUsingClassTag(EnumDeclBits.getNextBit()); ED->setFixed(EnumDeclBits.getNextBit()); - if (!shouldSkipCheckingODR(ED)) { + if (!ShouldSkipCheckingODR) { ED->setHasODRHash(true); ED->ODRHash = Record.readInt(); } @@ -1073,6 +1074,7 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) { FD->setCachedLinkage((Linkage)FunctionDeclBits.getNextBits(/*Width=*/3)); FD->setStorageClass((StorageClass)FunctionDeclBits.getNextBits(/*Width=*/3)); + bool ShouldSkipCheckingODR = FunctionDeclBits.getNextBit(); FD->setInlineSpecified(FunctionDeclBits.getNextBit()); FD->setImplicitlyInline(FunctionDeclBits.getNextBit()); FD->setHasSkippedBody(FunctionDeclBits.getNextBit()); @@ -1102,7 +1104,7 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) { if (FD->isExplicitlyDefaulted()) FD->setDefaultLoc(readSourceLocation()); - if (!shouldSkipCheckingODR(FD)) { + if (!ShouldSkipCheckingODR) { FD->ODRHash = Record.readInt(); FD->setHasODRHash(true); } @@ -1973,6 +1975,8 @@ void ASTDeclReader::ReadCXXDefinitionData( BitsUnpacker CXXRecordDeclBits = Record.readInt(); + bool ShouldSkipCheckingODR = CXXRecordDeclBits.getNextBit(); + #define FIELD(Name, Width, Merge) \ if (!CXXRecordDeclBits.canGetNextNBits(Width)) \ CXXRecordDeclBits.updateValue(Record.readInt()); \ @@ -1982,7 +1986,7 @@ void ASTDeclReader::ReadCXXDefinitionData( #undef FIELD // We only perform ODR checks for decls not in GMF. - if (!shouldSkipCheckingODR(D)) { + if (!ShouldSkipCheckingODR) { // Note: the caller has deserialized the IsLambda bit already. Data.ODRHash = Record.readInt(); Data.HasODRHash = true; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 3b79a9238d1af..73018c1170d8f 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6010,6 +6010,9 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { BitsPacker DefinitionBits; + bool ShouldSkipCheckingODR = shouldSkipCheckingODR(D); + DefinitionBits.addBit(ShouldSkipCheckingODR); + #define FIELD(Name, Width, Merge) \ if (!DefinitionBits.canWriteNextNBits(Width)) { \ Record->push_back(DefinitionBits); \ @@ -6023,11 +6026,10 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { Record->push_back(DefinitionBits); // We only perform ODR checks for decls not in GMF. - if (!shouldSkipCheckingODR(D)) { + if (!ShouldSkipCheckingODR) // getODRHash will compute the ODRHash if it has not been previously // computed. Record->push_back(D->getODRHash()); - } bool ModulesDebugInfo = Writer->Context->getLangOpts().ModulesDebugInfo && !D->isDependentType(); diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index f224075643e99..e73800100e3cc 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -488,13 +488,15 @@ void ASTDeclWriter::VisitEnumDecl(EnumDecl *D) { BitsPacker EnumDeclBits; EnumDeclBits.addBits(D->getNumPositiveBits(), /*BitWidth=*/8); EnumDeclBits.addBits(D->getNumNegativeBits(), /*BitWidth=*/8); + bool ShouldSkipCheckingODR = shouldSkipCheckingODR(D); + EnumDeclBits.addBit(ShouldSkipCheckingODR); EnumDeclBits.addBit(D->isScoped()); EnumDeclBits.addBit(D->isScopedUsingClassTag()); EnumDeclBits.addBit(D->isFixed()); Record.push_back(EnumDeclBits); // We only perform ODR checks for decls not in GMF. - if (!shouldSkipCheckingODR(D)) + if (!ShouldSkipCheckingODR) Record.push_back(D->getODRHash()); if (MemberSpecializationInfo *MemberInfo = D->getMemberSpecializationInfo()) { @@ -678,6 +680,8 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { // FIXME: stable encoding FunctionDeclBits.addBits(llvm::to_underlying(D->getLinkageInternal()), 3); FunctionDeclBits.addBits((uint32_t)D->getStorageClass(), /*BitWidth=*/3); + bool ShouldSkipCheckingODR = shouldSkipCheckingODR(D); + FunctionDeclBits.addBit(ShouldSkipCheckingODR); FunctionDeclBits.addBit(D->isInlineSpecified()); FunctionDeclBits.addBit(D->isInlined()); FunctionDeclBits.addBit(D->hasSkippedBody()); @@ -704,7 +708,7 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { Record.AddSourceLocation(D->getDefaultLoc()); // We only perform ODR checks for decls not in GMF. - if (!shouldSkipCheckingODR(D)) + if (!ShouldSkipCheckingODR) Record.push_back(D->getODRHash()); if (D->isDefaulted()) { @@ -2137,12 +2141,13 @@ getFunctionDeclAbbrev(serialization::DeclCode Code) { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 11)); // IDNS Abv->Add(BitCodeAbbrevOp( BitCodeAbbrevOp::Fixed, - 27)); // Packed Function Bits: StorageClass, Inline, InlineSpecified, + 28)); // Packed Function Bits: StorageClass, Inline, InlineSpecified, // VirtualAsWritten, Pure, HasInheritedProto, HasWrittenProto, // Deleted, Trivial, TrivialForCall, Defaulted, ExplicitlyDefaulted, // IsIneligibleOrNotSelected, ImplicitReturnZero, Constexpr, // UsesSEHTry, SkippedBody, MultiVersion, LateParsed, - // FriendConstraintRefersToEnclosingTemplate, Linkage + // FriendConstraintRefersToEnclosingTemplate, Linkage, + // ShouldSkipCheckingODR Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LocEnd Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // ODRHash // This Array slurps the rest of the record. Fortunately we want to encode @@ -2269,7 +2274,7 @@ void ASTWriter::WriteDeclAbbrevs() { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // AddTypeRef Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // IntegerType Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // getPromotionType - Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 19)); // Enum Decl Bits + Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 20)); // Enum Decl Bits Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));// ODRHash Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InstantiatedMembEnum // DC diff --git a/clang/unittests/Serialization/CMakeLists.txt b/clang/unittests/Serialization/CMakeLists.txt index 10d7de970c643..e7eebd0cb9823 100644 --- a/clang/unittests/Serialization/CMakeLists.txt +++ b/clang/unittests/Serialization/CMakeLists.txt @@ -10,6 +10,7 @@ add_clang_unittest(SerializationTests InMemoryModuleCacheTest.cpp ModuleCacheTest.cpp NoCommentsTest.cpp + PreambleInNamedModulesTest.cpp SourceLocationEncodingTest.cpp VarDeclConstantInitTest.cpp ) diff --git a/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp b/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp new file mode 100644 index 0000000000000..d26e1cb633654 --- /dev/null +++ b/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp @@ -0,0 +1,132 @@ +//===- unittests/Serialization/PreambleInNamedModulesTest.cpp -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/CompilerInvocation.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Frontend/PrecompiledPreamble.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/raw_ostream.h" + +#include "gtest/gtest.h" + +using namespace llvm; +using namespace clang; + +namespace { + +class PreambleInNamedModulesTest : public ::testing::Test { + void SetUp() override { + ASSERT_FALSE(sys::fs::createUniqueDirectory("modules-test", TestDir)); + } + + void TearDown() override { sys::fs::remove_directories(TestDir); } + +public: + using PathType = SmallString<256>; + + PathType TestDir; + + void addFile(StringRef Path, StringRef Contents, PathType &AbsPath) { + ASSERT_FALSE(sys::path::is_absolute(Path)); + + AbsPath = TestDir; + sys::path::append(AbsPath, Path); + + ASSERT_FALSE( + sys::fs::create_directories(llvm::sys::path::parent_path(AbsPath))); + + std::error_code EC; + llvm::raw_fd_ostream OS(AbsPath, EC); + ASSERT_FALSE(EC); + OS << Contents; + } + + void addFile(StringRef Path, StringRef Contents) { + PathType UnusedAbsPath; + addFile(Path, Contents, UnusedAbsPath); + } +}; + +// Testing that the use of Preamble in named modules can work basically. +// See https://github.com/llvm/llvm-project/issues/80570 +TEST_F(PreambleInNamedModulesTest, BasicTest) { + addFile("foo.h", R"cpp( +enum class E { + A, + B, + C, + D +}; + )cpp"); + + PathType MainFilePath; + addFile("A.cppm", R"cpp( +module; +#include "foo.h" +export module A; +export using ::E; + )cpp", + MainFilePath); + + IntrusiveRefCntPtr Diags = + CompilerInstance::createDiagnostics(new DiagnosticOptions()); + IntrusiveRefCntPtr VFS = + llvm::vfs::createPhysicalFileSystem(); + + CreateInvocationOptions CIOpts; + CIOpts.Diags = Diags; + CIOpts.VFS = VFS; + + const char *Args[] = {"clang++", "-std=c++20", "-working-directory", + TestDir.c_str(), MainFilePath.c_str()}; + std::shared_ptr Invocation = + createInvocation(Args, CIOpts); + ASSERT_TRUE(Invocation); + + llvm::ErrorOr> ContentsBuffer = + llvm::MemoryBuffer::getFile(MainFilePath, /*IsText=*/true); + EXPECT_TRUE(ContentsBuffer); + std::unique_ptr Buffer = std::move(*ContentsBuffer); + + PreambleBounds Bounds = + ComputePreambleBounds(Invocation->getLangOpts(), *Buffer, 0); + + PreambleCallbacks Callbacks; + llvm::ErrorOr BuiltPreamble = PrecompiledPreamble::Build( + *Invocation, Buffer.get(), Bounds, *Diags, VFS, + std::make_shared(), + /*StoreInMemory=*/false, /*StoragePath=*/TestDir, Callbacks); + + ASSERT_FALSE(Diags->hasErrorOccurred()); + + EXPECT_TRUE(BuiltPreamble); + EXPECT_TRUE(BuiltPreamble->CanReuse(*Invocation, *Buffer, Bounds, *VFS)); + BuiltPreamble->OverridePreamble(*Invocation, VFS, Buffer.get()); + + auto Clang = std::make_unique( + std::make_shared()); + Clang->setInvocation(std::move(Invocation)); + Clang->setDiagnostics(Diags.get()); + + if (auto VFSWithRemapping = createVFSFromCompilerInvocation( + Clang->getInvocation(), Clang->getDiagnostics(), VFS)) + VFS = VFSWithRemapping; + + Clang->createFileManager(VFS); + EXPECT_TRUE(Clang->createTarget()); + + Buffer.release(); + + SyntaxOnlyAction Action; + EXPECT_TRUE(Clang->ExecuteAction(Action)); + EXPECT_FALSE(Clang->getDiagnosticsPtr()->hasErrorOccurred()); +} + +} // namespace From 688566b2dfb86c0cd3e896470150282b98b5d8d3 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 1 Feb 2024 12:57:59 +0100 Subject: [PATCH 47/54] [IndVars] Add tests for #79861 (NFC) (cherry picked from commit c105848fd29d3b46eeb794bb6b10dad04f903b09) --- .../test/Transforms/IndVarSimplify/pr79861.ll | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 llvm/test/Transforms/IndVarSimplify/pr79861.ll diff --git a/llvm/test/Transforms/IndVarSimplify/pr79861.ll b/llvm/test/Transforms/IndVarSimplify/pr79861.ll new file mode 100644 index 0000000000000..a8e2aa42a365c --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/pr79861.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes=indvars < %s | FileCheck %s + +target datalayout = "n64" + +declare void @use(i64) + +define void @or_disjoint() { +; CHECK-LABEL: define void @or_disjoint() { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2, [[ENTRY:%.*]] ], [ [[IV_DEC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 +; CHECK-NEXT: call void @use(i64 [[OR]]) +; CHECK-NEXT: [[IV_DEC]] = add nsw i64 [[IV]], -1 +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_DEC]], 0 +; CHECK-NEXT: br i1 [[EXIT_COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 2, %entry ], [ %iv.dec, %loop ] + %or = or disjoint i64 %iv, 1 + %add = add nsw i64 %iv, 1 + %sel = select i1 false, i64 %or, i64 %add + call void @use(i64 %sel) + + %iv.dec = add nsw i64 %iv, -1 + %exit.cond = icmp eq i64 %iv.dec, 0 + br i1 %exit.cond, label %exit, label %loop + +exit: + ret void +} + +define void @add_nowrap_flags(i64 %n) { +; CHECK-LABEL: define void @add_nowrap_flags( +; CHECK-SAME: i64 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ADD1:%.*]] = add nuw nsw i64 [[IV]], 123 +; CHECK-NEXT: call void @use(i64 [[ADD1]]) +; CHECK-NEXT: [[IV_INC]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_INC]], [[N]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.inc, %loop ] + %add1 = add nuw nsw i64 %iv, 123 + %add2 = add i64 %iv, 123 + %sel = select i1 false, i64 %add1, i64 %add2 + call void @use(i64 %sel) + + %iv.inc = add i64 %iv, 1 + %exit.cond = icmp eq i64 %iv.inc, %n + br i1 %exit.cond, label %exit, label %loop + +exit: + ret void +} + + +define void @expander_or_disjoint(i64 %n) { +; CHECK-LABEL: define void @expander_or_disjoint( +; CHECK-SAME: i64 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[OR:%.*]] = or i64 [[N]], 1 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_INC]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[IV]], [[OR]] +; CHECK-NEXT: call void @use(i64 [[ADD]]) +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_INC]], [[OR]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %or = or disjoint i64 %n, 1 + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.inc, %loop ] + %iv.inc = add i64 %iv, 1 + %add = add i64 %iv, %or + call void @use(i64 %add) + %cmp = icmp ult i64 %iv, %n + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} From 4223b2264ce5e6d1855b9e7b32fe61152a681046 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 2 Feb 2024 10:52:05 +0100 Subject: [PATCH 48/54] [SCEVExpander] Do not reuse disjoint or (#80281) SCEV treats "or disjoint" the same as "add nsw nuw". However, when expanding, we cannot generally replace an add SCEV node with an "or disjoint" instruction. Just dropping the poison flag is insufficient in this case, we would have to actually convert the or into an add. This is a partial fix for #79861. (cherry picked from commit 5b8e1a6ebf11b6e93bcc96a0d009febe4bb3d7bc) --- llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp | 7 +++++++ llvm/test/Transforms/IndVarSimplify/pr79861.ll | 5 +++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index a1d7f0f9ba0f7..e6f93e72c98a7 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1401,6 +1401,13 @@ canReuseInstruction(ScalarEvolution &SE, const SCEV *S, Instruction *I, if (!I) return false; + // Disjoint or instructions are interpreted as adds by SCEV. However, we + // can't replace an arbitrary add with disjoint or, even if we drop the + // flag. We would need to convert the or into an add. + if (auto *PDI = dyn_cast(I)) + if (PDI->isDisjoint()) + return false; + // FIXME: Ignore vscale, even though it technically could be poison. Do this // because SCEV currently assumes it can't be poison. Remove this special // case once we proper model when vscale can be poison. diff --git a/llvm/test/Transforms/IndVarSimplify/pr79861.ll b/llvm/test/Transforms/IndVarSimplify/pr79861.ll index a8e2aa42a365c..7e267d04c94cc 100644 --- a/llvm/test/Transforms/IndVarSimplify/pr79861.ll +++ b/llvm/test/Transforms/IndVarSimplify/pr79861.ll @@ -75,14 +75,15 @@ define void @expander_or_disjoint(i64 %n) { ; CHECK-LABEL: define void @expander_or_disjoint( ; CHECK-SAME: i64 [[N:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[OR:%.*]] = or i64 [[N]], 1 +; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[N]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_INC]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[IV]], [[OR]] ; CHECK-NEXT: call void @use(i64 [[ADD]]) -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_INC]], [[OR]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_INC]], [[TMP0]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void From dc0ed54ac582357c8e097b2610791b7f802bb0ad Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 2 Feb 2024 16:02:46 +0100 Subject: [PATCH 49/54] [SCEV] Move canReuseInstruction() helper into SCEV (NFC) To allow reusing it in IndVars. (cherry picked from commit 43dd1e84df1ecdad872e1004af47b489e08fc228) --- llvm/include/llvm/Analysis/ScalarEvolution.h | 7 ++ llvm/lib/Analysis/ScalarEvolution.cpp | 62 ++++++++++++++++++ .../Utils/ScalarEvolutionExpander.cpp | 64 +------------------ 3 files changed, 70 insertions(+), 63 deletions(-) diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index af3ad822e0b0d..0880f9c65aa45 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1314,6 +1314,13 @@ class ScalarEvolution { void getPoisonGeneratingValues(SmallPtrSetImpl &Result, const SCEV *S); + /// Check whether it is poison-safe to represent the expression S using the + /// instruction I. If such a replacement is performed, the poison flags of + /// instructions in DropPoisonGeneratingInsts must be dropped. + bool canReuseInstruction( + const SCEV *S, Instruction *I, + SmallVectorImpl &DropPoisonGeneratingInsts); + class FoldID { const SCEV *Op = nullptr; const Type *Ty = nullptr; diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 2acb45837c480..4b2db80bc1ec3 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -4184,6 +4184,68 @@ void ScalarEvolution::getPoisonGeneratingValues( Result.insert(SU->getValue()); } +bool ScalarEvolution::canReuseInstruction( + const SCEV *S, Instruction *I, + SmallVectorImpl &DropPoisonGeneratingInsts) { + // If the instruction cannot be poison, it's always safe to reuse. + if (programUndefinedIfPoison(I)) + return true; + + // Otherwise, it is possible that I is more poisonous that S. Collect the + // poison-contributors of S, and then check whether I has any additional + // poison-contributors. Poison that is contributed through poison-generating + // flags is handled by dropping those flags instead. + SmallPtrSet PoisonVals; + getPoisonGeneratingValues(PoisonVals, S); + + SmallVector Worklist; + SmallPtrSet Visited; + Worklist.push_back(I); + while (!Worklist.empty()) { + Value *V = Worklist.pop_back_val(); + if (!Visited.insert(V).second) + continue; + + // Avoid walking large instruction graphs. + if (Visited.size() > 16) + return false; + + // Either the value can't be poison, or the S would also be poison if it + // is. + if (PoisonVals.contains(V) || isGuaranteedNotToBePoison(V)) + continue; + + auto *I = dyn_cast(V); + if (!I) + return false; + + // Disjoint or instructions are interpreted as adds by SCEV. However, we + // can't replace an arbitrary add with disjoint or, even if we drop the + // flag. We would need to convert the or into an add. + if (auto *PDI = dyn_cast(I)) + if (PDI->isDisjoint()) + return false; + + // FIXME: Ignore vscale, even though it technically could be poison. Do this + // because SCEV currently assumes it can't be poison. Remove this special + // case once we proper model when vscale can be poison. + if (auto *II = dyn_cast(I); + II && II->getIntrinsicID() == Intrinsic::vscale) + continue; + + if (canCreatePoison(cast(I), /*ConsiderFlagsAndMetadata*/ false)) + return false; + + // If the instruction can't create poison, we can recurse to its operands. + if (I->hasPoisonGeneratingFlagsOrMetadata()) + DropPoisonGeneratingInsts.push_back(I); + + for (Value *Op : I->operands()) + Worklist.push_back(Op); + } + return true; +} + const SCEV * ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind, SmallVectorImpl &Ops) { diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index e6f93e72c98a7..a3951fdf8a158 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1366,68 +1366,6 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) { return V; } -static bool -canReuseInstruction(ScalarEvolution &SE, const SCEV *S, Instruction *I, - SmallVectorImpl &DropPoisonGeneratingInsts) { - // If the instruction cannot be poison, it's always safe to reuse. - if (programUndefinedIfPoison(I)) - return true; - - // Otherwise, it is possible that I is more poisonous that S. Collect the - // poison-contributors of S, and then check whether I has any additional - // poison-contributors. Poison that is contributed through poison-generating - // flags is handled by dropping those flags instead. - SmallPtrSet PoisonVals; - SE.getPoisonGeneratingValues(PoisonVals, S); - - SmallVector Worklist; - SmallPtrSet Visited; - Worklist.push_back(I); - while (!Worklist.empty()) { - Value *V = Worklist.pop_back_val(); - if (!Visited.insert(V).second) - continue; - - // Avoid walking large instruction graphs. - if (Visited.size() > 16) - return false; - - // Either the value can't be poison, or the S would also be poison if it - // is. - if (PoisonVals.contains(V) || isGuaranteedNotToBePoison(V)) - continue; - - auto *I = dyn_cast(V); - if (!I) - return false; - - // Disjoint or instructions are interpreted as adds by SCEV. However, we - // can't replace an arbitrary add with disjoint or, even if we drop the - // flag. We would need to convert the or into an add. - if (auto *PDI = dyn_cast(I)) - if (PDI->isDisjoint()) - return false; - - // FIXME: Ignore vscale, even though it technically could be poison. Do this - // because SCEV currently assumes it can't be poison. Remove this special - // case once we proper model when vscale can be poison. - if (auto *II = dyn_cast(I); - II && II->getIntrinsicID() == Intrinsic::vscale) - continue; - - if (canCreatePoison(cast(I), /*ConsiderFlagsAndMetadata*/ false)) - return false; - - // If the instruction can't create poison, we can recurse to its operands. - if (I->hasPoisonGeneratingFlagsOrMetadata()) - DropPoisonGeneratingInsts.push_back(I); - - for (Value *Op : I->operands()) - Worklist.push_back(Op); - } - return true; -} - Value *SCEVExpander::FindValueInExprValueMap( const SCEV *S, const Instruction *InsertPt, SmallVectorImpl &DropPoisonGeneratingInsts) { @@ -1455,7 +1393,7 @@ Value *SCEVExpander::FindValueInExprValueMap( continue; // Make sure reusing the instruction is poison-safe. - if (canReuseInstruction(SE, S, EntInst, DropPoisonGeneratingInsts)) + if (SE.canReuseInstruction(S, EntInst, DropPoisonGeneratingInsts)) return V; DropPoisonGeneratingInsts.clear(); } From 94dcc1d6ae0912910c1a001da54ca64171d36374 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 5 Feb 2024 10:11:39 +0100 Subject: [PATCH 50/54] [IndVarSimplify] Fix poison-safety when reusing instructions (#80458) IndVars may replace an instruction with one of its operands, if they have the same SCEV expression. However, such a replacement may be more poisonous. First, check whether the operand being poison implies that the instruction is also poison, in which case the replacement is always safe. If this fails, check whether SCEV can determine that reusing the instruction is safe, using the same check as SCEVExpander. Fixes https://github.com/llvm/llvm-project/issues/79861. (cherry picked from commit 7d2b6f0b355bc98bbe3aa5bae83316a708da33ee) --- llvm/lib/Transforms/Utils/SimplifyIndVar.cpp | 18 ++++++++++++++++-- llvm/test/Transforms/IndVarSimplify/pr55925.ll | 4 ++-- llvm/test/Transforms/IndVarSimplify/pr79861.ll | 6 ++++-- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index 0ed3324a27b6c..1b142f14d8113 100644 --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -713,8 +714,11 @@ bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) { bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand) { if (!SE->isSCEVable(UseInst->getType()) || - (UseInst->getType() != IVOperand->getType()) || - (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) + UseInst->getType() != IVOperand->getType()) + return false; + + const SCEV *UseSCEV = SE->getSCEV(UseInst); + if (UseSCEV != SE->getSCEV(IVOperand)) return false; // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the @@ -742,6 +746,16 @@ bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand)) return false; + // Make sure the operand is not more poisonous than the instruction. + if (!impliesPoison(IVOperand, UseInst)) { + SmallVector DropPoisonGeneratingInsts; + if (!SE->canReuseInstruction(UseSCEV, IVOperand, DropPoisonGeneratingInsts)) + return false; + + for (Instruction *I : DropPoisonGeneratingInsts) + I->dropPoisonGeneratingFlagsAndMetadata(); + } + LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); SE->forgetValue(UseInst); diff --git a/llvm/test/Transforms/IndVarSimplify/pr55925.ll b/llvm/test/Transforms/IndVarSimplify/pr55925.ll index 420fc209949d4..312a8295ccdc9 100644 --- a/llvm/test/Transforms/IndVarSimplify/pr55925.ll +++ b/llvm/test/Transforms/IndVarSimplify/pr55925.ll @@ -18,9 +18,9 @@ define void @test(ptr %p) personality ptr undef { ; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foo(i32 returned [[TMP0]]) ; CHECK-NEXT: to label [[LOOP_LATCH]] unwind label [[EXIT:%.*]] ; CHECK: loop.latch: -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i32 [[TMP1]]) +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: br label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: [[LP:%.*]] = landingpad { ptr, i32 } @@ -64,8 +64,8 @@ define void @test_critedge(i1 %c, ptr %p) personality ptr undef { ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: ; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP1]], [[LOOP_INVOKE]] ], [ 0, [[LOOP_OTHER]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i32 [[PHI]]) +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: br label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: [[LP:%.*]] = landingpad { ptr, i32 } diff --git a/llvm/test/Transforms/IndVarSimplify/pr79861.ll b/llvm/test/Transforms/IndVarSimplify/pr79861.ll index 7e267d04c94cc..6625094496139 100644 --- a/llvm/test/Transforms/IndVarSimplify/pr79861.ll +++ b/llvm/test/Transforms/IndVarSimplify/pr79861.ll @@ -12,7 +12,9 @@ define void @or_disjoint() { ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2, [[ENTRY:%.*]] ], [ [[IV_DEC:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 -; CHECK-NEXT: call void @use(i64 [[OR]]) +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 false, i64 [[OR]], i64 [[ADD]] +; CHECK-NEXT: call void @use(i64 [[SEL]]) ; CHECK-NEXT: [[IV_DEC]] = add nsw i64 [[IV]], -1 ; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_DEC]], 0 ; CHECK-NEXT: br i1 [[EXIT_COND]], label [[EXIT:%.*]], label [[LOOP]] @@ -44,7 +46,7 @@ define void @add_nowrap_flags(i64 %n) { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ADD1:%.*]] = add nuw nsw i64 [[IV]], 123 +; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[IV]], 123 ; CHECK-NEXT: call void @use(i64 [[ADD1]]) ; CHECK-NEXT: [[IV_INC]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_INC]], [[N]] From bba39443eb918f842502ee2315306a6e811e1987 Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Tue, 20 Feb 2024 07:26:48 +0100 Subject: [PATCH 51/54] [Release] Don't build during test-release.sh Phase 3 install (#82001) As described in [test-release.sh ninja install does builds in Phase 3](https://github.com/llvm/llvm-project/issues/80999), considerable parts of Phase 3 of a `test-release.sh` build are run by `ninja install`, ignoring both `$Verbose` and the parallelism set via `-j NUM`. This patches fixes this by not specifying any explicit build target for Phase 3, thus running the full build as usual. Tested on `sparc64-unknown-linux-gnu`. (cherry picked from commit f6ac598c104ed3c9f4bcbbe830f86500c8d1013e) --- llvm/utils/release/test-release.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/utils/release/test-release.sh b/llvm/utils/release/test-release.sh index 5b1945df47d24..0af16387ce1d8 100755 --- a/llvm/utils/release/test-release.sh +++ b/llvm/utils/release/test-release.sh @@ -537,6 +537,11 @@ function build_llvmCore() { InstallTarget="$InstallTarget install-runtimes" fi fi + if [ "$Phase" -eq "3" ]; then + # Build everything at once, with the proper parallelism and verbosity, + # in Phase 3. + BuildTarget= + fi cd $ObjDir echo "# Compiling llvm $Release-$RC $Flavor" From d84c1e947472ba7d3a8b32621d4e451f2beacce9 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 20 Feb 2024 17:52:38 -0800 Subject: [PATCH 52/54] [workflows] Fix permissions check for creating new releases (#81163) The default GitHub token does not have read permissions on the org, so we need to use a custom token in order to read the members of the llvm-release-managers team. (cherry picked from commit 2836d8edbfbcd461b25101ed58f93c862d65903a) --- .github/workflows/release-tasks.yml | 4 +++- llvm/utils/release/github-upload-release.py | 16 ++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/workflows/release-tasks.yml b/.github/workflows/release-tasks.yml index f2a831ad3577a..53da8662b0203 100644 --- a/.github/workflows/release-tasks.yml +++ b/.github/workflows/release-tasks.yml @@ -28,6 +28,7 @@ jobs: name: Create a New Release runs-on: ubuntu-latest needs: validate-tag + steps: - name: Install Dependencies run: | @@ -40,8 +41,9 @@ jobs: - name: Create Release env: GITHUB_TOKEN: ${{ github.token }} + USER_TOKEN: ${{ secrets.RELEASE_TASKS_USER_TOKEN }} run: | - ./llvm/utils/release/./github-upload-release.py --token "$GITHUB_TOKEN" --release ${{ needs.validate-tag.outputs.release-version }} --user ${{ github.actor }} create + ./llvm/utils/release/./github-upload-release.py --token "$GITHUB_TOKEN" --release ${{ needs.validate-tag.outputs.release-version }} --user ${{ github.actor }} --user-token "$USER_TOKEN" create release-documentation: name: Build and Upload Release Documentation needs: diff --git a/llvm/utils/release/github-upload-release.py b/llvm/utils/release/github-upload-release.py index a8bb569d2fc99..14ec05062d88c 100755 --- a/llvm/utils/release/github-upload-release.py +++ b/llvm/utils/release/github-upload-release.py @@ -77,20 +77,28 @@ def upload_files(repo, release, files): parser.add_argument("--token", type=str) parser.add_argument("--release", type=str) parser.add_argument("--user", type=str) +parser.add_argument("--user-token", type=str) # Upload args parser.add_argument("--files", nargs="+", type=str) args = parser.parse_args() -github = github.Github(args.token) -llvm_org = github.get_organization("llvm") +gh = github.Github(args.token) +llvm_org = gh.get_organization("llvm") llvm_repo = llvm_org.get_repo("llvm-project") if args.user: + if not args.user_token: + print("--user-token option required when --user is used") + sys.exit(1) # Validate that this user is allowed to modify releases. - user = github.get_user(args.user) - team = llvm_org.get_team_by_slug("llvm-release-managers") + user = gh.get_user(args.user) + team = ( + github.Github(args.user_token) + .get_organization("llvm") + .get_team_by_slug("llvm-release-managers") + ) if not team.has_in_members(user): print("User {} is not a allowed to modify releases".format(args.user)) sys.exit(1) From 235306ba1f89eb1b9f9724edec5eacb295f8198f Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 19 Feb 2024 16:46:16 -0800 Subject: [PATCH 53/54] [cmake] Add minor version to library SONAME (#79376) We need to do this now that we are bumping the minor release number when we create the release branch. This also results in a slight change to the library names for LLVM. The main library now has a more convential library name: 'libLLVM.so.$major.$minor'. The old library name: libLLVM-$major.so is now a symlink that points to the new library. However, the symlink is not present in the build directory. It is only present in the install directory. The library name was changed because it helped to keep the CMake changes more simple. Fixes #76273 (cherry picked from commit 91a384621e5b762d9c173ffd247cfeadd5f436a2) --- llvm/CMakeLists.txt | 2 +- llvm/cmake/modules/AddLLVM.cmake | 8 ++++---- llvm/tools/llvm-shlib/CMakeLists.txt | 5 ++++- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index c2fb77d5a371f..44f2850b92d52 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -35,7 +35,7 @@ endif() if(NOT DEFINED LLVM_SHLIB_SYMBOL_VERSION) # "Symbol version prefix for libLLVM.so" - set(LLVM_SHLIB_SYMBOL_VERSION "LLVM_${LLVM_VERSION_MAJOR}") + set(LLVM_SHLIB_SYMBOL_VERSION "LLVM_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") endif() if ((CMAKE_GENERATOR MATCHES "Visual Studio") AND (MSVC_TOOLSET_VERSION LESS 142) AND (CMAKE_GENERATOR_TOOLSET STREQUAL "")) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 5e98961855282..5fc663d10b8f7 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -108,7 +108,7 @@ function(add_llvm_symbol_exports target_name export_file) COMMAND "${Python3_EXECUTABLE}" "-c" "import sys; \ lines = [' ' + l.rstrip() for l in sys.stdin] + [' local: *;']; \ - print('LLVM_${LLVM_VERSION_MAJOR} {'); \ + print('LLVM_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR} {'); \ print(' global:') if len(lines) > 1 else None; \ print(';\\n'.join(lines) + '\\n};')" < ${export_file} > ${native_export_file} @@ -646,9 +646,9 @@ function(llvm_add_library name) if(UNIX AND NOT APPLE AND NOT ARG_SONAME) set_target_properties(${name} PROPERTIES - # Since 4.0.0, the ABI version is indicated by the major version - SOVERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX} - VERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX}) + # Since 18.1.0, the ABI version is indicated by the major and minor version. + SOVERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}${LLVM_VERSION_SUFFIX} + VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}${LLVM_VERSION_SUFFIX}) endif() endif() diff --git a/llvm/tools/llvm-shlib/CMakeLists.txt b/llvm/tools/llvm-shlib/CMakeLists.txt index a47a0ec84c625..09c15e304614c 100644 --- a/llvm/tools/llvm-shlib/CMakeLists.txt +++ b/llvm/tools/llvm-shlib/CMakeLists.txt @@ -33,7 +33,10 @@ if(LLVM_BUILD_LLVM_DYLIB) if (LLVM_LINK_LLVM_DYLIB) set(INSTALL_WITH_TOOLCHAIN INSTALL_WITH_TOOLCHAIN) endif() - add_llvm_library(LLVM SHARED DISABLE_LLVM_LINK_LLVM_DYLIB SONAME ${INSTALL_WITH_TOOLCHAIN} ${SOURCES}) + add_llvm_library(LLVM SHARED DISABLE_LLVM_LINK_LLVM_DYLIB OUTPUT_NAME LLVM ${INSTALL_WITH_TOOLCHAIN} ${SOURCES}) + # Add symlink for backwards compatibility with old library name + get_target_property(LLVM_DYLIB_FILENAME LLVM OUTPUT_NAME) + llvm_install_library_symlink(LLVM-${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX} ${LLVM_DYLIB_FILENAME} SHARED COMPONENT LLVM) list(REMOVE_DUPLICATES LIB_NAMES) if("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin") From 6c90f8dd546334b01c9a86387950e8c72d459f1e Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 21 Feb 2024 00:14:59 +0000 Subject: [PATCH 54/54] Fix llvm-x86_64-debian-dylib buildbot This was broken by 91a384621e5b762d9c173ffd247cfeadd5f436a2. (cherry picked from commit ff4d6c64ee4269e4a9b67a4dae7e0b82ae1c3419) --- llvm/test/lit.cfg.py | 7 ++++--- llvm/test/lit.site.cfg.py.in | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index c6f9ee82e08cc..74e7769a48059 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -414,10 +414,11 @@ def version_int(ver): config.available_features.add("llvm-dylib") config.substitutions.append( ( + # libLLVM.so.19.0git "%llvmdylib", - "{}/libLLVM-{}{}".format( - config.llvm_shlib_dir, config.llvm_dylib_version, config.llvm_shlib_ext - ), + "{}/libLLVM{}.{}".format( + config.llvm_shlib_dir, config.llvm_shlib_ext, config.llvm_dylib_version + ) ) ) diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in index 1138b2ccf7bce..b6f255d472d16 100644 --- a/llvm/test/lit.site.cfg.py.in +++ b/llvm/test/lit.site.cfg.py.in @@ -44,7 +44,7 @@ config.build_examples = @LLVM_BUILD_EXAMPLES@ config.enable_threads = @LLVM_ENABLE_THREADS@ config.build_shared_libs = @BUILD_SHARED_LIBS@ config.link_llvm_dylib = @LLVM_LINK_LLVM_DYLIB@ -config.llvm_dylib_version = "@LLVM_VERSION_MAJOR@@LLVM_VERSION_SUFFIX@" +config.llvm_dylib_version = "@LLVM_VERSION_MAJOR@.@LLVM_VERSION_MINOR@@LLVM_VERSION_SUFFIX@" config.llvm_host_triple = '@LLVM_HOST_TRIPLE@' config.host_arch = "@HOST_ARCH@" config.have_opt_viewer_modules = @LLVM_HAVE_OPT_VIEWER_MODULES@