Merge branch 'llvm:main' into main

CLRN · web-flow · commit 0632a35671b9 · 2024-02-15T20:51:12.000Z
diff --git a/bolt/include/bolt/Profile/BoltAddressTranslation.h b/bolt/include/bolt/Profile/BoltAddressTranslation.h
@@ -111,6 +111,9 @@ class BoltAddressTranslation {
   /// addresses when aggregating profile
   bool enabledFor(llvm::object::ELFObjectFileBase *InputFile) const;
 
+  /// Save function and basic block hashes used for metadata dump.
+  void saveMetadata(BinaryContext &BC);
+
 private:
   /// Helper to update \p Map by inserting one or more BAT entries reflecting
   /// \p BB for function located at \p FuncAddress. At least one entry will be
@@ -140,6 +143,9 @@ class BoltAddressTranslation {
 
   std::map<uint64_t, MapTy> Maps;
 
+  using BBHashMap = std::unordered_map<uint32_t, size_t>;
+  std::unordered_map<uint64_t, std::pair<size_t, BBHashMap>> FuncHashes;
+
   /// Links outlined cold bocks to their original function
   std::map<uint64_t, uint64_t> ColdPartSource;
 
diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp
@@ -424,5 +424,20 @@ bool BoltAddressTranslation::enabledFor(
   }
   return false;
 }
+
+void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
+  for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
+    // We don't need a translation table if the body of the function hasn't
+    // changed
+    if (BF.isIgnored() || (!BC.HasRelocations && !BF.isSimple()))
+      continue;
+    // Prepare function and block hashes
+    FuncHashes[BF.getAddress()].first = BF.computeHash();
+    BF.computeBlockHashes();
+    for (const BinaryBasicBlock &BB : BF)
+      FuncHashes[BF.getAddress()].second.emplace(BB.getInputOffset(),
+                                                 BB.getHash());
+  }
+}
 } // namespace bolt
 } // namespace llvm
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -748,6 +748,10 @@ Error RewriteInstance::run() {
 
   processProfileData();
 
+  // Save input binary metadata if BAT section needs to be emitted
+  if (opts::EnableBAT)
+    BAT->saveMetadata(*BC);
+
   postProcessFunctions();
 
   processMetadataPostCFG();
diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h
@@ -37,10 +37,10 @@ template <size_t Bits> struct DyadicFloat {
   int exponent = 0;
   MantissaType mantissa = MantissaType(0);
 
-  constexpr DyadicFloat() = default;
+  LIBC_INLINE constexpr DyadicFloat() = default;
 
   template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
-  DyadicFloat(T x) {
+  LIBC_INLINE constexpr DyadicFloat(T x) {
     static_assert(FPBits<T>::FRACTION_LEN < Bits);
     FPBits<T> x_bits(x);
     sign = x_bits.sign();
@@ -49,14 +49,14 @@ template <size_t Bits> struct DyadicFloat {
     normalize();
   }
 
-  constexpr DyadicFloat(Sign s, int e, MantissaType m)
+  LIBC_INLINE constexpr DyadicFloat(Sign s, int e, MantissaType m)
       : sign(s), exponent(e), mantissa(m) {
     normalize();
   }
 
   // Normalizing the mantissa, bringing the leading 1 bit to the most
   // significant bit.
-  constexpr DyadicFloat &normalize() {
+  LIBC_INLINE constexpr DyadicFloat &normalize() {
     if (!mantissa.is_zero()) {
       int shift_length = static_cast<int>(mantissa.clz());
       exponent -= shift_length;
@@ -66,14 +66,14 @@ template <size_t Bits> struct DyadicFloat {
   }
 
   // Used for aligning exponents.  Output might not be normalized.
-  DyadicFloat &shift_left(int shift_length) {
+  LIBC_INLINE constexpr DyadicFloat &shift_left(int shift_length) {
     exponent -= shift_length;
     mantissa <<= static_cast<size_t>(shift_length);
     return *this;
   }
 
   // Used for aligning exponents.  Output might not be normalized.
-  DyadicFloat &shift_right(int shift_length) {
+  LIBC_INLINE constexpr DyadicFloat &shift_right(int shift_length) {
     exponent += shift_length;
     mantissa >>= static_cast<size_t>(shift_length);
     return *this;
@@ -85,7 +85,7 @@ template <size_t Bits> struct DyadicFloat {
             typename = cpp::enable_if_t<cpp::is_floating_point_v<T> &&
                                             (FPBits<T>::FRACTION_LEN < Bits),
                                         void>>
-  explicit operator T() const {
+  LIBC_INLINE explicit constexpr operator T() const {
     if (LIBC_UNLIKELY(mantissa.is_zero()))
       return FPBits<T>::zero(sign).get_val();
 
@@ -176,7 +176,7 @@ template <size_t Bits> struct DyadicFloat {
     return r;
   }
 
-  explicit operator MantissaType() const {
+  LIBC_INLINE explicit constexpr operator MantissaType() const {
     if (mantissa.is_zero())
       return 0;
 
@@ -208,8 +208,8 @@ template <size_t Bits> struct DyadicFloat {
 // don't need to normalize the inputs again in this function.  If the inputs are
 // not normalized, the results might lose precision significantly.
 template <size_t Bits>
-constexpr DyadicFloat<Bits> quick_add(DyadicFloat<Bits> a,
-                                      DyadicFloat<Bits> b) {
+LIBC_INLINE constexpr DyadicFloat<Bits> quick_add(DyadicFloat<Bits> a,
+                                                  DyadicFloat<Bits> b) {
   if (LIBC_UNLIKELY(a.mantissa.is_zero()))
     return b;
   if (LIBC_UNLIKELY(b.mantissa.is_zero()))
@@ -263,8 +263,8 @@ constexpr DyadicFloat<Bits> quick_add(DyadicFloat<Bits> a,
 // don't need to normalize the inputs again in this function.  If the inputs are
 // not normalized, the results might lose precision significantly.
 template <size_t Bits>
-constexpr DyadicFloat<Bits> quick_mul(DyadicFloat<Bits> a,
-                                      DyadicFloat<Bits> b) {
+LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(DyadicFloat<Bits> a,
+                                                  DyadicFloat<Bits> b) {
   DyadicFloat<Bits> result;
   result.sign = (a.sign != b.sign) ? Sign::NEG : Sign::POS;
   result.exponent = a.exponent + b.exponent + int(Bits);
@@ -285,16 +285,17 @@ constexpr DyadicFloat<Bits> quick_mul(DyadicFloat<Bits> a,
 
 // Simple polynomial approximation.
 template <size_t Bits>
-constexpr DyadicFloat<Bits> multiply_add(const DyadicFloat<Bits> &a,
-                                         const DyadicFloat<Bits> &b,
-                                         const DyadicFloat<Bits> &c) {
+LIBC_INLINE constexpr DyadicFloat<Bits>
+multiply_add(const DyadicFloat<Bits> &a, const DyadicFloat<Bits> &b,
+             const DyadicFloat<Bits> &c) {
   return quick_add(c, quick_mul(a, b));
 }
 
 // Simple exponentiation implementation for printf. Only handles positive
 // exponents, since division isn't implemented.
 template <size_t Bits>
-constexpr DyadicFloat<Bits> pow_n(DyadicFloat<Bits> a, uint32_t power) {
+LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(DyadicFloat<Bits> a,
+                                              uint32_t power) {
   DyadicFloat<Bits> result = 1.0;
   DyadicFloat<Bits> cur_power = a;
 
@@ -309,7 +310,8 @@ constexpr DyadicFloat<Bits> pow_n(DyadicFloat<Bits> a, uint32_t power) {
 }
 
 template <size_t Bits>
-constexpr DyadicFloat<Bits> mul_pow_2(DyadicFloat<Bits> a, int32_t pow_2) {
+LIBC_INLINE constexpr DyadicFloat<Bits> mul_pow_2(DyadicFloat<Bits> a,
+                                                  int32_t pow_2) {
   DyadicFloat<Bits> result = a;
   result.exponent += pow_2;
   return result;
diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h
@@ -157,7 +157,7 @@ struct BigInt {
 
   LIBC_INLINE constexpr explicit operator bool() const { return !is_zero(); }
 
-  LIBC_INLINE BigInt &operator=(const BigInt &other) = default;
+  LIBC_INLINE constexpr BigInt &operator=(const BigInt &other) = default;
 
   LIBC_INLINE constexpr bool is_zero() const {
     for (size_t i = 0; i < WORD_COUNT; ++i) {
@@ -172,7 +172,7 @@ struct BigInt {
   LIBC_INLINE constexpr WordType add(const BigInt &x) {
     SumCarry<WordType> s{0, 0};
     for (size_t i = 0; i < WORD_COUNT; ++i) {
-      s = add_with_carry_const(val[i], x.val[i], s.carry);
+      s = add_with_carry(val[i], x.val[i], s.carry);
       val[i] = s.sum;
     }
     return s.carry;
@@ -194,7 +194,7 @@ struct BigInt {
     BigInt result;
     SumCarry<WordType> s{0, 0};
     for (size_t i = 0; i < WORD_COUNT; ++i) {
-      s = add_with_carry_const(val[i], other.val[i], s.carry);
+      s = add_with_carry(val[i], other.val[i], s.carry);
       result.val[i] = s.sum;
     }
     return result;
@@ -210,7 +210,7 @@ struct BigInt {
   LIBC_INLINE constexpr WordType sub(const BigInt &x) {
     DiffBorrow<WordType> d{0, 0};
     for (size_t i = 0; i < WORD_COUNT; ++i) {
-      d = sub_with_borrow_const(val[i], x.val[i], d.borrow);
+      d = sub_with_borrow(val[i], x.val[i], d.borrow);
       val[i] = d.diff;
     }
     return d.borrow;
@@ -230,7 +230,7 @@ struct BigInt {
     BigInt result;
     DiffBorrow<WordType> d{0, 0};
     for (size_t i = 0; i < WORD_COUNT; ++i) {
-      d = sub_with_borrow_const(val[i], other.val[i], d.borrow);
+      d = sub_with_borrow(val[i], other.val[i], d.borrow);
       result.val[i] = d.diff;
     }
     return result;
diff --git a/libc/src/__support/integer_utils.h b/libc/src/__support/integer_utils.h
@@ -19,7 +19,7 @@
 
 namespace LIBC_NAMESPACE {
 
-template <typename T> NumberPair<T> full_mul(T a, T b) {
+template <typename T> constexpr NumberPair<T> full_mul(T a, T b) {
   NumberPair<T> pa = split(a);
   NumberPair<T> pb = split(b);
   NumberPair<T> prod;
@@ -43,7 +43,8 @@ template <typename T> NumberPair<T> full_mul(T a, T b) {
 }
 
 template <>
-LIBC_INLINE NumberPair<uint32_t> full_mul<uint32_t>(uint32_t a, uint32_t b) {
+LIBC_INLINE constexpr NumberPair<uint32_t> full_mul<uint32_t>(uint32_t a,
+                                                              uint32_t b) {
   uint64_t prod = uint64_t(a) * uint64_t(b);
   NumberPair<uint32_t> result;
   result.lo = uint32_t(prod);
@@ -53,7 +54,8 @@ LIBC_INLINE NumberPair<uint32_t> full_mul<uint32_t>(uint32_t a, uint32_t b) {
 
 #ifdef __SIZEOF_INT128__
 template <>
-LIBC_INLINE NumberPair<uint64_t> full_mul<uint64_t>(uint64_t a, uint64_t b) {
+LIBC_INLINE constexpr NumberPair<uint64_t> full_mul<uint64_t>(uint64_t a,
+                                                              uint64_t b) {
   __uint128_t prod = __uint128_t(a) * __uint128_t(b);
   NumberPair<uint64_t> result;
   result.lo = uint64_t(prod);
diff --git a/libc/src/__support/number_pair.h b/libc/src/__support/number_pair.h
@@ -16,8 +16,8 @@
 namespace LIBC_NAMESPACE {
 
 template <typename T> struct NumberPair {
-  T lo;
-  T hi;
+  T lo = T(0);
+  T hi = T(0);
 };
 
 template <typename T>
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -635,21 +635,22 @@ static SmallString<128>
 getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
                            Mangler &Mang, const TargetMachine &TM,
                            unsigned EntrySize, bool UniqueSectionName) {
-  SmallString<128> Name;
+  SmallString<128> Name =
+      getSectionPrefixForGlobal(Kind, TM.isLargeGlobalValue(GO));
   if (Kind.isMergeableCString()) {
     // We also need alignment here.
     // FIXME: this is getting the alignment of the character, not the
     // alignment of the global!
     Align Alignment = GO->getParent()->getDataLayout().getPreferredAlign(
         cast<GlobalVariable>(GO));
 
-    std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
-    Name = SizeSpec + utostr(Alignment.value());
+    Name += ".str";
+    Name += utostr(EntrySize);
+    Name += ".";
+    Name += utostr(Alignment.value());
   } else if (Kind.isMergeableConst()) {
-    Name = ".rodata.cst";
+    Name += ".cst";
     Name += utostr(EntrySize);
-  } else {
-    Name = getSectionPrefixForGlobal(Kind, TM.isLargeGlobalValue(GO));
   }
 
   bool HasPrefix = false;
diff --git a/llvm/test/CodeGen/X86/code-model-elf-merge-sections.ll b/llvm/test/CodeGen/X86/code-model-elf-merge-sections.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -relocation-model=pic -filetype=obj -code-model=medium -large-data-threshold=0 -o %t
+; RUN: llvm-readelf -S %t | FileCheck %s --check-prefix=LARGE
+; RUN: llc < %s -relocation-model=pic -filetype=obj -code-model=medium -large-data-threshold=99 -o %t
+; RUN: llvm-readelf -S %t | FileCheck %s --check-prefix=SMALL
+
+; LARGE: .lrodata.str4.4 {{.*}} AMSl
+; LARGE: .lrodata.cst8 {{.*}} AMl
+
+; SMALL: .rodata.str4.4 {{.*}} AMS
+; SMALL: .rodata.cst8 {{.*}} AM
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64--linux"
+
+@str = internal unnamed_addr constant [3 x i32] [i32 1, i32 2, i32 0]
+@merge = internal unnamed_addr constant i64 2