From e83ad816992838781c70d0af895100a5c78268d1 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Thu, 20 Feb 2025 17:47:34 +0100
Subject: [PATCH 01/29] [NFC][libc++][TZDB] Fixes formatting.

This was broken by #127339
---
 libcxx/src/experimental/tzdb.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/libcxx/src/experimental/tzdb.cpp b/libcxx/src/experimental/tzdb.cpp
index 1f18226636fd5..ac5c62bb81902 100644
--- a/libcxx/src/experimental/tzdb.cpp
+++ b/libcxx/src/experimental/tzdb.cpp
@@ -763,8 +763,9 @@ void __init_tzdb(tzdb& __tzdb, __tz::__rules_storage_type& __rules) {
   // - The file /etc/timezone. This text file contains the name of the time
   //   zone.
   //
-  // On Linux systems it seems /etc/timezone is deprecated and being phased
-  // out. This file is used when /etc/localtime does not exist, or when it exists but is not a symlink. For more information and links see
+  // On Linux systems it seems /etc/timezone is deprecated and being phased out.
+  // This file is used when /etc/localtime does not exist, or when it exists but
+  // is not a symlink. For more information and links see
   // https://github.com/llvm/llvm-project/issues/105634
 
   string __name = chrono::__current_zone_environment();

From c0c172213b6124a672ee64786260f8860027413f Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Thu, 20 Feb 2025 08:55:35 -0800
Subject: [PATCH 02/29] [Analysis] Avoid repeated hash lookups (NFC) (#127955)

---
 llvm/lib/Analysis/LoopAccessAnalysis.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index cab70c5c01a45..b3a42ed3377cd 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1431,8 +1431,8 @@ void AccessAnalysis::processMemAccesses() {
           typedef SmallVector<const Value *, 16> ValueVector;
           ValueVector TempObjects;
 
-          UnderlyingObjects[Ptr] = {};
           SmallVector<const Value *, 16> &UOs = UnderlyingObjects[Ptr];
+          UOs = {};
           ::getUnderlyingObjects(Ptr, UOs, LI);
           LLVM_DEBUG(dbgs()
                      << "Underlying objects for pointer " << *Ptr << "\n");

From 506b31ec36746732b84fb0e9cd74af3ca885fa86 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Thu, 20 Feb 2025 08:55:52 -0800
Subject: [PATCH 03/29] [IPO] Avoid repeated hash lookups (NFC) (#127957)

---
 llvm/lib/Transforms/IPO/PartialInlining.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp
index f2707afe195c4..56b7b8bfe1f66 100644
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -1393,9 +1393,12 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
     CallerORE.emit(OR);
 
     // Now update the entry count:
-    if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
-      uint64_t CallSiteCount = CallSiteToProfCountMap[User];
-      CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
+    if (CalleeEntryCountV) {
+      if (auto It = CallSiteToProfCountMap.find(User);
+          It != CallSiteToProfCountMap.end()) {
+        uint64_t CallSiteCount = It->second;
+        CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
+      }
     }
 
     AnyInline = true;

From c612f7961219adfad617a05b3d5f07bb05d52df3 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Thu, 20 Feb 2025 08:56:33 -0800
Subject: [PATCH 04/29] [ObjectYAML] Avoid repeated hash lookups (NFC)
 (#127958)

---
 llvm/lib/ObjectYAML/XCOFFEmitter.cpp | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/ObjectYAML/XCOFFEmitter.cpp b/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
index f3a9fb188f51d..5d7d6a1141ba0 100644
--- a/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
@@ -145,14 +145,16 @@ bool XCOFFWriter::initSectionHeaders(uint64_t &CurrentOffset) {
   uint64_t CurrentEndTDataAddr = 0;
   for (uint16_t I = 0, E = InitSections.size(); I < E; ++I) {
     // Assign indices for sections.
-    if (InitSections[I].SectionName.size() &&
-        !SectionIndexMap[InitSections[I].SectionName]) {
-      // The section index starts from 1.
-      SectionIndexMap[InitSections[I].SectionName] = I + 1;
-      if ((I + 1) > MaxSectionIndex) {
-        ErrHandler("exceeded the maximum permitted section index of " +
-                   Twine(MaxSectionIndex));
-        return false;
+    if (InitSections[I].SectionName.size()) {
+      int16_t &SectionIndex = SectionIndexMap[InitSections[I].SectionName];
+      if (!SectionIndex) {
+        // The section index starts from 1.
+        SectionIndex = I + 1;
+        if ((I + 1) > MaxSectionIndex) {
+          ErrHandler("exceeded the maximum permitted section index of " +
+                     Twine(MaxSectionIndex));
+          return false;
+        }
       }
     }
 
@@ -779,19 +781,19 @@ bool XCOFFWriter::writeSymbols() {
       W.write<uint32_t>(YamlSym.Value);
     }
     if (YamlSym.SectionName) {
-      if (!SectionIndexMap.count(*YamlSym.SectionName)) {
+      auto It = SectionIndexMap.find(*YamlSym.SectionName);
+      if (It == SectionIndexMap.end()) {
         ErrHandler("the SectionName " + *YamlSym.SectionName +
                    " specified in the symbol does not exist");
         return false;
       }
-      if (YamlSym.SectionIndex &&
-          SectionIndexMap[*YamlSym.SectionName] != *YamlSym.SectionIndex) {
+      if (YamlSym.SectionIndex && It->second != *YamlSym.SectionIndex) {
         ErrHandler("the SectionName " + *YamlSym.SectionName +
                    " and the SectionIndex (" + Twine(*YamlSym.SectionIndex) +
                    ") refer to different sections");
         return false;
       }
-      W.write<int16_t>(SectionIndexMap[*YamlSym.SectionName]);
+      W.write<int16_t>(It->second);
     } else {
       W.write<int16_t>(YamlSym.SectionIndex.value_or(0));
     }

From 4a8f41456515953cb8a5f9f1b927c9f60436f56a Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Thu, 20 Feb 2025 08:56:56 -0800
Subject: [PATCH 05/29] [Utils] Avoid repeated hash lookups (NFC) (#127959)

---
 llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
index 2d9a3d1f8a110..78b9c7d06e183 100644
--- a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
+++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
@@ -178,11 +178,9 @@ struct AssumeBuilderState {
     if (tryToPreserveWithoutAddingAssume(RK))
       return;
     MapKey Key{RK.WasOn, RK.AttrKind};
-    auto Lookup = AssumedKnowledgeMap.find(Key);
-    if (Lookup == AssumedKnowledgeMap.end()) {
-      AssumedKnowledgeMap[Key] = RK.ArgValue;
+    auto [Lookup, Inserted] = AssumedKnowledgeMap.try_emplace(Key, RK.ArgValue);
+    if (Inserted)
       return;
-    }
     assert(((Lookup->second == 0 && RK.ArgValue == 0) ||
             (Lookup->second != 0 && RK.ArgValue != 0)) &&
            "inconsistent argument value");

From be51ef4518ad3375e267667a89d379ce46efc4bb Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Thu, 20 Feb 2025 08:57:30 -0800
Subject: [PATCH 06/29] [WebAssembly] Avoid repeated hash lookups (NFC)
 (#127960)

---
 .../WebAssembly/WebAssemblySortRegion.cpp      | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp
index cd84e68aed140..0469fbf15b251 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp
@@ -28,17 +28,17 @@ const SortRegion *SortRegionInfo::getRegionFor(const MachineBasicBlock *MBB) {
   // WE->contains(ML->getHeader()), but not ML->contains(WE->getHeader()).
   if ((ML && !WE) || (ML && WE && WE->contains(ML->getHeader()))) {
     // If the smallest region containing MBB is a loop
-    if (LoopMap.count(ML))
-      return LoopMap[ML].get();
-    LoopMap[ML] = std::make_unique<ConcreteSortRegion<MachineLoop>>(ML);
-    return LoopMap[ML].get();
+    auto [It, Inserted] = LoopMap.try_emplace(ML);
+    if (Inserted)
+      It->second = std::make_unique<ConcreteSortRegion<MachineLoop>>(ML);
+    return It->second.get();
   } else {
     // If the smallest region containing MBB is an exception
-    if (ExceptionMap.count(WE))
-      return ExceptionMap[WE].get();
-    ExceptionMap[WE] =
-        std::make_unique<ConcreteSortRegion<WebAssemblyException>>(WE);
-    return ExceptionMap[WE].get();
+    auto [It, Inserted] = ExceptionMap.try_emplace(WE);
+    if (Inserted)
+      It->second =
+          std::make_unique<ConcreteSortRegion<WebAssemblyException>>(WE);
+    return It->second.get();
   }
 }
 

From 02e8fd7a30903d65c85b3cfb56e8039b71c31972 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard@arm.com>
Date: Thu, 20 Feb 2025 17:07:16 +0000
Subject: [PATCH 07/29] [ARM,AArch64] Fix ABI bugs with over-sized bitfields
 (#126774)

This fixes two bugs in the ABI for over-sized bitfields for ARM and
AArch64:

The container type picked for an over-sized bitfield already contributes
to the alignment of the structure, but it should also contribute to the
"unadjusted alignment" which is used by the ARM and AArch64 PCS.

AAPCS64 defines the bitfield layout algorithm for over-sized bitfields
as picking a container which is the fundamental integer data type with
the largest size less than or equal to the bit-field width. Since
AAPCS64 has a 128-bit integer fundamental data type, we need to consider
Int128 as a container type for AArch64.
---
 clang/include/clang/Basic/TargetInfo.h        |  8 +++
 clang/lib/AST/RecordLayoutBuilder.cpp         | 10 ++-
 clang/lib/Basic/TargetInfo.cpp                |  1 +
 clang/lib/Basic/Targets/AArch64.cpp           |  4 ++
 clang/test/CodeGen/aapcs-align.cpp            | 43 +++++++++++++
 clang/test/CodeGen/aapcs64-align.cpp          | 64 +++++++++++++++++++
 ...debug-info-structured-binding-bitfield.cpp |  4 +-
 7 files changed, 129 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 070cc792ca7db..db23afa6d6f0b 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -199,6 +199,10 @@ struct TransferrableTargetInfo {
   /// zero length bitfield, regardless of the zero length bitfield type.
   unsigned ZeroLengthBitfieldBoundary;
 
+  /// The largest container size which should be used for an over-sized
+  /// bitfield, in bits.
+  unsigned LargestOverSizedBitfieldContainer;
+
   /// If non-zero, specifies a maximum alignment to truncate alignment
   /// specified in the aligned attribute of a static variable to this value.
   unsigned MaxAlignedAttribute;
@@ -954,6 +958,10 @@ class TargetInfo : public TransferrableTargetInfo,
     return ZeroLengthBitfieldBoundary;
   }
 
+  unsigned getLargestOverSizedBitfieldContainer() const {
+    return LargestOverSizedBitfieldContainer;
+  }
+
   /// Get the maximum alignment in bits for a static variable with
   /// aligned attribute.
   unsigned getMaxAlignedAttribute() const { return MaxAlignedAttribute; }
diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp
index 3e38ba0a43d98..b8600e6a344a4 100644
--- a/clang/lib/AST/RecordLayoutBuilder.cpp
+++ b/clang/lib/AST/RecordLayoutBuilder.cpp
@@ -1469,15 +1469,18 @@ void ItaniumRecordLayoutBuilder::LayoutWideBitField(uint64_t FieldSize,
   //   sizeof(T')*8 <= n.
 
   QualType IntegralPODTypes[] = {
-    Context.UnsignedCharTy, Context.UnsignedShortTy, Context.UnsignedIntTy,
-    Context.UnsignedLongTy, Context.UnsignedLongLongTy
+      Context.UnsignedCharTy,     Context.UnsignedShortTy,
+      Context.UnsignedIntTy,      Context.UnsignedLongTy,
+      Context.UnsignedLongLongTy, Context.UnsignedInt128Ty,
   };
 
   QualType Type;
+  uint64_t MaxSize =
+      Context.getTargetInfo().getLargestOverSizedBitfieldContainer();
   for (const QualType &QT : IntegralPODTypes) {
     uint64_t Size = Context.getTypeSize(QT);
 
-    if (Size > FieldSize)
+    if (Size > FieldSize || Size > MaxSize)
       break;
 
     Type = QT;
@@ -1520,6 +1523,7 @@ void ItaniumRecordLayoutBuilder::LayoutWideBitField(uint64_t FieldSize,
   setSize(std::max(getSizeInBits(), getDataSizeInBits()));
 
   // Remember max struct/class alignment.
+  UnadjustedAlignment = std::max(UnadjustedAlignment, TypeAlign);
   UpdateAlignment(TypeAlign);
 }
 
diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index c0bf4e686cf03..0699ec686e4e6 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -141,6 +141,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) {
   UseLeadingZeroLengthBitfield = true;
   UseExplicitBitFieldAlignment = true;
   ZeroLengthBitfieldBoundary = 0;
+  LargestOverSizedBitfieldContainer = 64;
   MaxAlignedAttribute = 0;
   HalfFormat = &llvm::APFloat::IEEEhalf();
   FloatFormat = &llvm::APFloat::IEEEsingle();
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index fad8d773bfc52..3633bab6e0df9 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -261,6 +261,10 @@ AArch64TargetInfo::AArch64TargetInfo(const llvm::Triple &Triple,
   assert(UseBitFieldTypeAlignment && "bitfields affect type alignment");
   UseZeroLengthBitfieldAlignment = true;
 
+  // AAPCS64 allows any "fundamental integer data type" to be used for
+  // over-sized bitfields, which includes 128-bit integers.
+  LargestOverSizedBitfieldContainer = 128;
+
   HasUnalignedAccess = true;
 
   // AArch64 targets default to using the ARM C++ ABI.
diff --git a/clang/test/CodeGen/aapcs-align.cpp b/clang/test/CodeGen/aapcs-align.cpp
index 4f393d9e6b7f3..c7bc5ba0bbfef 100644
--- a/clang/test/CodeGen/aapcs-align.cpp
+++ b/clang/test/CodeGen/aapcs-align.cpp
@@ -6,6 +6,11 @@
 
 extern "C" {
 
+// CHECK: @sizeof_OverSizedBitfield ={{.*}} global i32 8
+// CHECK: @alignof_OverSizedBitfield ={{.*}} global i32 8
+// CHECK: @sizeof_VeryOverSizedBitfield ={{.*}} global i32 16
+// CHECK: @alignof_VeryOverSizedBitfield ={{.*}} global i32 8
+
 // Base case, nothing interesting.
 struct S {
   int x, y;
@@ -138,4 +143,42 @@ void g6() {
 // CHECK: call void @f6m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [4 x i32] [i32 6, i32 7, i32 0, i32 undef])
 // CHECK: declare void @f6(i32 noundef, [4 x i32])
 // CHECK: declare void @f6m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, [4 x i32])
+
+// Over-sized bitfield, which results in a 64-bit container type, so 64-bit
+// alignment.
+struct OverSizedBitfield {
+  int x : 64;
+};
+
+unsigned sizeof_OverSizedBitfield = sizeof(OverSizedBitfield);
+unsigned alignof_OverSizedBitfield = alignof(OverSizedBitfield);
+
+// CHECK: define{{.*}} void @g7
+// CHECK: call void @f7(i32 noundef 1, [1 x i64] [i64 42])
+// CHECK: declare void @f7(i32 noundef, [1 x i64])
+void f7(int a, OverSizedBitfield b);
+void g7() {
+  OverSizedBitfield s = {42};
+  f7(1, s);
+}
+
+// There are no 128-bit fundamental data types defined by AAPCS32, so this gets
+// a 64-bit container plus 64 bits of padding, giving it a size of 16 bytes and
+// alignment of 8 bytes.
+struct VeryOverSizedBitfield {
+  int x : 128;
+};
+
+unsigned sizeof_VeryOverSizedBitfield = sizeof(VeryOverSizedBitfield);
+unsigned alignof_VeryOverSizedBitfield = alignof(VeryOverSizedBitfield);
+
+// CHECK: define{{.*}} void @g8
+// CHECK: call void @f8(i32 noundef 1, [2 x i64] [i64 42, i64 0])
+// CHECK: declare void @f8(i32 noundef, [2 x i64])
+void f8(int a, VeryOverSizedBitfield b);
+void g8() {
+  VeryOverSizedBitfield s = {42};
+  f8(1, s);
+}
+
 }
diff --git a/clang/test/CodeGen/aapcs64-align.cpp b/clang/test/CodeGen/aapcs64-align.cpp
index 7a8151022852e..e69faf231936c 100644
--- a/clang/test/CodeGen/aapcs64-align.cpp
+++ b/clang/test/CodeGen/aapcs64-align.cpp
@@ -5,6 +5,13 @@
 
 extern "C" {
 
+// CHECK: @sizeof_OverSizedBitfield ={{.*}} global i32 8
+// CHECK: @alignof_OverSizedBitfield ={{.*}} global i32 8
+// CHECK: @sizeof_VeryOverSizedBitfield ={{.*}} global i32 16
+// CHECK: @alignof_VeryOverSizedBitfield ={{.*}} global i32 16
+// CHECK: @sizeof_RidiculouslyOverSizedBitfield ={{.*}} global i32 32
+// CHECK: @alignof_RidiculouslyOverSizedBitfield ={{.*}} global i32 16
+
 // Base case, nothing interesting.
 struct S {
   long x, y;
@@ -161,5 +168,62 @@ int test_bitint8(){
 }
 // CHECK:  ret i32 1
 
+// Over-sized bitfield, which results in a 64-bit container type, so 64-bit
+// alignment.
+struct OverSizedBitfield {
+  int x : 64;
+};
+
+unsigned sizeof_OverSizedBitfield = sizeof(OverSizedBitfield);
+unsigned alignof_OverSizedBitfield = alignof(OverSizedBitfield);
+
+// CHECK: define{{.*}} void @g7
+// CHECK: call void @f7(i32 noundef 1, i64 42)
+// CHECK: declare void @f7(i32 noundef, i64)
+void f7(int a, OverSizedBitfield b);
+void g7() {
+  OverSizedBitfield s = {42};
+  f7(1, s);
+}
+
+// AAPCS64 does have a 128-bit integer fundamental data type, so this gets a
+// 128-bit container with 128-bit alignment. This is just within the limit of
+// what can be passed directly.
+struct VeryOverSizedBitfield {
+  int x : 128;
+};
+
+unsigned sizeof_VeryOverSizedBitfield = sizeof(VeryOverSizedBitfield);
+unsigned alignof_VeryOverSizedBitfield = alignof(VeryOverSizedBitfield);
+
+// CHECK: define{{.*}} void @g8
+// CHECK: call void @f8(i32 noundef 1, i128 42)
+// CHECK: declare void @f8(i32 noundef, i128)
+void f8(int a, VeryOverSizedBitfield b);
+void g8() {
+  VeryOverSizedBitfield s = {42};
+  f8(1, s);
+}
+
+// There are no bigger fundamental data types, so this gets a 128-bit container
+// and 128 bits of padding, giving the struct a size of 32 bytes, and an
+// alignment of 16 bytes. This is over the PCS size limit of 16 bytes, so it
+// will be passed indirectly.
+struct RidiculouslyOverSizedBitfield {
+  int x : 256;
+};
+
+unsigned sizeof_RidiculouslyOverSizedBitfield = sizeof(RidiculouslyOverSizedBitfield);
+unsigned alignof_RidiculouslyOverSizedBitfield = alignof(RidiculouslyOverSizedBitfield);
+
+// CHECK: define{{.*}} void @g9
+// CHECK: call void @f9(i32 noundef 1, ptr noundef nonnull %agg.tmp)
+// CHECK: declare void @f9(i32 noundef, ptr noundef)
+void f9(int a, RidiculouslyOverSizedBitfield b);
+void g9() {
+  RidiculouslyOverSizedBitfield s = {42};
+  f9(1, s);
+}
+
 }
 
diff --git a/clang/test/CodeGenCXX/debug-info-structured-binding-bitfield.cpp b/clang/test/CodeGenCXX/debug-info-structured-binding-bitfield.cpp
index e475f032f5ce3..b7aad6a5bcd21 100644
--- a/clang/test/CodeGenCXX/debug-info-structured-binding-bitfield.cpp
+++ b/clang/test/CodeGenCXX/debug-info-structured-binding-bitfield.cpp
@@ -248,8 +248,8 @@ struct S15 {
 };
 
 // CHECK-LABEL: define dso_local void @_Z4fS15v
-// CHECK:                        alloca %struct.S15, align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = alloca %struct.S15, align 8
+// CHECK:                        alloca %struct.S15, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = alloca %struct.S15, align 16
 // CHECK:         #dbg_declare(ptr [[TMP0]], [[S15_A:![0-9]+]], !DIExpression(DW_OP_LLVM_extract_bits_sext, 0, 32),
 // CHECK-NEXT:    #dbg_declare(ptr [[TMP0]], [[S15_B:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 16, DW_OP_LLVM_extract_bits_zext, 0, 32),
 //

From 726c4b9f77862d83b6e5e16c8d5a2fc4fb1589a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
 =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=29?= <clementval@gmail.com>
Date: Thu, 20 Feb 2025 09:10:25 -0800
Subject: [PATCH 08/29] [flang][cuda] Lower match_all_sync functions to nvvm
 intrinsics (#127940)

---
 .../flang/Optimizer/Builder/IntrinsicCall.h   |  1 +
 flang/include/flang/Semantics/tools.h         |  1 +
 flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 52 +++++++++++++++++++
 flang/lib/Optimizer/CodeGen/CodeGen.cpp       |  7 +++
 flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp    |  3 +-
 flang/module/cudadevice.f90                   | 27 ++++++++++
 flang/test/Lower/CUDA/cuda-device-proc.cuf    | 21 ++++++++
 7 files changed, 111 insertions(+), 1 deletion(-)

diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 65732ce7f3224..caec6a913293f 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -335,6 +335,7 @@ struct IntrinsicLibrary {
   mlir::Value genMalloc(mlir::Type, llvm::ArrayRef<mlir::Value>);
   template <typename Shift>
   mlir::Value genMask(mlir::Type, llvm::ArrayRef<mlir::Value>);
+  mlir::Value genMatchAllSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
   fir::ExtendedValue genMatmul(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genMatmulTranspose(mlir::Type,
                                         llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h
index e82446a2ba884..56dcfa88ad92d 100644
--- a/flang/include/flang/Semantics/tools.h
+++ b/flang/include/flang/Semantics/tools.h
@@ -231,6 +231,7 @@ inline bool NeedCUDAAlloc(const Symbol &sym) {
         (*details->cudaDataAttr() == common::CUDADataAttr::Device ||
             *details->cudaDataAttr() == common::CUDADataAttr::Managed ||
             *details->cudaDataAttr() == common::CUDADataAttr::Unified ||
+            *details->cudaDataAttr() == common::CUDADataAttr::Shared ||
             *details->cudaDataAttr() == common::CUDADataAttr::Pinned)) {
       return true;
     }
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 93744fa58ebc0..754496921ca3a 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -469,6 +469,22 @@ static constexpr IntrinsicHandler handlers[]{
     {"malloc", &I::genMalloc},
     {"maskl", &I::genMask<mlir::arith::ShLIOp>},
     {"maskr", &I::genMask<mlir::arith::ShRUIOp>},
+    {"match_all_syncjd",
+     &I::genMatchAllSync,
+     {{{"mask", asValue}, {"value", asValue}, {"pred", asAddr}}},
+     /*isElemental=*/false},
+    {"match_all_syncjf",
+     &I::genMatchAllSync,
+     {{{"mask", asValue}, {"value", asValue}, {"pred", asAddr}}},
+     /*isElemental=*/false},
+    {"match_all_syncjj",
+     &I::genMatchAllSync,
+     {{{"mask", asValue}, {"value", asValue}, {"pred", asAddr}}},
+     /*isElemental=*/false},
+    {"match_all_syncjx",
+     &I::genMatchAllSync,
+     {{{"mask", asValue}, {"value", asValue}, {"pred", asAddr}}},
+     /*isElemental=*/false},
     {"matmul",
      &I::genMatmul,
      {{{"matrix_a", asAddr}, {"matrix_b", asAddr}}},
@@ -6044,6 +6060,42 @@ mlir::Value IntrinsicLibrary::genMask(mlir::Type resultType,
   return result;
 }
 
+mlir::Value
+IntrinsicLibrary::genMatchAllSync(mlir::Type resultType,
+                                  llvm::ArrayRef<mlir::Value> args) {
+  assert(args.size() == 3);
+  bool is32 = args[1].getType().isInteger(32) || args[1].getType().isF32();
+
+  llvm::StringRef funcName =
+      is32 ? "llvm.nvvm.match.all.sync.i32p" : "llvm.nvvm.match.all.sync.i64p";
+  mlir::MLIRContext *context = builder.getContext();
+  mlir::Type i32Ty = builder.getI32Type();
+  mlir::Type i64Ty = builder.getI64Type();
+  mlir::Type i1Ty = builder.getI1Type();
+  mlir::Type retTy = mlir::TupleType::get(context, {resultType, i1Ty});
+  mlir::Type valTy = is32 ? i32Ty : i64Ty;
+
+  mlir::FunctionType ftype =
+      mlir::FunctionType::get(context, {i32Ty, valTy}, {retTy});
+  auto funcOp = builder.createFunction(loc, funcName, ftype);
+  llvm::SmallVector<mlir::Value> filteredArgs;
+  filteredArgs.push_back(args[0]);
+  if (args[1].getType().isF32() || args[1].getType().isF64())
+    filteredArgs.push_back(builder.create<fir::ConvertOp>(loc, valTy, args[1]));
+  else
+    filteredArgs.push_back(args[1]);
+  auto call = builder.create<fir::CallOp>(loc, funcOp, filteredArgs);
+  auto zero = builder.getIntegerAttr(builder.getIndexType(), 0);
+  auto value = builder.create<fir::ExtractValueOp>(
+      loc, resultType, call.getResult(0), builder.getArrayAttr(zero));
+  auto one = builder.getIntegerAttr(builder.getIndexType(), 1);
+  auto pred = builder.create<fir::ExtractValueOp>(loc, i1Ty, call.getResult(0),
+                                                  builder.getArrayAttr(one));
+  auto conv = builder.create<mlir::LLVM::ZExtOp>(loc, resultType, pred);
+  builder.create<fir::StoreOp>(loc, conv, args[2]);
+  return value;
+}
+
 // MATMUL
 fir::ExtendedValue
 IntrinsicLibrary::genMatmul(mlir::Type resultType,
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index c76b7cde55bdd..439cc7a856236 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -292,6 +292,12 @@ struct AllocaOpConversion : public fir::FIROpConversion<fir::AllocaOp> {
       rewriter.setInsertionPointAfter(size.getDefiningOp());
     }
 
+    if (auto dataAttr = alloc->getAttrOfType<cuf::DataAttributeAttr>(
+            cuf::getDataAttrName())) {
+      if (dataAttr.getValue() == cuf::DataAttribute::Shared)
+        allocaAs = 3;
+    }
+
     // NOTE: we used to pass alloc->getAttrs() in the builder for non opaque
     // pointers! Only propagate pinned and bindc_name to help debugging, but
     // this should have no functional purpose (and passing the operand segment
@@ -316,6 +322,7 @@ struct AllocaOpConversion : public fir::FIROpConversion<fir::AllocaOp> {
       rewriter.replaceOpWithNewOp<mlir::LLVM::AddrSpaceCastOp>(
           alloc, ::getLlvmPtrType(alloc.getContext(), programAs), llvmAlloc);
     }
+
     return mlir::success();
   }
 };
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index b05991a29a321..fa82f3916a57e 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -57,7 +57,8 @@ static llvm::LogicalResult checkCudaAttr(Op op) {
   if (op.getDataAttr() == cuf::DataAttribute::Device ||
       op.getDataAttr() == cuf::DataAttribute::Managed ||
       op.getDataAttr() == cuf::DataAttribute::Unified ||
-      op.getDataAttr() == cuf::DataAttribute::Pinned)
+      op.getDataAttr() == cuf::DataAttribute::Pinned ||
+      op.getDataAttr() == cuf::DataAttribute::Shared)
     return mlir::success();
   return op.emitOpError()
          << "expect device, managed, pinned or unified cuda attribute";
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index e473590a7d78f..c75c5c191ab51 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -562,4 +562,31 @@ attributes(device) integer(8) function clock64()
     end function
   end interface
 
+interface match_all_sync
+  attributes(device) integer function match_all_syncjj(mask, val, pred)
+!dir$ ignore_tkr(d) mask, (d) val, (d) pred
+  integer(4), value :: mask
+  integer(4), value :: val
+  integer(4)        :: pred
+  end function
+  attributes(device) integer function match_all_syncjx(mask, val, pred)
+!dir$ ignore_tkr(d) mask, (d) val, (d) pred
+  integer(4), value :: mask
+  integer(8), value :: val
+  integer(4)        :: pred
+  end function
+  attributes(device) integer function match_all_syncjf(mask, val, pred)
+!dir$ ignore_tkr(d) mask, (d) val, (d) pred
+  integer(4), value :: mask
+  real(4), value    :: val
+  integer(4)        :: pred
+  end function
+  attributes(device) integer function match_all_syncjd(mask, val, pred)
+!dir$ ignore_tkr(d) mask, (d) val, (d) pred
+  integer(4), value :: mask
+  real(8), value    :: val
+  integer(4)        :: pred
+  end function
+end interface
+
 end module
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 6a5524102c0ea..1210dae8608c8 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -112,6 +112,25 @@ end
 ! CHECK: fir.call @llvm.nvvm.barrier0.popc(%c1{{.*}}) fastmath<contract> : (i32) -> i32
 ! CHECK: fir.call @llvm.nvvm.barrier0.or(%c1{{.*}}) fastmath<contract> : (i32) -> i32
 
+attributes(device) subroutine testMatch()
+  integer :: a, ipred, mask, v32
+  integer(8) :: v64
+  real(4) :: r4
+  real(8) :: r8
+  a = match_all_sync(mask, v32, ipred)
+  a = match_all_sync(mask, v64, ipred)
+  a = match_all_sync(mask, r4, ipred)
+  a = match_all_sync(mask, r8, ipred)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtestmatch()
+! CHECK: fir.call @llvm.nvvm.match.all.sync.i32p
+! CHECK: fir.call @llvm.nvvm.match.all.sync.i64p
+! CHECK: fir.convert %{{.*}} : (f32) -> i32
+! CHECK: fir.call @llvm.nvvm.match.all.sync.i32p
+! CHECK: fir.convert %{{.*}} : (f64) -> i64
+! CHECK: fir.call @llvm.nvvm.match.all.sync.i64p
+
 ! CHECK: func.func private @llvm.nvvm.barrier0()
 ! CHECK: func.func private @llvm.nvvm.bar.warp.sync(i32)
 ! CHECK: func.func private @llvm.nvvm.membar.gl()
@@ -120,3 +139,5 @@ end
 ! CHECK: func.func private @llvm.nvvm.barrier0.and(i32) -> i32
 ! CHECK: func.func private @llvm.nvvm.barrier0.popc(i32) -> i32
 ! CHECK: func.func private @llvm.nvvm.barrier0.or(i32) -> i32
+! CHECK: func.func private @llvm.nvvm.match.all.sync.i32p(i32, i32) -> tuple<i32, i1>
+! CHECK: func.func private @llvm.nvvm.match.all.sync.i64p(i32, i64) -> tuple<i32, i1>

From f7a10f0e8d46085cfebfbd53c3ab19a5ab37774f Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Thu, 20 Feb 2025 09:39:32 -0800
Subject: [PATCH 09/29] [Index] Un-XFAIL test with msan

Passing after #127078.
---
 clang/test/Index/comment-to-html-xml-conversion.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/test/Index/comment-to-html-xml-conversion.cpp b/clang/test/Index/comment-to-html-xml-conversion.cpp
index e0a7cff5a9a3d..941aa8a27b6bf 100644
--- a/clang/test/Index/comment-to-html-xml-conversion.cpp
+++ b/clang/test/Index/comment-to-html-xml-conversion.cpp
@@ -20,7 +20,6 @@
 // RUN: FileCheck %s < %t/out.c-index-direct
 // RUN: FileCheck %s < %t/out.c-index-pch
 
-// XFAIL: msan
 // XFAIL: valgrind
 
 #ifndef HEADER

From 52ded67249210db8762a282a662fe8249c1f4331 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 20 Feb 2025 19:00:23 +0100
Subject: [PATCH 10/29] [LAA] Always require non-wrapping pointers for runtime
 checks. (#127543)

Currently we only check if the pointers involved in runtime checks do
not wrap if we need to perform dependency checks. If that's not the
case, we generate runtime checks, even if the pointers may wrap (see
test/Analysis/LoopAccessAnalysis/runtime-checks-may-wrap.ll).

If the pointer wraps, then we swap start and end of the runtime check,
leading to incorrect checks.

An Alive2 proof of what the runtime checks are checking conceptually (on
i4 to have it complete in reasonable time) showing the incorrect result
should be https://alive2.llvm.org/ce/z/KsHzn8

Depends on https://github.com/llvm/llvm-project/pull/127410 to avoid
more regressions.

PR: https://github.com/llvm/llvm-project/pull/127543
---
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      | 41 ++++----
 .../LoopAccessAnalysis/nusw-predicates.ll     | 30 +++---
 ...ter-dependence-analysis-forked-pointers.ll |  4 +-
 ...untime-checks-after-dependence-analysis.ll | 28 +++---
 .../runtime-checks-may-wrap.ll                | 11 ++-
 .../AArch64/conditional-branches-cost.ll      | 24 ++---
 .../LoopVectorize/RISCV/strided-accesses.ll   | 39 +++++++-
 .../LoopVectorize/X86/interleave-cost.ll      | 93 ++++++++++++-------
 8 files changed, 166 insertions(+), 104 deletions(-)

diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index b3a42ed3377cd..a1d91de3bb788 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -677,7 +677,7 @@ class AccessAnalysis {
                             const DenseMap<Value *, const SCEV *> &Strides,
                             DenseMap<Value *, unsigned> &DepSetId,
                             Loop *TheLoop, unsigned &RunningDepId,
-                            unsigned ASId, bool ShouldCheckStride, bool Assume);
+                            unsigned ASId, bool Assume);
 
   /// Check whether we can check the pointers at runtime for
   /// non-intersection.
@@ -685,8 +685,9 @@ class AccessAnalysis {
   /// Returns true if we need no check or if we do and we can generate them
   /// (i.e. the pointers have computable bounds).
   bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
-                       Loop *TheLoop, const DenseMap<Value *, const SCEV *> &Strides,
-                       Value *&UncomputablePtr, bool ShouldCheckWrap = false);
+                       Loop *TheLoop,
+                       const DenseMap<Value *, const SCEV *> &Strides,
+                       Value *&UncomputablePtr);
 
   /// Goes over all memory accesses, checks whether a RT check is needed
   /// and builds sets of dependent accesses.
@@ -1115,13 +1116,11 @@ findForkedPointer(PredicatedScalarEvolution &PSE,
   return {{replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false}};
 }
 
-bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
-                                          MemAccessInfo Access, Type *AccessTy,
-                                          const DenseMap<Value *, const SCEV *> &StridesMap,
-                                          DenseMap<Value *, unsigned> &DepSetId,
-                                          Loop *TheLoop, unsigned &RunningDepId,
-                                          unsigned ASId, bool ShouldCheckWrap,
-                                          bool Assume) {
+bool AccessAnalysis::createCheckForAccess(
+    RuntimePointerChecking &RtCheck, MemAccessInfo Access, Type *AccessTy,
+    const DenseMap<Value *, const SCEV *> &StridesMap,
+    DenseMap<Value *, unsigned> &DepSetId, Loop *TheLoop,
+    unsigned &RunningDepId, unsigned ASId, bool Assume) {
   Value *Ptr = Access.getPointer();
 
   SmallVector<PointerIntPair<const SCEV *, 1, bool>> TranslatedPtrs =
@@ -1152,8 +1151,7 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
 
     // When we run after a failing dependency check we have to make sure
     // we don't have wrapping pointers.
-    if (ShouldCheckWrap &&
-        !isNoWrap(PSE, AR, TranslatedPtrs.size() == 1 ? Ptr : nullptr, AccessTy,
+    if (!isNoWrap(PSE, AR, TranslatedPtrs.size() == 1 ? Ptr : nullptr, AccessTy,
                   TheLoop, Assume)) {
       return false;
     }
@@ -1182,10 +1180,10 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
   return true;
 }
 
-bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
-                                     ScalarEvolution *SE, Loop *TheLoop,
-                                     const DenseMap<Value *, const SCEV *> &StridesMap,
-                                     Value *&UncomputablePtr, bool ShouldCheckWrap) {
+bool AccessAnalysis::canCheckPtrAtRT(
+    RuntimePointerChecking &RtCheck, ScalarEvolution *SE, Loop *TheLoop,
+    const DenseMap<Value *, const SCEV *> &StridesMap,
+    Value *&UncomputablePtr) {
   // Find pointers with computable bounds. We are going to use this information
   // to place a runtime bound check.
   bool CanDoRT = true;
@@ -1245,7 +1243,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
       for (const auto &AccessTy : Accesses[Access]) {
         if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
                                   DepSetId, TheLoop, RunningDepId, ASId,
-                                  ShouldCheckWrap, false)) {
+                                  false)) {
           LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
                             << *Access.getPointer() << '\n');
           Retries.emplace_back(Access, AccessTy);
@@ -1275,7 +1273,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
       for (const auto &[Access, AccessTy] : Retries) {
         if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
                                   DepSetId, TheLoop, RunningDepId, ASId,
-                                  ShouldCheckWrap, /*Assume=*/true)) {
+                                  /*Assume=*/true)) {
           CanDoAliasSetRT = false;
           UncomputablePtr = Access.getPointer();
           break;
@@ -2643,9 +2641,8 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
   // Find pointers with computable bounds. We are going to use this information
   // to place a runtime bound check.
   Value *UncomputablePtr = nullptr;
-  bool CanDoRTIfNeeded =
-      Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop,
-                               SymbolicStrides, UncomputablePtr, false);
+  bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(
+      *PtrRtChecking, PSE->getSE(), TheLoop, SymbolicStrides, UncomputablePtr);
   if (!CanDoRTIfNeeded) {
     const auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
     recordAnalysis("CantIdentifyArrayBounds", I)
@@ -2676,7 +2673,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
       auto *SE = PSE->getSE();
       UncomputablePtr = nullptr;
       CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(
-          *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr, true);
+          *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr);
 
       // Check that we found the bounds for the pointer.
       if (!CanDoRTIfNeeded) {
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/nusw-predicates.ll b/llvm/test/Analysis/LoopAccessAnalysis/nusw-predicates.ll
index 5234d8f107271..d4f7f82a8cff1 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/nusw-predicates.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/nusw-predicates.ll
@@ -15,6 +15,12 @@ define void @int_and_pointer_predicate(ptr %v, i32 %N) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group [[GRP1:0x[0-9a-f]+]]:
+; CHECK-NEXT:          (Low: %v High: (2 + %v))
+; CHECK-NEXT:            Member: %v
+; CHECK-NEXT:        Group [[GRP2:0x[0-9a-f]+]]:
+; CHECK-NEXT:          (Low: %v High: (6 + (4 * (trunc i32 %N to i16)) + %v))
+; CHECK-NEXT:            Member: {%v,+,4}<%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -57,36 +63,36 @@ define void @int_and_multiple_pointer_predicates(ptr %v, ptr %w, i32 %N) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Check 0:
-; CHECK-NEXT:        Comparing group ([[GRP1:0x[0-9a-f]+]]):
+; CHECK-NEXT:        Comparing group ([[GRP3:0x[0-9a-f]+]]):
 ; CHECK-NEXT:        ptr %v
-; CHECK-NEXT:        Against group ([[GRP2:0x[0-9a-f]+]]):
+; CHECK-NEXT:        Against group ([[GRP4:0x[0-9a-f]+]]):
 ; CHECK-NEXT:        ptr %w
 ; CHECK-NEXT:      Check 1:
-; CHECK-NEXT:        Comparing group ([[GRP1]]):
+; CHECK-NEXT:        Comparing group ([[GRP3]]):
 ; CHECK-NEXT:        ptr %v
-; CHECK-NEXT:        Against group ([[GRP3:0x[0-9a-f]+]]):
+; CHECK-NEXT:        Against group ([[GRP5:0x[0-9a-f]+]]):
 ; CHECK-NEXT:          %gep.w = getelementptr { i16, i16 }, ptr %w, i16 %iv.i16
 ; CHECK-NEXT:      Check 2:
-; CHECK-NEXT:        Comparing group ([[GRP4:0x[0-9a-f]+]]):
+; CHECK-NEXT:        Comparing group ([[GRP6:0x[0-9a-f]+]]):
 ; CHECK-NEXT:          %gep.v = getelementptr { i16, i16 }, ptr %v, i16 %iv.i16
-; CHECK-NEXT:        Against group ([[GRP2]]):
+; CHECK-NEXT:        Against group ([[GRP4]]):
 ; CHECK-NEXT:        ptr %w
 ; CHECK-NEXT:      Check 3:
-; CHECK-NEXT:        Comparing group ([[GRP4]]):
+; CHECK-NEXT:        Comparing group ([[GRP6]]):
 ; CHECK-NEXT:          %gep.v = getelementptr { i16, i16 }, ptr %v, i16 %iv.i16
-; CHECK-NEXT:        Against group ([[GRP3]]):
+; CHECK-NEXT:        Against group ([[GRP5]]):
 ; CHECK-NEXT:          %gep.w = getelementptr { i16, i16 }, ptr %w, i16 %iv.i16
 ; CHECK-NEXT:      Grouped accesses:
-; CHECK-NEXT:        Group [[GRP1]]:
+; CHECK-NEXT:        Group [[GRP3]]:
 ; CHECK-NEXT:          (Low: %v High: (2 + %v))
 ; CHECK-NEXT:            Member: %v
-; CHECK-NEXT:        Group [[GRP4]]:
+; CHECK-NEXT:        Group [[GRP6]]:
 ; CHECK-NEXT:          (Low: %v High: (6 + (4 * (trunc i32 %N to i16)) + %v))
 ; CHECK-NEXT:            Member: {%v,+,4}<%loop>
-; CHECK-NEXT:        Group [[GRP2]]:
+; CHECK-NEXT:        Group [[GRP4]]:
 ; CHECK-NEXT:          (Low: %w High: (2 + %w))
 ; CHECK-NEXT:            Member: %w
-; CHECK-NEXT:        Group [[GRP3]]:
+; CHECK-NEXT:        Group [[GRP5]]:
 ; CHECK-NEXT:          (Low: %w High: (6 + (4 * (trunc i32 %N to i16)) + %w))
 ; CHECK-NEXT:            Member: {%w,+,4}<%loop>
 ; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll
index 38b7389ae9083..021447d53f943 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll
@@ -163,7 +163,7 @@ exit:
 define void @dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs_may_wrap_1(ptr %a, ptr %b, ptr %c, i64 %offset, i64 %n) {
 ; CHECK-LABEL: 'dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs_may_wrap_1'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: cannot check memory dependencies at runtime
+; CHECK-NEXT:      Report: cannot identify array bounds
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Grouped accesses:
@@ -204,7 +204,7 @@ exit:
 define void @dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs_may_wrap_2(ptr %a, ptr %b, ptr %c, i64 %offset, i64 %n) {
 ; CHECK-LABEL: 'dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs_may_wrap_2'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: cannot check memory dependencies at runtime
+; CHECK-NEXT:      Report: cannot identify array bounds
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Grouped accesses:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll
index 26c571b9cb63a..a15253a901488 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll
@@ -72,27 +72,27 @@ define void @dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_st
 ; CHECK-NEXT:        Comparing group ([[GRP4:0x[0-9a-f]+]]):
 ; CHECK-NEXT:          %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
 ; CHECK-NEXT:        Against group ([[GRP5:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
+; CHECK-NEXT:          %gep.b = getelementptr i8, ptr %b, i64 %iv2
 ; CHECK-NEXT:      Check 1:
 ; CHECK-NEXT:        Comparing group ([[GRP4]]):
 ; CHECK-NEXT:          %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
 ; CHECK-NEXT:        Against group ([[GRP6:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep.b = getelementptr i8, ptr %b, i64 %iv2
+; CHECK-NEXT:          %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
 ; CHECK-NEXT:      Check 2:
 ; CHECK-NEXT:        Comparing group ([[GRP5]]):
-; CHECK-NEXT:          %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
-; CHECK-NEXT:        Against group ([[GRP6]]):
 ; CHECK-NEXT:          %gep.b = getelementptr i8, ptr %b, i64 %iv2
+; CHECK-NEXT:        Against group ([[GRP6]]):
+; CHECK-NEXT:          %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-NEXT:        Group [[GRP4]]:
 ; CHECK-NEXT:          (Low: %a High: ((4 * %n) + %a))
 ; CHECK-NEXT:            Member: {%a,+,4}<nuw><%loop>
 ; CHECK-NEXT:        Group [[GRP5]]:
-; CHECK-NEXT:          (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
-; CHECK-NEXT:            Member: {((4 * %offset) + %a),+,4}<%loop>
-; CHECK-NEXT:        Group [[GRP6]]:
 ; CHECK-NEXT:          (Low: %b High: (-1 + (5 * %n) + %b))
 ; CHECK-NEXT:            Member: {%b,+,5}<%loop>
+; CHECK-NEXT:        Group [[GRP6]]:
+; CHECK-NEXT:          (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
+; CHECK-NEXT:            Member: {((4 * %offset) + %a),+,4}<%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -265,27 +265,27 @@ define void @dependency_check_and_runtime_checks_needed_gepb_may_wrap(ptr %a, pt
 ; CHECK-NEXT:        Comparing group ([[GRP13:0x[0-9a-f]+]]):
 ; CHECK-NEXT:          %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
 ; CHECK-NEXT:        Against group ([[GRP14:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
+; CHECK-NEXT:          %gep.b = getelementptr float, ptr %b, i64 %iv2
 ; CHECK-NEXT:      Check 1:
 ; CHECK-NEXT:        Comparing group ([[GRP13]]):
 ; CHECK-NEXT:          %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
 ; CHECK-NEXT:        Against group ([[GRP15:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep.b = getelementptr float, ptr %b, i64 %iv2
+; CHECK-NEXT:          %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
 ; CHECK-NEXT:      Check 2:
 ; CHECK-NEXT:        Comparing group ([[GRP14]]):
-; CHECK-NEXT:          %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
-; CHECK-NEXT:        Against group ([[GRP15]]):
 ; CHECK-NEXT:          %gep.b = getelementptr float, ptr %b, i64 %iv2
+; CHECK-NEXT:        Against group ([[GRP15]]):
+; CHECK-NEXT:          %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-NEXT:        Group [[GRP13]]:
 ; CHECK-NEXT:          (Low: %a High: ((4 * %n) + %a))
 ; CHECK-NEXT:            Member: {%a,+,4}<nuw><%loop>
 ; CHECK-NEXT:        Group [[GRP14]]:
-; CHECK-NEXT:          (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
-; CHECK-NEXT:            Member: {((4 * %offset) + %a),+,4}<%loop>
-; CHECK-NEXT:        Group [[GRP15]]:
 ; CHECK-NEXT:          (Low: %b High: (-4 + (8 * %n) + %b))
 ; CHECK-NEXT:            Member: {%b,+,8}<%loop>
+; CHECK-NEXT:        Group [[GRP15]]:
+; CHECK-NEXT:          (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
+; CHECK-NEXT:            Member: {((4 * %offset) + %a),+,4}<%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/runtime-checks-may-wrap.ll b/llvm/test/Analysis/LoopAccessAnalysis/runtime-checks-may-wrap.ll
index b27937862b261..cce6f829d05af 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/runtime-checks-may-wrap.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/runtime-checks-may-wrap.ll
@@ -11,20 +11,21 @@ define void @geps_may_wrap(ptr %a, ptr %b, i64 %N) {
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Check 0:
 ; CHECK-NEXT:        Comparing group ([[GRP1:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep.iv = getelementptr i32, ptr %a, i64 %iv
-; CHECK-NEXT:        Against group ([[GRP2:0x[0-9a-f]+]]):
 ; CHECK-NEXT:        ptr %b
+; CHECK-NEXT:        Against group ([[GRP2:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep.iv = getelementptr i32, ptr %a, i64 %iv
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-NEXT:        Group [[GRP1]]:
-; CHECK-NEXT:          (Low: %a High: (16 + (12 * (trunc i128 ((zext i64 %N to i128) /u 3) to i16)) + %a))
-; CHECK-NEXT:            Member: {%a,+,12}<%loop>
-; CHECK-NEXT:        Group [[GRP2]]:
 ; CHECK-NEXT:          (Low: %b High: (4 + %b))
 ; CHECK-NEXT:            Member: %b
+; CHECK-NEXT:        Group [[GRP2]]:
+; CHECK-NEXT:          (Low: %a High: (16 + (12 * (trunc i128 ((zext i64 %N to i128) /u 3) to i16)) + %a))
+; CHECK-NEXT:            Member: {%a,+,12}<%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
 ; CHECK-NEXT:      {0,+,3}<%loop> Added Flags: <nusw>
+; CHECK-NEXT:      {%a,+,12}<%loop> Added Flags: <nusw>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Expressions re-written:
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index 754b86ab2fb87..cf4fc143fe8c3 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -1123,15 +1123,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
 ; DEFAULT-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
 ; DEFAULT:       vector.scevcheck:
-; DEFAULT-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 4
 ; DEFAULT-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
 ; DEFAULT-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
 ; DEFAULT-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
 ; DEFAULT-NEXT:    [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]]
-; DEFAULT-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
-; DEFAULT-NEXT:    [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]]
+; DEFAULT-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT]]
+; DEFAULT-NEXT:    [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[DST]]
 ; DEFAULT-NEXT:    [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
-; DEFAULT-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 8
+; DEFAULT-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 4
 ; DEFAULT-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
 ; DEFAULT-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
 ; DEFAULT-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
@@ -1139,12 +1138,13 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]]
 ; DEFAULT-NEXT:    [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP1]]
 ; DEFAULT-NEXT:    [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW4]]
+; DEFAULT-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 8
 ; DEFAULT-NEXT:    [[MUL5:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
 ; DEFAULT-NEXT:    [[MUL_RESULT6:%.*]] = extractvalue { i64, i1 } [[MUL5]], 0
 ; DEFAULT-NEXT:    [[MUL_OVERFLOW7:%.*]] = extractvalue { i64, i1 } [[MUL5]], 1
 ; DEFAULT-NEXT:    [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT6]]
-; DEFAULT-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT6]]
-; DEFAULT-NEXT:    [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[DST]]
+; DEFAULT-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP4]], i64 [[MUL_RESULT6]]
+; DEFAULT-NEXT:    [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP4]]
 ; DEFAULT-NEXT:    [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW7]]
 ; DEFAULT-NEXT:    [[TMP13:%.*]] = or i1 [[TMP4]], [[TMP8]]
 ; DEFAULT-NEXT:    [[TMP14:%.*]] = or i1 [[TMP13]], [[TMP12]]
@@ -1337,15 +1337,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
 ; PRED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
 ; PRED:       vector.scevcheck:
-; PRED-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 4
 ; PRED-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
 ; PRED-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
 ; PRED-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
 ; PRED-NEXT:    [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]]
-; PRED-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
-; PRED-NEXT:    [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]]
+; PRED-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT]]
+; PRED-NEXT:    [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[DST]]
 ; PRED-NEXT:    [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
-; PRED-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 8
+; PRED-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 4
 ; PRED-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
 ; PRED-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
 ; PRED-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
@@ -1353,12 +1352,13 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]]
 ; PRED-NEXT:    [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP1]]
 ; PRED-NEXT:    [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW4]]
+; PRED-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 8
 ; PRED-NEXT:    [[MUL5:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
 ; PRED-NEXT:    [[MUL_RESULT6:%.*]] = extractvalue { i64, i1 } [[MUL5]], 0
 ; PRED-NEXT:    [[MUL_OVERFLOW7:%.*]] = extractvalue { i64, i1 } [[MUL5]], 1
 ; PRED-NEXT:    [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT6]]
-; PRED-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT6]]
-; PRED-NEXT:    [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[DST]]
+; PRED-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP4]], i64 [[MUL_RESULT6]]
+; PRED-NEXT:    [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP4]]
 ; PRED-NEXT:    [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW7]]
 ; PRED-NEXT:    [[TMP13:%.*]] = or i1 [[TMP4]], [[TMP8]]
 ; PRED-NEXT:    [[TMP14:%.*]] = or i1 [[TMP13]], [[TMP12]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
index 4ea248254f2c6..f7b8758084056 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
@@ -500,18 +500,47 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
 ; STRIDED-NEXT:  entry:
 ; STRIDED-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; STRIDED-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
-; STRIDED-NEXT:    [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 24, i64 [[TMP1]])
+; STRIDED-NEXT:    [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 80, i64 [[TMP1]])
 ; STRIDED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]]
 ; STRIDED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; STRIDED:       vector.scevcheck:
+; STRIDED-NEXT:    [[TMP24:%.*]] = shl i64 [[STRIDE:%.*]], 2
+; STRIDED-NEXT:    [[TMP25:%.*]] = mul i64 [[STRIDE]], -4
+; STRIDED-NEXT:    [[TMP26:%.*]] = icmp slt i64 [[TMP24]], 0
+; STRIDED-NEXT:    [[TMP27:%.*]] = select i1 [[TMP26]], i64 [[TMP25]], i64 [[TMP24]]
+; STRIDED-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[TMP27]], i64 1023)
+; STRIDED-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
+; STRIDED-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; STRIDED-NEXT:    [[TMP28:%.*]] = sub i64 0, [[MUL_RESULT]]
+; STRIDED-NEXT:    [[TMP29:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 [[MUL_RESULT]]
+; STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP28]]
+; STRIDED-NEXT:    [[TMP31:%.*]] = icmp ult ptr [[TMP29]], [[P2]]
+; STRIDED-NEXT:    [[TMP32:%.*]] = icmp ugt ptr [[TMP30]], [[P2]]
+; STRIDED-NEXT:    [[TMP33:%.*]] = select i1 [[TMP26]], i1 [[TMP32]], i1 [[TMP31]]
+; STRIDED-NEXT:    [[TMP13:%.*]] = or i1 [[TMP33]], [[MUL_OVERFLOW]]
+; STRIDED-NEXT:    [[TMP34:%.*]] = icmp slt i64 [[TMP24]], 0
+; STRIDED-NEXT:    [[TMP15:%.*]] = select i1 [[TMP34]], i64 [[TMP25]], i64 [[TMP24]]
+; STRIDED-NEXT:    [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[TMP15]], i64 1023)
+; STRIDED-NEXT:    [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
+; STRIDED-NEXT:    [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
+; STRIDED-NEXT:    [[TMP16:%.*]] = sub i64 0, [[MUL_RESULT2]]
+; STRIDED-NEXT:    [[TMP35:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[MUL_RESULT2]]
+; STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP16]]
+; STRIDED-NEXT:    [[TMP37:%.*]] = icmp ult ptr [[TMP35]], [[P]]
+; STRIDED-NEXT:    [[TMP38:%.*]] = icmp ugt ptr [[TMP36]], [[P]]
+; STRIDED-NEXT:    [[TMP39:%.*]] = select i1 [[TMP34]], i1 [[TMP38]], i1 [[TMP37]]
+; STRIDED-NEXT:    [[TMP40:%.*]] = or i1 [[TMP39]], [[MUL_OVERFLOW3]]
+; STRIDED-NEXT:    [[TMP23:%.*]] = or i1 [[TMP13]], [[TMP40]]
+; STRIDED-NEXT:    br i1 [[TMP23]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK1:%.*]]
 ; STRIDED:       vector.memcheck:
-; STRIDED-NEXT:    [[TMP3:%.*]] = mul i64 [[STRIDE:%.*]], 4092
-; STRIDED-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 [[TMP3]]
+; STRIDED-NEXT:    [[TMP3:%.*]] = mul i64 [[STRIDE]], 4092
+; STRIDED-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP3]]
 ; STRIDED-NEXT:    [[TMP4:%.*]] = icmp ult ptr [[P2]], [[SCEVGEP]]
 ; STRIDED-NEXT:    [[UMIN:%.*]] = select i1 [[TMP4]], ptr [[P2]], ptr [[SCEVGEP]]
 ; STRIDED-NEXT:    [[TMP5:%.*]] = icmp ugt ptr [[P2]], [[SCEVGEP]]
 ; STRIDED-NEXT:    [[UMAX:%.*]] = select i1 [[TMP5]], ptr [[P2]], ptr [[SCEVGEP]]
 ; STRIDED-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[UMAX]], i64 4
-; STRIDED-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP3]]
+; STRIDED-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP3]]
 ; STRIDED-NEXT:    [[TMP6:%.*]] = icmp ult ptr [[P]], [[SCEVGEP2]]
 ; STRIDED-NEXT:    [[UMIN3:%.*]] = select i1 [[TMP6]], ptr [[P]], ptr [[SCEVGEP2]]
 ; STRIDED-NEXT:    [[TMP7:%.*]] = icmp ugt ptr [[P]], [[SCEVGEP2]]
@@ -554,7 +583,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
 ; STRIDED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
 ; STRIDED-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; STRIDED:       scalar.ph:
-; STRIDED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; STRIDED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[VECTOR_MEMCHECK1]] ]
 ; STRIDED-NEXT:    br label [[LOOP:%.*]]
 ; STRIDED:       loop:
 ; STRIDED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll
index cf66264486095..b885d85a96800 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll
@@ -114,8 +114,18 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr
 ; CHECK-SAME: ptr [[ARG:%.*]], i64 [[ARG1:%.*]], ptr [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[ARG1]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 18
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK:       [[VECTOR_SCEVCHECK]]:
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ARG]], i64 16
+; CHECK-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[ARG1]])
+; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
+; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
 ; CHECK:       [[VECTOR_MEMCHECK]]:
 ; CHECK-NEXT:    [[TMP20:%.*]] = shl i64 [[ARG1]], 4
 ; CHECK-NEXT:    [[TMP21:%.*]] = add i64 [[TMP20]], 16
@@ -167,7 +177,7 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
@@ -266,19 +276,26 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[N]], 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 52
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 64
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
 ; CHECK:       [[VECTOR_SCEVCHECK]]:
 ; CHECK-NEXT:    [[TMP2:%.*]] = lshr i64 [[N]], 3
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 24
 ; CHECK-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
 ; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
 ; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 0, [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL_RESULT]]
+; CHECK-NEXT:    [[TMP41:%.*]] = icmp ult ptr [[TMP32]], [[A]]
+; CHECK-NEXT:    [[TMP44:%.*]] = or i1 [[TMP41]], [[MUL_OVERFLOW]]
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 4
+; CHECK-NEXT:    [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
+; CHECK-NEXT:    [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
+; CHECK-NEXT:    [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
+; CHECK-NEXT:    [[TMP55:%.*]] = sub i64 0, [[MUL_RESULT2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult ptr [[TMP4]], [[SCEVGEP]]
-; CHECK-NEXT:    [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]]
-; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 28
+; CHECK-NEXT:    [[TMP57:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW3]]
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 8
 ; CHECK-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
 ; CHECK-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
 ; CHECK-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
@@ -286,7 +303,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp ult ptr [[TMP8]], [[SCEVGEP1]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW4]]
-; CHECK-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 20
+; CHECK-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 12
 ; CHECK-NEXT:    [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
 ; CHECK-NEXT:    [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0
 ; CHECK-NEXT:    [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1
@@ -302,7 +319,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[SCEVGEP9]], i64 [[MUL_RESULT11]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp ult ptr [[TMP16]], [[SCEVGEP9]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW12]]
-; CHECK-NEXT:    [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[A]], i64 12
+; CHECK-NEXT:    [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[A]], i64 20
 ; CHECK-NEXT:    [[MUL14:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
 ; CHECK-NEXT:    [[MUL_RESULT15:%.*]] = extractvalue { i64, i1 } [[MUL14]], 0
 ; CHECK-NEXT:    [[MUL_OVERFLOW16:%.*]] = extractvalue { i64, i1 } [[MUL14]], 1
@@ -310,7 +327,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
 ; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[SCEVGEP13]], i64 [[MUL_RESULT15]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = icmp ult ptr [[TMP20]], [[SCEVGEP13]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = or i1 [[TMP21]], [[MUL_OVERFLOW16]]
-; CHECK-NEXT:    [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT:    [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[A]], i64 24
 ; CHECK-NEXT:    [[MUL18:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
 ; CHECK-NEXT:    [[MUL_RESULT19:%.*]] = extractvalue { i64, i1 } [[MUL18]], 0
 ; CHECK-NEXT:    [[MUL_OVERFLOW20:%.*]] = extractvalue { i64, i1 } [[MUL18]], 1
@@ -318,7 +335,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
 ; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr i8, ptr [[SCEVGEP17]], i64 [[MUL_RESULT19]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp ult ptr [[TMP24]], [[SCEVGEP17]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = or i1 [[TMP25]], [[MUL_OVERFLOW20]]
-; CHECK-NEXT:    [[SCEVGEP21:%.*]] = getelementptr i8, ptr [[A]], i64 4
+; CHECK-NEXT:    [[SCEVGEP21:%.*]] = getelementptr i8, ptr [[A]], i64 28
 ; CHECK-NEXT:    [[MUL22:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
 ; CHECK-NEXT:    [[MUL_RESULT23:%.*]] = extractvalue { i64, i1 } [[MUL22]], 0
 ; CHECK-NEXT:    [[MUL_OVERFLOW24:%.*]] = extractvalue { i64, i1 } [[MUL22]], 1
@@ -326,37 +343,47 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
 ; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr i8, ptr [[SCEVGEP21]], i64 [[MUL_RESULT23]]
 ; CHECK-NEXT:    [[TMP29:%.*]] = icmp ult ptr [[TMP28]], [[SCEVGEP21]]
 ; CHECK-NEXT:    [[TMP30:%.*]] = or i1 [[TMP29]], [[MUL_OVERFLOW24]]
+; CHECK-NEXT:    [[SCEVGEP31:%.*]] = getelementptr i8, ptr [[B]], i64 4
+; CHECK-NEXT:    [[MUL29:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP2]])
+; CHECK-NEXT:    [[MUL_RESULT30:%.*]] = extractvalue { i64, i1 } [[MUL29]], 0
+; CHECK-NEXT:    [[MUL_OVERFLOW31:%.*]] = extractvalue { i64, i1 } [[MUL29]], 1
+; CHECK-NEXT:    [[TMP67:%.*]] = sub i64 0, [[MUL_RESULT30]]
+; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i8, ptr [[SCEVGEP31]], i64 [[MUL_RESULT30]]
+; CHECK-NEXT:    [[TMP69:%.*]] = icmp ult ptr [[TMP68]], [[SCEVGEP31]]
+; CHECK-NEXT:    [[TMP70:%.*]] = or i1 [[TMP69]], [[MUL_OVERFLOW31]]
 ; CHECK-NEXT:    [[MUL25:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
 ; CHECK-NEXT:    [[MUL_RESULT26:%.*]] = extractvalue { i64, i1 } [[MUL25]], 0
 ; CHECK-NEXT:    [[MUL_OVERFLOW27:%.*]] = extractvalue { i64, i1 } [[MUL25]], 1
 ; CHECK-NEXT:    [[TMP31:%.*]] = sub i64 0, [[MUL_RESULT26]]
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL_RESULT26]]
-; CHECK-NEXT:    [[TMP33:%.*]] = icmp ult ptr [[TMP32]], [[A]]
+; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr i8, ptr [[B]], i64 [[MUL_RESULT26]]
+; CHECK-NEXT:    [[TMP33:%.*]] = icmp ult ptr [[TMP71]], [[B]]
 ; CHECK-NEXT:    [[TMP34:%.*]] = or i1 [[TMP33]], [[MUL_OVERFLOW27]]
+; CHECK-NEXT:    [[TMP6:%.*]] = or i1 [[TMP44]], [[TMP57]]
 ; CHECK-NEXT:    [[TMP35:%.*]] = or i1 [[TMP6]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP14]]
 ; CHECK-NEXT:    [[TMP37:%.*]] = or i1 [[TMP36]], [[TMP18]]
 ; CHECK-NEXT:    [[TMP38:%.*]] = or i1 [[TMP37]], [[TMP22]]
 ; CHECK-NEXT:    [[TMP39:%.*]] = or i1 [[TMP38]], [[TMP26]]
 ; CHECK-NEXT:    [[TMP40:%.*]] = or i1 [[TMP39]], [[TMP30]]
-; CHECK-NEXT:    [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP34]]
-; CHECK-NEXT:    br i1 [[TMP41]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK-NEXT:    [[TMP72:%.*]] = or i1 [[TMP40]], [[TMP70]]
+; CHECK-NEXT:    [[TMP73:%.*]] = or i1 [[TMP72]], [[TMP34]]
+; CHECK-NEXT:    br i1 [[TMP73]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
 ; CHECK:       [[VECTOR_MEMCHECK]]:
 ; CHECK-NEXT:    [[TMP42:%.*]] = lshr i64 [[N]], 3
 ; CHECK-NEXT:    [[TMP43:%.*]] = shl i64 [[TMP42]], 5
-; CHECK-NEXT:    [[TMP44:%.*]] = add i64 [[TMP43]], 32
-; CHECK-NEXT:    [[SCEVGEP28:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP44]]
 ; CHECK-NEXT:    [[TMP45:%.*]] = add nuw nsw i64 [[TMP43]], 4
 ; CHECK-NEXT:    [[SCEVGEP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP45]]
+; CHECK-NEXT:    [[TMP53:%.*]] = add i64 [[TMP43]], 32
+; CHECK-NEXT:    [[SCEVGEP28:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP53]]
 ; CHECK-NEXT:    [[TMP46:%.*]] = shl i64 [[TMP42]], 4
 ; CHECK-NEXT:    [[TMP47:%.*]] = add nuw nsw i64 [[TMP46]], 8
 ; CHECK-NEXT:    [[SCEVGEP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP47]]
-; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP29]]
 ; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP28]]
-; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; CHECK-NEXT:    [[BOUND031:%.*]] = icmp ult ptr [[A]], [[SCEVGEP30]]
+; CHECK-NEXT:    [[BOUND2:%.*]] = icmp ult ptr [[A]], [[SCEVGEP29]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND2]]
 ; CHECK-NEXT:    [[BOUND132:%.*]] = icmp ult ptr [[B]], [[SCEVGEP28]]
-; CHECK-NEXT:    [[FOUND_CONFLICT33:%.*]] = and i1 [[BOUND031]], [[BOUND132]]
+; CHECK-NEXT:    [[BOUND133:%.*]] = icmp ult ptr [[A]], [[SCEVGEP30]]
+; CHECK-NEXT:    [[FOUND_CONFLICT33:%.*]] = and i1 [[BOUND132]], [[BOUND133]]
 ; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT33]]
 ; CHECK-NEXT:    br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
@@ -378,7 +405,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
 ; CHECK-NEXT:    [[STRIDED_VEC34:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
 ; CHECK-NEXT:    [[TMP56:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP50]]
 ; CHECK-NEXT:    [[TMP54:%.*]] = getelementptr i32, ptr [[B]], <4 x i64> [[VEC_IND]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP54]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison), !alias.scope [[META6:![0-9]+]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP54]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison), !alias.scope [[META6:![0-9]+]], !noalias [[META9:![0-9]+]]
 ; CHECK-NEXT:    [[TMP58:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP59:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC34]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP60:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -390,7 +417,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 32)
 ; CHECK-NEXT:    [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
@@ -431,7 +458,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
 ; CHECK-NEXT:    store i32 0, ptr [[GEP_A_7]], align 4
 ; CHECK-NEXT:    [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8
 ; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
-; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -498,7 +525,7 @@ define void @interleave_store_double_i64(ptr %dst) {
 ; CHECK-NEXT:    store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
@@ -512,7 +539,7 @@ define void @interleave_store_double_i64(ptr %dst) {
 ; CHECK-NEXT:    store double 0.000000e+00, ptr [[GEP_0]], align 8
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], 1
-; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -620,7 +647,7 @@ define void @interleave_store_i64_double_2(ptr %dst) {
 ; CHECK-NEXT:    store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
@@ -634,7 +661,7 @@ define void @interleave_store_i64_double_2(ptr %dst) {
 ; CHECK-NEXT:    store double 0.000000e+00, ptr [[GEP_1]], align 8
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], 1
-; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP16:![0-9]+]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -670,10 +697,12 @@ attributes #1 = { "min-legal-vector-width"="0" "target-cpu"="cascadelake" }
 ; CHECK: [[META6]] = !{[[META7:![0-9]+]]}
 ; CHECK: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]}
 ; CHECK: [[META8]] = distinct !{[[META8]], !"LVerDomain"}
-; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
-; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]}
+; CHECK: [[META9]] = !{[[META10:![0-9]+]]}
+; CHECK: [[META10]] = distinct !{[[META10]], [[META8]]}
 ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]}
-; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]}
+; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]]}
 ; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]}
 ; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META2]], [[META1]]}
+; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META1]], [[META2]]}
+; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META2]], [[META1]]}
 ;.

From 2fab6db72811e52cf18d172caf243e22f8bd3851 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 20 Feb 2025 18:02:34 +0000
Subject: [PATCH 11/29] [VectorCombine] foldSelectShuffle - remove extra adds
 of old shuffles to worklist (#127999)

We already push the old shuffles to the worklist as part of the replaceValue calls, so we shouldn't need to add them to the deferred list as well - my guess is this was to ensure that the instructions got erased first to help cleanup unused instructions, but eraseInstruction should handle this now.
---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 746742e14d080..cdb8853f7503c 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3036,8 +3036,6 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
   Worklist.pushValue(NSV0B);
   Worklist.pushValue(NSV1A);
   Worklist.pushValue(NSV1B);
-  for (auto *S : Shuffles)
-    Worklist.add(S);
   return true;
 }
 

From 00f5aaf841cbd6e2df9158538e36d66632af9cd5 Mon Sep 17 00:00:00 2001
From: Vinay Deshmukh <32487576+vinay-deshmukh@users.noreply.github.com>
Date: Thu, 20 Feb 2025 13:08:26 -0500
Subject: [PATCH 12/29] [libc]: Clean up unnecessary function pointers in scanf
 (#121215)

Resolves #115394

1. Move definitions of cross-platform `getc` `ungetc` to `reader.h`.
2. Remove function pointer members to define them once per platform in
`.h`
3. Built in overlay mode in macOS m1
4. Remove `reader.cpp` as it's empty now


Also, full build doesn't yet build on macos m1 AFAIK
---
 libc/src/stdio/scanf_core/CMakeLists.txt     | 17 +++-
 libc/src/stdio/scanf_core/reader.cpp         | 29 -------
 libc/src/stdio/scanf_core/reader.h           | 87 ++++++++++++++++----
 libc/src/stdio/scanf_core/vfscanf_internal.h | 31 +------
 4 files changed, 86 insertions(+), 78 deletions(-)
 delete mode 100644 libc/src/stdio/scanf_core/reader.cpp

diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt
index a8935d464417c..35b8b3d318a9f 100644
--- a/libc/src/stdio/scanf_core/CMakeLists.txt
+++ b/libc/src/stdio/scanf_core/CMakeLists.txt
@@ -54,15 +54,26 @@ add_object_library(
     libc.src.__support.arg_list
 )
 
-add_object_library(
+if(LIBC_TARGET_OS_IS_GPU)
+add_header_library(
+  reader
+  HDRS
+    reader.h
+  DEPENDS
+    libc.src.__support.macros.attributes
+)
+elseif((TARGET libc.src.__support.File.file) OR (NOT LLVM_LIBC_FULL_BUILD))
+add_header_library(
   reader
-  SRCS
-    reader.cpp
   HDRS
     reader.h
   DEPENDS
     libc.src.__support.macros.attributes
+    libc.hdr.types.FILE
+    libc.src.__support.File.file
+  ${use_system_file}
 )
+endif()
 
 add_object_library(
   converter
diff --git a/libc/src/stdio/scanf_core/reader.cpp b/libc/src/stdio/scanf_core/reader.cpp
deleted file mode 100644
index ec1f5c098dc7a..0000000000000
--- a/libc/src/stdio/scanf_core/reader.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-//===-- Reader definition for scanf -----------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/stdio/scanf_core/reader.h"
-#include "src/__support/macros/config.h"
-#include <stddef.h>
-
-namespace LIBC_NAMESPACE_DECL {
-namespace scanf_core {
-
-void Reader::ungetc(char c) {
-  --cur_chars_read;
-  if (rb != nullptr && rb->buff_cur > 0) {
-    // While technically c should be written back to the buffer, in scanf we
-    // always write the character that was already there. Additionally, the
-    // buffer is most likely to contain a string that isn't part of a file,
-    // which may not be writable.
-    --(rb->buff_cur);
-    return;
-  }
-  stream_ungetc(static_cast<int>(c), input_stream);
-}
-} // namespace scanf_core
-} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/scanf_core/reader.h b/libc/src/stdio/scanf_core/reader.h
index f984fd9378910..a545a605ff150 100644
--- a/libc/src/stdio/scanf_core/reader.h
+++ b/libc/src/stdio/scanf_core/reader.h
@@ -9,15 +9,68 @@
 #ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_READER_H
 #define LLVM_LIBC_SRC_STDIO_SCANF_CORE_READER_H
 
+#include "hdr/types/FILE.h"
+
+#ifndef LIBC_COPT_STDIO_USE_SYSTEM_FILE
+#include "src/__support/File/file.h"
+#endif
+
 #include "src/__support/macros/attributes.h" // For LIBC_INLINE
 #include "src/__support/macros/config.h"
+
 #include <stddef.h>
 
 namespace LIBC_NAMESPACE_DECL {
 namespace scanf_core {
-
-using StreamGetc = int (*)(void *);
-using StreamUngetc = void (*)(int, void *);
+// We use the name "reader_internal" over "internal" because
+// "internal" causes name lookups in files that include the current header to be
+// ambigious i.e. `internal::foo` in those files, will try to lookup in
+// `LIBC_NAMESPACE::scanf_core::internal` over `LIBC_NAMESPACE::internal` for
+// e.g., `internal::ArgList` in `libc/src/stdio/scanf_core/scanf_main.h`
+namespace reader_internal {
+
+#if defined(LIBC_TARGET_ARCH_IS_GPU)
+// The GPU build provides FILE access through the host operating system's
+// library. So here we simply use the public entrypoints like in the SYSTEM_FILE
+// interface. Entrypoints should normally not call others, this is an exception.
+// FIXME: We do not acquire any locks here, so this is not thread safe.
+LIBC_INLINE int getc(void *f) {
+  return LIBC_NAMESPACE::getc(reinterpret_cast<::FILE *>(f));
+}
+
+LIBC_INLINE void ungetc(int c, void *f) {
+  LIBC_NAMESPACE::ungetc(c, reinterpret_cast<::FILE *>(f));
+}
+
+#elif !defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
+
+LIBC_INLINE int getc(void *f) {
+  unsigned char c;
+  auto result =
+      reinterpret_cast<LIBC_NAMESPACE::File *>(f)->read_unlocked(&c, 1);
+  size_t r = result.value;
+  if (result.has_error() || r != 1)
+    return '\0';
+
+  return c;
+}
+
+LIBC_INLINE void ungetc(int c, void *f) {
+  reinterpret_cast<LIBC_NAMESPACE::File *>(f)->ungetc_unlocked(c);
+}
+
+#else  // defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
+
+// Since ungetc_unlocked isn't always available, we don't acquire the lock for
+// system files.
+LIBC_INLINE int getc(void *f) { return ::getc(reinterpret_cast<::FILE *>(f)); }
+
+LIBC_INLINE void ungetc(int c, void *f) {
+  ::ungetc(c, reinterpret_cast<::FILE *>(f));
+}
+#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+
+} // namespace reader_internal
 
 // This is intended to be either a raw string or a buffer syncronized with the
 // file's internal buffer.
@@ -29,24 +82,15 @@ struct ReadBuffer {
 
 class Reader {
   ReadBuffer *rb;
-
   void *input_stream = nullptr;
-
-  // TODO: Remove these unnecessary function pointers
-  StreamGetc stream_getc = nullptr;
-  StreamUngetc stream_ungetc = nullptr;
-
   size_t cur_chars_read = 0;
 
 public:
   // TODO: Set buff_len with a proper constant
   LIBC_INLINE Reader(ReadBuffer *string_buffer) : rb(string_buffer) {}
 
-  LIBC_INLINE Reader(void *stream, StreamGetc stream_getc_in,
-                     StreamUngetc stream_ungetc_in,
-                     ReadBuffer *stream_buffer = nullptr)
-      : rb(stream_buffer), input_stream(stream), stream_getc(stream_getc_in),
-        stream_ungetc(stream_ungetc_in) {}
+  LIBC_INLINE Reader(void *stream, ReadBuffer *stream_buffer = nullptr)
+      : rb(stream_buffer), input_stream(stream) {}
 
   // This returns the next character from the input and advances it by one
   // character. When it hits the end of the string or file it returns '\0' to
@@ -59,12 +103,23 @@ class Reader {
       return output;
     }
     // This should reset the buffer if applicable.
-    return static_cast<char>(stream_getc(input_stream));
+    return static_cast<char>(reader_internal::getc(input_stream));
   }
 
   // This moves the input back by one character, placing c into the buffer if
   // this is a file reader, else c is ignored.
-  void ungetc(char c);
+  LIBC_INLINE void ungetc(char c) {
+    --cur_chars_read;
+    if (rb != nullptr && rb->buff_cur > 0) {
+      // While technically c should be written back to the buffer, in scanf we
+      // always write the character that was already there. Additionally, the
+      // buffer is most likely to contain a string that isn't part of a file,
+      // which may not be writable.
+      --(rb->buff_cur);
+      return;
+    }
+    reader_internal::ungetc(static_cast<int>(c), input_stream);
+  }
 
   LIBC_INLINE size_t chars_read() { return cur_chars_read; }
 };
diff --git a/libc/src/stdio/scanf_core/vfscanf_internal.h b/libc/src/stdio/scanf_core/vfscanf_internal.h
index 67126431fcded..84d074711b8fb 100644
--- a/libc/src/stdio/scanf_core/vfscanf_internal.h
+++ b/libc/src/stdio/scanf_core/vfscanf_internal.h
@@ -38,14 +38,6 @@ LIBC_INLINE void flockfile(::FILE *) { return; }
 
 LIBC_INLINE void funlockfile(::FILE *) { return; }
 
-LIBC_INLINE int getc(void *f) {
-  return LIBC_NAMESPACE::getc(reinterpret_cast<::FILE *>(f));
-}
-
-LIBC_INLINE void ungetc(int c, void *f) {
-  LIBC_NAMESPACE::ungetc(c, reinterpret_cast<::FILE *>(f));
-}
-
 LIBC_INLINE int ferror_unlocked(::FILE *f) { return LIBC_NAMESPACE::ferror(f); }
 
 #elif !defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
@@ -58,21 +50,6 @@ LIBC_INLINE void funlockfile(FILE *f) {
   reinterpret_cast<LIBC_NAMESPACE::File *>(f)->unlock();
 }
 
-LIBC_INLINE int getc(void *f) {
-  unsigned char c;
-  auto result =
-      reinterpret_cast<LIBC_NAMESPACE::File *>(f)->read_unlocked(&c, 1);
-  size_t r = result.value;
-  if (result.has_error() || r != 1)
-    return '\0';
-
-  return c;
-}
-
-LIBC_INLINE void ungetc(int c, void *f) {
-  reinterpret_cast<LIBC_NAMESPACE::File *>(f)->ungetc_unlocked(c);
-}
-
 LIBC_INLINE int ferror_unlocked(FILE *f) {
   return reinterpret_cast<LIBC_NAMESPACE::File *>(f)->error_unlocked();
 }
@@ -85,12 +62,6 @@ LIBC_INLINE void flockfile(::FILE *) { return; }
 
 LIBC_INLINE void funlockfile(::FILE *) { return; }
 
-LIBC_INLINE int getc(void *f) { return ::getc(reinterpret_cast<::FILE *>(f)); }
-
-LIBC_INLINE void ungetc(int c, void *f) {
-  ::ungetc(c, reinterpret_cast<::FILE *>(f));
-}
-
 LIBC_INLINE int ferror_unlocked(::FILE *f) { return ::ferror(f); }
 
 #endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE
@@ -103,7 +74,7 @@ LIBC_INLINE int vfscanf_internal(::FILE *__restrict stream,
                                  const char *__restrict format,
                                  internal::ArgList &args) {
   internal::flockfile(stream);
-  scanf_core::Reader reader(stream, &internal::getc, internal::ungetc);
+  scanf_core::Reader reader(stream);
   int retval = scanf_core::scanf_main(&reader, format, args);
   if (retval == 0 && internal::ferror_unlocked(stream))
     retval = EOF;

From 9ba438d3217505c48a7b9fb4cbe75ab9e055093f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Thu, 20 Feb 2025 10:10:14 -0800
Subject: [PATCH 13/29] [RISCV] Remove some unnecessary casts from int64_t to
 uint64_t. NFC

We have a lot of casts near this to avoid undefined behavior or
arithmetic on arbitrary signed integers, but the casts removed here
don't appear to be necessary.
---
 llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index 06ae8e1296e51..e40c85abc8b5d 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -134,7 +134,7 @@ static void generateInstSeqImpl(int64_t Val, const MCSubtargetInfo &STI,
     }
 
     // Try to use SLLI_UW for Val when it is uint32 but not int32.
-    if (isUInt<32>((uint64_t)Val) && !isInt<32>((uint64_t)Val) &&
+    if (isUInt<32>(Val) && !isInt<32>(Val) &&
         STI.hasFeature(RISCV::FeatureStdExtZba)) {
       // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with
       // SLLI_UW.

From 10b99e97ff866b43569531b890c11b4a3011f6a2 Mon Sep 17 00:00:00 2001
From: vporpo <vporpodas@google.com>
Date: Thu, 20 Feb 2025 10:21:25 -0800
Subject: [PATCH 14/29] [SandboxVec][BottomUpVec] Separate vectorization
 decisions from code generation (#127727)

Up until now the generation of vector instructions was taking place
during the top-down post-order traversal of vectorizeRec(). The issue
with this approach is that the vector instructions emitted during the
traversal can be reordered by the scheduler, making it challenging to
place them without breaking the def-before-uses rule.

With this patch we separate the vectorization decisions (done in
`vectorizeRec()`) from the code generation phase (`emitVectors()`). The
vectorization decisions are stored in the `Actions` vector and are used
by `emitVectors()` to drive code generation.
---
 .../Vectorize/SandboxVectorizer/InstrMaps.h   |  67 +++--
 .../Vectorize/SandboxVectorizer/Legality.h    |  33 +--
 .../SandboxVectorizer/Passes/BottomUpVec.h    |  30 +-
 .../Vectorize/SandboxVectorizer/InstrMaps.cpp |  13 +
 .../SandboxVectorizer/Passes/BottomUpVec.cpp  | 268 +++++++++++-------
 .../SandboxVectorizer/bottomup_basic.ll       |  34 +++
 .../Transforms/SandboxVectorizer/scheduler.ll |  10 +-
 .../SandboxVectorizer/InstrMapsTest.cpp       |  77 ++---
 .../SandboxVectorizer/LegalityTest.cpp        |  26 +-
 9 files changed, 346 insertions(+), 212 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h
index 9bdf940fc77b7..4385df518a111 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h
@@ -23,57 +23,54 @@
 
 namespace llvm::sandboxir {
 
+class LegalityResult;
+
+struct Action {
+  unsigned Idx = 0;
+  const LegalityResult *LegalityRes = nullptr;
+  SmallVector<Value *, 4> Bndl;
+  SmallVector<Value *> UserBndl;
+  unsigned Depth;
+  SmallVector<Action *> Operands;
+  Value *Vec = nullptr;
+  Action(const LegalityResult *LR, ArrayRef<Value *> B, ArrayRef<Value *> UB,
+         unsigned Depth)
+      : LegalityRes(LR), Bndl(B), UserBndl(UB), Depth(Depth) {}
+#ifndef NDEBUG
+  void print(raw_ostream &OS) const;
+  void dump() const;
+  friend raw_ostream &operator<<(raw_ostream &OS, const Action &A) {
+    A.print(OS);
+    return OS;
+  }
+#endif // NDEBUG
+};
+
 /// Maps the original instructions to the vectorized instrs and the reverse.
 /// For now an original instr can only map to a single vector.
 class InstrMaps {
   /// A map from the original values that got combined into vectors, to the
-  /// vector value(s).
-  DenseMap<Value *, Value *> OrigToVectorMap;
-  /// A map from the vector value to a map of the original value to its lane.
+  /// vectorization Action.
+  DenseMap<Value *, Action *> OrigToVectorMap;
+  /// A map from the vec Action to a map of the original value to its lane.
   /// Please note that for constant vectors, there may multiple original values
   /// with the same lane, as they may be coming from vectorizing different
   /// original values.
-  DenseMap<Value *, DenseMap<Value *, unsigned>> VectorToOrigLaneMap;
-  Context &Ctx;
+  DenseMap<Action *, DenseMap<Value *, unsigned>> VectorToOrigLaneMap;
   std::optional<Context::CallbackID> EraseInstrCB;
 
-private:
-  void notifyEraseInstr(Value *V) {
-    // We don't know if V is an original or a vector value.
-    auto It = OrigToVectorMap.find(V);
-    if (It != OrigToVectorMap.end()) {
-      // V is an original value.
-      // Remove it from VectorToOrigLaneMap.
-      Value *Vec = It->second;
-      VectorToOrigLaneMap[Vec].erase(V);
-      // Now erase V from OrigToVectorMap.
-      OrigToVectorMap.erase(It);
-    } else {
-      // V is a vector value.
-      // Go over the original values it came from and remove them from
-      // OrigToVectorMap.
-      for (auto [Orig, Lane] : VectorToOrigLaneMap[V])
-        OrigToVectorMap.erase(Orig);
-      // Now erase V from VectorToOrigLaneMap.
-      VectorToOrigLaneMap.erase(V);
-    }
-  }
-
 public:
-  InstrMaps(Context &Ctx) : Ctx(Ctx) {
-    EraseInstrCB = Ctx.registerEraseInstrCallback(
-        [this](Instruction *I) { notifyEraseInstr(I); });
-  }
-  ~InstrMaps() { Ctx.unregisterEraseInstrCallback(*EraseInstrCB); }
+  InstrMaps() = default;
+  ~InstrMaps() = default;
   /// \Returns the vector value that we got from vectorizing \p Orig, or
   /// nullptr if not found.
-  Value *getVectorForOrig(Value *Orig) const {
+  Action *getVectorForOrig(Value *Orig) const {
     auto It = OrigToVectorMap.find(Orig);
     return It != OrigToVectorMap.end() ? It->second : nullptr;
   }
   /// \Returns the lane of \p Orig before it got vectorized into \p Vec, or
   /// nullopt if not found.
-  std::optional<unsigned> getOrigLane(Value *Vec, Value *Orig) const {
+  std::optional<unsigned> getOrigLane(Action *Vec, Value *Orig) const {
     auto It1 = VectorToOrigLaneMap.find(Vec);
     if (It1 == VectorToOrigLaneMap.end())
       return std::nullopt;
@@ -84,7 +81,7 @@ class InstrMaps {
     return It2->second;
   }
   /// Update the map to reflect that \p Origs got vectorized into \p Vec.
-  void registerVector(ArrayRef<Value *> Origs, Value *Vec) {
+  void registerVector(ArrayRef<Value *> Origs, Action *Vec) {
     auto &OrigToLaneMap = VectorToOrigLaneMap[Vec];
     unsigned Lane = 0;
     for (Value *Orig : Origs) {
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
index 132b12a7b4e6c..bc2942f87adcf 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
@@ -17,6 +17,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h"
 #include "llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h"
 
 namespace llvm::sandboxir {
@@ -206,22 +207,22 @@ class Widen final : public LegalityResult {
 
 class DiamondReuse final : public LegalityResult {
   friend class LegalityAnalysis;
-  Value *Vec;
-  DiamondReuse(Value *Vec)
+  Action *Vec;
+  DiamondReuse(Action *Vec)
       : LegalityResult(LegalityResultID::DiamondReuse), Vec(Vec) {}
 
 public:
   static bool classof(const LegalityResult *From) {
     return From->getSubclassID() == LegalityResultID::DiamondReuse;
   }
-  Value *getVector() const { return Vec; }
+  Action *getVector() const { return Vec; }
 };
 
 class DiamondReuseWithShuffle final : public LegalityResult {
   friend class LegalityAnalysis;
-  Value *Vec;
+  Action *Vec;
   ShuffleMask Mask;
-  DiamondReuseWithShuffle(Value *Vec, const ShuffleMask &Mask)
+  DiamondReuseWithShuffle(Action *Vec, const ShuffleMask &Mask)
       : LegalityResult(LegalityResultID::DiamondReuseWithShuffle), Vec(Vec),
         Mask(Mask) {}
 
@@ -229,7 +230,7 @@ class DiamondReuseWithShuffle final : public LegalityResult {
   static bool classof(const LegalityResult *From) {
     return From->getSubclassID() == LegalityResultID::DiamondReuseWithShuffle;
   }
-  Value *getVector() const { return Vec; }
+  Action *getVector() const { return Vec; }
   const ShuffleMask &getMask() const { return Mask; }
 };
 
@@ -250,18 +251,18 @@ class CollectDescr {
   /// Describes how to get a value element. If the value is a vector then it
   /// also provides the index to extract it from.
   class ExtractElementDescr {
-    Value *V;
+    PointerUnion<Action *, Value *> V = nullptr;
     /// The index in `V` that the value can be extracted from.
-    /// This is nullopt if we need to use `V` as a whole.
-    std::optional<int> ExtractIdx;
+    int ExtractIdx = 0;
 
   public:
-    ExtractElementDescr(Value *V, int ExtractIdx)
+    ExtractElementDescr(Action *V, int ExtractIdx)
         : V(V), ExtractIdx(ExtractIdx) {}
-    ExtractElementDescr(Value *V) : V(V), ExtractIdx(std::nullopt) {}
-    Value *getValue() const { return V; }
-    bool needsExtract() const { return ExtractIdx.has_value(); }
-    int getExtractIdx() const { return *ExtractIdx; }
+    ExtractElementDescr(Value *V) : V(V) {}
+    Action *getValue() const { return cast<Action *>(V); }
+    Value *getScalar() const { return cast<Value *>(V); }
+    bool needsExtract() const { return isa<Action *>(V); }
+    int getExtractIdx() const { return ExtractIdx; }
   };
 
   using DescrVecT = SmallVector<ExtractElementDescr, 4>;
@@ -272,11 +273,11 @@ class CollectDescr {
       : Descrs(std::move(Descrs)) {}
   /// If all elements come from a single vector input, then return that vector
   /// and also the shuffle mask required to get them in order.
-  std::optional<std::pair<Value *, ShuffleMask>> getSingleInput() const {
+  std::optional<std::pair<Action *, ShuffleMask>> getSingleInput() const {
     const auto &Descr0 = *Descrs.begin();
-    Value *V0 = Descr0.getValue();
     if (!Descr0.needsExtract())
       return std::nullopt;
+    auto *V0 = Descr0.getValue();
     ShuffleMask::IndicesVecT MaskIndices;
     MaskIndices.push_back(Descr0.getExtractIdx());
     for (const auto &Descr : drop_begin(Descrs)) {
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
index daf6499213d48..b28e9948d6f55 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
@@ -58,9 +58,33 @@ class BottomUpVec final : public RegionPass {
   /// function helps collect these instructions (along with the pointer operands
   /// for loads/stores) so that they can be cleaned up later.
   void collectPotentiallyDeadInstrs(ArrayRef<Value *> Bndl);
-  /// Recursively try to vectorize \p Bndl and its operands.
-  Value *vectorizeRec(ArrayRef<Value *> Bndl, ArrayRef<Value *> UserBndl,
-                      unsigned Depth);
+
+  /// Helper class describing how(if) to vectorize the code.
+  class ActionsVector {
+  private:
+    SmallVector<std::unique_ptr<Action>, 16> Actions;
+
+  public:
+    auto begin() const { return Actions.begin(); }
+    auto end() const { return Actions.end(); }
+    void push_back(std::unique_ptr<Action> &&ActPtr) {
+      ActPtr->Idx = Actions.size();
+      Actions.push_back(std::move(ActPtr));
+    }
+    void clear() { Actions.clear(); }
+#ifndef NDEBUG
+    void print(raw_ostream &OS) const;
+    void dump() const;
+#endif // NDEBUG
+  };
+  ActionsVector Actions;
+  /// Recursively try to vectorize \p Bndl and its operands. This populates the
+  /// `Actions` vector.
+  Action *vectorizeRec(ArrayRef<Value *> Bndl, ArrayRef<Value *> UserBndl,
+                       unsigned Depth);
+  /// Generate vector instructions based on `Actions` and return the last vector
+  /// created.
+  Value *emitVectors();
   /// Entry point for vectorization starting from \p Seeds.
   bool tryVectorize(ArrayRef<Value *> Seeds);
 
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/InstrMaps.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/InstrMaps.cpp
index 4df4829a04c41..37f1ec450f2eb 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/InstrMaps.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/InstrMaps.cpp
@@ -8,10 +8,23 @@
 
 #include "llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h"
 
 namespace llvm::sandboxir {
 
 #ifndef NDEBUG
+void Action::print(raw_ostream &OS) const {
+  OS << Idx << ". " << *LegalityRes << " Depth:" << Depth << "\n";
+  OS.indent(2) << "Bndl:\n";
+  for (Value *V : Bndl)
+    OS.indent(4) << *V << "\n";
+  OS.indent(2) << "UserBndl:\n";
+  for (Value *V : UserBndl)
+    OS.indent(4) << *V << "\n";
+}
+
+void Action::dump() const { print(dbgs()); }
+
 void InstrMaps::dump() const {
   print(dbgs());
   dbgs() << "\n";
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
index d57732090dcd6..14438181f2602 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
@@ -156,12 +156,7 @@ Value *BottomUpVec::createVectorInstr(ArrayRef<Value *> Bndl,
     // TODO: Propagate debug info.
   };
 
-  auto *VecI = CreateVectorInstr(Bndl, Operands);
-  if (VecI != nullptr) {
-    Change = true;
-    IMaps->registerVector(Bndl, VecI);
-  }
-  return VecI;
+  return CreateVectorInstr(Bndl, Operands);
 }
 
 void BottomUpVec::tryEraseDeadInstrs() {
@@ -266,135 +261,196 @@ void BottomUpVec::collectPotentiallyDeadInstrs(ArrayRef<Value *> Bndl) {
   }
 }
 
-Value *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl,
-                                 ArrayRef<Value *> UserBndl, unsigned Depth) {
-  Value *NewVec = nullptr;
-  auto *UserBB = !UserBndl.empty()
-                     ? cast<Instruction>(UserBndl.front())->getParent()
-                     : cast<Instruction>(Bndl[0])->getParent();
+Action *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl,
+                                  ArrayRef<Value *> UserBndl, unsigned Depth) {
   const auto &LegalityRes = Legality->canVectorize(Bndl);
+  auto ActionPtr =
+      std::make_unique<Action>(&LegalityRes, Bndl, UserBndl, Depth);
+  SmallVector<Action *> Operands;
   switch (LegalityRes.getSubclassID()) {
   case LegalityResultID::Widen: {
     auto *I = cast<Instruction>(Bndl[0]);
-    SmallVector<Value *, 2> VecOperands;
     switch (I->getOpcode()) {
     case Instruction::Opcode::Load:
-      // Don't recurse towards the pointer operand.
-      VecOperands.push_back(cast<LoadInst>(I)->getPointerOperand());
       break;
     case Instruction::Opcode::Store: {
       // Don't recurse towards the pointer operand.
-      auto *VecOp = vectorizeRec(getOperand(Bndl, 0), Bndl, Depth + 1);
-      VecOperands.push_back(VecOp);
-      VecOperands.push_back(cast<StoreInst>(I)->getPointerOperand());
+      Action *OpA = vectorizeRec(getOperand(Bndl, 0), Bndl, Depth + 1);
+      Operands.push_back(OpA);
       break;
     }
     default:
       // Visit all operands.
       for (auto OpIdx : seq<unsigned>(I->getNumOperands())) {
-        auto *VecOp = vectorizeRec(getOperand(Bndl, OpIdx), Bndl, Depth + 1);
-        VecOperands.push_back(VecOp);
+        Action *OpA = vectorizeRec(getOperand(Bndl, OpIdx), Bndl, Depth + 1);
+        Operands.push_back(OpA);
       }
       break;
     }
-    NewVec = createVectorInstr(Bndl, VecOperands);
-
-    // Collect any potentially dead scalar instructions, including the original
-    // scalars and pointer operands of loads/stores.
-    if (NewVec != nullptr)
-      collectPotentiallyDeadInstrs(Bndl);
+    // Update the maps to mark Bndl as "vectorized".
+    IMaps->registerVector(Bndl, ActionPtr.get());
     break;
   }
-  case LegalityResultID::DiamondReuse: {
-    NewVec = cast<DiamondReuse>(LegalityRes).getVector();
+  case LegalityResultID::DiamondReuse:
+  case LegalityResultID::DiamondReuseWithShuffle:
+  case LegalityResultID::DiamondReuseMultiInput:
+  case LegalityResultID::Pack:
     break;
   }
-  case LegalityResultID::DiamondReuseWithShuffle: {
-    auto *VecOp = cast<DiamondReuseWithShuffle>(LegalityRes).getVector();
-    const ShuffleMask &Mask =
-        cast<DiamondReuseWithShuffle>(LegalityRes).getMask();
-    NewVec = createShuffle(VecOp, Mask, UserBB);
-    assert(NewVec->getType() == VecOp->getType() &&
-           "Expected same type! Bad mask ?");
-    break;
+  // Create actions in post-order.
+  ActionPtr->Operands = std::move(Operands);
+  auto *Action = ActionPtr.get();
+  Actions.push_back(std::move(ActionPtr));
+  return Action;
+}
+
+#ifndef NDEBUG
+void BottomUpVec::ActionsVector::print(raw_ostream &OS) const {
+  for (auto [Idx, Action] : enumerate(Actions)) {
+    Action->print(OS);
+    OS << "\n";
   }
-  case LegalityResultID::DiamondReuseMultiInput: {
-    const auto &Descr =
-        cast<DiamondReuseMultiInput>(LegalityRes).getCollectDescr();
-    Type *ResTy = VecUtils::getWideType(Bndl[0]->getType(), Bndl.size());
+}
+void BottomUpVec::ActionsVector::dump() const { print(dbgs()); }
+#endif // NDEBUG
+
+Value *BottomUpVec::emitVectors() {
+  Value *NewVec = nullptr;
+  for (const auto &ActionPtr : Actions) {
+    ArrayRef<Value *> Bndl = ActionPtr->Bndl;
+    ArrayRef<Value *> UserBndl = ActionPtr->UserBndl;
+    const LegalityResult &LegalityRes = *ActionPtr->LegalityRes;
+    unsigned Depth = ActionPtr->Depth;
+    auto *UserBB = !UserBndl.empty()
+                       ? cast<Instruction>(UserBndl.front())->getParent()
+                       : cast<Instruction>(Bndl[0])->getParent();
 
-    // TODO: Try to get WhereIt without creating a vector.
-    SmallVector<Value *, 4> DescrInstrs;
-    for (const auto &ElmDescr : Descr.getDescrs()) {
-      if (auto *I = dyn_cast<Instruction>(ElmDescr.getValue()))
-        DescrInstrs.push_back(I);
+    switch (LegalityRes.getSubclassID()) {
+    case LegalityResultID::Widen: {
+      auto *I = cast<Instruction>(Bndl[0]);
+      SmallVector<Value *, 2> VecOperands;
+      switch (I->getOpcode()) {
+      case Instruction::Opcode::Load:
+        VecOperands.push_back(cast<LoadInst>(I)->getPointerOperand());
+        break;
+      case Instruction::Opcode::Store: {
+        VecOperands.push_back(ActionPtr->Operands[0]->Vec);
+        VecOperands.push_back(cast<StoreInst>(I)->getPointerOperand());
+        break;
+      }
+      default:
+        // Visit all operands.
+        for (Action *OpA : ActionPtr->Operands) {
+          auto *VecOp = OpA->Vec;
+          VecOperands.push_back(VecOp);
+        }
+        break;
+      }
+      NewVec = createVectorInstr(ActionPtr->Bndl, VecOperands);
+      // Collect any potentially dead scalar instructions, including the
+      // original scalars and pointer operands of loads/stores.
+      if (NewVec != nullptr)
+        collectPotentiallyDeadInstrs(Bndl);
+      break;
+    }
+    case LegalityResultID::DiamondReuse: {
+      NewVec = cast<DiamondReuse>(LegalityRes).getVector()->Vec;
+      break;
+    }
+    case LegalityResultID::DiamondReuseWithShuffle: {
+      auto *VecOp = cast<DiamondReuseWithShuffle>(LegalityRes).getVector()->Vec;
+      const ShuffleMask &Mask =
+          cast<DiamondReuseWithShuffle>(LegalityRes).getMask();
+      NewVec = createShuffle(VecOp, Mask, UserBB);
+      assert(NewVec->getType() == VecOp->getType() &&
+             "Expected same type! Bad mask ?");
+      break;
     }
-    BasicBlock::iterator WhereIt =
-        getInsertPointAfterInstrs(DescrInstrs, UserBB);
+    case LegalityResultID::DiamondReuseMultiInput: {
+      const auto &Descr =
+          cast<DiamondReuseMultiInput>(LegalityRes).getCollectDescr();
+      Type *ResTy = VecUtils::getWideType(Bndl[0]->getType(), Bndl.size());
 
-    Value *LastV = PoisonValue::get(ResTy);
-    unsigned Lane = 0;
-    for (const auto &ElmDescr : Descr.getDescrs()) {
-      Value *VecOp = ElmDescr.getValue();
-      Context &Ctx = VecOp->getContext();
-      Value *ValueToInsert;
-      if (ElmDescr.needsExtract()) {
-        ConstantInt *IdxC =
-            ConstantInt::get(Type::getInt32Ty(Ctx), ElmDescr.getExtractIdx());
-        ValueToInsert = ExtractElementInst::create(VecOp, IdxC, WhereIt,
-                                                   VecOp->getContext(), "VExt");
-      } else {
-        ValueToInsert = VecOp;
+      // TODO: Try to get WhereIt without creating a vector.
+      SmallVector<Value *, 4> DescrInstrs;
+      for (const auto &ElmDescr : Descr.getDescrs()) {
+        auto *V = ElmDescr.needsExtract() ? ElmDescr.getValue()->Vec
+                                          : ElmDescr.getScalar();
+        if (auto *I = dyn_cast<Instruction>(V))
+          DescrInstrs.push_back(I);
       }
-      auto NumLanesToInsert = VecUtils::getNumLanes(ValueToInsert);
-      if (NumLanesToInsert == 1) {
-        // If we are inserting a scalar element then we need a single insert.
-        //   %VIns = insert %DstVec,  %SrcScalar, Lane
-        ConstantInt *LaneC = ConstantInt::get(Type::getInt32Ty(Ctx), Lane);
-        LastV = InsertElementInst::create(LastV, ValueToInsert, LaneC, WhereIt,
-                                          Ctx, "VIns");
-      } else {
-        // If we are inserting a vector element then we need to extract and
-        // insert each vector element one by one with a chain of extracts and
-        // inserts, for example:
-        //   %VExt0 = extract %SrcVec, 0
-        //   %VIns0 = insert  %DstVec, %Vect0, Lane + 0
-        //   %VExt1 = extract %SrcVec, 1
-        //   %VIns1 = insert  %VIns0,  %Vect0, Lane + 1
-        for (unsigned LnCnt = 0; LnCnt != NumLanesToInsert; ++LnCnt) {
-          auto *ExtrIdxC = ConstantInt::get(Type::getInt32Ty(Ctx), LnCnt);
-          auto *ExtrI = ExtractElementInst::create(ValueToInsert, ExtrIdxC,
-                                                   WhereIt, Ctx, "VExt");
-          unsigned InsLane = Lane + LnCnt;
-          auto *InsLaneC = ConstantInt::get(Type::getInt32Ty(Ctx), InsLane);
-          LastV = InsertElementInst::create(LastV, ExtrI, InsLaneC, WhereIt,
-                                            Ctx, "VIns");
+      BasicBlock::iterator WhereIt =
+          getInsertPointAfterInstrs(DescrInstrs, UserBB);
+
+      Value *LastV = PoisonValue::get(ResTy);
+      Context &Ctx = LastV->getContext();
+      unsigned Lane = 0;
+      for (const auto &ElmDescr : Descr.getDescrs()) {
+        Value *VecOp = nullptr;
+        Value *ValueToInsert;
+        if (ElmDescr.needsExtract()) {
+          VecOp = ElmDescr.getValue()->Vec;
+          ConstantInt *IdxC =
+              ConstantInt::get(Type::getInt32Ty(Ctx), ElmDescr.getExtractIdx());
+          ValueToInsert = ExtractElementInst::create(
+              VecOp, IdxC, WhereIt, VecOp->getContext(), "VExt");
+        } else {
+          ValueToInsert = ElmDescr.getScalar();
+        }
+        auto NumLanesToInsert = VecUtils::getNumLanes(ValueToInsert);
+        if (NumLanesToInsert == 1) {
+          // If we are inserting a scalar element then we need a single insert.
+          //   %VIns = insert %DstVec,  %SrcScalar, Lane
+          ConstantInt *LaneC = ConstantInt::get(Type::getInt32Ty(Ctx), Lane);
+          LastV = InsertElementInst::create(LastV, ValueToInsert, LaneC,
+                                            WhereIt, Ctx, "VIns");
+        } else {
+          // If we are inserting a vector element then we need to extract and
+          // insert each vector element one by one with a chain of extracts and
+          // inserts, for example:
+          //   %VExt0 = extract %SrcVec, 0
+          //   %VIns0 = insert  %DstVec, %Vect0, Lane + 0
+          //   %VExt1 = extract %SrcVec, 1
+          //   %VIns1 = insert  %VIns0,  %Vect0, Lane + 1
+          for (unsigned LnCnt = 0; LnCnt != NumLanesToInsert; ++LnCnt) {
+            auto *ExtrIdxC = ConstantInt::get(Type::getInt32Ty(Ctx), LnCnt);
+            auto *ExtrI = ExtractElementInst::create(ValueToInsert, ExtrIdxC,
+                                                     WhereIt, Ctx, "VExt");
+            unsigned InsLane = Lane + LnCnt;
+            auto *InsLaneC = ConstantInt::get(Type::getInt32Ty(Ctx), InsLane);
+            LastV = InsertElementInst::create(LastV, ExtrI, InsLaneC, WhereIt,
+                                              Ctx, "VIns");
+          }
         }
+        Lane += NumLanesToInsert;
       }
-      Lane += NumLanesToInsert;
+      NewVec = LastV;
+      break;
+    }
+    case LegalityResultID::Pack: {
+      // If we can't vectorize the seeds then just return.
+      if (Depth == 0)
+        return nullptr;
+      NewVec = createPack(Bndl, UserBB);
+      break;
+    }
+    }
+    if (NewVec != nullptr) {
+      Change = true;
+      ActionPtr->Vec = NewVec;
     }
-    NewVec = LastV;
-    break;
-  }
-  case LegalityResultID::Pack: {
-    // If we can't vectorize the seeds then just return.
-    if (Depth == 0)
-      return nullptr;
-    NewVec = createPack(Bndl, UserBB);
-    break;
-  }
-  }
 #ifndef NDEBUG
-  if (AlwaysVerify) {
-    // This helps find broken IR by constantly verifying the function. Note that
-    // this is very expensive and should only be used for debugging.
-    Instruction *I0 = isa<Instruction>(Bndl[0])
-                          ? cast<Instruction>(Bndl[0])
-                          : cast<Instruction>(UserBndl[0]);
-    assert(!Utils::verifyFunction(I0->getParent()->getParent(), dbgs()) &&
-           "Broken function!");
+    if (AlwaysVerify) {
+      // This helps find broken IR by constantly verifying the function. Note
+      // that this is very expensive and should only be used for debugging.
+      Instruction *I0 = isa<Instruction>(Bndl[0])
+                            ? cast<Instruction>(Bndl[0])
+                            : cast<Instruction>(UserBndl[0]);
+      assert(!Utils::verifyFunction(I0->getParent()->getParent(), dbgs()) &&
+             "Broken function!");
+    }
+#endif // NDEBUG
   }
-#endif
   return NewVec;
 }
 
@@ -402,7 +458,9 @@ bool BottomUpVec::tryVectorize(ArrayRef<Value *> Bndl) {
   Change = false;
   DeadInstrCandidates.clear();
   Legality->clear();
+  Actions.clear();
   vectorizeRec(Bndl, {}, /*Depth=*/0);
+  emitVectors();
   tryEraseDeadInstrs();
   return Change;
 }
@@ -411,7 +469,7 @@ bool BottomUpVec::runOnRegion(Region &Rgn, const Analyses &A) {
   const auto &SeedSlice = Rgn.getAux();
   assert(SeedSlice.size() >= 2 && "Bad slice!");
   Function &F = *SeedSlice[0]->getParent()->getParent();
-  IMaps = std::make_unique<InstrMaps>(F.getContext());
+  IMaps = std::make_unique<InstrMaps>();
   Legality = std::make_unique<LegalityAnalysis>(
       A.getAA(), A.getScalarEvolution(), F.getParent()->getDataLayout(),
       F.getContext(), *IMaps);
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
index c076c0e849fa9..fc5795708c7d8 100644
--- a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
@@ -352,3 +352,37 @@ define void @diamondWithConstantVector(ptr %ptr) {
   store i32 %orB1, ptr %gepB1
   ret void
 }
+
+; Check that we don't get def-after-use errors due to wrong placement
+; of new vector instructions.
+define void @vecInstrsPlacement(ptr %ptr0) {
+; CHECK-LABEL: define void @vecInstrsPlacement(
+; CHECK-SAME: ptr [[PTR0:%.*]]) {
+; CHECK-NEXT:    [[VECL2:%.*]] = load <2 x double>, ptr [[PTR0]], align 8
+; CHECK-NEXT:    [[VECL:%.*]] = load <2 x double>, ptr [[PTR0]], align 8
+; CHECK-NEXT:    [[VEC2:%.*]] = fmul <2 x double> [[VECL]], [[VECL2]]
+; CHECK-NEXT:    [[VEC:%.*]] = fmul <2 x double> [[VECL]], [[VECL2]]
+; CHECK-NEXT:    [[VEC5:%.*]] = fadd <2 x double> [[VEC]], [[VEC2]]
+; CHECK-NEXT:    store <2 x double> [[VEC5]], ptr [[PTR0]], align 8
+; CHECK-NEXT:    ret void
+;
+  %ptr1 = getelementptr inbounds double, ptr %ptr0, i64 1
+  %ldA_0 = load double, ptr %ptr0
+  %ldA_1 = load double, ptr %ptr1
+
+  %ldB_0 = load double, ptr %ptr0
+  %ldB_1 = load double, ptr %ptr1
+
+  %mul0 = fmul double %ldA_0, %ldB_0
+  %mul1 = fmul double %ldA_1, %ldB_1
+
+  %mul2 = fmul double %ldA_0, %ldB_0
+  %mul3 = fmul double %ldA_1, %ldB_1
+
+  %add0 = fadd double %mul0, %mul2
+  %add1 = fadd double %mul1, %mul3
+
+  store double %add0, ptr %ptr0
+  store double %add1, ptr %ptr1
+  ret void
+}
diff --git a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll
index 7741d8c64c8fc..5b9177ba4b3bf 100644
--- a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll
@@ -7,17 +7,17 @@ define void @check_dag_scheduler_update(ptr noalias %p, ptr noalias %p1) {
 ; CHECK-LABEL: define void @check_dag_scheduler_update(
 ; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[P1:%.*]]) {
 ; CHECK-NEXT:    [[I:%.*]] = load i32, ptr [[P]], align 4
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr i32, ptr [[P]], i64 32
+; CHECK-NEXT:    [[VECL:%.*]] = load <4 x i32>, ptr [[P]], align 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr i32, ptr [[P]], i64 34
 ; CHECK-NEXT:    [[I2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr i32, ptr [[P]], i64 33
 ; CHECK-NEXT:    [[I4:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4
-; CHECK-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr i32, ptr [[P]], i64 34
+; CHECK-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr i32, ptr [[P]], i64 32
 ; CHECK-NEXT:    [[I6:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4
 ; CHECK-NEXT:    [[PACK:%.*]] = insertelement <4 x i32> poison, i32 [[I]], i32 0
-; CHECK-NEXT:    [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[I2]], i32 1
+; CHECK-NEXT:    [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[I6]], i32 1
 ; CHECK-NEXT:    [[PACK2:%.*]] = insertelement <4 x i32> [[PACK1]], i32 [[I4]], i32 2
-; CHECK-NEXT:    [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[I6]], i32 3
-; CHECK-NEXT:    [[VECL:%.*]] = load <4 x i32>, ptr [[P]], align 4
+; CHECK-NEXT:    [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[I2]], i32 3
 ; CHECK-NEXT:    [[VEC:%.*]] = add nsw <4 x i32> [[PACK3]], [[VECL]]
 ; CHECK-NEXT:    store <4 x i32> [[VEC]], ptr [[P1]], align 4
 ; CHECK-NEXT:    ret void
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp
index 5b033f0edcb02..c8fee1c24dbcb 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp
@@ -53,37 +53,41 @@ define void @foo(i8 %v0, i8 %v1, i8 %v2, i8 %v3, <2 x i8> %vec) {
   auto *VAdd0 = cast<sandboxir::BinaryOperator>(&*It++);
   [[maybe_unused]] auto *Ret = cast<sandboxir::ReturnInst>(&*It++);
 
-  sandboxir::InstrMaps IMaps(Ctx);
-  // Check with empty IMaps.
-  EXPECT_EQ(IMaps.getVectorForOrig(Add0), nullptr);
-  EXPECT_EQ(IMaps.getVectorForOrig(Add1), nullptr);
-  EXPECT_FALSE(IMaps.getOrigLane(Add0, Add0));
-  // Check with 1 match.
-  IMaps.registerVector({Add0, Add1}, VAdd0);
-  EXPECT_EQ(IMaps.getVectorForOrig(Add0), VAdd0);
-  EXPECT_EQ(IMaps.getVectorForOrig(Add1), VAdd0);
-  EXPECT_FALSE(IMaps.getOrigLane(VAdd0, VAdd0)); // Bad Orig value
-  EXPECT_FALSE(IMaps.getOrigLane(Add0, Add0));   // Bad Vector value
-  EXPECT_EQ(*IMaps.getOrigLane(VAdd0, Add0), 0U);
-  EXPECT_EQ(*IMaps.getOrigLane(VAdd0, Add1), 1U);
-  // Check when the same vector maps to different original values (which is
-  // common for vector constants).
-  IMaps.registerVector({Add2, Add3}, VAdd0);
-  EXPECT_EQ(*IMaps.getOrigLane(VAdd0, Add2), 0U);
-  EXPECT_EQ(*IMaps.getOrigLane(VAdd0, Add3), 1U);
-  // Check when we register for a second time.
+  sandboxir::InstrMaps IMaps;
+  {
+    // Check with empty IMaps.
+    sandboxir::Action A(nullptr, {Add0}, {}, 0);
+    EXPECT_EQ(IMaps.getVectorForOrig(Add0), nullptr);
+    EXPECT_EQ(IMaps.getVectorForOrig(Add1), nullptr);
+    EXPECT_FALSE(IMaps.getOrigLane(&A, Add0));
+  }
+  {
+    // Check with 1 match.
+    sandboxir::Action A(nullptr, {Add0, Add1}, {}, 0);
+    sandboxir::Action OtherA(nullptr, {}, {}, 0);
+    IMaps.registerVector({Add0, Add1}, &A);
+    EXPECT_EQ(IMaps.getVectorForOrig(Add0), &A);
+    EXPECT_EQ(IMaps.getVectorForOrig(Add1), &A);
+    EXPECT_FALSE(IMaps.getOrigLane(&A, VAdd0));     // Bad Orig value
+    EXPECT_FALSE(IMaps.getOrigLane(&OtherA, Add0)); // Bad Vector value
+    EXPECT_EQ(*IMaps.getOrigLane(&A, Add0), 0U);
+    EXPECT_EQ(*IMaps.getOrigLane(&A, Add1), 1U);
+  }
+  {
+    // Check when the same vector maps to different original values (which is
+    // common for vector constants).
+    sandboxir::Action A(nullptr, {Add2, Add3}, {}, 0);
+    IMaps.registerVector({Add2, Add3}, &A);
+    EXPECT_EQ(*IMaps.getOrigLane(&A, Add2), 0U);
+    EXPECT_EQ(*IMaps.getOrigLane(&A, Add3), 1U);
+  }
+  {
+    // Check when we register for a second time.
+    sandboxir::Action A(nullptr, {Add2, Add3}, {}, 0);
 #ifndef NDEBUG
-  EXPECT_DEATH(IMaps.registerVector({Add1, Add0}, VAdd0), ".*exists.*");
+    EXPECT_DEATH(IMaps.registerVector({Add1, Add0}, &A), ".*exists.*");
 #endif // NDEBUG
-  // Check callbacks: erase original instr.
-  Add0->eraseFromParent();
-  EXPECT_FALSE(IMaps.getOrigLane(VAdd0, Add0));
-  EXPECT_EQ(*IMaps.getOrigLane(VAdd0, Add1), 1U);
-  EXPECT_EQ(IMaps.getVectorForOrig(Add0), nullptr);
-  // Check callbacks: erase vector instr.
-  VAdd0->eraseFromParent();
-  EXPECT_FALSE(IMaps.getOrigLane(VAdd0, Add1));
-  EXPECT_EQ(IMaps.getVectorForOrig(Add1), nullptr);
+  }
 }
 
 TEST_F(InstrMapsTest, VectorLanes) {
@@ -91,7 +95,6 @@ TEST_F(InstrMapsTest, VectorLanes) {
 define void @foo(<2 x i8> %v0, <2 x i8> %v1, <4 x i8> %v2, <4 x i8> %v3) {
   %vadd0 = add <2 x i8> %v0, %v1
   %vadd1 = add <2 x i8> %v0, %v1
-  %vadd2 = add <4 x i8> %v2, %v3
   ret void
 }
 )IR");
@@ -103,12 +106,14 @@ define void @foo(<2 x i8> %v0, <2 x i8> %v1, <4 x i8> %v2, <4 x i8> %v3) {
 
   auto *VAdd0 = cast<sandboxir::BinaryOperator>(&*It++);
   auto *VAdd1 = cast<sandboxir::BinaryOperator>(&*It++);
-  auto *VAdd2 = cast<sandboxir::BinaryOperator>(&*It++);
 
-  sandboxir::InstrMaps IMaps(Ctx);
+  sandboxir::InstrMaps IMaps;
 
-  // Check that the vector lanes are calculated correctly.
-  IMaps.registerVector({VAdd0, VAdd1}, VAdd2);
-  EXPECT_EQ(*IMaps.getOrigLane(VAdd2, VAdd0), 0U);
-  EXPECT_EQ(*IMaps.getOrigLane(VAdd2, VAdd1), 2U);
+  {
+    // Check that the vector lanes are calculated correctly.
+    sandboxir::Action A(nullptr, {VAdd0, VAdd1}, {}, 0);
+    IMaps.registerVector({VAdd0, VAdd1}, &A);
+    EXPECT_EQ(*IMaps.getOrigLane(&A, VAdd0), 0U);
+    EXPECT_EQ(*IMaps.getOrigLane(&A, VAdd1), 2U);
+  }
 }
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp
index 15f8166b705fc..99519d17d0e8e 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp
@@ -133,7 +133,7 @@ define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float
   auto *Sel0 = cast<sandboxir::SelectInst>(&*It++);
   auto *Sel1 = cast<sandboxir::SelectInst>(&*It++);
 
-  llvm::sandboxir::InstrMaps IMaps(Ctx);
+  llvm::sandboxir::InstrMaps IMaps;
   sandboxir::LegalityAnalysis Legality(*AA, *SE, DL, Ctx, IMaps);
   const auto &Result =
       Legality.canVectorize({St0, St1}, /*SkipScheduling=*/true);
@@ -285,7 +285,7 @@ define void @foo(ptr %ptr) {
   auto *St0 = cast<sandboxir::StoreInst>(&*It++);
   auto *St1 = cast<sandboxir::StoreInst>(&*It++);
 
-  llvm::sandboxir::InstrMaps IMaps(Ctx);
+  llvm::sandboxir::InstrMaps IMaps;
   sandboxir::LegalityAnalysis Legality(*AA, *SE, DL, Ctx, IMaps);
   {
     // Can vectorize St0,St1.
@@ -321,7 +321,7 @@ define void @foo() {
   };
 
   sandboxir::Context Ctx(C);
-  llvm::sandboxir::InstrMaps IMaps(Ctx);
+  llvm::sandboxir::InstrMaps IMaps;
   sandboxir::LegalityAnalysis Legality(*AA, *SE, DL, Ctx, IMaps);
   EXPECT_TRUE(
       Matches(Legality.createLegalityResult<sandboxir::Widen>(), "Widen"));
@@ -368,32 +368,34 @@ define void @foo(ptr %ptr) {
 
   sandboxir::CollectDescr::DescrVecT Descrs;
   using EEDescr = sandboxir::CollectDescr::ExtractElementDescr;
-
+  SmallVector<sandboxir::Value *> Bndl({VLd});
+  SmallVector<sandboxir::Value *> UB;
+  sandboxir::Action VLdA(nullptr, Bndl, UB, 0);
   {
     // Check single input, no shuffle.
-    Descrs.push_back(EEDescr(VLd, 0));
-    Descrs.push_back(EEDescr(VLd, 1));
+    Descrs.push_back(EEDescr(&VLdA, 0));
+    Descrs.push_back(EEDescr(&VLdA, 1));
     sandboxir::CollectDescr CD(std::move(Descrs));
     EXPECT_TRUE(CD.getSingleInput());
-    EXPECT_EQ(CD.getSingleInput()->first, VLd);
+    EXPECT_EQ(CD.getSingleInput()->first, &VLdA);
     EXPECT_THAT(CD.getSingleInput()->second, testing::ElementsAre(0, 1));
     EXPECT_TRUE(CD.hasVectorInputs());
   }
   {
     // Check single input, shuffle.
-    Descrs.push_back(EEDescr(VLd, 1));
-    Descrs.push_back(EEDescr(VLd, 0));
+    Descrs.push_back(EEDescr(&VLdA, 1));
+    Descrs.push_back(EEDescr(&VLdA, 0));
     sandboxir::CollectDescr CD(std::move(Descrs));
     EXPECT_TRUE(CD.getSingleInput());
-    EXPECT_EQ(CD.getSingleInput()->first, VLd);
+    EXPECT_EQ(CD.getSingleInput()->first, &VLdA);
     EXPECT_THAT(CD.getSingleInput()->second, testing::ElementsAre(1, 0));
     EXPECT_TRUE(CD.hasVectorInputs());
   }
   {
     // Check multiple inputs.
     Descrs.push_back(EEDescr(Ld0));
-    Descrs.push_back(EEDescr(VLd, 0));
-    Descrs.push_back(EEDescr(VLd, 1));
+    Descrs.push_back(EEDescr(&VLdA, 0));
+    Descrs.push_back(EEDescr(&VLdA, 1));
     sandboxir::CollectDescr CD(std::move(Descrs));
     EXPECT_FALSE(CD.getSingleInput());
     EXPECT_TRUE(CD.hasVectorInputs());

From 2a7d3f055d4ea86354e314c5cd1f682a4ad5853f Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing@udel.edu>
Date: Thu, 20 Feb 2025 10:27:07 -0800
Subject: [PATCH 15/29] [NFC] Add release notes for
 -Wunsafe-buffer-usage-in-libc-call (#126975)

`-Wunsafe-buffer-usage-in-libc-call` is a subgroup of
`-Wunsafe-buffer-usage` that warns about unsafe libc function calls.
---
 clang/docs/ReleaseNotes.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index db42fc5cc0da7..e1c61992512b5 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -144,6 +144,9 @@ Improvements to Clang's diagnostics
 - Fixed a bug where Clang's Analysis did not correctly model the destructor behavior of ``union`` members (#GH119415).
 - A statement attribute applied to a ``case`` label no longer suppresses
   'bypassing variable initialization' diagnostics (#84072).
+- The ``-Wunsafe-buffer-usage`` warning has been updated to warn
+  about unsafe libc function calls.  Those new warnings are emitted
+  under the subgroup ``-Wunsafe-buffer-usage-in-libc-call``.
 
 Improvements to Clang's time-trace
 ----------------------------------

From 19af8581d51b8144f6d041ae1d948443084d8d0b Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas@microsoft.com>
Date: Thu, 20 Feb 2025 10:32:14 -0800
Subject: [PATCH 16/29] [HLSL] Constant Buffers CodeGen (#124886)

Translates `cbuffer` declaration blocks to `target("dx.CBuffer")` type. Creates global variables in `hlsl_constant` address space for all `cbuffer` constant and adds metadata describing which global constant belongs to which constant buffer. For explicit constant buffer layout information an explicit layout type `target("dx.Layout")` is used. This might change in the future.

The constant globals are temporary and will be removed in upcoming pass that will translate `load` instructions in the `hlsl_constant` address space to constant buffer load intrinsics calls off a CBV handle (#124630, #112992).

See [Constant buffer design
doc](https://github.com/llvm/wg-hlsl/pull/94) for more details.

Fixes #113514, #106596
---
 clang/include/clang/AST/Decl.h                |   9 +
 clang/include/clang/AST/Type.h                |   4 +-
 clang/lib/AST/Decl.cpp                        |  12 +-
 clang/lib/CodeGen/CGHLSLRuntime.cpp           | 316 +++++++++++-------
 clang/lib/CodeGen/CGHLSLRuntime.h             |  32 +-
 clang/lib/CodeGen/CMakeLists.txt              |   1 +
 clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp | 229 +++++++++++++
 clang/lib/CodeGen/HLSLBufferLayoutBuilder.h   |  48 +++
 clang/lib/CodeGen/TargetInfo.h                |   4 +-
 clang/lib/CodeGen/Targets/DirectX.cpp         |  30 +-
 clang/lib/CodeGen/Targets/SPIR.cpp            |   9 +-
 clang/lib/Sema/SemaDecl.cpp                   |   7 +
 clang/lib/Sema/SemaHLSL.cpp                   |   5 +-
 clang/test/CodeGenHLSL/cbuf.hlsl              |  33 --
 clang/test/CodeGenHLSL/cbuf_in_namespace.hlsl |  29 --
 clang/test/CodeGenHLSL/cbuffer.hlsl           | 197 +++++++++++
 .../CodeGenHLSL/cbuffer_and_namespaces.hlsl   |  56 ++++
 .../CodeGenHLSL/cbuffer_with_packoffset.hlsl  |  38 +++
 ...uffer_with_static_global_and_function.hlsl |  29 ++
 .../static_global_and_function_in_cb.hlsl     |  22 --
 20 files changed, 871 insertions(+), 239 deletions(-)
 create mode 100644 clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp
 create mode 100644 clang/lib/CodeGen/HLSLBufferLayoutBuilder.h
 delete mode 100644 clang/test/CodeGenHLSL/cbuf.hlsl
 delete mode 100644 clang/test/CodeGenHLSL/cbuf_in_namespace.hlsl
 create mode 100644 clang/test/CodeGenHLSL/cbuffer.hlsl
 create mode 100644 clang/test/CodeGenHLSL/cbuffer_and_namespaces.hlsl
 create mode 100644 clang/test/CodeGenHLSL/cbuffer_with_packoffset.hlsl
 create mode 100644 clang/test/CodeGenHLSL/cbuffer_with_static_global_and_function.hlsl
 delete mode 100644 clang/test/CodeGenHLSL/static_global_and_function_in_cb.hlsl

diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index f305cbbce4c60..0f96bf0762ca4 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -5039,6 +5039,11 @@ class HLSLBufferDecl final : public NamedDecl, public DeclContext {
   SourceLocation KwLoc;
   /// IsCBuffer - Whether the buffer is a cbuffer (and not a tbuffer).
   bool IsCBuffer;
+  /// HasValidPackoffset - Whether the buffer has valid packoffset annotations
+  //                       on all declarations
+  bool HasValidPackoffset;
+  // LayoutStruct - Layout struct for the buffer
+  CXXRecordDecl *LayoutStruct;
 
   HLSLBufferDecl(DeclContext *DC, bool CBuffer, SourceLocation KwLoc,
                  IdentifierInfo *ID, SourceLocation IDLoc,
@@ -5059,6 +5064,10 @@ class HLSLBufferDecl final : public NamedDecl, public DeclContext {
   SourceLocation getRBraceLoc() const { return RBraceLoc; }
   void setRBraceLoc(SourceLocation L) { RBraceLoc = L; }
   bool isCBuffer() const { return IsCBuffer; }
+  void setHasValidPackoffset(bool PO) { HasValidPackoffset = PO; }
+  bool hasValidPackoffset() const { return HasValidPackoffset; }
+  const CXXRecordDecl *getLayoutStruct() const { return LayoutStruct; }
+  void addLayoutStruct(CXXRecordDecl *LS);
 
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 1d9743520654e..c3ff7ebd88516 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -6266,8 +6266,8 @@ class HLSLAttributedResourceType : public Type, public llvm::FoldingSetNode {
     LLVM_PREFERRED_TYPE(bool)
     uint8_t RawBuffer : 1;
 
-    Attributes(llvm::dxil::ResourceClass ResourceClass, bool IsROV,
-               bool RawBuffer)
+    Attributes(llvm::dxil::ResourceClass ResourceClass, bool IsROV = false,
+               bool RawBuffer = false)
         : ResourceClass(ResourceClass), IsROV(IsROV), RawBuffer(RawBuffer) {}
 
     Attributes() : Attributes(llvm::dxil::ResourceClass::UAV, false, false) {}
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 610207cf8b9a4..5a3be1690f335 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -1747,6 +1747,10 @@ void NamedDecl::printNestedNameSpecifier(raw_ostream &OS,
       }
     }
 
+    // Suppress transparent contexts like export or HLSLBufferDecl context
+    if (Ctx->isTransparentContext())
+      continue;
+
     // Skip non-named contexts such as linkage specifications and ExportDecls.
     const NamedDecl *ND = dyn_cast<NamedDecl>(Ctx);
     if (!ND)
@@ -5717,7 +5721,7 @@ HLSLBufferDecl::HLSLBufferDecl(DeclContext *DC, bool CBuffer,
                                SourceLocation IDLoc, SourceLocation LBrace)
     : NamedDecl(Decl::Kind::HLSLBuffer, DC, IDLoc, DeclarationName(ID)),
       DeclContext(Decl::Kind::HLSLBuffer), LBraceLoc(LBrace), KwLoc(KwLoc),
-      IsCBuffer(CBuffer) {}
+      IsCBuffer(CBuffer), HasValidPackoffset(false), LayoutStruct(nullptr) {}
 
 HLSLBufferDecl *HLSLBufferDecl::Create(ASTContext &C,
                                        DeclContext *LexicalParent, bool CBuffer,
@@ -5747,6 +5751,12 @@ HLSLBufferDecl *HLSLBufferDecl::CreateDeserialized(ASTContext &C,
                                     SourceLocation(), SourceLocation());
 }
 
+void HLSLBufferDecl::addLayoutStruct(CXXRecordDecl *LS) {
+  assert(LayoutStruct == nullptr && "layout struct has already been set");
+  LayoutStruct = LS;
+  addDecl(LS);
+}
+
 //===----------------------------------------------------------------------===//
 // ImportDecl Implementation
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 856d8b1b2948d..547220fb1f1e1 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -17,16 +17,22 @@
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
 #include "TargetInfo.h"
+#include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/Type.h"
 #include "clang/Basic/TargetOptions.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Alignment.h"
 
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormatVariadic.h"
 
 using namespace clang;
@@ -34,6 +40,9 @@ using namespace CodeGen;
 using namespace clang::hlsl;
 using namespace llvm;
 
+static void createResourceInitFn(CodeGenModule &CGM, llvm::GlobalVariable *GV,
+                                 unsigned Slot, unsigned Space);
+
 namespace {
 
 void addDxilValVersion(StringRef ValVersionStr, llvm::Module &M) {
@@ -56,58 +65,17 @@ void addDxilValVersion(StringRef ValVersionStr, llvm::Module &M) {
   auto *DXILValMD = M.getOrInsertNamedMetadata(DXILValKey);
   DXILValMD->addOperand(Val);
 }
-// cbuffer will be translated into global variable in special address space.
-// If translate into C,
-// cbuffer A {
-//   float a;
-//   float b;
-// }
-// float foo() { return a + b; }
-//
-// will be translated into
-//
-// struct A {
-//   float a;
-//   float b;
-// } cbuffer_A __attribute__((address_space(4)));
-// float foo() { return cbuffer_A.a + cbuffer_A.b; }
-//
-// layoutBuffer will create the struct A type.
-// replaceBuffer will replace use of global variable a and b with cbuffer_A.a
-// and cbuffer_A.b.
-//
-void layoutBuffer(CGHLSLRuntime::Buffer &Buf, const DataLayout &DL) {
-  if (Buf.Constants.empty())
-    return;
-
-  std::vector<llvm::Type *> EltTys;
-  for (auto &Const : Buf.Constants) {
-    GlobalVariable *GV = Const.first;
-    Const.second = EltTys.size();
-    llvm::Type *Ty = GV->getValueType();
-    EltTys.emplace_back(Ty);
-  }
-  Buf.LayoutStruct = llvm::StructType::get(EltTys[0]->getContext(), EltTys);
-}
-
-GlobalVariable *replaceBuffer(CGHLSLRuntime::Buffer &Buf) {
-  // Create global variable for CB.
-  GlobalVariable *CBGV = new GlobalVariable(
-      Buf.LayoutStruct, /*isConstant*/ true,
-      GlobalValue::LinkageTypes::ExternalLinkage, nullptr,
-      llvm::formatv("{0}{1}", Buf.Name, Buf.IsCBuffer ? ".cb." : ".tb."),
-      GlobalValue::NotThreadLocal);
-
-  return CBGV;
-}
 
 } // namespace
 
-llvm::Type *CGHLSLRuntime::convertHLSLSpecificType(const Type *T) {
+llvm::Type *
+CGHLSLRuntime::convertHLSLSpecificType(const Type *T,
+                                       SmallVector<unsigned> *Packoffsets) {
   assert(T->isHLSLSpecificType() && "Not an HLSL specific type!");
 
   // Check if the target has a specific translation for this type first.
-  if (llvm::Type *TargetTy = CGM.getTargetCodeGenInfo().getHLSLType(CGM, T))
+  if (llvm::Type *TargetTy =
+          CGM.getTargetCodeGenInfo().getHLSLType(CGM, T, Packoffsets))
     return TargetTy;
 
   llvm_unreachable("Generic handling of HLSL types is not supported.");
@@ -117,48 +85,174 @@ llvm::Triple::ArchType CGHLSLRuntime::getArch() {
   return CGM.getTarget().getTriple().getArch();
 }
 
-void CGHLSLRuntime::addConstant(VarDecl *D, Buffer &CB) {
-  if (D->getStorageClass() == SC_Static) {
-    // For static inside cbuffer, take as global static.
-    // Don't add to cbuffer.
-    CGM.EmitGlobal(D);
-    return;
-  }
+// Returns true if the type is an HLSL resource class
+static bool isResourceRecordType(const clang::Type *Ty) {
+  return HLSLAttributedResourceType::findHandleTypeOnResource(Ty) != nullptr;
+}
 
-  auto *GV = cast<GlobalVariable>(CGM.GetAddrOfGlobalVar(D));
-  GV->setExternallyInitialized(true);
-  // Add debug info for constVal.
-  if (CGDebugInfo *DI = CGM.getModuleDebugInfo())
-    if (CGM.getCodeGenOpts().getDebugInfo() >=
-        codegenoptions::DebugInfoKind::LimitedDebugInfo)
-      DI->EmitGlobalVariable(cast<GlobalVariable>(GV), D);
-
-  // FIXME: support packoffset.
-  // See https://github.com/llvm/llvm-project/issues/57914.
-  uint32_t Offset = 0;
-  bool HasUserOffset = false;
-
-  unsigned LowerBound = HasUserOffset ? Offset : UINT_MAX;
-  CB.Constants.emplace_back(std::make_pair(GV, LowerBound));
+// Returns true if the type is an HLSL resource class or an array of them
+static bool isResourceRecordTypeOrArrayOf(const clang::Type *Ty) {
+  while (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(Ty))
+    Ty = CAT->getArrayElementTypeNoTypeQual();
+  return isResourceRecordType(Ty);
 }
 
-void CGHLSLRuntime::addBufferDecls(const DeclContext *DC, Buffer &CB) {
-  for (Decl *it : DC->decls()) {
-    if (auto *ConstDecl = dyn_cast<VarDecl>(it)) {
-      addConstant(ConstDecl, CB);
-    } else if (isa<CXXRecordDecl, EmptyDecl>(it)) {
+// Emits constant global variables for buffer constants declarations
+// and creates metadata linking the constant globals with the buffer global.
+void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,
+                                                 llvm::GlobalVariable *BufGV) {
+  LLVMContext &Ctx = CGM.getLLVMContext();
+
+  // get the layout struct from constant buffer target type
+  llvm::Type *BufType = BufGV->getValueType();
+  llvm::Type *BufLayoutType =
+      cast<llvm::TargetExtType>(BufType)->getTypeParameter(0);
+  llvm::StructType *LayoutStruct = cast<llvm::StructType>(
+      cast<llvm::TargetExtType>(BufLayoutType)->getTypeParameter(0));
+
+  // Start metadata list associating the buffer global variable with its
+  // constatns
+  SmallVector<llvm::Metadata *> BufGlobals;
+  BufGlobals.push_back(ValueAsMetadata::get(BufGV));
+
+  const auto *ElemIt = LayoutStruct->element_begin();
+  for (Decl *D : BufDecl->decls()) {
+    if (isa<CXXRecordDecl, EmptyDecl>(D))
       // Nothing to do for this declaration.
-    } else if (isa<FunctionDecl>(it)) {
-      // A function within an cbuffer is effectively a top-level function,
-      // as it only refers to globally scoped declarations.
-      CGM.EmitTopLevelDecl(it);
+      continue;
+    if (isa<FunctionDecl>(D)) {
+      // A function within an cbuffer is effectively a top-level function.
+      CGM.EmitTopLevelDecl(D);
+      continue;
     }
+    VarDecl *VD = dyn_cast<VarDecl>(D);
+    if (!VD)
+      continue;
+
+    QualType VDTy = VD->getType();
+    if (VDTy.getAddressSpace() != LangAS::hlsl_constant) {
+      if (VD->getStorageClass() == SC_Static ||
+          VDTy.getAddressSpace() == LangAS::hlsl_groupshared ||
+          isResourceRecordTypeOrArrayOf(VDTy.getTypePtr())) {
+        // Emit static and groupshared variables and resource classes inside
+        // cbuffer as regular globals
+        CGM.EmitGlobal(VD);
+      } else {
+        // Anything else that is not in the hlsl_constant address space must be
+        // an empty struct or a zero-sized array and can be ignored
+        assert(BufDecl->getASTContext().getTypeSize(VDTy) == 0 &&
+               "constant buffer decl with non-zero sized type outside of "
+               "hlsl_constant address space");
+      }
+      continue;
+    }
+
+    assert(ElemIt != LayoutStruct->element_end() &&
+           "number of elements in layout struct does not match");
+    llvm::Type *LayoutType = *ElemIt++;
+
+    // FIXME: handle resources inside user defined structs
+    // (llvm/wg-hlsl#175)
+
+    // create global variable for the constant and to metadata list
+    GlobalVariable *ElemGV =
+        cast<GlobalVariable>(CGM.GetAddrOfGlobalVar(VD, LayoutType));
+    BufGlobals.push_back(ValueAsMetadata::get(ElemGV));
   }
+  assert(ElemIt == LayoutStruct->element_end() &&
+         "number of elements in layout struct does not match");
+
+  // add buffer metadata to the module
+  CGM.getModule()
+      .getOrInsertNamedMetadata("hlsl.cbs")
+      ->addOperand(MDNode::get(Ctx, BufGlobals));
 }
 
-void CGHLSLRuntime::addBuffer(const HLSLBufferDecl *D) {
-  Buffers.emplace_back(Buffer(D));
-  addBufferDecls(D, Buffers.back());
+// Creates resource handle type for the HLSL buffer declaration
+static const clang::HLSLAttributedResourceType *
+createBufferHandleType(const HLSLBufferDecl *BufDecl) {
+  ASTContext &AST = BufDecl->getASTContext();
+  QualType QT = AST.getHLSLAttributedResourceType(
+      AST.HLSLResourceTy,
+      QualType(BufDecl->getLayoutStruct()->getTypeForDecl(), 0),
+      HLSLAttributedResourceType::Attributes(ResourceClass::CBuffer));
+  return cast<HLSLAttributedResourceType>(QT.getTypePtr());
+}
+
+static void fillPackoffsetLayout(const HLSLBufferDecl *BufDecl,
+                                 SmallVector<unsigned> &Layout) {
+  assert(Layout.empty() && "expected empty vector for layout");
+  assert(BufDecl->hasValidPackoffset());
+
+  for (Decl *D : BufDecl->decls()) {
+    if (isa<CXXRecordDecl, EmptyDecl>(D) || isa<FunctionDecl>(D)) {
+      continue;
+    }
+    VarDecl *VD = dyn_cast<VarDecl>(D);
+    if (!VD || VD->getType().getAddressSpace() != LangAS::hlsl_constant)
+      continue;
+    assert(VD->hasAttr<HLSLPackOffsetAttr>() &&
+           "expected packoffset attribute on every declaration");
+    size_t Offset = VD->getAttr<HLSLPackOffsetAttr>()->getOffsetInBytes();
+    Layout.push_back(Offset);
+  }
+}
+
+// Codegen for HLSLBufferDecl
+void CGHLSLRuntime::addBuffer(const HLSLBufferDecl *BufDecl) {
+
+  assert(BufDecl->isCBuffer() && "tbuffer codegen is not supported yet");
+
+  // create resource handle type for the buffer
+  const clang::HLSLAttributedResourceType *ResHandleTy =
+      createBufferHandleType(BufDecl);
+
+  // empty constant buffer is ignored
+  if (ResHandleTy->getContainedType()->getAsCXXRecordDecl()->isEmpty())
+    return;
+
+  // create global variable for the constant buffer
+  SmallVector<unsigned> Layout;
+  if (BufDecl->hasValidPackoffset())
+    fillPackoffsetLayout(BufDecl, Layout);
+
+  llvm::TargetExtType *TargetTy =
+      cast<llvm::TargetExtType>(convertHLSLSpecificType(
+          ResHandleTy, BufDecl->hasValidPackoffset() ? &Layout : nullptr));
+  llvm::GlobalVariable *BufGV =
+      new GlobalVariable(TargetTy, /*isConstant*/ true,
+                         GlobalValue::LinkageTypes::ExternalLinkage, nullptr,
+                         llvm::formatv("{0}{1}", BufDecl->getName(),
+                                       BufDecl->isCBuffer() ? ".cb" : ".tb"),
+                         GlobalValue::NotThreadLocal);
+  CGM.getModule().insertGlobalVariable(BufGV);
+
+  // Add globals for constant buffer elements and create metadata nodes
+  emitBufferGlobalsAndMetadata(BufDecl, BufGV);
+
+  // Resource initialization
+  const HLSLResourceBindingAttr *RBA =
+      BufDecl->getAttr<HLSLResourceBindingAttr>();
+  // FIXME: handle implicit binding if no binding attribute is found
+  // (llvm/llvm-project#110722)
+  if (RBA)
+    createResourceInitFn(CGM, BufGV, RBA->getSlotNumber(),
+                         RBA->getSpaceNumber());
+}
+
+llvm::TargetExtType *
+CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) {
+  const auto Entry = LayoutTypes.find(StructType);
+  if (Entry != LayoutTypes.end())
+    return Entry->getSecond();
+  return nullptr;
+}
+
+void CGHLSLRuntime::addHLSLBufferLayoutType(const RecordType *StructType,
+                                            llvm::TargetExtType *LayoutTy) {
+  assert(getHLSLBufferLayoutType(StructType) == nullptr &&
+         "layout type for this struct already exist");
+  LayoutTypes[StructType] = LayoutTy;
 }
 
 void CGHLSLRuntime::finishCodeGen() {
@@ -169,28 +263,8 @@ void CGHLSLRuntime::finishCodeGen() {
     addDxilValVersion(TargetOpts.DxilValidatorVersion, M);
 
   generateGlobalCtorDtorCalls();
-
-  const DataLayout &DL = M.getDataLayout();
-
-  for (auto &Buf : Buffers) {
-    layoutBuffer(Buf, DL);
-    GlobalVariable *GV = replaceBuffer(Buf);
-    M.insertGlobalVariable(GV);
-    llvm::hlsl::ResourceClass RC = Buf.IsCBuffer
-                                       ? llvm::hlsl::ResourceClass::CBuffer
-                                       : llvm::hlsl::ResourceClass::SRV;
-    llvm::hlsl::ResourceKind RK = Buf.IsCBuffer
-                                      ? llvm::hlsl::ResourceKind::CBuffer
-                                      : llvm::hlsl::ResourceKind::TBuffer;
-    addBufferResourceAnnotation(GV, RC, RK, /*IsROV=*/false,
-                                llvm::hlsl::ElementType::Invalid, Buf.Binding);
-  }
 }
 
-CGHLSLRuntime::Buffer::Buffer(const HLSLBufferDecl *D)
-    : Name(D->getName()), IsCBuffer(D->isCBuffer()),
-      Binding(D->getAttr<HLSLResourceBindingAttr>()) {}
-
 void CGHLSLRuntime::addBufferResourceAnnotation(llvm::GlobalVariable *GV,
                                                 llvm::hlsl::ResourceClass RC,
                                                 llvm::hlsl::ResourceKind RK,
@@ -524,21 +598,15 @@ void CGHLSLRuntime::generateGlobalCtorDtorCalls() {
   }
 }
 
-// Returns true if the type is an HLSL resource class
-static bool isResourceRecordType(const clang::Type *Ty) {
-  return HLSLAttributedResourceType::findHandleTypeOnResource(Ty) != nullptr;
-}
-
-static void createResourceInitFn(CodeGenModule &CGM, const VarDecl *VD,
-                                 llvm::GlobalVariable *GV, unsigned Slot,
-                                 unsigned Space) {
+static void createResourceInitFn(CodeGenModule &CGM, llvm::GlobalVariable *GV,
+                                 unsigned Slot, unsigned Space) {
   LLVMContext &Ctx = CGM.getLLVMContext();
   llvm::Type *Int1Ty = llvm::Type::getInt1Ty(Ctx);
 
   llvm::Function *InitResFunc = llvm::Function::Create(
       llvm::FunctionType::get(CGM.VoidTy, false),
       llvm::GlobalValue::InternalLinkage,
-      ("_init_resource_" + VD->getName()).str(), CGM.getModule());
+      ("_init_resource_" + GV->getName()).str(), CGM.getModule());
   InitResFunc->addFnAttr(llvm::Attribute::AlwaysInline);
 
   llvm::BasicBlock *EntryBB =
@@ -547,20 +615,15 @@ static void createResourceInitFn(CodeGenModule &CGM, const VarDecl *VD,
   const DataLayout &DL = CGM.getModule().getDataLayout();
   Builder.SetInsertPoint(EntryBB);
 
-  const HLSLAttributedResourceType *AttrResType =
-      HLSLAttributedResourceType::findHandleTypeOnResource(
-          VD->getType().getTypePtr());
-
-  // FIXME: Only simple declarations of resources are supported for now.
-  // Arrays of resources or resources in user defined classes are
-  // not implemented yet.
-  assert(AttrResType != nullptr &&
-         "Resource class must have a handle of HLSLAttributedResourceType");
-
-  llvm::Type *TargetTy =
-      CGM.getTargetCodeGenInfo().getHLSLType(CGM, AttrResType);
-  assert(TargetTy != nullptr &&
-         "Failed to convert resource handle to target type");
+  // Make sure the global variable is resource handle (cbuffer) or
+  // resource class (=class where the first element is a resource handle).
+  llvm::Type *HandleTy = GV->getValueType();
+  assert((HandleTy->isTargetExtTy() ||
+          (HandleTy->isStructTy() &&
+           HandleTy->getStructElementType(0)->isTargetExtTy())) &&
+         "unexpected type of the global");
+  if (!HandleTy->isTargetExtTy())
+    HandleTy = HandleTy->getStructElementType(0);
 
   llvm::Value *Args[] = {
       llvm::ConstantInt::get(CGM.IntTy, Space), /* reg_space */
@@ -572,9 +635,9 @@ static void createResourceInitFn(CodeGenModule &CGM, const VarDecl *VD,
       llvm::ConstantInt::get(Int1Ty, false) /* non-uniform */
   };
   llvm::Value *CreateHandle = Builder.CreateIntrinsic(
-      /*ReturnType=*/TargetTy,
+      /*ReturnType=*/HandleTy,
       CGM.getHLSLRuntime().getCreateHandleFromBindingIntrinsic(), Args, nullptr,
-      Twine(VD->getName()).concat("_h"));
+      Twine(GV->getName()).concat("_h"));
 
   llvm::Value *HandleRef = Builder.CreateStructGEP(GV->getValueType(), GV, 0);
   Builder.CreateAlignedStore(CreateHandle, HandleRef,
@@ -601,8 +664,7 @@ void CGHLSLRuntime::handleGlobalVarDefinition(const VarDecl *VD,
     // not implemented yet.
     return;
 
-  createResourceInitFn(CGM, VD, GV, RBA->getSlotNumber(),
-                       RBA->getSpaceNumber());
+  createResourceInitFn(CGM, GV, RBA->getSlotNumber(), RBA->getSpaceNumber());
 }
 
 llvm::Instruction *CGHLSLRuntime::getConvergenceToken(BasicBlock &BB) {
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 8767a2ddceb96..a9da42324a038 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H
 #define LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsDirectX.h"
@@ -46,20 +47,26 @@
     }                                                                          \
   }
 
+using ResourceClass = llvm::dxil::ResourceClass;
+
 namespace llvm {
 class GlobalVariable;
 class Function;
 class StructType;
+class Metadata;
 } // namespace llvm
 
 namespace clang {
+class NamedDecl;
 class VarDecl;
 class ParmVarDecl;
 class InitListExpr;
 class HLSLBufferDecl;
 class HLSLResourceBindingAttr;
 class Type;
+class RecordType;
 class DeclContext;
+class HLSLPackOffsetAttr;
 
 class FunctionDecl;
 
@@ -126,16 +133,6 @@ class CGHLSLRuntime {
     unsigned Space;
     BufferResBinding(HLSLResourceBindingAttr *Attr);
   };
-  struct Buffer {
-    Buffer(const HLSLBufferDecl *D);
-    llvm::StringRef Name;
-    // IsCBuffer - Whether the buffer is a cbuffer (and not a tbuffer).
-    bool IsCBuffer;
-    BufferResBinding Binding;
-    // Global variable and offset for each constant.
-    std::vector<std::pair<llvm::GlobalVariable *, unsigned>> Constants;
-    llvm::StructType *LayoutStruct = nullptr;
-  };
 
 protected:
   CodeGenModule &CGM;
@@ -147,7 +144,9 @@ class CGHLSLRuntime {
   CGHLSLRuntime(CodeGenModule &CGM) : CGM(CGM) {}
   virtual ~CGHLSLRuntime() {}
 
-  llvm::Type *convertHLSLSpecificType(const Type *T);
+  llvm::Type *
+  convertHLSLSpecificType(const Type *T,
+                          SmallVector<unsigned> *Packoffsets = nullptr);
 
   void annotateHLSLResource(const VarDecl *D, llvm::GlobalVariable *GV);
   void generateGlobalCtorDtorCalls();
@@ -163,6 +162,10 @@ class CGHLSLRuntime {
 
   llvm::Instruction *getConvergenceToken(llvm::BasicBlock &BB);
 
+  llvm::TargetExtType *
+  getHLSLBufferLayoutType(const RecordType *LayoutStructTy);
+  void addHLSLBufferLayoutType(const RecordType *LayoutStructTy,
+                               llvm::TargetExtType *LayoutTy);
   void emitInitListOpaqueValues(CodeGenFunction &CGF, InitListExpr *E);
 
 private:
@@ -171,10 +174,11 @@ class CGHLSLRuntime {
                                    llvm::hlsl::ResourceKind RK, bool IsROV,
                                    llvm::hlsl::ElementType ET,
                                    BufferResBinding &Binding);
-  void addConstant(VarDecl *D, Buffer &CB);
-  void addBufferDecls(const DeclContext *DC, Buffer &CB);
+  void emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,
+                                    llvm::GlobalVariable *BufGV);
   llvm::Triple::ArchType getArch();
-  llvm::SmallVector<Buffer> Buffers;
+
+  llvm::DenseMap<const clang::RecordType *, llvm::TargetExtType *> LayoutTypes;
 };
 
 } // namespace CodeGen
diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt
index 868ec847b9634..05ab6671453f8 100644
--- a/clang/lib/CodeGen/CMakeLists.txt
+++ b/clang/lib/CodeGen/CMakeLists.txt
@@ -106,6 +106,7 @@ add_clang_library(clangCodeGen
   ConstantInitBuilder.cpp
   CoverageMappingGen.cpp
   ItaniumCXXABI.cpp
+  HLSLBufferLayoutBuilder.cpp
   LinkInModulesPass.cpp
   MacroPPCallbacks.cpp
   MicrosoftCXXABI.cpp
diff --git a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp
new file mode 100644
index 0000000000000..1ae00023ab2bc
--- /dev/null
+++ b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp
@@ -0,0 +1,229 @@
+//===- HLSLBufferLayoutBuilder.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "HLSLBufferLayoutBuilder.h"
+#include "CGHLSLRuntime.h"
+#include "CodeGenModule.h"
+#include "clang/AST/Type.h"
+
+//===----------------------------------------------------------------------===//
+// Implementation of constant buffer layout common between DirectX and
+// SPIR/SPIR-V.
+//===----------------------------------------------------------------------===//
+
+using namespace clang;
+using namespace clang::CodeGen;
+
+namespace {
+
+// Creates a new array type with the same dimentions but with the new
+// element type.
+static llvm::Type *
+createArrayWithNewElementType(CodeGenModule &CGM,
+                              const ConstantArrayType *ArrayType,
+                              llvm::Type *NewElemType) {
+  const clang::Type *ArrayElemType = ArrayType->getArrayElementTypeNoTypeQual();
+  if (ArrayElemType->isConstantArrayType())
+    NewElemType = createArrayWithNewElementType(
+        CGM, cast<const ConstantArrayType>(ArrayElemType), NewElemType);
+  return llvm::ArrayType::get(NewElemType, ArrayType->getSExtSize());
+}
+
+// Returns the size of a scalar or vector in bytes
+static unsigned getScalarOrVectorSizeInBytes(llvm::Type *Ty) {
+  assert(Ty->isVectorTy() || Ty->isIntegerTy() || Ty->isFloatingPointTy());
+  if (Ty->isVectorTy()) {
+    llvm::FixedVectorType *FVT = cast<llvm::FixedVectorType>(Ty);
+    return FVT->getNumElements() *
+           (FVT->getElementType()->getScalarSizeInBits() / 8);
+  }
+  return Ty->getScalarSizeInBits() / 8;
+}
+
+} // namespace
+
+namespace clang {
+namespace CodeGen {
+
+// Creates a layout type for given struct with HLSL constant buffer layout
+// taking into account Packoffsets, if provided.
+// Previously created layout types are cached by CGHLSLRuntime.
+//
+// The function iterates over all fields of the StructType (including base
+// classes) and calls layoutField to converts each field to its corresponding
+// LLVM type and to calculate its HLSL constant buffer layout. Any embedded
+// structs (or arrays of structs) are converted to target layout types as well.
+llvm::TargetExtType *HLSLBufferLayoutBuilder::createLayoutType(
+    const RecordType *StructType,
+    const llvm::SmallVector<unsigned> *Packoffsets) {
+
+  // check if we already have the layout type for this struct
+  if (llvm::TargetExtType *Ty =
+          CGM.getHLSLRuntime().getHLSLBufferLayoutType(StructType))
+    return Ty;
+
+  SmallVector<unsigned> Layout;
+  SmallVector<llvm::Type *> LayoutElements;
+  unsigned Index = 0; // packoffset index
+  unsigned EndOffset = 0;
+
+  // reserve first spot in the layout vector for buffer size
+  Layout.push_back(0);
+
+  // iterate over all fields of the record, including fields on base classes
+  llvm::SmallVector<const RecordType *> RecordTypes;
+  RecordTypes.push_back(StructType);
+  while (RecordTypes.back()->getAsCXXRecordDecl()->getNumBases()) {
+    CXXRecordDecl *D = RecordTypes.back()->getAsCXXRecordDecl();
+    assert(D->getNumBases() == 1 &&
+           "HLSL doesn't support multiple inheritance");
+    RecordTypes.push_back(D->bases_begin()->getType()->getAs<RecordType>());
+  }
+  while (!RecordTypes.empty()) {
+    const RecordType *RT = RecordTypes.back();
+    RecordTypes.pop_back();
+
+    for (const auto *FD : RT->getDecl()->fields()) {
+      assert(!Packoffsets || Index < Packoffsets->size() &&
+                                 "number of elements in layout struct does not "
+                                 "match number of packoffset annotations");
+
+      if (!layoutField(FD, EndOffset, Layout, LayoutElements,
+                       Packoffsets ? (*Packoffsets)[Index] : -1))
+        return nullptr;
+      Index++;
+    }
+  }
+
+  // set the size of the buffer
+  Layout[0] = EndOffset;
+
+  // create the layout struct type; anonymous struct have empty name but
+  // non-empty qualified name
+  const CXXRecordDecl *Decl = StructType->getAsCXXRecordDecl();
+  std::string Name =
+      Decl->getName().empty() ? "anon" : Decl->getQualifiedNameAsString();
+  llvm::StructType *StructTy =
+      llvm::StructType::create(LayoutElements, Name, true);
+
+  // create target layout type
+  llvm::TargetExtType *NewLayoutTy = llvm::TargetExtType::get(
+      CGM.getLLVMContext(), LayoutTypeName, {StructTy}, Layout);
+  if (NewLayoutTy)
+    CGM.getHLSLRuntime().addHLSLBufferLayoutType(StructType, NewLayoutTy);
+  return NewLayoutTy;
+}
+
+// The function converts a single field of HLSL Buffer to its corresponding
+// LLVM type and calculates it's layout. Any embedded structs (or
+// arrays of structs) are converted to target layout types as well.
+// The converted type is appended to the LayoutElements list, the element
+// offset is added to the Layout list and the EndOffset updated to the offset
+// just after the lay-ed out element (which is basically the size of the
+// buffer).
+// Returns true if the conversion was successful.
+// The packoffset parameter contains the field's layout offset provided by the
+// user or -1 if there was no packoffset (or register(cX)) annotation.
+bool HLSLBufferLayoutBuilder::layoutField(
+    const FieldDecl *FD, unsigned &EndOffset, SmallVector<unsigned> &Layout,
+    SmallVector<llvm::Type *> &LayoutElements, int Packoffset) {
+
+  // Size of element; for arrays this is a size of a single element in the
+  // array. Total array size of calculated as (ArrayCount-1) * ArrayStride +
+  // ElemSize.
+  unsigned ElemSize = 0;
+  unsigned ElemOffset = 0;
+  unsigned ArrayCount = 1;
+  unsigned ArrayStride = 0;
+
+  const unsigned BufferRowAlign = 16U;
+  unsigned NextRowOffset = llvm::alignTo(EndOffset, BufferRowAlign);
+
+  llvm::Type *ElemLayoutTy = nullptr;
+  QualType FieldTy = FD->getType();
+
+  if (FieldTy->isConstantArrayType()) {
+    // Unwrap array to find the element type and get combined array size.
+    QualType Ty = FieldTy;
+    while (Ty->isConstantArrayType()) {
+      const ConstantArrayType *ArrayTy = cast<ConstantArrayType>(Ty);
+      ArrayCount *= ArrayTy->getSExtSize();
+      Ty = ArrayTy->getElementType();
+    }
+    // For array of structures, create a new array with a layout type
+    // instead of the structure type.
+    if (Ty->isStructureType()) {
+      llvm::Type *NewTy =
+          cast<llvm::TargetExtType>(createLayoutType(Ty->getAsStructureType()));
+      if (!NewTy)
+        return false;
+      assert(isa<llvm::TargetExtType>(NewTy) && "expected target type");
+      ElemSize = cast<llvm::TargetExtType>(NewTy)->getIntParameter(0);
+      ElemLayoutTy = createArrayWithNewElementType(
+          CGM, cast<ConstantArrayType>(FieldTy.getTypePtr()), NewTy);
+    } else {
+      // Array of vectors or scalars
+      ElemSize =
+          getScalarOrVectorSizeInBytes(CGM.getTypes().ConvertTypeForMem(Ty));
+      ElemLayoutTy = CGM.getTypes().ConvertTypeForMem(FieldTy);
+    }
+    ArrayStride = llvm::alignTo(ElemSize, BufferRowAlign);
+    ElemOffset = (Packoffset != -1) ? Packoffset : NextRowOffset;
+
+  } else if (FieldTy->isStructureType()) {
+    // Create a layout type for the structure
+    ElemLayoutTy = createLayoutType(FieldTy->getAsStructureType());
+    if (!ElemLayoutTy)
+      return false;
+    assert(isa<llvm::TargetExtType>(ElemLayoutTy) && "expected target type");
+    ElemSize = cast<llvm::TargetExtType>(ElemLayoutTy)->getIntParameter(0);
+    ElemOffset = (Packoffset != -1) ? Packoffset : NextRowOffset;
+
+  } else {
+    // scalar or vector - find element size and alignment
+    unsigned Align = 0;
+    ElemLayoutTy = CGM.getTypes().ConvertTypeForMem(FieldTy);
+    if (ElemLayoutTy->isVectorTy()) {
+      // align vectors by sub element size
+      const llvm::FixedVectorType *FVT =
+          cast<llvm::FixedVectorType>(ElemLayoutTy);
+      unsigned SubElemSize = FVT->getElementType()->getScalarSizeInBits() / 8;
+      ElemSize = FVT->getNumElements() * SubElemSize;
+      Align = SubElemSize;
+    } else {
+      assert(ElemLayoutTy->isIntegerTy() || ElemLayoutTy->isFloatingPointTy());
+      ElemSize = ElemLayoutTy->getScalarSizeInBits() / 8;
+      Align = ElemSize;
+    }
+
+    // calculate or get element offset for the vector or scalar
+    if (Packoffset != -1) {
+      ElemOffset = Packoffset;
+    } else {
+      ElemOffset = llvm::alignTo(EndOffset, Align);
+      // if the element does not fit, move it to the next row
+      if (ElemOffset + ElemSize > NextRowOffset)
+        ElemOffset = NextRowOffset;
+    }
+  }
+
+  // Update end offset of the layout; do not update it if the EndOffset
+  // is already bigger than the new value (which may happen with unordered
+  // packoffset annotations)
+  unsigned NewEndOffset =
+      ElemOffset + (ArrayCount - 1) * ArrayStride + ElemSize;
+  EndOffset = std::max<unsigned>(EndOffset, NewEndOffset);
+
+  // add the layout element and offset to the lists
+  Layout.push_back(ElemOffset);
+  LayoutElements.push_back(ElemLayoutTy);
+  return true;
+}
+
+} // namespace CodeGen
+} // namespace clang
diff --git a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h
new file mode 100644
index 0000000000000..57bb17c557b9c
--- /dev/null
+++ b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h
@@ -0,0 +1,48 @@
+//===- HLSLBufferLayoutBuilder.h ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/DerivedTypes.h"
+
+namespace clang {
+class RecordType;
+class FieldDecl;
+
+namespace CodeGen {
+class CodeGenModule;
+
+//===----------------------------------------------------------------------===//
+// Implementation of constant buffer layout common between DirectX and
+// SPIR/SPIR-V.
+//===----------------------------------------------------------------------===//
+
+class HLSLBufferLayoutBuilder {
+private:
+  CodeGenModule &CGM;
+  llvm::StringRef LayoutTypeName;
+
+public:
+  HLSLBufferLayoutBuilder(CodeGenModule &CGM, llvm::StringRef LayoutTypeName)
+      : CGM(CGM), LayoutTypeName(LayoutTypeName) {}
+
+  // Returns LLVM target extension type with the name LayoutTypeName
+  // for given structure type and layout data. The first number in
+  // the Layout is the size followed by offsets for each struct element.
+  llvm::TargetExtType *
+  createLayoutType(const RecordType *StructType,
+                   const llvm::SmallVector<unsigned> *Packoffsets = nullptr);
+
+private:
+  bool layoutField(const clang::FieldDecl *FD, unsigned &EndOffset,
+                   llvm::SmallVector<unsigned> &Layout,
+                   llvm::SmallVector<llvm::Type *> &LayoutElements,
+                   int Packoffset);
+};
+
+} // namespace CodeGen
+} // namespace clang
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 4a66683a3b91f..86057c14a549e 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -439,7 +439,9 @@ class TargetCodeGenInfo {
   }
 
   /// Return an LLVM type that corresponds to a HLSL type
-  virtual llvm::Type *getHLSLType(CodeGenModule &CGM, const Type *T) const {
+  virtual llvm::Type *
+  getHLSLType(CodeGenModule &CGM, const Type *T,
+              const SmallVector<unsigned> *Packoffsets = nullptr) const {
     return nullptr;
   }
 
diff --git a/clang/lib/CodeGen/Targets/DirectX.cpp b/clang/lib/CodeGen/Targets/DirectX.cpp
index 7935f7ae37004..77091eb45f5cf 100644
--- a/clang/lib/CodeGen/Targets/DirectX.cpp
+++ b/clang/lib/CodeGen/Targets/DirectX.cpp
@@ -7,8 +7,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "ABIInfoImpl.h"
+#include "CodeGenModule.h"
+#include "HLSLBufferLayoutBuilder.h"
 #include "TargetInfo.h"
+#include "clang/AST/Type.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Type.h"
 
 using namespace clang;
 using namespace clang::CodeGen;
@@ -24,11 +29,14 @@ class DirectXTargetCodeGenInfo : public TargetCodeGenInfo {
   DirectXTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
       : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {}
 
-  llvm::Type *getHLSLType(CodeGenModule &CGM, const Type *T) const override;
+  llvm::Type *getHLSLType(
+      CodeGenModule &CGM, const Type *T,
+      const SmallVector<unsigned> *Packoffsets = nullptr) const override;
 };
 
-llvm::Type *DirectXTargetCodeGenInfo::getHLSLType(CodeGenModule &CGM,
-                                                  const Type *Ty) const {
+llvm::Type *DirectXTargetCodeGenInfo::getHLSLType(
+    CodeGenModule &CGM, const Type *Ty,
+    const SmallVector<unsigned> *Packoffsets) const {
   auto *ResType = dyn_cast<HLSLAttributedResourceType>(Ty);
   if (!ResType)
     return nullptr;
@@ -56,9 +64,19 @@ llvm::Type *DirectXTargetCodeGenInfo::getHLSLType(CodeGenModule &CGM,
 
     return llvm::TargetExtType::get(Ctx, TypeName, {ElemType}, Ints);
   }
-  case llvm::dxil::ResourceClass::CBuffer:
-    llvm_unreachable("dx.CBuffer handles are not implemented yet");
-    break;
+  case llvm::dxil::ResourceClass::CBuffer: {
+    QualType ContainedTy = ResType->getContainedType();
+    if (ContainedTy.isNull() || !ContainedTy->isStructureType())
+      return nullptr;
+
+    llvm::Type *BufferLayoutTy =
+        HLSLBufferLayoutBuilder(CGM, "dx.Layout")
+            .createLayoutType(ContainedTy->getAsStructureType(), Packoffsets);
+    if (!BufferLayoutTy)
+      return nullptr;
+
+    return llvm::TargetExtType::get(Ctx, "dx.CBuffer", {BufferLayoutTy});
+  }
   case llvm::dxil::ResourceClass::Sampler:
     llvm_unreachable("dx.Sampler handles are not implemented yet");
     break;
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index b81ed29a5159b..c94db31ae1a89 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -52,7 +52,9 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
 
   unsigned getOpenCLKernelCallingConv() const override;
   llvm::Type *getOpenCLType(CodeGenModule &CGM, const Type *T) const override;
-  llvm::Type *getHLSLType(CodeGenModule &CGM, const Type *Ty) const override;
+  llvm::Type *getHLSLType(
+      CodeGenModule &CGM, const Type *Ty,
+      const SmallVector<unsigned> *Packoffsets = nullptr) const override;
   llvm::Type *getSPIRVImageTypeFromHLSLResource(
       const HLSLAttributedResourceType::Attributes &attributes,
       llvm::Type *ElementType, llvm::LLVMContext &Ctx) const;
@@ -367,8 +369,9 @@ llvm::Type *CommonSPIRTargetCodeGenInfo::getOpenCLType(CodeGenModule &CGM,
   return nullptr;
 }
 
-llvm::Type *CommonSPIRTargetCodeGenInfo::getHLSLType(CodeGenModule &CGM,
-                                                     const Type *Ty) const {
+llvm::Type *CommonSPIRTargetCodeGenInfo::getHLSLType(
+    CodeGenModule &CGM, const Type *Ty,
+    const SmallVector<unsigned> *Packoffsets) const {
   auto *ResType = dyn_cast<HLSLAttributedResourceType>(Ty);
   if (!ResType)
     return nullptr;
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 362df485a025c..d95763b22a819 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -14295,6 +14295,13 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) {
     if (getLangOpts().OpenCL &&
         Var->getType().getAddressSpace() == LangAS::opencl_local)
       return;
+
+    // In HLSL, objects in the hlsl_constant address space are initialized
+    // externally, so don't synthesize an implicit initializer.
+    if (getLangOpts().HLSL &&
+        Var->getType().getAddressSpace() == LangAS::hlsl_constant)
+      return;
+
     // C++03 [dcl.init]p9:
     //   If no initializer is specified for an object, and the
     //   object is of (possibly cv-qualified) non-POD class type (or
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 20275ded8a561..502a1429ff6e3 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -239,6 +239,7 @@ static void validatePackoffset(Sema &S, HLSLBufferDecl *BufDecl) {
 
   // Make sure there is no overlap in packoffset - sort PackOffsetVec by offset
   // and compare adjacent values.
+  bool IsValid = true;
   ASTContext &Context = S.getASTContext();
   std::sort(PackOffsetVec.begin(), PackOffsetVec.end(),
             [](const std::pair<VarDecl *, HLSLPackOffsetAttr *> &LHS,
@@ -257,8 +258,10 @@ static void validatePackoffset(Sema &S, HLSLBufferDecl *BufDecl) {
       VarDecl *NextVar = PackOffsetVec[i + 1].first;
       S.Diag(NextVar->getLocation(), diag::err_hlsl_packoffset_overlap)
           << NextVar << Var;
+      IsValid = false;
     }
   }
+  BufDecl->setHasValidPackoffset(IsValid);
 }
 
 // Returns true if the array has a zero size = if any of the dimensions is 0
@@ -500,7 +503,7 @@ void createHostLayoutStructForBuffer(Sema &S, HLSLBufferDecl *BufDecl) {
     }
   }
   LS->completeDefinition();
-  BufDecl->addDecl(LS);
+  BufDecl->addLayoutStruct(LS);
 }
 
 // Handle end of cbuffer/tbuffer declaration
diff --git a/clang/test/CodeGenHLSL/cbuf.hlsl b/clang/test/CodeGenHLSL/cbuf.hlsl
deleted file mode 100644
index 825e7b8161a60..0000000000000
--- a/clang/test/CodeGenHLSL/cbuf.hlsl
+++ /dev/null
@@ -1,33 +0,0 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
-
-// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan-library %s \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
-
-// CHECK: @a = external addrspace(2) externally_initialized global float, align 4
-// CHECK: @b = external addrspace(2) externally_initialized global double, align 8
-// CHECK: @c = external addrspace(2) externally_initialized global float, align 4
-// CHECK: @d = external addrspace(2) externally_initialized global double, align 8
-
-// CHECK: @[[CB:.+]] = external constant { float, double }
-cbuffer A : register(b0, space2) {
-  float a;
-  double b;
-}
-
-// CHECK: @[[TB:.+]] = external constant { float, double }
-tbuffer A : register(t2, space1) {
-  float c;
-  double d;
-}
-
-float foo() {
-// CHECK: load float, ptr addrspace(2) @a, align 4
-// CHECK: load double, ptr addrspace(2) @b, align 8
-// CHECK: load float, ptr addrspace(2) @c, align 4
-// CHECK: load double, ptr addrspace(2) @d, align 8
-  return a + b + c*d;
-}
-
-// CHECK: !hlsl.cbufs = !{![[CBMD:[0-9]+]]}
-// CHECK: ![[CBMD]] = !{ptr @[[CB]], i32 13, i32 0, i1 false, i32 0, i32 2}
diff --git a/clang/test/CodeGenHLSL/cbuf_in_namespace.hlsl b/clang/test/CodeGenHLSL/cbuf_in_namespace.hlsl
deleted file mode 100644
index 13c401d428331..0000000000000
--- a/clang/test/CodeGenHLSL/cbuf_in_namespace.hlsl
+++ /dev/null
@@ -1,29 +0,0 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
-
-// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan-library %s \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
-
-// Make sure cbuffer inside namespace works.
-
-// CHECK: @_ZN2n02n11aE = external addrspace(2) externally_initialized global float, align 4
-// CHECK: @_ZN2n01bE = external addrspace(2) externally_initialized global float, align 4
-
-// CHECK: @[[CB:.+]] = external constant { float }
-// CHECK: @[[TB:.+]] = external constant { float }
-namespace n0 {
-namespace n1 {
-  cbuffer A {
-    float a;
-  }
-}
-  tbuffer B {
-    float b;
-  }
-}
-
-float foo() {
-// CHECK: load float, ptr addrspace(2) @_ZN2n02n11aE, align 4
-// CHECK: load float, ptr addrspace(2) @_ZN2n01bE, align 4
-  return n0::n1::a + n0::b;
-}
diff --git a/clang/test/CodeGenHLSL/cbuffer.hlsl b/clang/test/CodeGenHLSL/cbuffer.hlsl
new file mode 100644
index 0000000000000..38093c6dfacd7
--- /dev/null
+++ b/clang/test/CodeGenHLSL/cbuffer.hlsl
@@ -0,0 +1,197 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute \
+// RUN:   -fnative-half-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+// CHECK: %__cblayout_CBScalars = type <{ float, double, half, i64, i32, i16, i32, i64 }>
+// CHECK: %__cblayout_CBVectors = type <{ <3 x float>, <3 x double>, <2 x half>, <3 x i64>, <4 x i32>, <3 x i16>, <3 x i64> }>
+// CHECK: %__cblayout_CBArrays = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [2 x [3 x [4 x <4 x i32>]]], [1 x i16], [2 x i64], [4 x i32] }>
+// CHECK: %__cblayout_CBStructs = type <{ target("dx.Layout", %A, 8, 0), target("dx.Layout", %B, 14, 0, 8), 
+// CHECK-SAME: target("dx.Layout", %C, 24, 0, 16), [5 x target("dx.Layout", %A, 8, 0)], 
+// CHECK-SAME: target("dx.Layout", %__cblayout_D, 94, 0), half, <3 x i16> }>
+
+// CHECK: %A = type <{ <2 x float> }>
+// CHECK: %B = type <{ <2 x float>, <3 x i16> }>
+// CHECK: %C = type <{ i32, target("dx.Layout", %A, 8, 0) }>
+// CHECK: %__cblayout_D = type <{ [2 x [3 x target("dx.Layout", %B, 14, 0, 8)]] }>
+
+// CHECK: %__cblayout_CBMix = type <{ [2 x target("dx.Layout", %Test, 8, 0, 4)], float, [3 x [2 x <2 x float>]], float,
+// CHECK-SAME: target("dx.Layout", %anon, 4, 0), double, target("dx.Layout", %anon.0, 8, 0), float, <1 x double>, i16 }>
+
+// CHECK: %Test = type <{ float, float }>
+// CHECK: %anon = type <{ float }>
+// CHECK: %anon.0 = type <{ <2 x i32> }>
+
+cbuffer CBScalars : register(b1, space5) {
+  float a1;
+  double a2;
+  float16_t a3;
+  uint64_t a4;
+  int a5;
+  uint16_t a6;
+  bool a7;
+  int64_t a8;
+}
+
+// CHECK: @CBScalars.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars, 
+// CHECK-SAME: 56, 0, 8, 16, 24, 32, 36, 40, 48))
+// CHECK: @a1 = external addrspace(2) global float, align 4
+// CHECK: @a2 = external addrspace(2) global double, align 8
+// CHECK: @a3 = external addrspace(2) global half, align 2
+// CHECK: @a4 = external addrspace(2) global i64, align 8
+// CHECK: @a5 = external addrspace(2) global i32, align 4
+// CHECK: @a6 = external addrspace(2) global i16, align 2
+// CHECK: @a7 = external addrspace(2) global i32, align 4
+// CHECK: @a8 = external addrspace(2) global i64, align 8
+
+cbuffer CBVectors {
+  float3 b1;
+  double3 b2;
+  float16_t2 b3;
+  uint64_t3 b4;
+  int4 b5;
+  uint16_t3 b6;
+  int64_t3 b7;
+  // FIXME: add a bool vectors after llvm-project/llvm#91639 is added
+}
+
+// CHECK: @CBVectors.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CBVectors, 
+// CHECK-SAME: 136, 0, 16, 40, 48, 80, 96, 112))
+// CHECK: @b1 = external addrspace(2) global <3 x float>, align 16
+// CHECK: @b2 = external addrspace(2) global <3 x double>, align 32
+// CHECK: @b3 = external addrspace(2) global <2 x half>, align 4
+// CHECK: @b4 = external addrspace(2) global <3 x i64>, align 32
+// CHECK: @b5 = external addrspace(2) global <4 x i32>, align 16
+// CHECK: @b6 = external addrspace(2) global <3 x i16>, align 8
+// CHECK: @b7 = external addrspace(2) global <3 x i64>, align 32
+
+cbuffer CBArrays : register(b2) {
+  float c1[3];
+  double3 c2[2];
+  float16_t c3[2][2];
+  uint64_t c4[3];
+  int4 c5[2][3][4];
+  uint16_t c6[1];
+  int64_t c7[2];
+  bool c8[4];
+}
+
+// CHECK: @CBArrays.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays, 
+// CHECK-SAME: 708, 0, 48, 112, 176, 224, 608, 624, 656))
+// CHECK: @c1 = external addrspace(2) global [3 x float], align 4
+// CHECK: @c2 = external addrspace(2) global [2 x <3 x double>], align 32
+// CHECK: @c3 = external addrspace(2) global [2 x [2 x half]], align 2
+// CHECK: @c4 = external addrspace(2) global [3 x i64], align 8
+// CHECK: @c5 = external addrspace(2) global [2 x [3 x [4 x <4 x i32>]]], align 16
+// CHECK: @c6 = external addrspace(2) global [1 x i16], align 2
+// CHECK: @c7 = external addrspace(2) global [2 x i64], align 8
+// CHECK: @c8 = external addrspace(2) global [4 x i32], align 4
+
+struct Empty {};
+
+struct A {
+  float2 f1;
+};
+
+struct B : A {
+  uint16_t3 f2;
+};
+
+struct C {
+  int i;
+  A f3;
+};
+
+struct D {
+  B array_of_B[2][3];
+  Empty es;
+};
+
+// CHECK: @CBStructs.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CBStructs, 
+// CHECK-SAME: 246, 0, 16, 32, 64, 144, 238, 240))
+// CHECK: @a = external addrspace(2) global target("dx.Layout", %A, 8, 0), align 8
+// CHECK: @b = external addrspace(2) global target("dx.Layout", %B, 14, 0, 8), align 8
+// CHECK: @c = external addrspace(2) global target("dx.Layout", %C, 24, 0, 16), align 8
+// CHECK: @array_of_A = external addrspace(2) global [5 x target("dx.Layout", %A, 8, 0)], align 8
+// CHECK: @d = external addrspace(2) global target("dx.Layout", %__cblayout_D, 94, 0), align 8
+// CHECK: @e = external addrspace(2) global half, align 2
+// CHECK: @f = external addrspace(2) global <3 x i16>, align 8
+
+cbuffer CBStructs {
+  A a;
+  B b;
+  C c;
+  A array_of_A[5];
+  D d;
+  half e;
+  uint16_t3 f;
+};
+
+struct Test {
+    float a, b;
+};
+
+// CHECK: @CBMix.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CBMix,
+// CHECK-SAME: 170, 0, 24, 32, 120, 128, 136, 144, 152, 160, 168))
+// CHECK: @test = external addrspace(2) global [2 x target("dx.Layout", %Test, 8, 0, 4)], align 4
+// CHECK: @f1 = external addrspace(2) global float, align 4
+// CHECK: @f2 = external addrspace(2) global [3 x [2 x <2 x float>]], align 8
+// CHECK: @f3 = external addrspace(2) global float, align 4
+// CHECK: @f4 = external addrspace(2) global target("dx.Layout", %anon, 4, 0), align 4
+// CHECK: @f5 = external addrspace(2) global double, align 8
+// CHECK: @f6 = external addrspace(2) global target("dx.Layout", %anon.0, 8, 0), align 8
+// CHECK: @f7 = external addrspace(2) global float, align 4
+// CHECK: @f8 = external addrspace(2) global <1 x double>, align 8
+// CHECK: @f9 = external addrspace(2) global i16, align 2
+
+cbuffer CBMix {
+    Test test[2];
+    float f1;
+    float2 f2[3][2];
+    float f3;
+    struct { float c; } f4;
+    double f5;
+    struct { int2 i; } f6;
+    float f7;
+    vector<double,1> f8;
+    uint16_t f9;
+};  
+
+// CHECK: define internal void @_init_resource_CBScalars.cb()
+// CHECK-NEXT: entry:
+// CHECK-NEXT: %[[HANDLE1:.*]] = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars, 56, 0, 8, 16, 24, 32, 36, 40, 48))
+// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBScalarss_56_0_8_16_24_32_36_40_48tt(i32 5, i32 1, i32 1, i32 0, i1 false)
+// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars, 56, 0, 8, 16, 24, 32, 36, 40, 48)) %CBScalars.cb_h, ptr @CBScalars.cb, align 4
+
+// CHECK: define internal void @_init_resource_CBArrays.cb()
+// CHECK-NEXT: entry:
+// CHECK-NEXT: %[[HANDLE2:.*]] = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays, 708, 0, 48, 112, 176, 224, 608, 624, 656))
+// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBArrayss_708_0_48_112_176_224_608_624_656tt(i32 0, i32 2, i32 1, i32 0, i1 false)
+// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays, 708, 0, 48, 112, 176, 224, 608, 624, 656)) %CBArrays.cb_h, ptr @CBArrays.cb, align 4
+
+RWBuffer<float> Buf;
+
+[numthreads(4,1,1)]
+void main() {
+  Buf[0] = a1 + b1.z + c1[2] + a.f1.y + f1;
+}
+
+// CHECK: define internal void @_GLOBAL__sub_I_cbuffer.hlsl()
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @_init_resource_CBScalars.cb()
+// CHECK-NEXT: call void @_init_resource_CBArrays.cb()
+
+// CHECK: !hlsl.cbs = !{![[CBSCALARS:[0-9]+]], ![[CBVECTORS:[0-9]+]], ![[CBARRAYS:[0-9]+]], ![[CBSTRUCTS:[0-9]+]], ![[CBMIX:[0-9]+]]}
+
+// CHECK: ![[CBSCALARS]] = !{ptr @CBScalars.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4,
+// CHECK-SAME: ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7, ptr addrspace(2) @a8}
+
+// CHECK: ![[CBVECTORS]] = !{ptr @CBVectors.cb, ptr addrspace(2) @b1, ptr addrspace(2) @b2, ptr addrspace(2) @b3, ptr addrspace(2) @b4,
+// CHECK-SAME: ptr addrspace(2) @b5, ptr addrspace(2) @b6, ptr addrspace(2) @b7}
+
+// CHECK: ![[CBARRAYS]] = !{ptr @CBArrays.cb, ptr addrspace(2) @c1, ptr addrspace(2) @c2, ptr addrspace(2) @c3, ptr addrspace(2) @c4, 
+// CHECK-SAME: ptr addrspace(2) @c5, ptr addrspace(2) @c6, ptr addrspace(2) @c7, ptr addrspace(2) @c8}
+
+// CHECK: ![[CBSTRUCTS]] = !{ptr @CBStructs.cb, ptr addrspace(2) @a, ptr addrspace(2) @b, ptr addrspace(2) @c, ptr addrspace(2) @array_of_A, 
+// CHECK-SAME: ptr addrspace(2) @d, ptr addrspace(2) @e, ptr addrspace(2) @f}
+
+// CHECK: ![[CBMIX]] = !{ptr @CBMix.cb, ptr addrspace(2) @test, ptr addrspace(2) @f1, ptr addrspace(2) @f2, ptr addrspace(2) @f3,
+// CHECK-SAME: ptr addrspace(2) @f4, ptr addrspace(2) @f5, ptr addrspace(2) @f6, ptr addrspace(2) @f7, ptr addrspace(2) @f8, ptr addrspace(2) @f9}
diff --git a/clang/test/CodeGenHLSL/cbuffer_and_namespaces.hlsl b/clang/test/CodeGenHLSL/cbuffer_and_namespaces.hlsl
new file mode 100644
index 0000000000000..393ca3825c638
--- /dev/null
+++ b/clang/test/CodeGenHLSL/cbuffer_and_namespaces.hlsl
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// Make sure cbuffer inside namespace works.
+
+// CHECK: %"n0::n1::__cblayout_A" = type <{ float }>
+// CHECK: %"n0::__cblayout_B" = type <{ float }>
+// CHECK: %"n0::n2::__cblayout_C" = type <{ float, target("dx.Layout", %"n0::Foo", 4, 0) }>
+// CHECK: %"n0::Foo" = type <{ float }>
+
+// CHECK: @A.cb = external constant target("dx.CBuffer", target("dx.Layout", %"n0::n1::__cblayout_A", 4, 0))
+// CHECK: @_ZN2n02n11aE = external addrspace(2) global float, align 4
+
+// CHECK: @B.cb = external constant target("dx.CBuffer", target("dx.Layout", %"n0::__cblayout_B", 4, 0))
+// CHECK: @_ZN2n01aE = external addrspace(2) global float, align 4
+
+// CHECK: @C.cb = external constant target("dx.CBuffer", target("dx.Layout", %"n0::n2::__cblayout_C", 20, 0, 16))
+// CHECK: @_ZN2n02n21aE = external addrspace(2) global float, align 4
+// CHECK: external addrspace(2) global target("dx.Layout", %"n0::Foo", 4, 0), align 4
+
+namespace n0 {
+  struct Foo {
+    float f;
+  };
+
+  namespace n1 {
+    cbuffer A {
+      float a;
+    }
+  }
+  cbuffer B {
+    float a;
+  }
+  namespace n2 {
+    cbuffer C {
+      float a;
+      Foo b;
+    }
+  }
+}
+
+float foo() {
+  // CHECK: load float, ptr addrspace(2) @_ZN2n02n11aE, align 4
+  // CHECK: load float, ptr addrspace(2) @_ZN2n01aE, align 4
+  // CHECK: load float, ptr addrspace(2) @_ZN2n02n21aE, align 4
+  return n0::n1::a + n0::a + n0::n2::a;
+}
+
+[numthreads(4,1,1)]
+void main() {}
+
+// CHECK: !hlsl.cbs = !{![[A:[0-9]+]], ![[B:[0-9]+]], ![[C:[0-9]+]]}
+// CHECK: [[A]] = !{ptr @A.cb, ptr addrspace(2) @_ZN2n02n11aE}
+// CHECK: [[B]] = !{ptr @B.cb, ptr addrspace(2) @_ZN2n01aE}
+// CHECK: [[C]] = !{ptr @C.cb, ptr addrspace(2) @_ZN2n02n21aE, ptr addrspace(2) @_ZN2n02n21bE}
diff --git a/clang/test/CodeGenHLSL/cbuffer_with_packoffset.hlsl b/clang/test/CodeGenHLSL/cbuffer_with_packoffset.hlsl
new file mode 100644
index 0000000000000..870593986a976
--- /dev/null
+++ b/clang/test/CodeGenHLSL/cbuffer_with_packoffset.hlsl
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-compute %s \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// CHECK: %__cblayout_CB = type <{ float, double, <2 x i32> }>
+
+// CHECK: @CB.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 176, 16, 168, 88))
+// CHECK: @a = external addrspace(2) global float, align 4
+// CHECK: @b = external addrspace(2) global double, align 8
+// CHECK: @c = external addrspace(2) global <2 x i32>, align 8
+
+cbuffer CB : register(b1, space3) {
+  float a : packoffset(c1.x);
+  double b : packoffset(c10.z);
+  int2 c : packoffset(c5.z);
+}
+
+// CHECK: define internal void @_init_resource_CB.cb()
+// CHECK-NEXT: entry:
+// CHECK-NEXT: %CB.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 176, 16, 168, 88))
+// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBs_176_16_168_88tt(i32 3, i32 1, i32 1, i32 0, i1 false)
+
+float foo() {
+  // CHECK: load float, ptr addrspace(2) @a, align 4
+  // CHECK: load double, ptr addrspace(2) @b, align 8
+  return a + b;
+}
+// CHECK: define internal void @_GLOBAL__sub_I_cbuffer_with_packoffset.hlsl()
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @_init_resource_CB.cb()
+
+[numthreads(4,1,1)]
+void main() {
+  foo();
+}
+
+// CHECK: !hlsl.cbs = !{![[CB:[0-9]+]]}
+// CHECK: ![[CB]] = !{ptr @CB.cb, ptr addrspace(2) @a, ptr addrspace(2) @b, ptr addrspace(2) @c}
diff --git a/clang/test/CodeGenHLSL/cbuffer_with_static_global_and_function.hlsl b/clang/test/CodeGenHLSL/cbuffer_with_static_global_and_function.hlsl
new file mode 100644
index 0000000000000..99f40d8fc93d7
--- /dev/null
+++ b/clang/test/CodeGenHLSL/cbuffer_with_static_global_and_function.hlsl
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// CHECK: %__cblayout_A = type <{ float }>
+
+// CHECK: @A.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_A, 4, 0))
+// CHECK: @a = external addrspace(2) global float, align 4
+// CHECK-DAG: @_ZL1b = internal global float 3.000000e+00, align 4
+// CHECK-NOT: @B.cb
+
+cbuffer A {
+  float a;
+  static float b = 3;
+  float foo() { return a + b; }
+}
+
+cbuffer B {
+  // intentionally empty
+}
+
+// CHECK: define {{.*}} float @_Z3foov() #0 {
+// CHECK: load float, ptr addrspace(2) @a, align 4
+
+extern float bar() {
+  return foo();
+}
+
+// CHECK: !hlsl.cbs = !{![[CB:[0-9]+]]}
+// CHECK: ![[CB]] = !{ptr @A.cb, ptr addrspace(2) @a}
diff --git a/clang/test/CodeGenHLSL/static_global_and_function_in_cb.hlsl b/clang/test/CodeGenHLSL/static_global_and_function_in_cb.hlsl
deleted file mode 100644
index 25f51cce2017d..0000000000000
--- a/clang/test/CodeGenHLSL/static_global_and_function_in_cb.hlsl
+++ /dev/null
@@ -1,22 +0,0 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
-
-// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan-library %s \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
-
-cbuffer A {
-  // CHECK: @a = external addrspace(2) externally_initialized global float, align 4
-  float a;
-  // CHECK: @_ZL1b = internal global float 3.000000e+00, align 4
-  static float b = 3;
-  float foo() { return a + b; }
-}
-// CHECK: @[[CB:.+]] = external constant { float }
-
-// CHECK:define {{.*}} float @_Z3foov()
-// CHECK:load float, ptr addrspace(2) @a, align 4
-// CHECK:load float, ptr @_ZL1b, align 4
-
-float bar() {
-  return foo();
-}

From 6e7da07c73c179396e21fb729ac14d6b2a1c3152 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman@google.com>
Date: Thu, 20 Feb 2025 10:32:40 -0800
Subject: [PATCH 17/29] [Github][CI] Enable New Premerge on PRs (#127894)

This patch gets rid of the file restriction for running the new premerge
Github workflow on PRs. This will cause the jobs to be run on all the
PRs. Currently the jobs will succeed regardless of build/test failure
results. This will let us test the new infra hopefully without too much
disruption before eventually letting jobs fail when builds/tests fail
and deprecating the existing premerge system.

This is part of the launch plan as outlined in

https://discourse.llvm.org/t/googles-plan-for-the-llvm-presubmit-infrastructure/78940.
---
 .github/workflows/premerge.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/premerge.yaml b/.github/workflows/premerge.yaml
index 49753c0746cbe..2d366028c2337 100644
--- a/.github/workflows/premerge.yaml
+++ b/.github/workflows/premerge.yaml
@@ -14,8 +14,6 @@ on:
       # do this is that it allows us to take advantage of concurrency groups
       # to cancel in progress CI jobs whenever the PR is closed.
       - closed
-    paths:
-      - .github/workflows/premerge.yaml
   push:
     branches:
       - 'main'

From b0210fee94bc29a507f900da1fb97f0e50ab2637 Mon Sep 17 00:00:00 2001
From: Sirraide <aeternalmail@gmail.com>
Date: Thu, 20 Feb 2025 19:49:37 +0100
Subject: [PATCH 18/29] [Clang] [NFC] Fix more `-Wreturn-type` warnings in
 tests everywhere (#123470)

With the goal of eventually being able to make `-Wreturn-type` default to an
error in all language modes, this is a follow-up to #123464 and updates even
more tests, mainly clang-tidy and clangd tests.
---
 .../clangd/unittests/ASTTests.cpp             |  4 +--
 .../clangd/unittests/FindSymbolsTests.cpp     |  6 ++--
 .../clangd/unittests/ParsedASTTests.cpp       |  2 +-
 .../clangd/unittests/QualityTests.cpp         |  2 +-
 .../clangd/unittests/RenameTests.cpp          | 10 +++----
 .../unittests/SemanticHighlightingTests.cpp   |  1 +
 .../unittests/SemanticSelectionTests.cpp      |  9 +++---
 .../clangd/unittests/SymbolInfoTests.cpp      |  8 ++++++
 .../clangd/unittests/XRefsTests.cpp           | 14 ++++++----
 .../unittests/tweaks/DefineInlineTests.cpp    |  7 +++--
 .../tweaks/ExpandDeducedTypeTests.cpp         |  4 +--
 .../unittests/tweaks/ExtractVariableTests.cpp |  1 +
 .../Inputs/absl/strings/internal-file.h       |  2 +-
 .../checkers/boost/use-to-string.cpp          |  4 +--
 .../bugprone/exception-escape-coro.cpp        |  2 +-
 .../bugprone/exception-escape-rethrow.cpp     |  2 ++
 .../checkers/bugprone/exception-escape.cpp    |  1 +
 .../checkers/bugprone/fold-init-type.cpp      |  9 +++---
 .../inc-dec-in-conditions-bitint-no-crash.c   |  3 +-
 .../bugprone/spuriously-wake-up-functions.c   |  4 +--
 .../bugprone/spuriously-wake-up-functions.cpp |  8 +++---
 .../checkers/bugprone/stringview-nullptr.cpp  |  2 +-
 .../bugprone/suspicious-string-compare.cpp    |  2 ++
 .../fuchsia/default-arguments-calls.cpp       |  4 +--
 .../checkers/fuchsia/multiple-inheritance.cpp |  2 +-
 .../checkers/google/runtime-int-std.cpp       |  1 +
 .../google/upgrade-googletest-case.cpp        | 28 +++++++++----------
 .../const-correctness-transform-values.cpp    |  4 +--
 .../misc/const-correctness-values.cpp         |  4 +--
 .../checkers/misc/unused-parameters.cpp       |  4 +--
 .../misc/use-internal-linkage-func.cpp        | 20 ++++++-------
 .../modernize/Inputs/use-auto/containers.h    |  4 +--
 .../checkers/modernize/avoid-bind.cpp         |  4 +--
 .../modernize/avoid-c-arrays-c++20.cpp        |  8 +++---
 .../modernize/avoid-c-arrays-ignores-main.cpp |  8 ++++--
 .../avoid-c-arrays-ignores-three-arg-main.cpp | 16 +++++++----
 .../checkers/modernize/loop-convert-basic.cpp |  2 ++
 .../checkers/modernize/use-emplace.cpp        | 24 ++++++++--------
 .../modernize/use-equals-default-copy.cpp     |  2 +-
 .../checkers/modernize/use-override.cpp       |  8 +++---
 .../checkers/modernize/use-std-format.cpp     |  2 +-
 .../modernize/use-trailing-return-type.cpp    | 28 +++++++++----------
 .../unnecessary-value-param/header-fixed.h    |  2 +-
 .../Inputs/unnecessary-value-param/header.h   |  2 +-
 .../inefficient-string-concatenation.cpp      |  6 ++--
 .../unnecessary-value-param-header.cpp        |  6 ++--
 .../identifier-naming/global-style1/header.h  |  2 +-
 .../identifier-naming/global-style2/header.h  |  2 +-
 .../readability/const-return-type-macros.cpp  |  6 ++--
 .../readability/const-return-type.cpp         |  2 +-
 .../convert-member-functions-to-static.cpp    |  1 +
 .../readability/identifier-naming.cpp         |  5 ++--
 .../readability/implicit-bool-conversion.cpp  |  2 +-
 .../checkers/readability/named-parameter.cpp  | 21 +++++++-------
 .../readability/redundant-declaration.c       |  2 +-
 .../readability/redundant-declaration.cpp     |  2 +-
 .../static-accessed-through-instance.cpp      |  2 +-
 .../readability/suspicious-call-argument.cpp  |  2 +-
 .../duplicate-fixes-of-alias-checkers.cpp     |  2 +-
 clang/test/CodeGen/armv7k-abi.c               | 10 +++----
 .../atomics-cas-remarks-gfx90a.cl             |  2 +-
 .../hwasan/TestCases/libc_thread_freeres.c    |  1 +
 .../TestCases/Misc/Posix/diag-stacktrace.cpp  |  4 +--
 .../ubsan/TestCases/Misc/missing_return.cpp   |  2 +-
 64 files changed, 202 insertions(+), 164 deletions(-)

diff --git a/clang-tools-extra/clangd/unittests/ASTTests.cpp b/clang-tools-extra/clangd/unittests/ASTTests.cpp
index 32c8e8a63a215..d0bc3c4d7db98 100644
--- a/clang-tools-extra/clangd/unittests/ASTTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ASTTests.cpp
@@ -329,7 +329,7 @@ TEST(ClangdAST, GetContainedAutoParamType) {
        auto &&d,
        auto *&e,
        auto (*f)(int)
-    ){};
+    ){ return 0; };
 
     int withoutAuto(
       int a,
@@ -338,7 +338,7 @@ TEST(ClangdAST, GetContainedAutoParamType) {
       int &&d,
       int *&e,
       int (*f)(int)
-    ){};
+    ){ return 0; };
   )cpp");
   TU.ExtraArgs.push_back("-std=c++20");
   auto AST = TU.build();
diff --git a/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp b/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp
index 4276a44275f53..282859c51a66f 100644
--- a/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp
+++ b/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp
@@ -113,7 +113,7 @@ TEST(WorkspaceSymbols, Unnamed) {
 TEST(WorkspaceSymbols, InMainFile) {
   TestTU TU;
   TU.Code = R"cpp(
-      int test() {}
+      int test() { return 0; }
       static void test2() {}
       )cpp";
   EXPECT_THAT(getSymbols(TU, "test"),
@@ -537,12 +537,14 @@ TEST(DocumentSymbols, InHeaderFile) {
   TestTU TU;
   TU.AdditionalFiles["bar.h"] = R"cpp(
       int foo() {
+        return 0;
       }
       )cpp";
   TU.Code = R"cpp(
       int i; // declaration to finish preamble
       #include "bar.h"
       int test() {
+        return 0;
       }
       )cpp";
   EXPECT_THAT(getSymbols(TU.build()),
@@ -780,7 +782,7 @@ TEST(DocumentSymbols, FuncTemplates) {
   TestTU TU;
   Annotations Source(R"cpp(
     template <class T>
-    T foo() {}
+    T foo() { return T{}; }
 
     auto x = foo<int>();
     auto y = foo<double>();
diff --git a/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp b/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp
index 6ee641caeefe3..f9752d5d44f97 100644
--- a/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp
@@ -251,7 +251,7 @@ TEST(ParsedASTTest, NoCrashOnTokensWithTidyCheck) {
   // this check runs the preprocessor, we need to make sure it does not break
   // our recording logic.
   TU.ClangTidyProvider = addTidyChecks("modernize-use-trailing-return-type");
-  TU.Code = "inline int foo() {}";
+  TU.Code = "inline int foo() { return 0; }";
 
   auto AST = TU.build();
   const syntax::TokenBuffer &T = AST.getTokens();
diff --git a/clang-tools-extra/clangd/unittests/QualityTests.cpp b/clang-tools-extra/clangd/unittests/QualityTests.cpp
index 576779fa3270a..619ea32115357 100644
--- a/clang-tools-extra/clangd/unittests/QualityTests.cpp
+++ b/clang-tools-extra/clangd/unittests/QualityTests.cpp
@@ -108,7 +108,7 @@ TEST(QualityTests, SymbolRelevanceSignalExtraction) {
 
   using flags::FLAGS_FOO;
 
-  int ::header_main() {}
+  int ::header_main() { return 0; }
   int main();
 
   [[deprecated]]
diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp
index 142ed171d1a1c..15866f43affa0 100644
--- a/clang-tools-extra/clangd/unittests/RenameTests.cpp
+++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp
@@ -214,7 +214,7 @@ TEST(RenameTest, WithinFileRename) {
         template<typename T>
         class Foo {
         public:
-          static T [[f^oo]]() {}
+          static T [[f^oo]]() { return T(); }
         };
 
         void bar() {
@@ -225,7 +225,7 @@ TEST(RenameTest, WithinFileRename) {
         template<typename T>
         class Foo {
         public:
-          T [[f^oo]]() {}
+          T [[f^oo]]() { return T(); }
         };
 
         void bar() {
@@ -827,7 +827,7 @@ TEST(RenameTest, WithinFileRename) {
 
       // Issue 170: Rename symbol introduced by UsingDecl
       R"cpp(
-        namespace ns { void [[f^oo]](); } 
+        namespace ns { void [[f^oo]](); }
 
         using ns::[[f^oo]];
 
@@ -1307,7 +1307,7 @@ TEST(RenameTest, Renameable) {
        "no symbol", false},
 
       {R"cpp(// FIXME we probably want to rename both overloads here,
-             // but renaming currently assumes there's only a 
+             // but renaming currently assumes there's only a
              // single canonical declaration.
         namespace ns { int foo(int); char foo(char); }
         using ns::^foo;
@@ -1776,7 +1776,7 @@ TEST(CrossFileRenameTests, WithUpToDateIndex) {
           void [[foo]]() override {};
         };
 
-        void func(Base* b, Derived1* d1, 
+        void func(Base* b, Derived1* d1,
                   Derived2* d2, NotDerived* nd) {
           b->[[foo]]();
           d1->[[foo]]();
diff --git a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
index 1ec51d862d0a6..94cecce1f038c 100644
--- a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
@@ -741,6 +741,7 @@ sizeof...($TemplateParameter[[Elements]]);
           $Class[[Foo]].$Field_static[[sharedInstance]].$Field[[someProperty]] $Operator[[=]] 1;
           self.$Field[[someProperty]] $Operator[[=]] self.$Field[[someProperty]] $Operator[[+]] self.$Field[[otherMethod]] $Operator[[+]] 1;
           self->$Field[[_someProperty]] $Operator[[=]] $Field[[_someProperty]] $Operator[[+]] 1;
+          return 0;
         }
         @end
       )cpp",
diff --git a/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp b/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp
index 7faef6f95d8f9..7ede19c321bc6 100644
--- a/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp
@@ -201,6 +201,7 @@ TEST(FoldingRanges, ASTAll) {
       R"cpp(
         #define FOO int foo() {\
           int Variable = 42; \
+          return 0; \
         }
 
         // Do not generate folding range for braces within macro expansion.
@@ -336,18 +337,18 @@ TEST(FoldingRanges, PseudoParserWithoutLineFoldings) {
         ]]};
       )cpp",
       R"cpp(
-        /*[[ Multi 
+        /*[[ Multi
           * line
-          *  comment 
+          *  comment
           ]]*/
       )cpp",
       R"cpp(
         //[[ Comment
         // 1]]
-        
+
         //[[ Comment
         // 2]]
-        
+
         // No folding for single line comment.
 
         /*[[ comment 3
diff --git a/clang-tools-extra/clangd/unittests/SymbolInfoTests.cpp b/clang-tools-extra/clangd/unittests/SymbolInfoTests.cpp
index 6c91f3783a622..95b6eaedce97c 100644
--- a/clang-tools-extra/clangd/unittests/SymbolInfoTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SymbolInfoTests.cpp
@@ -36,6 +36,7 @@ TEST(SymbolInfoTests, All) {
           void $decl[[foo]]();
           int bar() {
             fo^o();
+            return 0;
           }
         )cpp",
               {ExpectedSymbolDetails{"foo", "", "c:@F@foo#", "decl"}}},
@@ -44,6 +45,7 @@ TEST(SymbolInfoTests, All) {
           void $def[[foo]]() {}
           int bar() {
             fo^o();
+            return 0;
           }
         )cpp",
               {ExpectedSymbolDetails{"foo", "", "c:@F@foo#", "def", "def"}}},
@@ -53,6 +55,7 @@ TEST(SymbolInfoTests, All) {
           void $def[[foo]]() {}
           int bar() {
             fo^o();
+            return 0;
           }
         )cpp",
               {ExpectedSymbolDetails{"foo", "", "c:@F@foo#", "decl", "def"}}},
@@ -83,6 +86,7 @@ TEST(SymbolInfoTests, All) {
             void $decl[[foo]]();
             int baz() {
               fo^o();
+              return 0;
             }
           }
         )cpp",
@@ -96,6 +100,7 @@ TEST(SymbolInfoTests, All) {
           namespace barbar {
             int baz() {
               bar::fo^o();
+              return 0;
             }
           }
         )cpp",
@@ -108,6 +113,7 @@ TEST(SymbolInfoTests, All) {
             namespace Nbaz {
               int baz() {
                 ::fo^o();
+              return 0;
               }
             }
           }
@@ -121,6 +127,7 @@ TEST(SymbolInfoTests, All) {
           namespace barbar {
             int baz() {
               fo^o();
+              return 0;
             }
           }
         )cpp",
@@ -136,6 +143,7 @@ TEST(SymbolInfoTests, All) {
             int baz() {
               bar::BarType b;
               fo^o(b);
+              return 0;
             }
           }
         )cpp",
diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
index 475b56b1dc230..e12d7691c58fb 100644
--- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp
+++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
@@ -95,7 +95,7 @@ TEST(HighlightsTest, All) {
       )cpp",
 
       R"cpp(// Function
-        int [[^foo]](int) {}
+        int [[^foo]](int) { return 0; }
         int main() {
           [[foo]]([[foo]](42));
           auto *X = &[[foo]];
@@ -2140,7 +2140,7 @@ TEST(FindReferences, WithinAST) {
       )cpp",
 
       R"cpp(// Function
-        int $def[[foo]](int) {}
+        int $def[[foo]](int) { return 0; }
         int main() {
           auto *X = &$(main)[[^foo]];
           $(main)[[foo]](42);
@@ -2160,7 +2160,7 @@ TEST(FindReferences, WithinAST) {
 
       R"cpp(// Method call
         struct Foo { int $decl(Foo)[[foo]](); };
-        int Foo::$def(Foo)[[foo]]() {}
+        int Foo::$def(Foo)[[foo]]() { return 0; }
         int main() {
           Foo f;
           f.$(main)[[^foo]]();
@@ -2258,7 +2258,7 @@ TEST(FindReferences, WithinAST) {
       )cpp",
       R"cpp(// Dependent code
         template <typename T> void $decl[[foo]](T t);
-        template <typename T> void bar(T t) { $(bar)[[foo]](t); } // foo in bar is uninstantiated. 
+        template <typename T> void bar(T t) { $(bar)[[foo]](t); } // foo in bar is uninstantiated.
         void baz(int x) { $(baz)[[f^oo]](x); }
       )cpp",
       R"cpp(
@@ -2508,6 +2508,7 @@ TEST(FindReferences, ExplicitSymbols) {
         X $def(test)[[a]];
         $(test)[[a]].operator bool();
         if ($(test)[[a^]]) {} // ignore implicit conversion-operator AST node
+        return 0;
       }
     )cpp",
   };
@@ -2543,7 +2544,7 @@ TEST(FindReferences, UsedSymbolsFromInclude) {
       #define BAR 5
       int bar1();
       int bar2();
-      class Bar {};            
+      class Bar {};
     )cpp");
     TU.AdditionalFiles["system/vector"] = guard(R"cpp(
       namespace std {
@@ -2560,7 +2561,7 @@ TEST(FindReferences, UsedSymbolsFromInclude) {
     std::vector<Matcher<ReferencesResult::Reference>> ExpectedLocations;
     for (const auto &R : T.ranges())
       ExpectedLocations.push_back(AllOf(rangeIs(R), attrsAre(0u)));
-    for (const auto &P : T.points()) 
+    for (const auto &P : T.points())
       EXPECT_THAT(findReferences(AST, P, 0).References,
                   UnorderedElementsAreArray(ExpectedLocations))
           << "Failed for Refs at " << P << "\n"
@@ -2635,6 +2636,7 @@ TEST(FindReferences, NeedsIndexForMacro) {
   Annotations IndexedMain(R"cpp(
     int indexed_main() {
       int a = [[MACRO]](1);
+      return 0;
     }
   )cpp");
 
diff --git a/clang-tools-extra/clangd/unittests/tweaks/DefineInlineTests.cpp b/clang-tools-extra/clangd/unittests/tweaks/DefineInlineTests.cpp
index 8d496b2a3ee73..5ec12396ae927 100644
--- a/clang-tools-extra/clangd/unittests/tweaks/DefineInlineTests.cpp
+++ b/clang-tools-extra/clangd/unittests/tweaks/DefineInlineTests.cpp
@@ -935,10 +935,11 @@ TEST_F(DefineInlineTest, AddInline) {
   // Check we put inline before cv-qualifiers.
   ExtraFiles["a.h"] = "const int foo();";
   apply(R"cpp(#include "a.h"
-              const int fo^o() {})cpp",
+              const int fo^o() { return 0; })cpp",
         &EditedFiles);
-  EXPECT_THAT(EditedFiles, testing::ElementsAre(FileWithContents(
-                               testPath("a.h"), "inline const int foo(){}")));
+  EXPECT_THAT(EditedFiles,
+              testing::ElementsAre(FileWithContents(
+                  testPath("a.h"), "inline const int foo(){ return 0; }")));
 
   // No double inline.
   ExtraFiles["a.h"] = "inline void foo();";
diff --git a/clang-tools-extra/clangd/unittests/tweaks/ExpandDeducedTypeTests.cpp b/clang-tools-extra/clangd/unittests/tweaks/ExpandDeducedTypeTests.cpp
index 3730ab4a87136..8da394d74b54d 100644
--- a/clang-tools-extra/clangd/unittests/tweaks/ExpandDeducedTypeTests.cpp
+++ b/clang-tools-extra/clangd/unittests/tweaks/ExpandDeducedTypeTests.cpp
@@ -69,8 +69,8 @@ TEST_F(ExpandDeducedTypeTest, Test) {
   EXPECT_THAT(apply(R"cpp(au^to s = &"foobar";)cpp"),
               StartsWith("fail: Could not expand type"));
 
-  EXPECT_EQ(apply("ns::Class * foo() { au^to c = foo(); }"),
-            "ns::Class * foo() { ns::Class * c = foo(); }");
+  EXPECT_EQ(apply("ns::Class * foo() { au^to c = foo(); return nullptr; }"),
+            "ns::Class * foo() { ns::Class * c = foo(); return nullptr; }");
   EXPECT_EQ(
       apply("void ns::Func() { au^to x = new ns::Class::Nested{}; }"),
       "void ns::Func() { ns::Class::Nested * x = new ns::Class::Nested{}; }");
diff --git a/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp b/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp
index 552e693c0363a..3c65a58d6c945 100644
--- a/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp
+++ b/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp
@@ -116,6 +116,7 @@ TEST_F(ExtractVariableTest, Test) {
       struct T {
         int bar(int a = [[1]]) {
           int b = [[z]];
+          return 0;
         }
         int z = [[1]];
       } t;
diff --git a/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/strings/internal-file.h b/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/strings/internal-file.h
index 31798661a80fc..b9ce1c875ed13 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/strings/internal-file.h
+++ b/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/strings/internal-file.h
@@ -10,7 +10,7 @@ std::string StringsFunction(std::string s1) { return s1; }
 class SomeContainer {};
 namespace strings_internal {
 void InternalFunction() {}
-template <class P> P InternalTemplateFunction(P a) {}
+template <class P> void InternalTemplateFunction(P a) { int; }
 } // namespace strings_internal
 
 namespace container_internal {
diff --git a/clang-tools-extra/test/clang-tidy/checkers/boost/use-to-string.cpp b/clang-tools-extra/test/clang-tidy/checkers/boost/use-to-string.cpp
index 44ba172c2ff0b..f888c430e6883 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/boost/use-to-string.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/boost/use-to-string.cpp
@@ -18,7 +18,7 @@ T lexical_cast(const V &) {
 
 struct my_weird_type {};
 
-std::string fun(const std::string &) {}
+std::string fun(const std::string &) { return {}; }
 
 void test_to_string1() {
 
@@ -75,7 +75,7 @@ void test_to_string2() {
   fun(boost::lexical_cast<std::string>(j));
 }
 
-std::string fun(const std::wstring &) {}
+std::string fun(const std::wstring &);
 
 void test_to_wstring() {
   int a;
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-coro.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-coro.cpp
index 222577b124dce..aff13d19fd209 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-coro.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-coro.cpp
@@ -1,5 +1,5 @@
 // RUN: %check_clang_tidy -std=c++20 %s bugprone-exception-escape %t -- \
-// RUN:     -- -fexceptions
+// RUN:     -- -fexceptions -Wno-error=return-type
 
 namespace std {
 
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-rethrow.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-rethrow.cpp
index b20333d5b0b3b..6f961a247b9d2 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-rethrow.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-rethrow.cpp
@@ -20,6 +20,7 @@ int throwsAndCallsRethrower() noexcept {
     } catch(...) {
         rethrower();
     }
+    return 1;
 }
 
 int throwsAndCallsCallsRethrower() noexcept {
@@ -29,6 +30,7 @@ int throwsAndCallsCallsRethrower() noexcept {
     } catch(...) {
         callsRethrower();
     }
+    return 1;
 }
 
 void rethrowerNoexcept() noexcept {
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape.cpp
index 26c443b139629..aae957dd7e090 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape.cpp
@@ -665,6 +665,7 @@ int indirectly_recursive(int n) noexcept;
 
 int recursion_helper(int n) {
   indirectly_recursive(n);
+  return 0;
 }
 
 int indirectly_recursive(int n) noexcept {
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/fold-init-type.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/fold-init-type.cpp
index 2a49960e02895..c813213c3dd0f 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/fold-init-type.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/fold-init-type.cpp
@@ -8,24 +8,25 @@ T accumulate(InputIt first, InputIt last, T init) {
   // is instantiated. In practice this happens somewhere in the implementation
   // of `accumulate`. For tests, do it here.
   (void)*first;
+  return init;
 }
 
 template <class InputIt, class T>
-T reduce(InputIt first, InputIt last, T init) { (void)*first; }
+T reduce(InputIt first, InputIt last, T init) { (void)*first; return init; }
 template <class ExecutionPolicy, class InputIt, class T>
 T reduce(ExecutionPolicy &&policy,
-         InputIt first, InputIt last, T init) { (void)*first; }
+         InputIt first, InputIt last, T init) { (void)*first; return init; }
 
 struct parallel_execution_policy {};
 constexpr parallel_execution_policy par{};
 
 template <class InputIt1, class InputIt2, class T>
 T inner_product(InputIt1 first1, InputIt1 last1,
-                InputIt2 first2, T value) { (void)*first1; (void)*first2; }
+                InputIt2 first2, T value) { (void)*first1; (void)*first2; return value;  }
 
 template <class ExecutionPolicy, class InputIt1, class InputIt2, class T>
 T inner_product(ExecutionPolicy &&policy, InputIt1 first1, InputIt1 last1,
-                InputIt2 first2, T value) { (void)*first1; (void)*first2; }
+                InputIt2 first2, T value) { (void)*first1; (void)*first2; return value; }
 
 } // namespace std
 
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/inc-dec-in-conditions-bitint-no-crash.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone/inc-dec-in-conditions-bitint-no-crash.c
index cfb64c10fe46c..5cfa264e42d68 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/inc-dec-in-conditions-bitint-no-crash.c
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/inc-dec-in-conditions-bitint-no-crash.c
@@ -5,5 +5,6 @@ _BitInt(8) v_401_0() {
     _BitInt(5) y = 0;
     16777215wb ?: ++y;
   });
+  return 0;
 }
-// CHECK-MESSAGES: warning 
+// CHECK-MESSAGES: warning
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/spuriously-wake-up-functions.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone/spuriously-wake-up-functions.c
index 8b84474d3f2d3..36b1215978603 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/spuriously-wake-up-functions.c
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/spuriously-wake-up-functions.c
@@ -12,9 +12,9 @@ typedef struct cnd_t {
 } cnd_t;
 struct timespec {};
 
-int cnd_wait(cnd_t *cond, mtx_t *mutex){};
+int cnd_wait(cnd_t *cond, mtx_t *mutex){ return 0; };
 int cnd_timedwait(cnd_t *cond, mtx_t *mutex,
-                  const struct timespec *time_point){};
+                  const struct timespec *time_point){ return 0; };
 
 struct Node1 list_c;
 static mtx_t lock;
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/spuriously-wake-up-functions.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/spuriously-wake-up-functions.cpp
index 6db92ef939fa3..d7508009e19ad 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/spuriously-wake-up-functions.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/spuriously-wake-up-functions.cpp
@@ -90,18 +90,18 @@ class condition_variable {
   void wait(unique_lock<mutex> &lock, Predicate pred);
   template <class Clock, class Duration>
   cv_status wait_until(unique_lock<mutex> &lock,
-                       const chrono::time_point<Clock, Duration> &abs_time){};
+                       const chrono::time_point<Clock, Duration> &abs_time){ return cv_status::no_timeout; };
   template <class Clock, class Duration, class Predicate>
   bool wait_until(unique_lock<mutex> &lock,
                   const chrono::time_point<Clock, Duration> &abs_time,
-                  Predicate pred){};
+                  Predicate pred){ return false; };
   template <class Rep, class Period>
   cv_status wait_for(unique_lock<mutex> &lock,
-                     const chrono::duration<Rep, Period> &rel_time){};
+                     const chrono::duration<Rep, Period> &rel_time){ return cv_status::no_timeout; };
   template <class Rep, class Period, class Predicate>
   bool wait_for(unique_lock<mutex> &lock,
                 const chrono::duration<Rep, Period> &rel_time,
-                Predicate pred){};
+                Predicate pred){ return false; };
 };
 
 } // namespace std
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/stringview-nullptr.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/stringview-nullptr.cpp
index 02fcab31dcf3e..ff5b256e71781 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/stringview-nullptr.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/stringview-nullptr.cpp
@@ -27,7 +27,7 @@ class basic_string_view {
 
   constexpr basic_string_view(const basic_string_view &) {}
 
-  constexpr basic_string_view &operator=(const basic_string_view &) {}
+  constexpr basic_string_view &operator=(const basic_string_view &) { return *this; }
 };
 
 template <typename CharT>
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-string-compare.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-string-compare.cpp
index 7e1dd6b444393..c14b094f3fca3 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-string-compare.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-string-compare.cpp
@@ -89,6 +89,8 @@ int test_warning_patterns() {
   if (strcmp(A, "a") < 0.)
     return 0;
   // CHECK-MESSAGES: [[@LINE-2]]:7: warning: function 'strcmp' has suspicious implicit cast
+
+  return 1;
 }
 
 int test_valid_patterns() {
diff --git a/clang-tools-extra/test/clang-tidy/checkers/fuchsia/default-arguments-calls.cpp b/clang-tools-extra/test/clang-tidy/checkers/fuchsia/default-arguments-calls.cpp
index 50b6d4c5676c3..ed7bcc7dacc30 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/fuchsia/default-arguments-calls.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/fuchsia/default-arguments-calls.cpp
@@ -2,7 +2,7 @@
 
 int foo(int value = 5) { return value; }
 
-int f() {
+void f() {
   foo();
   // CHECK-NOTES: [[@LINE-1]]:3: warning: calling a function that uses a default argument is disallowed [fuchsia-default-arguments-calls]
   // CHECK-NOTES: [[@LINE-5]]:9: note: default parameter was declared here
@@ -10,7 +10,7 @@ int f() {
 
 int bar(int value) { return value; }
 
-int n() {
+void n() {
   foo(0);
   bar(0);
 }
diff --git a/clang-tools-extra/test/clang-tidy/checkers/fuchsia/multiple-inheritance.cpp b/clang-tools-extra/test/clang-tidy/checkers/fuchsia/multiple-inheritance.cpp
index 6ce9ce8e65536..d53b3fde7736b 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/fuchsia/multiple-inheritance.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/fuchsia/multiple-inheritance.cpp
@@ -144,7 +144,7 @@ struct WithTemplBase : T {
   WithTemplBase();
 };
 
-int test_no_crash() {
+void test_no_crash() {
   auto foo = []() {};
   WithTemplBase<decltype(foo)>();
 }
diff --git a/clang-tools-extra/test/clang-tidy/checkers/google/runtime-int-std.cpp b/clang-tools-extra/test/clang-tidy/checkers/google/runtime-int-std.cpp
index 30f9b3cf1e90c..cd65de51a5ce9 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/google/runtime-int-std.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/google/runtime-int-std.cpp
@@ -54,4 +54,5 @@ short bar(const short, unsigned short) {
 
   tmpl<short>();
 // CHECK-MESSAGES: [[@LINE-1]]:8: warning: consider replacing 'short' with 'std::int16_t'
+  return 0;
 }
diff --git a/clang-tools-extra/test/clang-tidy/checkers/google/upgrade-googletest-case.cpp b/clang-tools-extra/test/clang-tidy/checkers/google/upgrade-googletest-case.cpp
index ce70e79183521..39ff9b7f39634 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/google/upgrade-googletest-case.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/google/upgrade-googletest-case.cpp
@@ -221,9 +221,9 @@ class FooTestInfo : public testing::TestInfo {
   // CHECK-FIXES: const char *test_suite_name() const;
 };
 
-const char *FooTestInfo::test_case_name() const {}
+const char *FooTestInfo::test_case_name() const { return nullptr; }
 // CHECK-MESSAGES: [[@LINE-1]]:26: warning: Google Test APIs named with 'case'
-// CHECK-FIXES: const char *FooTestInfo::test_suite_name() const {}
+// CHECK-FIXES: const char *FooTestInfo::test_suite_name() const { return nullptr; }
 
 class BarTestInfo : public testing::TestInfo {
 public:
@@ -491,26 +491,26 @@ class FooUnitTest : public testing::UnitTest {
   // CHECK-FIXES: const testing::TestSuite *GetTestSuite(int) const;
 };
 
-testing::TestCase *FooUnitTest::current_test_case() const {}
+testing::TestCase *FooUnitTest::current_test_case() const { return nullptr; }
 // CHECK-MESSAGES: [[@LINE-1]]:10: warning: Google Test APIs named with 'case'
 // CHECK-MESSAGES: [[@LINE-2]]:33: warning: Google Test APIs named with 'case'
-// CHECK-FIXES: testing::TestSuite *FooUnitTest::current_test_suite() const {}
-int FooUnitTest::successful_test_case_count() const {}
+// CHECK-FIXES: testing::TestSuite *FooUnitTest::current_test_suite() const { return nullptr; }
+int FooUnitTest::successful_test_case_count() const { return 0; }
 // CHECK-MESSAGES: [[@LINE-1]]:18: warning: Google Test APIs named with 'case'
-// CHECK-FIXES: int FooUnitTest::successful_test_suite_count() const {}
-int FooUnitTest::failed_test_case_count() const {}
+// CHECK-FIXES: int FooUnitTest::successful_test_suite_count() const { return 0; }
+int FooUnitTest::failed_test_case_count() const { return 0; }
 // CHECK-MESSAGES: [[@LINE-1]]:18: warning: Google Test APIs named with 'case'
-// CHECK-FIXES: int FooUnitTest::failed_test_suite_count() const {}
-int FooUnitTest::total_test_case_count() const {}
+// CHECK-FIXES: int FooUnitTest::failed_test_suite_count() const { return 0; }
+int FooUnitTest::total_test_case_count() const { return 0; }
 // CHECK-MESSAGES: [[@LINE-1]]:18: warning: Google Test APIs named with 'case'
-// CHECK-FIXES: int FooUnitTest::total_test_suite_count() const {}
-int FooUnitTest::test_case_to_run_count() const {}
+// CHECK-FIXES: int FooUnitTest::total_test_suite_count() const { return 0; }
+int FooUnitTest::test_case_to_run_count() const { return 0; }
 // CHECK-MESSAGES: [[@LINE-1]]:18: warning: Google Test APIs named with 'case'
-// CHECK-FIXES: int FooUnitTest::test_suite_to_run_count() const {}
-const testing::TestCase *FooUnitTest::GetTestCase(int) const {}
+// CHECK-FIXES: int FooUnitTest::test_suite_to_run_count() const { return 0; }
+const testing::TestCase *FooUnitTest::GetTestCase(int) const { return 0; }
 // CHECK-MESSAGES: [[@LINE-1]]:16: warning: Google Test APIs named with 'case'
 // CHECK-MESSAGES: [[@LINE-2]]:39: warning: Google Test APIs named with 'case'
-// CHECK-FIXES: const testing::TestSuite *FooUnitTest::GetTestSuite(int) const {}
+// CHECK-FIXES: const testing::TestSuite *FooUnitTest::GetTestSuite(int) const { return 0; }
 
 // Type derived from testing::TestCase
 class BarUnitTest : public testing::UnitTest {
diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-transform-values.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-transform-values.cpp
index 9a4eb010609b4..109eddc195558 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-transform-values.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-transform-values.cpp
@@ -54,8 +54,8 @@ void template_instantiation() {
 struct ConstNonConstClass {
   ConstNonConstClass();
   ConstNonConstClass(double &np_local0);
-  double nonConstMethod() {}
-  double constMethod() const {}
+  double nonConstMethod() { return 0; }
+  double constMethod() const { return 0; }
   double modifyingMethod(double &np_arg0) const;
 
   double NonConstMember;
diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp
index 0d1ff0db58371..5efb64bca2374 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp
@@ -283,8 +283,8 @@ void template_instantiation() {
 struct ConstNonConstClass {
   ConstNonConstClass();
   ConstNonConstClass(double &np_local0);
-  double nonConstMethod() {}
-  double constMethod() const {}
+  double nonConstMethod() { return 0; }
+  double constMethod() const { return 0; }
   double modifyingMethod(double &np_arg0) const;
 
   double NonConstMember;
diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/unused-parameters.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/unused-parameters.cpp
index 524de45463e36..9b3dd070405b5 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/misc/unused-parameters.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/misc/unused-parameters.cpp
@@ -33,9 +33,9 @@ void f(void (*fn)()) {;}
 // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: parameter 'fn' is unused [misc-unused-parameters]
 // CHECK-FIXES: {{^}}void f(void (* /*fn*/)()) {;}{{$}}
 
-int *k([[clang::lifetimebound]] int *i) {;}
+int *k([[clang::lifetimebound]] int *i) { return nullptr; }
 // CHECK-MESSAGES: :[[@LINE-1]]:38: warning: parameter 'i' is unused [misc-unused-parameters]
-// CHECK-FIXES: {{^}}int *k({{\[\[clang::lifetimebound\]\]}} int * /*i*/) {;}{{$}}
+// CHECK-FIXES: {{^}}int *k({{\[\[clang::lifetimebound\]\]}} int * /*i*/) { return nullptr; }{{$}}
 
 #define ATTR_BEFORE(x) [[clang::lifetimebound]] x
 int* m(ATTR_BEFORE(const int *i)) { return nullptr; }
diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp
index 68951fcf0aaac..abf95b857c192 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp
@@ -17,25 +17,25 @@ void func_cpp_inc() {}
 // CHECK-MESSAGES: :[[@LINE-1]]:6: warning: function 'func_cpp_inc'
 // CHECK-FIXES: static void func_cpp_inc() {}
 
-int* func_cpp_inc_return_ptr() {}
+int* func_cpp_inc_return_ptr() { return nullptr; }
 // CHECK-MESSAGES: :[[@LINE-1]]:6: warning: function 'func_cpp_inc_return_ptr'
-// CHECK-FIXES: static int* func_cpp_inc_return_ptr() {}
+// CHECK-FIXES: static int* func_cpp_inc_return_ptr() { return nullptr; }
 
-const int* func_cpp_inc_return_const_ptr() {}
+const int* func_cpp_inc_return_const_ptr() { return nullptr; }
 // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: function 'func_cpp_inc_return_const_ptr'
-// CHECK-FIXES: static const int* func_cpp_inc_return_const_ptr() {}
+// CHECK-FIXES: static const int* func_cpp_inc_return_const_ptr() { return nullptr; }
 
-int const* func_cpp_inc_return_ptr_const() {}
+int const* func_cpp_inc_return_ptr_const() { return nullptr; }
 // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: function 'func_cpp_inc_return_ptr_const'
-// CHECK-FIXES: static int const* func_cpp_inc_return_ptr_const() {}
+// CHECK-FIXES: static int const* func_cpp_inc_return_ptr_const() { return nullptr; }
 
-int * const func_cpp_inc_return_const() {}
+int * const func_cpp_inc_return_const() { return nullptr; }
 // CHECK-MESSAGES: :[[@LINE-1]]:13: warning: function 'func_cpp_inc_return_const'
-// CHECK-FIXES: static int * const func_cpp_inc_return_const() {}
+// CHECK-FIXES: static int * const func_cpp_inc_return_const() { return nullptr; }
 
-volatile const int* func_cpp_inc_return_volatile_const_ptr() {}
+volatile const int* func_cpp_inc_return_volatile_const_ptr() { return nullptr; }
 // CHECK-MESSAGES: :[[@LINE-1]]:21: warning: function 'func_cpp_inc_return_volatile_const_ptr'
-// CHECK-FIXES: static volatile const int* func_cpp_inc_return_volatile_const_ptr() {}
+// CHECK-FIXES: static volatile const int* func_cpp_inc_return_volatile_const_ptr() { return nullptr; }
 
 [[nodiscard]] void func_nodiscard() {}
 // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: function 'func_nodiscard'
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/Inputs/use-auto/containers.h b/clang-tools-extra/test/clang-tidy/checkers/modernize/Inputs/use-auto/containers.h
index c99b7a4407d5c..2c90d762f5d8a 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/Inputs/use-auto/containers.h
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/Inputs/use-auto/containers.h
@@ -214,8 +214,8 @@ class map : public bidirectional_iterable<iterator<pair<key, value>>> {
 public:
   map() {}
 
-  iterator<pair<key, value>> find(const key &) {}
-  const_iterator<iterator<pair<key, value>>> find(const key &) const {}
+  iterator<pair<key, value>> find(const key &);
+  const_iterator<iterator<pair<key, value>>> find(const key &) const;
 };
 
 template <typename key, typename value>
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-bind.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-bind.cpp
index 22b24d45fe63f..0d100ffa38b27 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-bind.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-bind.cpp
@@ -46,7 +46,7 @@ struct D {
   operator bool() const { return true; }
 
   void MemberFunction(int x) {}
-  int MemberFunctionWithReturn(int x) {}
+  int MemberFunctionWithReturn(int x) { return 0; }
 
   static D *create();
 };
@@ -342,7 +342,7 @@ void testCapturedSubexpressions() {
 
 struct E {
   void MemberFunction(int x) {}
-  int MemberFunctionWithReturn(int x) {}
+  int MemberFunctionWithReturn(int x) { return 0; }
   int operator()(int x, int y) const { return x + y; }
 
   void testMemberFunctions() {
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-c++20.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-c++20.cpp
index 1eb8ebe3d51e3..c9391e3339623 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-c++20.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-c++20.cpp
@@ -1,11 +1,11 @@
 // RUN: %check_clang_tidy -std=c++20 %s modernize-avoid-c-arrays %t
 
-int f1(int data[], int size) {
-  // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: do not declare C-style arrays, use 'std::span' instead
+void f1(int data[], int size) {
+  // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: do not declare C-style arrays, use 'std::span' instead
   int f4[] = {1, 2};
   // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use 'std::array' instead
 }
 
-int f2(int data[100]) {
-  // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: do not declare C-style arrays, use 'std::array' instead
+void f2(int data[100]) {
+  // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: do not declare C-style arrays, use 'std::array' instead
 }
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-main.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-main.cpp
index a0c79bb55a686..8e1890c234223 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-main.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-main.cpp
@@ -1,9 +1,13 @@
 // RUN: %check_clang_tidy -std=c++17 %s modernize-avoid-c-arrays %t
 
-int not_main(int argc, char *argv[]) {
-  // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead
+namespace X {
+// Not main
+int main(int argc, char *argv[]) {
+  // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead
   int f4[] = {1, 2};
   // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use 'std::array' instead
+  return 0;
+}
 }
 
 int main(int argc, char *argv[]) {
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-three-arg-main.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-three-arg-main.cpp
index bd39f0fb4f1c8..58eced408733a 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-three-arg-main.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-three-arg-main.cpp
@@ -1,19 +1,23 @@
 // RUN: %check_clang_tidy -std=c++17 %s modernize-avoid-c-arrays %t
 
-int not_main(int argc, char *argv[], char *argw[]) {
-  // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead
-  // CHECK-MESSAGES: :[[@LINE-2]]:38: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead
+namespace X {
+// Not main.
+int main(int argc, char *argv[], char *argw[]) {
+  // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead
+  // CHECK-MESSAGES: :[[@LINE-2]]:34: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead
   int f4[] = {1, 2};
   // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use 'std::array' instead
+  return 0;
+}
 }
 
 int main(int argc, char *argv[], char *argw[]) {
   int f5[] = {1, 2};
   // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use 'std::array' instead
 
-  auto not_main = [](int argc, char *argv[], char *argw[]) {
-    // CHECK-MESSAGES: :[[@LINE-1]]:32: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead
-    // CHECK-MESSAGES: :[[@LINE-2]]:46: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead
+  auto main = [](int argc, char *argv[], char *argw[]) {
+    // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead
+    // CHECK-MESSAGES: :[[@LINE-2]]:42: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead
     int f6[] = {1, 2};
     // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not declare C-style arrays, use 'std::array' instead
   };
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/loop-convert-basic.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/loop-convert-basic.cpp
index df2a2c1af1f54..8d1d7378e5cff 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/loop-convert-basic.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/loop-convert-basic.cpp
@@ -170,6 +170,8 @@ const int *constArray() {
   // CHECK-FIXES: for (const int & I : ConstArr)
   // CHECK-FIXES-NEXT: if (Something)
   // CHECK-FIXES-NEXT: return &I;
+
+  return nullptr;
 }
 
 struct HasArr {
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-emplace.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-emplace.cpp
index 3f4a14cd9bb64..e6562cd18dbab 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-emplace.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-emplace.cpp
@@ -49,7 +49,7 @@ class vector {
   template <typename... Args>
   void emplace_back(Args &&... args){};
   template <typename... Args>
-  iterator emplace(const_iterator pos, Args &&...args){};
+  iterator emplace(const_iterator pos, Args &&...args);
   ~vector();
 };
 
@@ -69,7 +69,7 @@ class list {
   void push_back(T &&) {}
 
   template <typename... Args>
-  iterator emplace(const_iterator pos, Args &&...args){};
+  iterator emplace(const_iterator pos, Args &&...args);
   template <typename... Args>
   void emplace_back(Args &&... args){};
   template <typename... Args>
@@ -93,7 +93,7 @@ class deque {
   void push_front(T &&) {}
 
   template <typename... Args>
-  iterator emplace(const_iterator pos, Args &&...args){};
+  iterator emplace(const_iterator pos, Args &&...args);
   template <typename... Args>
   void emplace_back(Args &&... args){};
   template <typename... Args>
@@ -116,7 +116,7 @@ class forward_list {
   template <typename... Args>
   void emplace_front(Args &&...args){};
   template <typename... Args>
-  iterator emplace_after(const_iterator pos, Args &&...args){};
+  iterator emplace_after(const_iterator pos, Args &&...args);
 };
 
 template <typename T>
@@ -131,7 +131,7 @@ class set {
   template <typename... Args>
   void emplace(Args &&...args){};
   template <typename... Args>
-  iterator emplace_hint(const_iterator pos, Args &&...args){};
+  iterator emplace_hint(const_iterator pos, Args &&...args);
 };
 
 template <typename Key, typename T>
@@ -146,7 +146,7 @@ class map {
   template <typename... Args>
   void emplace(Args &&...args){};
   template <typename... Args>
-  iterator emplace_hint(const_iterator pos, Args &&...args){};
+  iterator emplace_hint(const_iterator pos, Args &&...args);
 };
 
 template <typename T>
@@ -161,7 +161,7 @@ class multiset {
   template <typename... Args>
   void emplace(Args &&...args){};
   template <typename... Args>
-  iterator emplace_hint(const_iterator pos, Args &&...args){};
+  iterator emplace_hint(const_iterator pos, Args &&...args);
 };
 
 template <typename Key, typename T>
@@ -176,7 +176,7 @@ class multimap {
   template <typename... Args>
   void emplace(Args &&...args){};
   template <typename... Args>
-  iterator emplace_hint(const_iterator pos, Args &&...args){};
+  iterator emplace_hint(const_iterator pos, Args &&...args);
 };
 
 template <typename T>
@@ -191,7 +191,7 @@ class unordered_set {
   template <typename... Args>
   void emplace(Args &&...args){};
   template <typename... Args>
-  iterator emplace_hint(const_iterator pos, Args &&...args){};
+  iterator emplace_hint(const_iterator pos, Args &&...args);
 };
 
 template <typename Key, typename T>
@@ -206,7 +206,7 @@ class unordered_map {
   template <typename... Args>
   void emplace(Args &&...args){};
   template <typename... Args>
-  iterator emplace_hint(const_iterator pos, Args &&...args){};
+  iterator emplace_hint(const_iterator pos, Args &&...args);
 };
 
 template <typename T>
@@ -221,7 +221,7 @@ class unordered_multiset {
   template <typename... Args>
   void emplace(Args &&...args){};
   template <typename... Args>
-  iterator emplace_hint(const_iterator pos, Args &&...args){};
+  iterator emplace_hint(const_iterator pos, Args &&...args);
 };
 
 template <typename Key, typename T>
@@ -236,7 +236,7 @@ class unordered_multimap {
   template <typename... Args>
   void emplace(Args &&...args){};
   template <typename... Args>
-  iterator emplace_hint(const_iterator pos, Args &&...args){};
+  iterator emplace_hint(const_iterator pos, Args &&...args);
 };
 
 template <typename T>
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-equals-default-copy.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-equals-default-copy.cpp
index 4abb9c8555970..7f737148a7cd1 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-equals-default-copy.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-equals-default-copy.cpp
@@ -1,6 +1,6 @@
 // RUN: %check_clang_tidy %s modernize-use-equals-default %t -- \
 // RUN:   -config="{CheckOptions: {modernize-use-equals-default.IgnoreMacros: false}}" \
-// RUN:   -- -fno-delayed-template-parsing -fexceptions
+// RUN:   -- -fno-delayed-template-parsing -fexceptions -Wno-error=return-type
 
 // Out of line definition.
 struct OL {
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-override.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-override.cpp
index 89d1aa48c46a3..bad8b7a8d7f08 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-override.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-override.cpp
@@ -203,13 +203,13 @@ struct InlineDefinitions : public Base {
   // CHECK-MESSAGES: :[[@LINE-2]]:16: warning: prefer using
   // CHECK-FIXES: {{^}}  void j() const override
 
-  virtual MustUseResultObject k() {}  // Has an implicit attribute.
+  virtual MustUseResultObject k();  // Has an implicit attribute.
   // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: prefer using
-  // CHECK-FIXES: {{^}}  MustUseResultObject k() override {}
+  // CHECK-FIXES: {{^}}  MustUseResultObject k() override;
 
-  virtual bool l() MUST_USE_RESULT UNUSED {}
+  virtual bool l() MUST_USE_RESULT UNUSED;
   // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: prefer using
-  // CHECK-FIXES: {{^}}  bool l() override MUST_USE_RESULT UNUSED {}
+  // CHECK-FIXES: {{^}}  bool l() override MUST_USE_RESULT UNUSED;
 
   virtual void r() &
   {}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-format.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-format.cpp
index 0a5a63eba2596..2af2e8949a814 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-format.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-format.cpp
@@ -100,7 +100,7 @@ std::string StrFormat_field_width_and_precision() {
   return s1 + s2 + s3 + s4 + s5 + s6;
 }
 
-std::string StrFormat_macros() {
+void StrFormat_macros() {
   // The function call is replaced even though it comes from a macro.
 #define FORMAT absl::StrFormat
   auto s1 = FORMAT("Hello %d", 42);
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-trailing-return-type.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-trailing-return-type.cpp
index d9efc006b22ef..e1f36c52a7c01 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-trailing-return-type.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-trailing-return-type.cpp
@@ -106,9 +106,9 @@ extern "C" int d2(int arg);
 inline int d3(int arg) noexcept(true);
 // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: use a trailing return type for this function [modernize-use-trailing-return-type]
 // CHECK-FIXES: {{^}}inline auto d3(int arg) noexcept(true) -> int;{{$}}
-inline int d4(int arg) try { } catch(...) { }
+inline int d4(int arg) try { return 0; } catch(...) { return 0; }
 // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: use a trailing return type for this function [modernize-use-trailing-return-type]
-// CHECK-FIXES: {{^}}inline auto d4(int arg) -> int try { } catch(...) { }{{$}}
+// CHECK-FIXES: {{^}}inline auto d4(int arg) -> int try { return 0; } catch(...) { return 0; }{{$}}
 int d5(int arg) throw();
 // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: use a trailing return type for this function [modernize-use-trailing-return-type]
 // CHECK-FIXES: {{^}}auto d5(int arg) throw() -> int;{{$}}
@@ -167,9 +167,9 @@ namespace N {
 }
 // CHECK-MESSAGES: :[[@LINE-2]]:9: warning: use a trailing return type for this function [modernize-use-trailing-return-type]
 // CHECK-FIXES: {{^}}    auto e1() -> int;{{$}}
-int N::e1() {}
+int N::e1() { return 0; }
 // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: use a trailing return type for this function [modernize-use-trailing-return-type]
-// CHECK-FIXES: {{^}}auto N::e1() -> int {}{{$}}
+// CHECK-FIXES: {{^}}auto N::e1() -> int { return 0; }{{$}}
 
 //
 // Functions with unsupported return types
@@ -260,14 +260,14 @@ struct B {
     B& operator=(const B&);
 // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: use a trailing return type for this function [modernize-use-trailing-return-type]
 // CHECK-FIXES: {{^}}    auto operator=(const B&) -> B&;{{$}}
-    
+
     double base1(int, bool b);
 // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: use a trailing return type for this function [modernize-use-trailing-return-type]
 // CHECK-FIXES: {{^}}    auto base1(int, bool b) -> double;{{$}}
 
-    virtual double base2(int, bool b) {}
+    virtual double base2(int, bool b) { return 0; }
 // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: use a trailing return type for this function [modernize-use-trailing-return-type]
-// CHECK-FIXES: {{^}}    virtual auto base2(int, bool b) -> double {}{{$}}
+// CHECK-FIXES: {{^}}    virtual auto base2(int, bool b) -> double { return 0; }{{$}}
 
     virtual float base3() const = 0;
 // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use a trailing return type for this function [modernize-use-trailing-return-type]
@@ -298,9 +298,9 @@ struct B {
 // CHECK-FIXES: {{^}}    virtual auto base9() const noexcept -> const char * { return ""; }{{$}}
 };
 
-double B::base1(int, bool b) {}
+double B::base1(int, bool b) { return 0; }
 // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: use a trailing return type for this function [modernize-use-trailing-return-type]
-// CHECK-FIXES: {{^}}auto B::base1(int, bool b) -> double {}{{$}}
+// CHECK-FIXES: {{^}}auto B::base1(int, bool b) -> double { return 0; }{{$}}
 
 struct D : B {
     virtual double f1(int, bool b) final;
@@ -311,9 +311,9 @@ struct D : B {
 // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: use a trailing return type for this function [modernize-use-trailing-return-type]
 // CHECK-FIXES: {{^}}    virtual auto base2(int, bool b) -> double override;{{$}}
 
-    virtual float base3() const override final { }
+    virtual float base3() const override final { return 0; }
 // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use a trailing return type for this function [modernize-use-trailing-return-type]
-// CHECK-FIXES: {{^}}    virtual auto base3() const -> float override final { }{{$}}
+// CHECK-FIXES: {{^}}    virtual auto base3() const -> float override final { return 0; }{{$}}
 
     const char * base9() const noexcept override { return ""; }
 // CHECK-MESSAGES: :[[@LINE-1]]:18: warning: use a trailing return type for this function [modernize-use-trailing-return-type]
@@ -586,13 +586,13 @@ void c(int arg) { return; }
 struct D2 : B {
     D2();
     virtual ~D2();
-    
+
     virtual auto f1(int, bool b) -> double final;
     virtual auto base2(int, bool b) -> double override;
-    virtual auto base3() const -> float override final { }
+    virtual auto base3() const -> float override final { return 0;  }
 
     operator double();
 };
 
 auto l1 = [](int arg) {};
-auto l2 = [](int arg) -> double {};
+auto l2 = [](int arg) -> double { return 0; };
diff --git a/clang-tools-extra/test/clang-tidy/checkers/performance/Inputs/unnecessary-value-param/header-fixed.h b/clang-tools-extra/test/clang-tidy/checkers/performance/Inputs/unnecessary-value-param/header-fixed.h
index a40b2b2ece52e..1dcdd7a5ea4b4 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/performance/Inputs/unnecessary-value-param/header-fixed.h
+++ b/clang-tools-extra/test/clang-tidy/checkers/performance/Inputs/unnecessary-value-param/header-fixed.h
@@ -12,4 +12,4 @@ int f1(int n, ABC v1); // line 11
 
 
 
-int f2(        int n,       const ABC& v2); // line 15
+void f2(        int n,       const ABC& v2); // line 15
diff --git a/clang-tools-extra/test/clang-tidy/checkers/performance/Inputs/unnecessary-value-param/header.h b/clang-tools-extra/test/clang-tidy/checkers/performance/Inputs/unnecessary-value-param/header.h
index 94916755ddafe..d6f6e65ace79d 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/performance/Inputs/unnecessary-value-param/header.h
+++ b/clang-tools-extra/test/clang-tidy/checkers/performance/Inputs/unnecessary-value-param/header.h
@@ -12,4 +12,4 @@ int f1(int n, ABC v1); // line 11
 
 
 
-int f2(        int n,       ABC v2); // line 15
+void f2(        int n,       ABC v2); // line 15
diff --git a/clang-tools-extra/test/clang-tidy/checkers/performance/inefficient-string-concatenation.cpp b/clang-tools-extra/test/clang-tidy/checkers/performance/inefficient-string-concatenation.cpp
index 1dbd56b322202..a1edf5fae2f9e 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/performance/inefficient-string-concatenation.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/performance/inefficient-string-concatenation.cpp
@@ -6,15 +6,15 @@ class basic_string {
 public:
   basic_string() {}
   ~basic_string() {}
-  basic_string<T> *operator+=(const basic_string<T> &) {}
-  friend basic_string<T> operator+(const basic_string<T> &, const basic_string<T> &) {}
+  basic_string<T> *operator+=(const basic_string<T> &);
+  friend basic_string<T> operator+(const basic_string<T> &, const basic_string<T> &);
 };
 typedef basic_string<char> string;
 typedef basic_string<wchar_t> wstring;
 }
 
 void f(std::string) {}
-std::string g(std::string) {}
+std::string g(std::string);
 
 int main() {
   std::string mystr1, mystr2;
diff --git a/clang-tools-extra/test/clang-tidy/checkers/performance/unnecessary-value-param-header.cpp b/clang-tools-extra/test/clang-tidy/checkers/performance/unnecessary-value-param-header.cpp
index 2b45bb719dbc5..8461248982447 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/performance/unnecessary-value-param-header.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/performance/unnecessary-value-param-header.cpp
@@ -14,7 +14,7 @@ int f1(int n, ABC v1, ABC v2) {
   // CHECK-FIXES: int f1(int n, const ABC& v1, const ABC& v2) {
   return v1.get(n) + v2.get(n);
 }
-int f2(int n, ABC v2) {
-  // CHECK-MESSAGES: [[@LINE-1]]:19: warning: the parameter 'v2' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
-  // CHECK-FIXES: int f2(int n, const ABC& v2) {
+void f2(int n, ABC v2) {
+  // CHECK-MESSAGES: [[@LINE-1]]:20: warning: the parameter 'v2' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
+  // CHECK-FIXES: void f2(int n, const ABC& v2) {
 }
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/Inputs/identifier-naming/global-style1/header.h b/clang-tools-extra/test/clang-tidy/checkers/readability/Inputs/identifier-naming/global-style1/header.h
index abbf7dfa48395..bbedc9b1df2dc 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/Inputs/identifier-naming/global-style1/header.h
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/Inputs/identifier-naming/global-style1/header.h
@@ -4,4 +4,4 @@ void style_first_good();
 
 void styleFirstBad();
 
-int thisIsMainLikeIgnored(int argc, const char *argv[]) {}
+int thisIsMainLikeIgnored(int argc, const char *argv[]) { return 0; }
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/Inputs/identifier-naming/global-style2/header.h b/clang-tools-extra/test/clang-tidy/checkers/readability/Inputs/identifier-naming/global-style2/header.h
index 9d3e846a080b9..3b3b1e9508e8f 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/Inputs/identifier-naming/global-style2/header.h
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/Inputs/identifier-naming/global-style2/header.h
@@ -4,4 +4,4 @@ void STYLE_SECOND_GOOD();
 
 void styleSecondBad();
 
-int thisIsMainLikeNotIgnored(int argc, const char *argv[]) {}
+int thisIsMainLikeNotIgnored(int argc, const char *argv[]) { return 0; }
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/const-return-type-macros.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/const-return-type-macros.cpp
index 5131011118f30..0a154c5d23d47 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/const-return-type-macros.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/const-return-type-macros.cpp
@@ -6,16 +6,16 @@
 
 // Regression tests involving macros
 #define CONCAT(a, b) a##b
-CONCAT(cons, t) int p22(){}
+CONCAT(cons, t) int p22(){ return 0; }
 // CHECK-MESSAGES: [[@LINE-1]]:1: warning: return type 'const int' is 'const'-qu
 // We warn, but we can't give a fix
 
 #define CONSTINT const int
-CONSTINT p23() {}
+CONSTINT p23() { return 0; }
 // CHECK-MESSAGES: [[@LINE-1]]:1: warning: return type 'const int' is 'const'-qu
 
 #define CONST const
-CONST int p24() {}
+CONST int p24() { return 0; }
 // CHECK-MESSAGES: [[@LINE-1]]:1: warning: return type 'const int' is 'const'-qu
 
 #define CREATE_FUNCTION()                    \
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/const-return-type.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/const-return-type.cpp
index 76a3555663b18..d913ab4dee9ba 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/const-return-type.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/const-return-type.cpp
@@ -1,4 +1,4 @@
-// RUN: %check_clang_tidy -std=c++14-or-later %s readability-const-return-type %t
+// RUN: %check_clang_tidy -std=c++14-or-later %s readability-const-return-type %t -- -- -Wno-error=return-type
 
 //  p# = positive test
 //  n# = negative test
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/convert-member-functions-to-static.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/convert-member-functions-to-static.cpp
index 5ec1f221b2207..a6b95bdb57e4d 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/convert-member-functions-to-static.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/convert-member-functions-to-static.cpp
@@ -32,6 +32,7 @@ class A {
     // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: method 'call_static_member' can be made static
     // CHECK-FIXES: {{^}}  static int call_static_member() {
     already_static();
+    return 0;
   }
 
   int read_static() {
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp
index be5ba54513c67..1771836539d86 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp
@@ -547,6 +547,7 @@ struct_type GlobalTypedefTestFunction(struct_type a_argument1) {
 // CHECK-FIXES: {{^}}struct_type_t GlobalTypedefTestFunction(struct_type_t a_argument1) {
     struct_type typedef_test_1;
 // CHECK-FIXES: {{^}}    struct_type_t typedef_test_1;
+  return {};
 }
 
 using my_struct_type = THIS___Structure;
@@ -777,8 +778,8 @@ STATIC_MACRO void someFunc(ValueType a_v1, const ValueType& a_v2) {}
 // CHECK-FIXES: {{^}}STATIC_MACRO void someFunc(value_type_t a_v1, const value_type_t& a_v2) {}
 STATIC_MACRO void someFunc(const ValueType** p_a_v1, ValueType (*p_a_v2)()) {}
 // CHECK-FIXES: {{^}}STATIC_MACRO void someFunc(const value_type_t** p_a_v1, value_type_t (*p_a_v2)()) {}
-STATIC_MACRO ValueType someFunc() {}
-// CHECK-FIXES: {{^}}STATIC_MACRO value_type_t someFunc() {}
+STATIC_MACRO ValueType someFunc() { return {}; }
+// CHECK-FIXES: {{^}}STATIC_MACRO value_type_t someFunc() { return {}; }
 STATIC_MACRO void someFunc(MyFunPtr, const MyFunPtr****) {}
 // CHECK-FIXES: {{^}}STATIC_MACRO void someFunc(my_fun_ptr_t, const my_fun_ptr_t****) {}
 #undef STATIC_MACRO
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp
index c4b7a77b92f0a..75f666e3e07e5 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp
@@ -465,7 +465,7 @@ struct S {
   // CHECK-FIXES: S(bool a, bool b, bool c) : a(static_cast<int>(a)), b(b), c(static_cast<int>(c)) {}
 };
 
-bool f(S& s) {
+void f(S& s) {
   functionTaking<bool>(s.a);
   // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: implicit conversion 'int' -> 'bool'
   // CHECK-FIXES: functionTaking<bool>(s.a != 0);
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/named-parameter.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/named-parameter.cpp
index 8c6fb123ac023..c22e9c564e3ee 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/named-parameter.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/named-parameter.cpp
@@ -37,8 +37,8 @@ void operator delete[](void *x) throw();
 void operator delete[](void * /*x*/) throw();
 
 struct X {
-  X operator++(int) {}
-  X operator--(int) {}
+  X operator++(int) { throw 0; }
+  X operator--(int) { throw 0; }
 
   X(X&) = delete;
   X &operator=(X&) = default;
@@ -86,22 +86,23 @@ void FDef2(int n, int) {}
 void FNoDef(int);
 
 class Z {};
+Z the_z;
 
-Z &operator++(Z&) {}
+Z &operator++(Z&) { return the_z; }
 // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: all parameters should be named in a function
-// CHECK-FIXES: Z &operator++(Z& /*unused*/) {}
+// CHECK-FIXES: Z &operator++(Z& /*unused*/) { return the_z; }
 
-Z &operator++(Z&, int) {}
+Z &operator++(Z&, int) { return the_z; }
 // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: all parameters should be named in a function
-// CHECK-FIXES: Z &operator++(Z& /*unused*/, int) {}
+// CHECK-FIXES: Z &operator++(Z& /*unused*/, int) { return the_z; }
 
-Z &operator--(Z&) {}
+Z &operator--(Z&) { return the_z; }
 // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: all parameters should be named in a function
-// CHECK-FIXES: Z &operator--(Z& /*unused*/) {}
+// CHECK-FIXES: Z &operator--(Z& /*unused*/) { return the_z; }
 
-Z &operator--(Z&, int) {}
+Z &operator--(Z&, int) { return the_z; }
 // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: all parameters should be named in a function
-// CHECK-FIXES: Z &operator--(Z& /*unused*/, int) {}
+// CHECK-FIXES: Z &operator--(Z& /*unused*/, int) { return the_z; }
 
 namespace testing {
 namespace internal {
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-declaration.c b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-declaration.c
index c2e8bf68b4ad7..dbcc4cf6d1022 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-declaration.c
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-declaration.c
@@ -20,7 +20,7 @@ static int f(void);
 static int f(void); // f
 // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: redundant 'f' declaration
 // CHECK-FIXES: {{^}}// f{{$}}
-static int f(void) {}
+static int f(void) { return 0; }
 
 inline void g(void) {}
 
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-declaration.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-declaration.cpp
index be505f55b86b0..595eccf8854ba 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-declaration.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-declaration.cpp
@@ -38,7 +38,7 @@ static int f();
 static int f(); // f
 // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: redundant 'f' declaration
 // CHECK-FIXES: {{^}}// f{{$}}
-static int f() {}
+static int f() { return 0; }
 
 // Original check crashed for the code below.
 namespace std {
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/static-accessed-through-instance.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/static-accessed-through-instance.cpp
index 202fe9be6d00c..a0d51dec7f32d 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/static-accessed-through-instance.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/static-accessed-through-instance.cpp
@@ -264,7 +264,7 @@ struct Qptr {
   }
 };
 
-int func(Qptr qp) {
+void func(Qptr qp) {
   qp->y = 10;
   qp->K = 10;
   // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: static member accessed through instance [readability-static-accessed-through-instance]
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/suspicious-call-argument.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/suspicious-call-argument.cpp
index edd3591517af3..27db92be21f20 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/suspicious-call-argument.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/suspicious-call-argument.cpp
@@ -382,7 +382,7 @@ enum opcode { Foo,
               Bar };
 static value *SimplifyRightShift(
     opcode Opcode, value *Op0, value *Op1, bool isExact,
-    const type1 &Q, unsigned MaxRecurse) {}
+    const type1 &Q, unsigned MaxRecurse) { return nullptr; }
 static value *SimplifyLShrInst(value *Op0, value *Op1, bool isExact,
                                const type1 &Q, unsigned MaxRecurse) {
   if (value *V = SimplifyRightShift(Foo, Op0, Op1, isExact, Q, MaxRecurse))
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/duplicate-fixes-of-alias-checkers.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/duplicate-fixes-of-alias-checkers.cpp
index f67c20635064a..ff216298cfd60 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/duplicate-fixes-of-alias-checkers.cpp
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/duplicate-fixes-of-alias-checkers.cpp
@@ -31,7 +31,7 @@ class Foo {
   // CHECK-FIXES: _num2{};
 };
 
-int should_use_emplace(std::vector<Foo> &v) {
+void should_use_emplace(std::vector<Foo> &v) {
   v.push_back(Foo());
   // CHECK-FIXES: v.emplace_back();
   // CHECK-MESSAGES: warning: use emplace_back instead of push_back [hicpp-use-emplace,modernize-use-emplace]
diff --git a/clang/test/CodeGen/armv7k-abi.c b/clang/test/CodeGen/armv7k-abi.c
index fd18dafa7d03f..872e6423a4a99 100644
--- a/clang/test/CodeGen/armv7k-abi.c
+++ b/clang/test/CodeGen/armv7k-abi.c
@@ -16,7 +16,7 @@ typedef struct {
 void simple_hfa(HFA h) {}
 
 // CHECK: define{{.*}} %struct.HFA @return_simple_hfa
-HFA return_simple_hfa() {}
+HFA return_simple_hfa() { return (HFA){0}; }
 
 typedef struct {
   double arr[4];
@@ -43,7 +43,7 @@ typedef struct {
 void big_struct_indirect(BigStruct b) {}
 
 // CHECK: define{{.*}} void @return_big_struct_indirect(ptr dead_on_unwind noalias writable sret
-BigStruct return_big_struct_indirect() {}
+BigStruct return_big_struct_indirect() { return (BigStruct){0}; }
 
 // Structs smaller than 16 bytes should be passed directly, and coerced to
 // either [N x i32] or [N x i64] depending on alignment requirements.
@@ -58,7 +58,7 @@ typedef struct {
 void small_struct_direct(SmallStruct s) {}
 
 // CHECK: define{{.*}} [4 x i32] @return_small_struct_direct()
-SmallStruct return_small_struct_direct() {}
+SmallStruct return_small_struct_direct() { return (SmallStruct){0}; }
 
 typedef struct {
   float x;
@@ -75,14 +75,14 @@ typedef struct {
 } PaddedSmallStruct;
 
 // CHECK: define{{.*}} i32 @return_padded_small_struct()
-PaddedSmallStruct return_padded_small_struct() {}
+PaddedSmallStruct return_padded_small_struct() { return (PaddedSmallStruct){0}; }
 
 typedef struct {
   char arr[7];
 } OddlySizedStruct;
 
 // CHECK: define{{.*}} [2 x i32] @return_oddly_sized_struct()
-OddlySizedStruct return_oddly_sized_struct() {}
+OddlySizedStruct return_oddly_sized_struct() { return (OddlySizedStruct){0}; }
 
 // CHECK: define{{.*}} <4 x float> @test_va_arg_vec(ptr noundef %l)
 
diff --git a/clang/test/CodeGenOpenCL/atomics-cas-remarks-gfx90a.cl b/clang/test/CodeGenOpenCL/atomics-cas-remarks-gfx90a.cl
index 72027eda4571d..59bf87b554af3 100644
--- a/clang/test/CodeGenOpenCL/atomics-cas-remarks-gfx90a.cl
+++ b/clang/test/CodeGenOpenCL/atomics-cas-remarks-gfx90a.cl
@@ -36,7 +36,7 @@ typedef enum memory_scope {
 // GFX90A-CAS: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("agent-one-as") monotonic
 // GFX90A-CAS: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("one-as") monotonic
 // GFX90A-CAS: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("wavefront-one-as") monotonic
-float atomic_cas(__global atomic_float *d, float a) {
+void atomic_cas(__global atomic_float *d, float a) {
   float ret1 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
   float ret2 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_device);
   float ret3 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_all_svm_devices);
diff --git a/compiler-rt/test/hwasan/TestCases/libc_thread_freeres.c b/compiler-rt/test/hwasan/TestCases/libc_thread_freeres.c
index e6d1731f30e37..e95f4c8c4fd43 100644
--- a/compiler-rt/test/hwasan/TestCases/libc_thread_freeres.c
+++ b/compiler-rt/test/hwasan/TestCases/libc_thread_freeres.c
@@ -11,6 +11,7 @@ void *ThreadFn(void *) {
   __hwasan_enable_allocator_tagging();
   // This will trigger memory deallocation in __strerror_thread_freeres,
   // at a point when HwasanThread is already gone.
+  return NULL;
 }
 
 int main() {
diff --git a/compiler-rt/test/ubsan/TestCases/Misc/Posix/diag-stacktrace.cpp b/compiler-rt/test/ubsan/TestCases/Misc/Posix/diag-stacktrace.cpp
index 8b7cb6ade35ac..296171848255f 100644
--- a/compiler-rt/test/ubsan/TestCases/Misc/Posix/diag-stacktrace.cpp
+++ b/compiler-rt/test/ubsan/TestCases/Misc/Posix/diag-stacktrace.cpp
@@ -2,10 +2,10 @@
 // UNSUPPORTED: target=thumb{{.*}}
 // UNSUPPORTED: android
 
-// RUN: %clangxx -fsanitize=return %gmlt -O2 -fno-omit-frame-pointer -fasynchronous-unwind-tables %s -o %t
+// RUN: %clangxx -Wno-error=return-type -fsanitize=return %gmlt -O2 -fno-omit-frame-pointer -fasynchronous-unwind-tables %s -o %t
 // RUN: %env_ubsan_opts=print_stacktrace=1:fast_unwind_on_fatal=0 not %run %t 2>&1 | FileCheck %s
 // RUN: %env_ubsan_opts=print_stacktrace=1:fast_unwind_on_fatal=1 not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx -fsanitize=return %gmlt -O2 -fno-omit-frame-pointer -fno-exceptions -fno-asynchronous-unwind-tables %s -o %t
+// RUN: %clangxx -Wno-error=return-type -fsanitize=return %gmlt -O2 -fno-omit-frame-pointer -fno-exceptions -fno-asynchronous-unwind-tables %s -o %t
 // RUN: %env_ubsan_opts=print_stacktrace=1:fast_unwind_on_fatal=0 not %run %t 2>&1 | FileCheck %s
 // RUN: %env_ubsan_opts=print_stacktrace=1:fast_unwind_on_fatal=1 not %run %t 2>&1 | FileCheck %s
 
diff --git a/compiler-rt/test/ubsan/TestCases/Misc/missing_return.cpp b/compiler-rt/test/ubsan/TestCases/Misc/missing_return.cpp
index 2ea76daf1fc16..6c1bc5525cd17 100644
--- a/compiler-rt/test/ubsan/TestCases/Misc/missing_return.cpp
+++ b/compiler-rt/test/ubsan/TestCases/Misc/missing_return.cpp
@@ -1,4 +1,4 @@
-// RUN: %clangxx -fsanitize=return %gmlt %s -O3 -o %t
+// RUN: %clangxx -Wno-error=return-type -fsanitize=return %gmlt %s -O3 -o %t
 // RUN: not %run %t 2>&1 | FileCheck %s
 // RUN: %env_ubsan_opts=print_stacktrace=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-STACKTRACE
 // Error message does not exact what expected

From 5fadb3d680909ab30b37eb559f80046b5a17045e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Thu, 20 Feb 2025 10:49:53 -0800
Subject: [PATCH 19/29] [CodeGen] Remove static member function
 Register::isPhysicalRegister. NFC

Prefer the nonstatic member by converting unsigned to Register instead.
---
 llvm/include/llvm/CodeGen/RDFRegisters.h               |  2 +-
 llvm/include/llvm/CodeGen/Register.h                   | 10 +++-------
 .../CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp  |  2 +-
 llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp       |  2 +-
 llvm/lib/CodeGen/LiveRangeCalc.cpp                     |  2 +-
 llvm/lib/CodeGen/MachineScheduler.cpp                  |  3 +--
 llvm/lib/CodeGen/RegAllocFast.cpp                      |  4 ++--
 llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp      |  4 ++--
 llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp  |  7 +++----
 llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp        | 10 +++++-----
 llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp               |  2 +-
 llvm/lib/Target/ARM/ARMLatencyMutations.cpp            |  2 +-
 llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp       |  6 +++---
 llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp        |  8 ++++----
 llvm/lib/Target/Hexagon/RDFCopy.cpp                    |  4 ++--
 llvm/lib/Target/M68k/M68kRegisterInfo.cpp              |  3 +--
 16 files changed, 32 insertions(+), 39 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/RDFRegisters.h b/llvm/include/llvm/CodeGen/RDFRegisters.h
index 174ee115a1501..cc30b977ae421 100644
--- a/llvm/include/llvm/CodeGen/RDFRegisters.h
+++ b/llvm/include/llvm/CodeGen/RDFRegisters.h
@@ -111,7 +111,7 @@ struct RegisterRef {
   }
 
   static constexpr bool isRegId(unsigned Id) {
-    return Register::isPhysicalRegister(Id);
+    return Register(Id).isPhysical();
   }
   static constexpr bool isUnitId(unsigned Id) {
     return Register(Id).isVirtual();
diff --git a/llvm/include/llvm/CodeGen/Register.h b/llvm/include/llvm/CodeGen/Register.h
index 03e462872d3c2..6c02ffef89363 100644
--- a/llvm/include/llvm/CodeGen/Register.h
+++ b/llvm/include/llvm/CodeGen/Register.h
@@ -48,12 +48,6 @@ class Register {
     return Register(FI + MCRegister::FirstStackSlot);
   }
 
-  /// Return true if the specified register number is in
-  /// the physical register namespace.
-  static constexpr bool isPhysicalRegister(unsigned Reg) {
-    return MCRegister::isPhysicalRegister(Reg);
-  }
-
   /// Convert a 0-based index to a virtual register number.
   /// This is the inverse operation of VirtReg2IndexFunctor below.
   static Register index2VirtReg(unsigned Index) {
@@ -67,7 +61,9 @@ class Register {
 
   /// Return true if the specified register number is in the physical register
   /// namespace.
-  constexpr bool isPhysical() const { return isPhysicalRegister(Reg); }
+  constexpr bool isPhysical() const {
+    return MCRegister::isPhysicalRegister(Reg);
+  }
 
   /// Convert a virtual register number to a 0-based index. The first virtual
   /// register in a function will get the index 0.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index d87649c4e6567..0f11423a84930 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -525,7 +525,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
           // Don't consider SP to be clobbered by register masks.
           for (auto It : RegVars) {
             unsigned int Reg = It.first;
-            if (Reg != SP && Register::isPhysicalRegister(Reg) &&
+            if (Reg != SP && Register(Reg).isPhysical() &&
                 MO.clobbersPhysReg(Reg))
               RegsToClobber.push_back(Reg);
           }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index ddf0275ddfe6a..cf3673058c8e7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -564,7 +564,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP,
         TFI->getDwarfFrameBase(*Asm->MF);
     switch (FrameBase.Kind) {
     case TargetFrameLowering::DwarfFrameBase::Register: {
-      if (Register::isPhysicalRegister(FrameBase.Location.Reg)) {
+      if (Register(FrameBase.Location.Reg).isPhysical()) {
         MachineLocation Location(FrameBase.Location.Reg);
         addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
       }
diff --git a/llvm/lib/CodeGen/LiveRangeCalc.cpp b/llvm/lib/CodeGen/LiveRangeCalc.cpp
index 1a9bc694ed0fd..a7c8c3fc8a25a 100644
--- a/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -216,7 +216,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
       report_fatal_error("Use not jointly dominated by defs.");
     }
 
-    if (Register::isPhysicalRegister(PhysReg)) {
+    if (Register(PhysReg).isPhysical()) {
       const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
       bool IsLiveIn = MBB->isLiveIn(PhysReg);
       for (MCRegAliasIterator Alias(PhysReg, TRI, false); !IsLiveIn && Alias.isValid(); ++Alias)
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 0da7535031a7d..1cc1b2cbd81b9 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -3966,8 +3966,7 @@ void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
   // Find already scheduled copies with a single physreg dependence and move
   // them just above the scheduled instruction.
   for (SDep &Dep : Deps) {
-    if (Dep.getKind() != SDep::Data ||
-        !Register::isPhysicalRegister(Dep.getReg()))
+    if (Dep.getKind() != SDep::Data || !Register(Dep.getReg()).isPhysical())
       continue;
     SUnit *DepSU = Dep.getSUnit();
     if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 14128dafbe4ee..2809056bfeba2 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -708,7 +708,7 @@ void RegAllocFastImpl::reloadAtBegin(MachineBasicBlock &MBB) {
 /// not used by a virtreg. Kill the physreg, marking it free. This may add
 /// implicit kills to MO->getParent() and invalidate MO.
 bool RegAllocFastImpl::usePhysReg(MachineInstr &MI, MCPhysReg Reg) {
-  assert(Register::isPhysicalRegister(Reg) && "expected physreg");
+  assert(Register(Reg).isPhysical() && "expected physreg");
   bool displacedAny = displacePhysReg(MI, Reg);
   setPhysRegState(Reg, regPreAssigned);
   markRegUsedInInstr(Reg);
@@ -1289,7 +1289,7 @@ void RegAllocFastImpl::dumpState() const {
     assert(VirtReg.isVirtual() && "Bad map key");
     MCPhysReg PhysReg = LR.PhysReg;
     if (PhysReg != 0) {
-      assert(Register::isPhysicalRegister(PhysReg) && "mapped to physreg");
+      assert(Register(PhysReg).isPhysical() && "mapped to physreg");
       for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
         assert(RegUnitStates[Unit] == VirtReg && "inverse map valid");
       }
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index fd4641ec6f124..288b9d9553b1d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -501,8 +501,8 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
             F.isClobberKind()) {
           // Check for def of register or earlyclobber register.
           for (; NumVals; --NumVals, ++i) {
-            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
-            if (Register::isPhysicalRegister(Reg))
+            Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+            if (Reg.isPhysical())
               CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
           }
         } else
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 133ac6b1327dd..a76498fcab8f2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -10125,9 +10125,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
     auto DetectWriteToReservedRegister = [&]() {
       const MachineFunction &MF = DAG.getMachineFunction();
       const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
-      for (unsigned Reg : OpInfo.AssignedRegs.Regs) {
-        if (Register::isPhysicalRegister(Reg) &&
-            TRI.isInlineAsmReadOnlyReg(MF, Reg)) {
+      for (Register Reg : OpInfo.AssignedRegs.Regs) {
+        if (Reg.isPhysical() && TRI.isInlineAsmReadOnlyReg(MF, Reg)) {
           const char *RegName = TRI.getName(Reg);
           emitInlineAsmError(Call, "write to reserved register '" +
                                        Twine(RegName) + "'");
@@ -11389,7 +11388,7 @@ void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V,
   assert((Op.getOpcode() != ISD::CopyFromReg ||
           cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
          "Copy from a reg to the same reg!");
-  assert(!Register::isPhysicalRegister(Reg) && "Is a physreg");
+  assert(!Register(Reg).isPhysical() && "Is a physreg");
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   // If this is an InlineAsm we have to match the registers required, not the
diff --git a/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
index 174438c1863dd..c636719d86ca0 100644
--- a/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -155,11 +155,11 @@ bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
 
   LiveIntervals &LIs = G.getMetadata().LIS;
 
-  if (Register::isPhysicalRegister(Rd) || Register::isPhysicalRegister(Ra)) {
-    LLVM_DEBUG(dbgs() << "Rd is a physical reg:"
-                      << Register::isPhysicalRegister(Rd) << '\n');
-    LLVM_DEBUG(dbgs() << "Ra is a physical reg:"
-                      << Register::isPhysicalRegister(Ra) << '\n');
+  if (Register(Rd).isPhysical() || Register(Ra).isPhysical()) {
+    LLVM_DEBUG(dbgs() << "Rd is a physical reg:" << Register(Rd).isPhysical()
+                      << '\n');
+    LLVM_DEBUG(dbgs() << "Ra is a physical reg:" << Register(Ra).isPhysical()
+                      << '\n');
     return false;
   }
 
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 839b7e81f8998..9a021925a6bd1 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1108,7 +1108,7 @@ ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
   if (!SubIdx)
     return MIB.addReg(Reg, State);
 
-  if (Register::isPhysicalRegister(Reg))
+  if (Register(Reg).isPhysical())
     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
   return MIB.addReg(Reg, State, SubIdx);
 }
diff --git a/llvm/lib/Target/ARM/ARMLatencyMutations.cpp b/llvm/lib/Target/ARM/ARMLatencyMutations.cpp
index 30e7ede68d787..601b3fa19978d 100644
--- a/llvm/lib/Target/ARM/ARMLatencyMutations.cpp
+++ b/llvm/lib/Target/ARM/ARMLatencyMutations.cpp
@@ -802,7 +802,7 @@ signed M85Overrides::modifyMixedWidthFP(const MachineInstr *SrcMI,
             OP.getSubReg() == ARM::ssub_1)
           return 1;
     }
-  } else if (Register::isPhysicalRegister(RegID)) {
+  } else if (Register(RegID).isPhysical()) {
     // Note that when the producer is narrower, not all of the producers
     // may be present in the scheduling graph; somewhere earlier in the
     // compiler, an implicit def/use of the aliased full register gets
diff --git a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 3b157006d9224..df182613d1661 100644
--- a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -223,8 +223,8 @@ static bool areCombinableOperations(const TargetRegisterInfo *TRI,
   return true;
 }
 
-static bool isEvenReg(unsigned Reg) {
-  assert(Register::isPhysicalRegister(Reg));
+static bool isEvenReg(Register Reg) {
+  assert(Reg.isPhysical());
   if (Hexagon::IntRegsRegClass.contains(Reg))
     return (Reg - Hexagon::R0) % 2 == 0;
   if (Hexagon::HvxVRRegClass.contains(Reg))
@@ -546,7 +546,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr &I1,
     // is even.
     bool IsI1LowReg = (I2DestReg - I1DestReg) == 1;
     bool IsI2LowReg = (I1DestReg - I2DestReg) == 1;
-    unsigned FirstRegIndex = IsI1LowReg ? I1DestReg : I2DestReg;
+    Register FirstRegIndex = IsI1LowReg ? I1DestReg : I2DestReg;
     if ((!IsI1LowReg && !IsI2LowReg) || !isEvenReg(FirstRegIndex))
       continue;
 
diff --git a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
index ee01ebc4daa26..3bb7175bbf8b9 100644
--- a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -275,7 +275,7 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
       return false;
   }
 
-  unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning.
+  Register cmpReg1, cmpOp2;
   cmpReg1 = MI.getOperand(1).getReg();
 
   if (secondReg) {
@@ -290,7 +290,7 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
     // at machine code level, we don't need this, but if we decide
     // to move new value jump prior to RA, we would be needing this.
     MachineRegisterInfo &MRI = MF.getRegInfo();
-    if (!Register::isPhysicalRegister(cmpOp2)) {
+    if (!cmpOp2.isPhysical()) {
       MachineInstr *def = MRI.getVRegDef(cmpOp2);
       if (def->getOpcode() == TargetOpcode::COPY)
         return false;
@@ -480,7 +480,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
     bool foundJump    = false;
     bool foundCompare = false;
     bool invertPredicate = false;
-    unsigned predReg = 0; // predicate reg of the jump.
+    Register predReg; // predicate reg of the jump.
     unsigned cmpReg1 = 0;
     int cmpOp2 = 0;
     MachineBasicBlock::iterator jmpPos;
@@ -516,7 +516,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
         jmpPos = MII;
         jmpInstr = &MI;
         predReg = MI.getOperand(0).getReg();
-        afterRA = Register::isPhysicalRegister(predReg);
+        afterRA = predReg.isPhysical();
 
         // If ifconverter had not messed up with the kill flags of the
         // operands, the following check on the kill flag would suffice.
diff --git a/llvm/lib/Target/Hexagon/RDFCopy.cpp b/llvm/lib/Target/Hexagon/RDFCopy.cpp
index fafdad08909dd..76177901f658a 100644
--- a/llvm/lib/Target/Hexagon/RDFCopy.cpp
+++ b/llvm/lib/Target/Hexagon/RDFCopy.cpp
@@ -44,8 +44,8 @@ bool CopyPropagation::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) {
       const MachineOperand &Src = MI->getOperand(1);
       RegisterRef DstR = DFG.makeRegRef(Dst.getReg(), Dst.getSubReg());
       RegisterRef SrcR = DFG.makeRegRef(Src.getReg(), Src.getSubReg());
-      assert(Register::isPhysicalRegister(DstR.Reg));
-      assert(Register::isPhysicalRegister(SrcR.Reg));
+      assert(Register(DstR.Reg).isPhysical());
+      assert(Register(SrcR.Reg).isPhysical());
       const TargetRegisterInfo &TRI = DFG.getTRI();
       if (TRI.getMinimalPhysRegClass(DstR.Reg) !=
           TRI.getMinimalPhysRegClass(SrcR.Reg))
diff --git a/llvm/lib/Target/M68k/M68kRegisterInfo.cpp b/llvm/lib/Target/M68k/M68kRegisterInfo.cpp
index 62fb72ba4fd5e..5375d4484a7ab 100644
--- a/llvm/lib/Target/M68k/M68kRegisterInfo.cpp
+++ b/llvm/lib/Target/M68k/M68kRegisterInfo.cpp
@@ -83,8 +83,7 @@ M68kRegisterInfo::getMatchingMegaReg(unsigned Reg,
 
 const TargetRegisterClass *
 M68kRegisterInfo::getMaximalPhysRegClass(unsigned reg, MVT VT) const {
-  assert(Register::isPhysicalRegister(reg) &&
-         "reg must be a physical register");
+  assert(Register(reg).isPhysical() && "reg must be a physical register");
 
   // Pick the most sub register class of the right type that contains
   // this physreg.

From b10ddfafcacf9d8f9cfa7e23f65730be60261554 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj@google.com>
Date: Thu, 20 Feb 2025 10:55:40 -0800
Subject: [PATCH 20/29] [libc] Fix missing includes in GPU scanf reader
 (#128049)

In #121215 the reader was reorganized and the definitions of the
internal getc and ungetc functions were moved, but the includes that the
GPU builder depends on were not. This patch moves the includes to the
correct new place.
---
 libc/src/stdio/scanf_core/reader.h           | 5 +++++
 libc/src/stdio/scanf_core/vfscanf_internal.h | 2 --
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/libc/src/stdio/scanf_core/reader.h b/libc/src/stdio/scanf_core/reader.h
index a545a605ff150..1f8ec9695a314 100644
--- a/libc/src/stdio/scanf_core/reader.h
+++ b/libc/src/stdio/scanf_core/reader.h
@@ -15,6 +15,11 @@
 #include "src/__support/File/file.h"
 #endif
 
+#if defined(LIBC_TARGET_ARCH_IS_GPU)
+#include "src/stdio/getc.h"
+#include "src/stdio/ungetc.h"
+#endif
+
 #include "src/__support/macros/attributes.h" // For LIBC_INLINE
 #include "src/__support/macros/config.h"
 
diff --git a/libc/src/stdio/scanf_core/vfscanf_internal.h b/libc/src/stdio/scanf_core/vfscanf_internal.h
index 84d074711b8fb..4e20fa3b93091 100644
--- a/libc/src/stdio/scanf_core/vfscanf_internal.h
+++ b/libc/src/stdio/scanf_core/vfscanf_internal.h
@@ -18,8 +18,6 @@
 
 #if defined(LIBC_TARGET_ARCH_IS_GPU)
 #include "src/stdio/ferror.h"
-#include "src/stdio/getc.h"
-#include "src/stdio/ungetc.h"
 #endif
 
 #include "hdr/types/FILE.h"

From 5bc51611446ee3a9fc3538623ee87f18aada30c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Susan=20Tan=20=28=E3=82=B9-=E3=82=B6=E3=83=B3=E3=80=80?=
 =?UTF-8?q?=E3=82=BF=E3=83=B3=29?= <zujunt@nvidia.com>
Date: Thu, 20 Feb 2025 14:04:26 -0500
Subject: [PATCH 21/29] [flang] Add support to fir::cg in alias analysis
 (#127827)

Currently the alias analysis doesn't trace the source whenever there are
operations from fir::cg dialect. This PR added support for
fir::cg::XEmboxOp, fir::cg::XReboxOp, fir::cg::XDeclareOp for a specific
application i'm working on.
---
 .../lib/Optimizer/Analysis/AliasAnalysis.cpp  |  89 ++++++++-------
 .../AliasAnalysis/fircg-as-sources.fir        | 108 ++++++++++++++++++
 2 files changed, 158 insertions(+), 39 deletions(-)
 create mode 100644 flang/test/Analysis/AliasAnalysis/fircg-as-sources.fir

diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
index 70fa18ad65b9b..436f7a1154c7c 100644
--- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
+++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "flang/Optimizer/Analysis/AliasAnalysis.h"
+#include "flang/Optimizer/CodeGen/CGOps.h"
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
@@ -61,13 +62,17 @@ getOriginalDef(mlir::Value v,
     mlir::Type ty = defOp->getResultTypes()[0];
     llvm::TypeSwitch<Operation *>(defOp)
         .Case<fir::ConvertOp>([&](fir::ConvertOp op) { v = op.getValue(); })
-        .Case<fir::DeclareOp, hlfir::DeclareOp>([&](auto op) {
-          v = op.getMemref();
-          auto varIf = llvm::cast<fir::FortranVariableOpInterface>(defOp);
-          attributes |= getAttrsFromVariable(varIf);
-          isCapturedInInternalProcedure |=
-              varIf.isCapturedInInternalProcedure();
-        })
+        .Case<fir::DeclareOp, hlfir::DeclareOp, fir::cg::XDeclareOp>(
+            [&](auto op) {
+              v = op.getMemref();
+              auto varIf =
+                  llvm::dyn_cast<fir::FortranVariableOpInterface>(defOp);
+              if (varIf) {
+                attributes |= getAttrsFromVariable(varIf);
+                isCapturedInInternalProcedure |=
+                    varIf.isCapturedInInternalProcedure();
+              }
+            })
         .Case<fir::CoordinateOp>([&](auto op) {
           if (fir::AliasAnalysis::isPointerReference(ty))
             attributes.set(fir::AliasAnalysis::Attribute::Pointer);
@@ -591,19 +596,21 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
             followBoxData = true;
           approximateSource = true;
         })
-        .Case<fir::EmboxOp, fir::ReboxOp>([&](auto op) {
-          if (followBoxData) {
-            v = op->getOperand(0);
-            defOp = v.getDefiningOp();
-          } else
-            breakFromLoop = true;
-        })
+        .Case<fir::EmboxOp, fir::ReboxOp, fir::cg::XEmboxOp, fir::cg::XReboxOp>(
+            [&](auto op) {
+              if (followBoxData) {
+                v = op->getOperand(0);
+                defOp = v.getDefiningOp();
+              } else
+                breakFromLoop = true;
+            })
         .Case<fir::LoadOp>([&](auto op) {
           // If load is inside target and it points to mapped item,
           // continue tracking.
           Operation *loadMemrefOp = op.getMemref().getDefiningOp();
           bool isDeclareOp =
               llvm::isa_and_present<fir::DeclareOp>(loadMemrefOp) ||
+              llvm::isa_and_present<fir::cg::XDeclareOp>(loadMemrefOp) ||
               llvm::isa_and_present<hlfir::DeclareOp>(loadMemrefOp);
           if (isDeclareOp &&
               llvm::isa<omp::TargetOp>(loadMemrefOp->getParentOp())) {
@@ -666,7 +673,8 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
           global = llvm::cast<fir::AddrOfOp>(op).getSymbol();
           breakFromLoop = true;
         })
-        .Case<hlfir::DeclareOp, fir::DeclareOp>([&](auto op) {
+        .Case<hlfir::DeclareOp, fir::DeclareOp,
+              fir::cg::XDeclareOp>([&](auto op) {
           bool isPrivateItem = false;
           if (omp::BlockArgOpenMPOpInterface argIface =
                   dyn_cast<omp::BlockArgOpenMPOpInterface>(op->getParentOp())) {
@@ -700,30 +708,33 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
               return;
             }
           }
-          auto varIf = llvm::cast<fir::FortranVariableOpInterface>(defOp);
-          // While going through a declare operation collect
-          // the variable attributes from it. Right now, some
-          // of the attributes are duplicated, e.g. a TARGET dummy
-          // argument has the target attribute both on its declare
-          // operation and on the entry block argument.
-          // In case of host associated use, the declare operation
-          // is the only carrier of the variable attributes,
-          // so we have to collect them here.
-          attributes |= getAttrsFromVariable(varIf);
-          isCapturedInInternalProcedure |=
-              varIf.isCapturedInInternalProcedure();
-          if (varIf.isHostAssoc()) {
-            // Do not track past such DeclareOp, because it does not
-            // currently provide any useful information. The host associated
-            // access will end up dereferencing the host association tuple,
-            // so we may as well stop right now.
-            v = defOp->getResult(0);
-            // TODO: if the host associated variable is a dummy argument
-            // of the host, I think, we can treat it as SourceKind::Argument
-            // for the purpose of alias analysis inside the internal procedure.
-            type = SourceKind::HostAssoc;
-            breakFromLoop = true;
-            return;
+          auto varIf = llvm::dyn_cast<fir::FortranVariableOpInterface>(defOp);
+          if (varIf) {
+            // While going through a declare operation collect
+            // the variable attributes from it. Right now, some
+            // of the attributes are duplicated, e.g. a TARGET dummy
+            // argument has the target attribute both on its declare
+            // operation and on the entry block argument.
+            // In case of host associated use, the declare operation
+            // is the only carrier of the variable attributes,
+            // so we have to collect them here.
+            attributes |= getAttrsFromVariable(varIf);
+            isCapturedInInternalProcedure |=
+                varIf.isCapturedInInternalProcedure();
+            if (varIf.isHostAssoc()) {
+              // Do not track past such DeclareOp, because it does not
+              // currently provide any useful information. The host associated
+              // access will end up dereferencing the host association tuple,
+              // so we may as well stop right now.
+              v = defOp->getResult(0);
+              // TODO: if the host associated variable is a dummy argument
+              // of the host, I think, we can treat it as SourceKind::Argument
+              // for the purpose of alias analysis inside the internal
+              // procedure.
+              type = SourceKind::HostAssoc;
+              breakFromLoop = true;
+              return;
+            }
           }
           if (getLastInstantiationPoint) {
             // Fetch only the innermost instantiation point.
diff --git a/flang/test/Analysis/AliasAnalysis/fircg-as-sources.fir b/flang/test/Analysis/AliasAnalysis/fircg-as-sources.fir
new file mode 100644
index 0000000000000..edb3b1dadb8cd
--- /dev/null
+++ b/flang/test/Analysis/AliasAnalysis/fircg-as-sources.fir
@@ -0,0 +1,108 @@
+// Check aliasing with the address *in* (not *of*) a local (fir.alloca) pointer
+// variable.
+//
+// Throughout this test, the ".fir" suffix on symbols indicates a version of the
+// MLIR after convert-hlfir-to-fir.  We would like alias analysis results to be
+// the same in both versions.
+
+// RUN: fir-opt %s -split-input-file -o /dev/null --mlir-disable-threading  \
+// RUN:   -pass-pipeline='builtin.module(func.func(test-fir-alias-analysis))' \
+// RUN:   2>&1 | FileCheck -match-full-lines %s
+
+// subroutine test(p1, arr, t_arr, alloc, t_alloc, t, v)
+//   real, pointer :: p1
+//   real :: arr(:)
+//   real, target :: t_arr(:)
+//   real, allocatable :: alloc
+//   real, allocatable, target :: t_alloc
+//   real, target :: t
+//   real :: v
+//   real, pointer :: p0
+// end subroutine test
+
+// check when fircg.ext_rebox and fircg.ext_declare are in the path of tracing the source
+// CHECK-LABEL: Testing : "_QPtest.fir"
+// CHECK-DAG: p0.tgt.fir#0 <-> arr(1).fir#0: NoAlias
+// CHECK-DAG: p0.tgt.fir#0 <-> t_arr(1).fir#0: MayAlias
+// CHECK-DAG: p0.tgt.fir#0 <-> alloc.tgt.fir#0: NoAlias
+// CHECK-DAG: p0.tgt.fir#0 <-> t_alloc.tgt.fir#0: MayAlias
+// CHECK-DAG: alloc.fir#0 <-> alloc.tgt.fir#0: NoAlias
+
+func.func @_QPtest.fir(%arg0: !fir.ref<!fir.box<!fir.ptr<f32>>> {fir.bindc_name = "p1"}, %arg1: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "arr"}, %arg2: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "t_arr", fir.target}, %arg3: !fir.ref<!fir.box<!fir.heap<f32>>> {fir.bindc_name = "alloc"}, %arg4: !fir.ref<!fir.box<!fir.heap<f32>>> {fir.bindc_name = "t_alloc", fir.target}, %arg5: !fir.ref<f32> {fir.bindc_name = "t", fir.target}, %arg6: !fir.ref<f32> {fir.bindc_name = "v"}) {
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fircg.ext_declare %arg3 dummy_scope %0 {test.ptr = "alloc.fir", fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtestEalloc"} : (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.dscope) -> !fir.ref<!fir.box<!fir.heap<f32>>>
+  %2 = fir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtestEarr"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  %3 = fircg.ext_rebox %2 : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %4 = fir.alloca !fir.box<!fir.ptr<f32>> {bindc_name = "p0", uniq_name = "_QFtestEp0"}
+  %5 = fircg.ext_declare %4 {test.ptr = "p0.fir", fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtestEp0"} : (!fir.ref<!fir.box<!fir.ptr<f32>>>) -> !fir.ref<!fir.box<!fir.ptr<f32>>>
+  %6 = fir.declare %arg0 dummy_scope %0 {test.ptr = "p1.fir", fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtestEp1"} : (!fir.ref<!fir.box<!fir.ptr<f32>>>, !fir.dscope) -> !fir.ref<!fir.box<!fir.ptr<f32>>>
+  %7 = fir.declare %arg5 dummy_scope %0 {test.ptr = "t.fir", fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QFtestEt"} : (!fir.ref<f32>, !fir.dscope) -> !fir.ref<f32>
+  %8 = fir.declare %arg4 dummy_scope %0 {fortran_attrs = #fir.var_attrs<allocatable, target>, uniq_name = "_QFtestEt_alloc"} : (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.dscope) -> !fir.ref<!fir.box<!fir.heap<f32>>>
+  %9 = fir.declare %arg2 dummy_scope %0 {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QFtestEt_arr"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  %10 = fircg.ext_rebox %9 : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %11 = fir.declare %arg6 dummy_scope %0 {test.ptr = "v.fir", uniq_name = "_QFtestEv"} : (!fir.ref<f32>, !fir.dscope) -> !fir.ref<f32>
+  %12 = fir.load %5 : !fir.ref<!fir.box<!fir.ptr<f32>>>
+  %13 = fir.box_addr %12 {test.ptr = "p0.tgt.fir"} : (!fir.box<!fir.ptr<f32>>) -> !fir.ptr<f32>
+  %14 = fir.load %6 : !fir.ref<!fir.box<!fir.ptr<f32>>>
+  %15 = fir.box_addr %14 {test.ptr = "p1.tgt.fir"} : (!fir.box<!fir.ptr<f32>>) -> !fir.ptr<f32>
+  %c1 = arith.constant 1 : index
+  %16 = fir.array_coor %3 %c1 {test.ptr="arr(1).fir"} : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+  %c1_0 = arith.constant 1 : index
+  %17 = fir.array_coor %10 %c1_0 {test.ptr="t_arr(1).fir"} : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+  %18 = fir.load %1 : !fir.ref<!fir.box<!fir.heap<f32>>>
+  %19 = fir.box_addr %18 {test.ptr = "alloc.tgt.fir"} : (!fir.box<!fir.heap<f32>>) -> !fir.heap<f32>
+  %20 = fir.load %8 : !fir.ref<!fir.box<!fir.heap<f32>>>
+  %21 = fir.box_addr %20 {test.ptr = "t_alloc.tgt.fir"} : (!fir.box<!fir.heap<f32>>) -> !fir.heap<f32>
+  return
+}
+
+// -----
+// CHECK-LABEL: Testing : "_QFPtest3"
+
+// module pointers
+//   real, pointer :: p
+// end module
+//
+// program main
+//   use pointers
+//   real, target :: var1 = 1, var2 =2
+//   p => var1
+//
+//   call test3(p)
+//
+// contains
+//   subroutine test3(p1)
+//     real, pointer :: p1
+//     p1 => var2
+//     print *, p
+//   end subroutine
+// end
+
+// check when there are fircg.ext_embox in the paths
+// CHECK-DAG: p#0 <-> box.addr#0: NoAlias
+// CHECK-DAG: box.addr#0 <-> func.region0#0: NoAlias
+// CHECK-DAG: var2#0 <-> p#0: NoAlias
+// CHECK-DAG: var2#0 <-> box.addr#0: MustAlias
+// CHECK-DAG: var2#0 <-> func.region0#1: NoAlias
+// CHECK-DAG: box.addr#0 <-> func.region0#1: NoAlias
+
+fir.global @_QMpointersEp : !fir.box<!fir.ptr<f32>> {
+  %0 = fir.zero_bits !fir.ptr<f32>
+  %1 = fircg.ext_embox %0 : (!fir.ptr<f32>) -> !fir.box<!fir.ptr<f32>>
+  fir.has_value %1 : !fir.box<!fir.ptr<f32>>
+}
+
+fir.global internal @_QFEvar2 target : f32 {
+  %cst = arith.constant 2.000000e+00 : f32
+  fir.has_value %cst : f32
+}
+
+func.func @_QFPtest3(%arg0: !fir.ref<!fir.box<!fir.ptr<f32>>> {fir.bindc_name = "p1"}, %arg1: !fir.ref<f32>) attributes {test.ptr = "func"} {
+  %3 = fir.load %arg0 {test.ptr = "arg0.load"}: !fir.ref<!fir.box<!fir.ptr<f32>>>
+  %4 = fir.address_of(@_QFEvar2) {test.ptr = "var2"} : !fir.ref<f32>
+  %5 = fir.address_of(@_QMpointersEp) {test.ptr = "p"} : !fir.ref<!fir.box<!fir.ptr<f32>>>
+  %6 = fircg.ext_embox %4 : (!fir.ref<f32>) -> !fir.box<!fir.ptr<f32>>
+  %13 = fir.box_addr %6 {test.ptr = "box.addr"} : (!fir.box<!fir.ptr<f32>>) -> !fir.ptr<f32>
+  return
+}
+

From 29361b326bd865f7c4f07d9fc5a908d3f2b177fc Mon Sep 17 00:00:00 2001
From: Sirraide <aeternalmail@gmail.com>
Date: Thu, 20 Feb 2025 20:06:56 +0100
Subject: [PATCH 22/29] [Clang] Fix failing clang-tidy test (#128051)

#123470 broke one of the clang-tidy tests; this fixes that.
---
 .../test/clang-tidy/checkers/readability/named-parameter.cpp  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/named-parameter.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/named-parameter.cpp
index c22e9c564e3ee..50433d5d12ea9 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/named-parameter.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/named-parameter.cpp
@@ -37,8 +37,8 @@ void operator delete[](void *x) throw();
 void operator delete[](void * /*x*/) throw();
 
 struct X {
-  X operator++(int) { throw 0; }
-  X operator--(int) { throw 0; }
+  void operator++(int) {}
+  void operator--(int) {}
 
   X(X&) = delete;
   X &operator=(X&) = default;

From 6d84fae60ed2c227dfcb349a144cbc0cdd3bcc4b Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Thu, 20 Feb 2025 19:07:06 +0000
Subject: [PATCH 23/29] [gn build] Port 19af8581d51b

---
 llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn
index ff4f558ca2fcf..fdd631bc40398 100644
--- a/llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn
@@ -93,6 +93,7 @@ static_library("CodeGen") {
     "CodeGenTypes.cpp",
     "ConstantInitBuilder.cpp",
     "CoverageMappingGen.cpp",
+    "HLSLBufferLayoutBuilder.cpp",
     "ItaniumCXXABI.cpp",
     "LinkInModulesPass.cpp",
     "MacroPPCallbacks.cpp",

From 78d82d3ae7ac99833e1b9c0b529c256f90b6c6cc Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Thu, 20 Feb 2025 11:13:46 -0800
Subject: [PATCH 24/29] [lldb] Store StreamAsynchronousIO in a unique_ptr (NFC)
 (#127961)

Make StreamAsynchronousIO an unique_ptr instead of a shared_ptr. I tried
passing the class by value, but the llvm::raw_ostream forwarder stored
in the Stream parent class isn't movable and I don't think it's worth
changing that. Additionally, there's a few places that expect a
StreamSP, which are easily created from a StreamUP.
---
 lldb/include/lldb/Core/Debugger.h             |  4 +-
 lldb/include/lldb/lldb-forward.h              |  1 +
 lldb/source/Breakpoint/BreakpointOptions.cpp  |  6 +-
 .../source/Commands/CommandObjectCommands.cpp |  7 +-
 .../CommandObjectWatchpointCommand.cpp        |  6 +-
 lldb/source/Core/Debugger.cpp                 | 85 +++++++++----------
 lldb/source/Core/DynamicLoader.cpp            |  2 +-
 .../DynamicLoaderDarwinKernel.cpp             |  6 +-
 .../DynamicLoaderFreeBSDKernel.cpp            | 10 +--
 .../Process/MacOSX-Kernel/ProcessKDP.cpp      | 18 +---
 .../Process/gdb-remote/ProcessGDBRemote.cpp   |  3 +-
 lldb/source/Target/Process.cpp                | 14 ++-
 lldb/source/Target/StopInfo.cpp               |  8 +-
 13 files changed, 71 insertions(+), 99 deletions(-)

diff --git a/lldb/include/lldb/Core/Debugger.h b/lldb/include/lldb/Core/Debugger.h
index 9c8a9623fe689..6ebc6147800e1 100644
--- a/lldb/include/lldb/Core/Debugger.h
+++ b/lldb/include/lldb/Core/Debugger.h
@@ -156,9 +156,9 @@ class Debugger : public std::enable_shared_from_this<Debugger>,
 
   void RestoreInputTerminalState();
 
-  lldb::StreamSP GetAsyncOutputStream();
+  lldb::StreamUP GetAsyncOutputStream();
 
-  lldb::StreamSP GetAsyncErrorStream();
+  lldb::StreamUP GetAsyncErrorStream();
 
   CommandInterpreter &GetCommandInterpreter() {
     assert(m_command_interpreter_up.get());
diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h
index cda55ef06e549..c664d1398f74d 100644
--- a/lldb/include/lldb/lldb-forward.h
+++ b/lldb/include/lldb/lldb-forward.h
@@ -432,6 +432,7 @@ typedef std::unique_ptr<lldb_private::StackFrameRecognizerManager>
     StackFrameRecognizerManagerUP;
 typedef std::shared_ptr<lldb_private::StopInfo> StopInfoSP;
 typedef std::shared_ptr<lldb_private::Stream> StreamSP;
+typedef std::unique_ptr<lldb_private::Stream> StreamUP;
 typedef std::shared_ptr<lldb_private::StreamFile> StreamFileSP;
 typedef std::shared_ptr<lldb_private::LockableStreamFile> LockableStreamFileSP;
 typedef std::shared_ptr<lldb_private::StringSummaryFormat>
diff --git a/lldb/source/Breakpoint/BreakpointOptions.cpp b/lldb/source/Breakpoint/BreakpointOptions.cpp
index 09abcf5e081d2..242b5b30168c5 100644
--- a/lldb/source/Breakpoint/BreakpointOptions.cpp
+++ b/lldb/source/Breakpoint/BreakpointOptions.cpp
@@ -620,10 +620,8 @@ bool BreakpointOptions::BreakpointOptionsCallbackFunction(
 
       // Rig up the results secondary output stream to the debugger's, so the
       // output will come out synchronously if the debugger is set up that way.
-      StreamSP output_stream(debugger.GetAsyncOutputStream());
-      StreamSP error_stream(debugger.GetAsyncErrorStream());
-      result.SetImmediateOutputStream(output_stream);
-      result.SetImmediateErrorStream(error_stream);
+      result.SetImmediateOutputStream(debugger.GetAsyncOutputStream());
+      result.SetImmediateErrorStream(debugger.GetAsyncErrorStream());
 
       CommandInterpreterRunOptions options;
       options.SetStopOnContinue(true);
diff --git a/lldb/source/Commands/CommandObjectCommands.cpp b/lldb/source/Commands/CommandObjectCommands.cpp
index dd841cb5cb4cc..9510cf4d14467 100644
--- a/lldb/source/Commands/CommandObjectCommands.cpp
+++ b/lldb/source/Commands/CommandObjectCommands.cpp
@@ -815,10 +815,9 @@ a number follows 'f':"
         for (const std::string &line : lines) {
           Status error = AppendRegexSubstitution(line, check_only);
           if (error.Fail()) {
-            if (!GetDebugger().GetCommandInterpreter().GetBatchCommandMode()) {
-              StreamSP out_stream = GetDebugger().GetAsyncOutputStream();
-              out_stream->Printf("error: %s\n", error.AsCString());
-            }
+            if (!GetDebugger().GetCommandInterpreter().GetBatchCommandMode())
+              GetDebugger().GetAsyncOutputStream()->Printf("error: %s\n",
+                                                           error.AsCString());
           }
         }
       }
diff --git a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp
index 507ef3fbe4759..32cb80b421fd6 100644
--- a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp
+++ b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp
@@ -252,10 +252,8 @@ are no syntax errors may indicate that a function was declared but never called.
         // Rig up the results secondary output stream to the debugger's, so the
         // output will come out synchronously if the debugger is set up that
         // way.
-        StreamSP output_stream(debugger.GetAsyncOutputStream());
-        StreamSP error_stream(debugger.GetAsyncErrorStream());
-        result.SetImmediateOutputStream(output_stream);
-        result.SetImmediateErrorStream(error_stream);
+        result.SetImmediateOutputStream(debugger.GetAsyncOutputStream());
+        result.SetImmediateErrorStream(debugger.GetAsyncErrorStream());
 
         CommandInterpreterRunOptions options;
         options.SetStopOnContinue(true);
diff --git a/lldb/source/Core/Debugger.cpp b/lldb/source/Core/Debugger.cpp
index 242ef1c8a4596..585138535203d 100644
--- a/lldb/source/Core/Debugger.cpp
+++ b/lldb/source/Core/Debugger.cpp
@@ -257,7 +257,7 @@ Status Debugger::SetPropertyValue(const ExecutionContext *exe_ctx,
         std::list<Status> errors;
         StreamString feedback_stream;
         if (!target_sp->LoadScriptingResources(errors, feedback_stream)) {
-          lldb::StreamSP s = GetAsyncErrorStream();
+          lldb::StreamUP s = GetAsyncErrorStream();
           for (auto &error : errors)
             s->Printf("%s\n", error.AsCString());
           if (feedback_stream.GetSize())
@@ -1328,13 +1328,13 @@ bool Debugger::PopIOHandler(const IOHandlerSP &pop_reader_sp) {
   return true;
 }
 
-StreamSP Debugger::GetAsyncOutputStream() {
-  return std::make_shared<StreamAsynchronousIO>(*this,
+StreamUP Debugger::GetAsyncOutputStream() {
+  return std::make_unique<StreamAsynchronousIO>(*this,
                                                 StreamAsynchronousIO::STDOUT);
 }
 
-StreamSP Debugger::GetAsyncErrorStream() {
-  return std::make_shared<StreamAsynchronousIO>(*this,
+StreamUP Debugger::GetAsyncErrorStream() {
+  return std::make_unique<StreamAsynchronousIO>(*this,
                                                 StreamAsynchronousIO::STDERR);
 }
 
@@ -1577,8 +1577,7 @@ static void PrivateReportDiagnostic(Debugger &debugger, Severity severity,
     // diagnostic directly to the debugger's error stream.
     DiagnosticEventData event_data(severity, std::move(message),
                                    debugger_specific);
-    StreamSP stream = debugger.GetAsyncErrorStream();
-    event_data.Dump(stream.get());
+    event_data.Dump(debugger.GetAsyncErrorStream().get());
     return;
   }
   EventSP event_sp = std::make_shared<Event>(
@@ -1774,12 +1773,11 @@ void Debugger::HandleBreakpointEvent(const EventSP &event_sp) {
     if (num_new_locations > 0) {
       BreakpointSP breakpoint =
           Breakpoint::BreakpointEventData::GetBreakpointFromEvent(event_sp);
-      StreamSP output_sp(GetAsyncOutputStream());
-      if (output_sp) {
-        output_sp->Printf("%d location%s added to breakpoint %d\n",
+      if (StreamUP output_up = GetAsyncOutputStream()) {
+        output_up->Printf("%d location%s added to breakpoint %d\n",
                           num_new_locations, num_new_locations == 1 ? "" : "s",
                           breakpoint->GetID());
-        output_sp->Flush();
+        output_up->Flush();
       }
     }
   }
@@ -1823,8 +1821,8 @@ void Debugger::HandleProcessEvent(const EventSP &event_sp) {
           ? EventDataStructuredData::GetProcessFromEvent(event_sp.get())
           : Process::ProcessEventData::GetProcessFromEvent(event_sp.get());
 
-  StreamSP output_stream_sp = GetAsyncOutputStream();
-  StreamSP error_stream_sp = GetAsyncErrorStream();
+  StreamUP output_stream_up = GetAsyncOutputStream();
+  StreamUP error_stream_up = GetAsyncErrorStream();
   const bool gui_enabled = IsForwardingEvents();
 
   if (!gui_enabled) {
@@ -1849,7 +1847,7 @@ void Debugger::HandleProcessEvent(const EventSP &event_sp) {
     if (got_state_changed && !state_is_stopped) {
       // This is a public stop which we are going to announce to the user, so
       // we should force the most relevant frame selection here.
-      Process::HandleProcessStateChangedEvent(event_sp, output_stream_sp.get(),
+      Process::HandleProcessStateChangedEvent(event_sp, output_stream_up.get(),
                                               SelectMostRelevantFrame,
                                               pop_process_io_handler);
     }
@@ -1865,37 +1863,35 @@ void Debugger::HandleProcessEvent(const EventSP &event_sp) {
       if (plugin_sp) {
         auto structured_data_sp =
             EventDataStructuredData::GetObjectFromEvent(event_sp.get());
-        if (output_stream_sp) {
-          StreamString content_stream;
-          Status error =
-              plugin_sp->GetDescription(structured_data_sp, content_stream);
-          if (error.Success()) {
-            if (!content_stream.GetString().empty()) {
-              // Add newline.
-              content_stream.PutChar('\n');
-              content_stream.Flush();
-
-              // Print it.
-              output_stream_sp->PutCString(content_stream.GetString());
-            }
-          } else {
-            error_stream_sp->Format("Failed to print structured "
-                                    "data with plugin {0}: {1}",
-                                    plugin_sp->GetPluginName(), error);
+        StreamString content_stream;
+        Status error =
+            plugin_sp->GetDescription(structured_data_sp, content_stream);
+        if (error.Success()) {
+          if (!content_stream.GetString().empty()) {
+            // Add newline.
+            content_stream.PutChar('\n');
+            content_stream.Flush();
+
+            // Print it.
+            output_stream_up->PutCString(content_stream.GetString());
           }
+        } else {
+          error_stream_up->Format("Failed to print structured "
+                                  "data with plugin {0}: {1}",
+                                  plugin_sp->GetPluginName(), error);
         }
       }
     }
 
     // Now display any stopped state changes after any STDIO
     if (got_state_changed && state_is_stopped) {
-      Process::HandleProcessStateChangedEvent(event_sp, output_stream_sp.get(),
+      Process::HandleProcessStateChangedEvent(event_sp, output_stream_up.get(),
                                               SelectMostRelevantFrame,
                                               pop_process_io_handler);
     }
 
-    output_stream_sp->Flush();
-    error_stream_sp->Flush();
+    output_stream_up->Flush();
+    error_stream_up->Flush();
 
     if (pop_process_io_handler)
       process_sp->PopProcessIOHandler();
@@ -1995,22 +1991,18 @@ lldb::thread_result_t Debugger::DefaultEventHandler() {
               const char *data = static_cast<const char *>(
                   EventDataBytes::GetBytesFromEvent(event_sp.get()));
               if (data && data[0]) {
-                StreamSP error_sp(GetAsyncErrorStream());
-                if (error_sp) {
-                  error_sp->PutCString(data);
-                  error_sp->Flush();
-                }
+                StreamUP error_up = GetAsyncErrorStream();
+                error_up->PutCString(data);
+                error_up->Flush();
               }
             } else if (event_type & CommandInterpreter::
                                         eBroadcastBitAsynchronousOutputData) {
               const char *data = static_cast<const char *>(
                   EventDataBytes::GetBytesFromEvent(event_sp.get()));
               if (data && data[0]) {
-                StreamSP output_sp(GetAsyncOutputStream());
-                if (output_sp) {
-                  output_sp->PutCString(data);
-                  output_sp->Flush();
-                }
+                StreamUP output_up = GetAsyncOutputStream();
+                output_up->PutCString(data);
+                output_up->Flush();
               }
             }
           } else if (broadcaster == &m_broadcaster) {
@@ -2125,7 +2117,7 @@ void Debugger::HandleProgressEvent(const lldb::EventSP &event_sp) {
   if (!file_sp->GetIsInteractive() || !file_sp->GetIsTerminalWithColors())
     return;
 
-  StreamSP output = GetAsyncOutputStream();
+  StreamUP output = GetAsyncOutputStream();
 
   // Print over previous line, if any.
   output->Printf("\r");
@@ -2175,8 +2167,7 @@ void Debugger::HandleDiagnosticEvent(const lldb::EventSP &event_sp) {
   if (!data)
     return;
 
-  StreamSP stream = GetAsyncErrorStream();
-  data->Dump(stream.get());
+  data->Dump(GetAsyncErrorStream().get());
 }
 
 bool Debugger::HasIOHandlerThread() const {
diff --git a/lldb/source/Core/DynamicLoader.cpp b/lldb/source/Core/DynamicLoader.cpp
index 9c6ca1e5f910c..76c71d2a49a48 100644
--- a/lldb/source/Core/DynamicLoader.cpp
+++ b/lldb/source/Core/DynamicLoader.cpp
@@ -328,7 +328,7 @@ ModuleSP DynamicLoader::LoadBinaryWithUUIDAndAddress(
     }
   } else {
     if (force_symbol_search) {
-      lldb::StreamSP s = target.GetDebugger().GetAsyncErrorStream();
+      lldb::StreamUP s = target.GetDebugger().GetAsyncErrorStream();
       s->Printf("Unable to find file");
       if (!name.empty())
         s->Printf(" %s", name.str().c_str());
diff --git a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
index 1d4cda6c046b7..60724f3900ae7 100644
--- a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
+++ b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
@@ -738,7 +738,7 @@ bool DynamicLoaderDarwinKernel::KextImageInfo::LoadImageUsingMemoryModule(
   }
 
   if (IsKernel() && m_uuid.IsValid()) {
-    lldb::StreamSP s = target.GetDebugger().GetAsyncOutputStream();
+    lldb::StreamUP s = target.GetDebugger().GetAsyncOutputStream();
     s->Printf("Kernel UUID: %s\n", m_uuid.GetAsString().c_str());
     s->Printf("Load Address: 0x%" PRIx64 "\n", m_load_address);
 
@@ -830,7 +830,7 @@ bool DynamicLoaderDarwinKernel::KextImageInfo::LoadImageUsingMemoryModule(
       }
 
       if (IsKernel() && !m_module_sp) {
-        lldb::StreamSP s = target.GetDebugger().GetAsyncErrorStream();
+        lldb::StreamUP s = target.GetDebugger().GetAsyncErrorStream();
         s->Printf("WARNING: Unable to locate kernel binary on the debugger "
                   "system.\n");
         if (kernel_search_error.Fail() && kernel_search_error.AsCString("") &&
@@ -974,7 +974,7 @@ bool DynamicLoaderDarwinKernel::KextImageInfo::LoadImageUsingMemoryModule(
   bool is_loaded = IsLoaded();
 
   if (is_loaded && m_module_sp && IsKernel()) {
-    lldb::StreamSP s = target.GetDebugger().GetAsyncOutputStream();
+    lldb::StreamUP s = target.GetDebugger().GetAsyncOutputStream();
     ObjectFile *kernel_object_file = m_module_sp->GetObjectFile();
     if (kernel_object_file) {
       addr_t file_address =
diff --git a/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp
index 3bf0a46de57af..a23ba3ad5c545 100644
--- a/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp
+++ b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp
@@ -327,7 +327,7 @@ bool DynamicLoaderFreeBSDKernel::KModImageInfo::LoadImageUsingMemoryModule(
   Target &target = process->GetTarget();
 
   if (IsKernel() && m_uuid.IsValid()) {
-    lldb::StreamSP s = target.GetDebugger().GetAsyncOutputStream();
+    lldb::StreamUP s = target.GetDebugger().GetAsyncOutputStream();
     s->Printf("Kernel UUID: %s\n", m_uuid.GetAsString().c_str());
     s->Printf("Load Address: 0x%" PRIx64 "\n", m_load_address);
   }
@@ -355,9 +355,9 @@ bool DynamicLoaderFreeBSDKernel::KModImageInfo::LoadImageUsingMemoryModule(
       if (!m_module_sp)
         m_module_sp = target.GetOrCreateModule(module_spec, true);
       if (IsKernel() && !m_module_sp) {
-        lldb::StreamSP s = target.GetDebugger().GetAsyncOutputStream();
-        s->Printf("WARNING: Unable to locate kernel binary on the debugger "
-                  "system.\n");
+        target.GetDebugger().GetAsyncOutputStream()->Printf(
+            "WARNING: Unable to locate kernel binary on the debugger "
+            "system.\n");
       }
     }
 
@@ -464,7 +464,7 @@ bool DynamicLoaderFreeBSDKernel::KModImageInfo::LoadImageUsingMemoryModule(
   }
 
   if (IsLoaded() && m_module_sp && IsKernel()) {
-    lldb::StreamSP s = target.GetDebugger().GetAsyncOutputStream();
+    lldb::StreamUP s = target.GetDebugger().GetAsyncOutputStream();
     ObjectFile *kernel_object_file = m_module_sp->GetObjectFile();
     if (kernel_object_file) {
       addr_t file_address =
diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp
index 9b2907c680996..406e1d45dc39a 100644
--- a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp
+++ b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp
@@ -321,20 +321,10 @@ Status ProcessKDP::DoConnectRemote(llvm::StringRef remote_url) {
           SetID(1);
           GetThreadList();
           SetPrivateState(eStateStopped);
-          StreamSP async_strm_sp(target.GetDebugger().GetAsyncOutputStream());
-          if (async_strm_sp) {
-            const char *cstr;
-            if ((cstr = m_comm.GetKernelVersion()) != NULL) {
-              async_strm_sp->Printf("Version: %s\n", cstr);
-              async_strm_sp->Flush();
-            }
-            //                      if ((cstr = m_comm.GetImagePath ()) != NULL)
-            //                      {
-            //                          async_strm_sp->Printf ("Image Path:
-            //                          %s\n", cstr);
-            //                          async_strm_sp->Flush();
-            //                      }
-          }
+          const char *cstr;
+          if ((cstr = m_comm.GetKernelVersion()) != NULL)
+            target.GetDebugger().GetAsyncOutputStream()->Printf("Version: %s\n",
+                                                                cstr);
         } else {
           return Status::FromErrorString("KDP_REATTACH failed");
         }
diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
index f36595145e035..8a8c0f92fbbc2 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
@@ -5495,8 +5495,7 @@ class CommandObjectProcessGDBRemoteSpeedTest : public CommandObjectParsed {
       if (process) {
         StreamSP output_stream_sp = result.GetImmediateOutputStream();
         if (!output_stream_sp)
-          output_stream_sp =
-              StreamSP(m_interpreter.GetDebugger().GetAsyncOutputStream());
+          output_stream_sp = m_interpreter.GetDebugger().GetAsyncOutputStream();
         result.SetImmediateOutputStream(output_stream_sp);
 
         const uint32_t num_packets =
diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index 0041c8f2b2db2..6db582096155f 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -2743,10 +2743,9 @@ Status Process::LaunchPrivate(ProcessLaunchInfo &launch_info, StateType &state,
 
     // Now that we know the process type, update its signal responses from the
     // ones stored in the Target:
-    if (m_unix_signals_sp) {
-      StreamSP warning_strm = GetTarget().GetDebugger().GetAsyncErrorStream();
-      GetTarget().UpdateSignalsFromDummy(m_unix_signals_sp, warning_strm);
-    }
+    if (m_unix_signals_sp)
+      GetTarget().UpdateSignalsFromDummy(
+          m_unix_signals_sp, GetTarget().GetDebugger().GetAsyncErrorStream());
 
     DynamicLoader *dyld = GetDynamicLoader();
     if (dyld)
@@ -3131,10 +3130,9 @@ void Process::CompleteAttach() {
   }
   // Now that we know the process type, update its signal responses from the
   // ones stored in the Target:
-  if (m_unix_signals_sp) {
-    StreamSP warning_strm = GetTarget().GetDebugger().GetAsyncErrorStream();
-    GetTarget().UpdateSignalsFromDummy(m_unix_signals_sp, warning_strm);
-  }
+  if (m_unix_signals_sp)
+    GetTarget().UpdateSignalsFromDummy(
+        m_unix_signals_sp, GetTarget().GetDebugger().GetAsyncErrorStream());
 
   // We have completed the attach, now it is time to find the dynamic loader
   // plug-in
diff --git a/lldb/source/Target/StopInfo.cpp b/lldb/source/Target/StopInfo.cpp
index 092d78d87a2b1..1c9ecbfe70c3c 100644
--- a/lldb/source/Target/StopInfo.cpp
+++ b/lldb/source/Target/StopInfo.cpp
@@ -1016,11 +1016,9 @@ class StopInfoWatchpoint : public StopInfo {
           wp_sp->CaptureWatchedValue(exe_ctx);
 
           Debugger &debugger = exe_ctx.GetTargetRef().GetDebugger();
-          StreamSP output_sp = debugger.GetAsyncOutputStream();
-          if (wp_sp->DumpSnapshots(output_sp.get())) {
-            output_sp->EOL();
-            output_sp->Flush();
-          }
+          StreamUP output_up = debugger.GetAsyncOutputStream();
+          if (wp_sp->DumpSnapshots(output_up.get()))
+            output_up->EOL();
         }
 
       } else {

From 0ba391a85f03541635c337ab5648704175e19bec Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks@fb.com>
Date: Thu, 20 Feb 2025 11:16:01 -0800
Subject: [PATCH 25/29] [BOLT] Improve constant island disassembly (#127971)

* Add label that identifies constant island.
* Support cases where the island is located after the function.
---
 bolt/lib/Core/BinaryFunction.cpp |  7 ++++++-
 bolt/test/AArch64/data-in-code.s | 10 ++++++++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index bc45caf3ec8b7..1e427b2df11cf 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -498,6 +498,11 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
     if (!IslandOffset)
       return;
 
+    // Print label if it exists at this offset.
+    if (const BinaryData *BD =
+            BC.getBinaryDataAtAddress(getAddress() + *IslandOffset))
+      OS << BD->getName() << ":\n";
+
     const size_t IslandSize = getSizeOfDataInCodeAt(*IslandOffset);
     BC.printData(OS, BC.extractData(getAddress() + *IslandOffset, IslandSize),
                  *IslandOffset);
@@ -1066,7 +1071,7 @@ size_t BinaryFunction::getSizeOfDataInCodeAt(uint64_t Offset) const {
   auto Iter = Islands->CodeOffsets.upper_bound(Offset);
   if (Iter != Islands->CodeOffsets.end())
     return *Iter - Offset;
-  return getSize() - Offset;
+  return getMaxSize() - Offset;
 }
 
 std::optional<uint64_t>
diff --git a/bolt/test/AArch64/data-in-code.s b/bolt/test/AArch64/data-in-code.s
index 8d3179a0c3350..1df5d4568542f 100644
--- a/bolt/test/AArch64/data-in-code.s
+++ b/bolt/test/AArch64/data-in-code.s
@@ -7,7 +7,8 @@
 ## Check disassembly of BOLT input.
 # RUN: llvm-objdump %t.exe -d | FileCheck %s
 
-# RUN: llvm-bolt %t.exe -o %t.bolt --print-disasm | FileCheck %s
+# RUN: llvm-bolt %t.exe -o %t.bolt --print-disasm \
+# RUN:   | FileCheck %s --check-prefixes CHECK,CHECK-BOLT-ONLY
 
 .text
 .balign 4
@@ -16,16 +17,21 @@
 .type _start, %function
 _start:
   mov x0, #0x0
+  ldr x1, .L1
   .word 0x4f82e010
   ret
+.size _start, .-_start
+.L1:
   .byte 0x0, 0xff, 0x42
 # CHECK-LABEL: _start
 # CHECK:        mov x0, #0x0
+# CHECK-NEXT:   ldr x1
+# CHECK-BOLT-ONLY-SAME: ISLANDat[[ADDR:]]
 # CHECK-NEXT:   .word 0x4f82e010
 # CHECK-NEXT:   ret
+# CHECK-BOLT-ONLY-NEXT: ISLANDat[[ADDR]]
 # CHECK-NEXT:   .short 0xff00
 # CHECK-NEXT:   .byte 0x42
-.size _start, .-_start
 
 ## Force relocation mode.
   .reloc 0, R_AARCH64_NONE

From 2e5ec1cc5b8ef30f04f53d927860184acf7150b3 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Thu, 20 Feb 2025 11:29:54 -0800
Subject: [PATCH 26/29] [flang] Add FLANG_PARALLEL_COMPILE_JOBS option
 (#127364)

This is a re-apply of 083c683969b2436afdc45becadc955841f5f4d31 with a
fix for the flang runtime build.

This works the same way as LLVM_PARALLEL_COMPILE_JOBS except that it is
specific to the flang source rather than for the whole project.

Configuring with -DFLANG_PARALLEL_COMPILE_JOBS=1 would mean that there
would only ever be one flang source being compiled at a time.

Some of the flang sources require large amounts of memory to compile, so
this option can be used to avoid OOM erros when compiling those files
while still allowing the rest of the project to compile using the
maximum number of jobs.

Update flang/CMakeLists.txt

---------

Co-authored-by: Nikita Popov <github@npopov.com>
Co-authored-by: Michael Kruse <github@meinersbur.de>
---
 flang/CMakeLists.txt               | 11 +++++++++++
 flang/cmake/modules/AddFlang.cmake |  3 +++
 2 files changed, 14 insertions(+)

diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index ac8f784fd811e..4b703b456cae2 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -467,6 +467,17 @@ if (APPLE)
   endif()
 endif()
 
+# Set up job pools for flang.  Some of the flang sources take a lot of memory to
+# compile, so allow users to limit the number of parallel flang jobs.  This is
+# useful for building flang alongside several other projects since you can use
+# the maximum number of build jobs for the other projects while limiting the
+# number of flang compile jobs.
+set(FLANG_PARALLEL_COMPILE_JOBS CACHE STRING
+  "The maximum number of concurrent compilation jobs for Flang (Ninja only)")
+if (FLANG_PARALLEL_COMPILE_JOBS)
+  set_property(GLOBAL APPEND PROPERTY JOB_POOLS flang_compile_job_pool=${FLANG_PARALLEL_COMPILE_JOBS})
+endif()
+
 include(AddFlang)
 include(FlangCommon)
 
diff --git a/flang/cmake/modules/AddFlang.cmake b/flang/cmake/modules/AddFlang.cmake
index badbd4e7b964b..ca233103ccdbe 100644
--- a/flang/cmake/modules/AddFlang.cmake
+++ b/flang/cmake/modules/AddFlang.cmake
@@ -94,6 +94,9 @@ function(add_flang_library name)
       set_property(GLOBAL APPEND PROPERTY FLANG_LIBS ${name})
     endif()
     set_property(GLOBAL APPEND PROPERTY FLANG_EXPORTS ${name})
+    if (FLANG_PARALLEL_COMPILE_JOBS)
+      set_property(TARGET ${name} PROPERTY JOB_POOL_COMPILE flang_compile_job_pool)
+    endif()
   else()
     # Add empty "phony" target
     add_custom_target(${name})

From df427992da4492a664b60ba5201bb2f4b5275228 Mon Sep 17 00:00:00 2001
From: Renaud Kauffmann <rkauffmann@nvidia.com>
Date: Thu, 20 Feb 2025 11:30:25 -0800
Subject: [PATCH 27/29] Adding dependency to FIRCodeGen to fix buildbots
 (#128053)

---
 flang/lib/Optimizer/Analysis/CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/flang/lib/Optimizer/Analysis/CMakeLists.txt b/flang/lib/Optimizer/Analysis/CMakeLists.txt
index 4d4ad882c27d3..3249f8a76ae3e 100644
--- a/flang/lib/Optimizer/Analysis/CMakeLists.txt
+++ b/flang/lib/Optimizer/Analysis/CMakeLists.txt
@@ -6,12 +6,14 @@ add_flang_library(FIRAnalysis
   FIRDialect
   FIRSupport
   HLFIRDialect
+  FIRCodeGen
 
   LINK_LIBS
   FIRBuilder
   FIRDialect
   FIRSupport
   HLFIRDialect
+  FIRCodeGen
 
   MLIR_DEPS
   MLIRIR

From f34f21a1f61b0413dd5ac90db58b4685ff492367 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Thu, 20 Feb 2025 20:30:05 +0100
Subject: [PATCH 28/29] [bazel] Fix mlir python build after
 a72616de18c0814ad37b5748d6bdc60b825dd889

---
 utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 05385ba491525..8f9a7e388ebc7 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -11136,6 +11136,7 @@ td_library(
         "include/mlir/Dialect/Linalg/IR/LinalgEnums.td",
         "include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td",
         "include/mlir/Dialect/Linalg/IR/LinalgOps.td",
+        "include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td",
     ],
     includes = ["include"],
     deps = [

From 5bf37484c6b0fc38a63465a36890a11494e4fc07 Mon Sep 17 00:00:00 2001
From: Ashley Coleman <ascoleman@microsoft.com>
Date: Thu, 20 Feb 2025 12:41:44 -0700
Subject: [PATCH 29/29] [NFC][hlsl][Sema] Simplify CBuffer Legacy Size
 Calculation Control Flow (#127921)

NFC: Small refactor to `calculateLegacyCbufferSize()`'s control flow to
make each branch easier to flow/more visually distinct from each other
---
 clang/lib/Sema/SemaHLSL.cpp | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 502a1429ff6e3..d26d85d5861b1 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -176,9 +176,9 @@ Decl *SemaHLSL::ActOnStartBuffer(Scope *BufferScope, bool CBuffer,
 // https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-packing-rules
 static unsigned calculateLegacyCbufferSize(const ASTContext &Context,
                                            QualType T) {
-  unsigned Size = 0;
   constexpr unsigned CBufferAlign = 16;
   if (const RecordType *RT = T->getAs<RecordType>()) {
+    unsigned Size = 0;
     const RecordDecl *RD = RT->getDecl();
     for (const FieldDecl *Field : RD->fields()) {
       QualType Ty = Field->getType();
@@ -191,22 +191,28 @@ static unsigned calculateLegacyCbufferSize(const ASTContext &Context,
       Size = llvm::alignTo(Size, FieldAlign);
       Size += FieldSize;
     }
-  } else if (const ConstantArrayType *AT = Context.getAsConstantArrayType(T)) {
-    if (unsigned ElementCount = AT->getSize().getZExtValue()) {
-      unsigned ElementSize =
-          calculateLegacyCbufferSize(Context, AT->getElementType());
-      unsigned AlignedElementSize = llvm::alignTo(ElementSize, CBufferAlign);
-      Size = AlignedElementSize * (ElementCount - 1) + ElementSize;
-    }
-  } else if (const VectorType *VT = T->getAs<VectorType>()) {
+    return Size;
+  }
+
+  if (const ConstantArrayType *AT = Context.getAsConstantArrayType(T)) {
+    unsigned ElementCount = AT->getSize().getZExtValue();
+    if (ElementCount == 0)
+      return 0;
+
+    unsigned ElementSize =
+        calculateLegacyCbufferSize(Context, AT->getElementType());
+    unsigned AlignedElementSize = llvm::alignTo(ElementSize, CBufferAlign);
+    return AlignedElementSize * (ElementCount - 1) + ElementSize;
+  }
+
+  if (const VectorType *VT = T->getAs<VectorType>()) {
     unsigned ElementCount = VT->getNumElements();
     unsigned ElementSize =
         calculateLegacyCbufferSize(Context, VT->getElementType());
-    Size = ElementSize * ElementCount;
-  } else {
-    Size = Context.getTypeSize(T) / 8;
+    return ElementSize * ElementCount;
   }
-  return Size;
+
+  return Context.getTypeSize(T) / 8;
 }
 
 // Validate packoffset: