GPUOpen-Drivers
diff --git a/‎clang/test/Driver/sanitizer-ld.c
Lines changed: 2 additions & 2 deletions b/‎clang/test/Driver/sanitizer-ld.c
Lines changed: 2 additions & 2 deletions
diff --git a/‎compiler-rt/lib/builtins/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎compiler-rt/lib/builtins/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎compiler-rt/lib/builtins/truncxfhf2.c
Lines changed: 15 additions & 0 deletions b/‎compiler-rt/lib/builtins/truncxfhf2.c
Lines changed: 15 additions & 0 deletions
diff --git a/‎compiler-rt/test/builtins/Unit/truncxfhf2_test.c
Lines changed: 74 additions & 0 deletions b/‎compiler-rt/test/builtins/Unit/truncxfhf2_test.c
Lines changed: 74 additions & 0 deletions
diff --git a/‎libc/test/UnitTest/FPMatcher.h
Lines changed: 4 additions & 4 deletions b/‎libc/test/UnitTest/FPMatcher.h
Lines changed: 4 additions & 4 deletions
diff --git a/‎llvm/include/llvm/Config/llvm-config.h.cmake
Lines changed: 1 addition & 1 deletion b/‎llvm/include/llvm/Config/llvm-config.h.cmake
Lines changed: 1 addition & 1 deletion
diff --git a/‎llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Lines changed: 7 additions & 7 deletions b/‎llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Lines changed: 7 additions & 7 deletions
diff --git a/‎llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Lines changed: 23 additions & 56 deletions b/‎llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Lines changed: 23 additions & 56 deletions
diff --git a/‎llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Lines changed: 2 additions & 9 deletions b/‎llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Lines changed: 2 additions & 9 deletions
@@ -332,8 +332,8 @@
 // RUN:   | %{filecheck} --check-prefix=CHECK-ASAN-ANDROID-SHARED-LIBASAN
 //
 // CHECK-ASAN-ANDROID-SHARED-LIBASAN-NOT: argument unused during compilation: '-shared-libsan'
-// CHECK-ASAN-ANDROID-SHARED-LIBASAN: libclang_rt.asan.so"
-// CHECK-ASAN-ANDROID-SHARED-LIBASAN: libclang_rt.asan_static.a"
+// CHECK-ASAN-ANDROID-SHARED-LIBASAN: libclang_rt.asan{{.*}}.so"
+// CHECK-ASAN-ANDROID-SHARED-LIBASAN: libclang_rt.asan_static{{.*}}.a"
 //
 // RUN: %clang -### %s 2>&1 \
 // RUN:     --target=arm-linux-androideabi -fuse-ld=ld -fsanitize=address \
 
@@ -310,6 +310,7 @@ set(x86_80_BIT_SOURCES
   mulxc3.c
   powixf2.c
   trunctfxf2.c
+  truncxfhf2.c
 )
 
 if (NOT MSVC)
 
@@ -0,0 +1,15 @@
+//===-- lib/truncsfhf2.c - long double -> half conversion ---------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define SRC_SINGLE
+#define DST_HALF
+#include "fp_trunc_impl.inc"
+
+COMPILER_RT_ABI dst_t __truncxfhf2(xf_float a) {
+  return __truncXfYf2__((float)a);
+}
@@ -0,0 +1,74 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_truncxfhf2
+
+#include <stdio.h>
+
+#include "fp_test.h"
+
+#if HAS_80_BIT_LONG_DOUBLE
+
+TYPE_FP16 __truncxfhf2(xf_float f);
+
+int test_truncxfhf2(uint16_t inputHi, uint64_t inputLo, uint16_t e) {
+  xf_float a = F80FromRep80(inputHi, inputLo);
+  TYPE_FP16 x = __truncxfhf2(a);
+  int ret = compareResultH(x, e);
+  if (ret) {
+    printf("error in test__truncxfhf2(%Lf) = %#.4x, "
+           "expected %#.4x\n",
+           a, toRep16(x), e);
+  }
+  return ret;
+}
+
+int main() {
+  // Small positive value
+  if (test_truncxfhf2(UINT16_C(0x3ffb), UINT64_C(0xccc0000000000000),
+                      UINT16_C(0x2e66)))
+    return 1;
+
+  // Small negative value
+  if (test_truncxfhf2(UINT16_C(0xbffb), UINT64_C(0xccc0000000000000),
+                      UINT16_C(0xae66)))
+    return 1;
+
+  // Zero
+  if (test_truncxfhf2(UINT16_C(0x0), UINT64_C(0x0), UINT16_C(0)))
+    return 1;
+
+  // Smallest positive non-zero value
+  if (test_truncxfhf2(UINT16_C(0x3fef), UINT64_C(0x8000000000000000),
+                      UINT16_C(0x0100)))
+    return 1;
+
+  // Smallest negative non-zero value
+  if (test_truncxfhf2(UINT16_C(0xbfef), UINT64_C(0x8000000000000000),
+                      UINT16_C(0x8100)))
+    return 1;
+
+  // Positive infinity
+  if (test_truncxfhf2(UINT16_C(0x7fff), UINT64_C(0x8000000000000000),
+                      UINT16_C(0x7c00U)))
+    return 1;
+
+  // Negative infinity
+  if (test_truncxfhf2(UINT16_C(0xffff), UINT64_C(0x8000000000000000),
+                      UINT16_C(0xfc00U)))
+    return 1;
+
+  // NaN
+  if (test_truncxfhf2(UINT16_C(0x7fff), UINT64_C(0xc000000000000000),
+                      UINT16_C(0x7e00U)))
+    return 1;
+
+  return 0;
+}
+
+#else
+
+int main() {
+  printf("skipped\n");
+  return 0;
+}
+
+#endif
@@ -131,11 +131,11 @@ template <typename T, TestCond Condition> class CFPMatcher : public Matcher<T> {
     else if constexpr (cpp::is_complex_type_same<T, _Complex long double>())
       return matchComplex<long double>();
 #ifdef LIBC_TYPES_HAS_CFLOAT16
-    else if constexpr (cpp::is_complex_type_same<T, cfloat16>)
+    else if constexpr (cpp::is_complex_type_same<T, cfloat16>())
       return matchComplex<float16>();
 #endif
 #ifdef LIBC_TYPES_HAS_CFLOAT128
-    else if constexpr (cpp::is_complex_type_same<T, cfloat128>)
+    else if constexpr (cpp::is_complex_type_same<T, cfloat128>())
       return matchComplex<float128>();
 #endif
   }
@@ -148,11 +148,11 @@ template <typename T, TestCond Condition> class CFPMatcher : public Matcher<T> {
     else if constexpr (cpp::is_complex_type_same<T, _Complex long double>())
       return explainErrorComplex<long double>();
 #ifdef LIBC_TYPES_HAS_CFLOAT16
-    else if constexpr (cpp::is_complex_type_same<T, cfloat16>)
+    else if constexpr (cpp::is_complex_type_same<T, cfloat16>())
       return explainErrorComplex<float16>();
 #endif
 #ifdef LIBC_TYPES_HAS_CFLOAT128
-    else if constexpr (cpp::is_complex_type_same<T, cfloat128>)
+    else if constexpr (cpp::is_complex_type_same<T, cfloat128>())
       return explainErrorComplex<float128>();
 #endif
   }
 
@@ -16,7 +16,7 @@
 
 /* Indicate that this is LLVM compiled from the amd-gfx branch. */
 #define LLVM_HAVE_BRANCH_AMD_GFX
-#define LLVM_MAIN_REVISION 522523
+#define LLVM_MAIN_REVISION 522528
 
 /* Define if LLVM_ENABLE_DUMP is enabled */
 #cmakedefine LLVM_ENABLE_DUMP
 
@@ -3489,10 +3489,10 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
   if (hasIrregularType(ScalarTy, DL))
     return false;
 
-  // For scalable vectors, the only interleave factor currently supported
-  // must be power of 2 since we require the (de)interleave2 intrinsics
-  // instead of shufflevectors.
-  if (VF.isScalable() && !isPowerOf2_32(InterleaveFactor))
+  // We currently only know how to emit interleave/deinterleave with
+  // Factor=2 for scalable vectors. This is purely an implementation
+  // limit.
+  if (VF.isScalable() && InterleaveFactor != 2)
     return false;
 
   // If the group involves a non-integral pointer, we may not be able to
@@ -9193,9 +9193,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
                      CM.getWideningDecision(IG->getInsertPos(), VF) ==
                          LoopVectorizationCostModel::CM_Interleave);
       // For scalable vectors, the only interleave factor currently supported
-      // must be power of 2 since we require the (de)interleave2 intrinsics
-      // instead of shufflevectors.
-      assert((!Result || !VF.isScalable() || isPowerOf2_32(IG->getFactor())) &&
+      // is 2 since we require the (de)interleave2 intrinsics instead of
+      // shufflevectors.
+      assert((!Result || !VF.isScalable() || IG->getFactor() == 2) &&
              "Unsupported interleave factor for scalable vectors");
       return Result;
     };
 
@@ -2789,21 +2789,10 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
   // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
   // must use intrinsics to interleave.
   if (VecTy->isScalableTy()) {
-    assert(isPowerOf2_32(Factor) && "Unsupported interleave factor for "
-                                    "scalable vectors, must be power of 2");
-    SmallVector<Value *> InterleavingValues(Vals);
-    // When interleaving, the number of values will be shrunk until we have the
-    // single final interleaved value.
-    auto *InterleaveTy = cast<VectorType>(InterleavingValues[0]->getType());
-    for (unsigned Midpoint = Factor / 2; Midpoint > 0; Midpoint /= 2) {
-      InterleaveTy = VectorType::getDoubleElementsVectorType(InterleaveTy);
-      for (unsigned I = 0; I < Midpoint; ++I)
-        InterleavingValues[I] = Builder.CreateIntrinsic(
-            InterleaveTy, Intrinsic::vector_interleave2,
-            {InterleavingValues[I], InterleavingValues[Midpoint + I]},
-            /*FMFSource=*/nullptr, Name);
-    }
-    return InterleavingValues[0];
+    VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy);
+    return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
+                                   Vals,
+                                   /*FMFSource=*/nullptr, Name);
   }
 
   // Fixed length. Start by concatenating all vectors into a wide vector.
@@ -2889,11 +2878,15 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
                           &InterleaveFactor](Value *MaskForGaps) -> Value * {
     if (State.VF.isScalable()) {
       assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
-      assert(isPowerOf2_32(InterleaveFactor) &&
+      assert(InterleaveFactor == 2 &&
              "Unsupported deinterleave factor for scalable vectors");
       auto *ResBlockInMask = State.get(BlockInMask);
-      SmallVector<Value *> Ops(InterleaveFactor, ResBlockInMask);
-      return interleaveVectors(State.Builder, Ops, "interleaved.mask");
+      SmallVector<Value *, 2> Ops = {ResBlockInMask, ResBlockInMask};
+      auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
+                                     State.VF.getKnownMinValue() * 2, true);
+      return State.Builder.CreateIntrinsic(
+          MaskTy, Intrinsic::vector_interleave2, Ops,
+          /*FMFSource=*/nullptr, "interleaved.mask");
     }
 
     if (!BlockInMask)
@@ -2933,48 +2926,22 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
     ArrayRef<VPValue *> VPDefs = definedValues();
     const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
     if (VecTy->isScalableTy()) {
-      assert(isPowerOf2_32(InterleaveFactor) &&
+      assert(InterleaveFactor == 2 &&
              "Unsupported deinterleave factor for scalable vectors");
 
-      // Scalable vectors cannot use arbitrary shufflevectors (only splats),
-      // so must use intrinsics to deinterleave.
-      SmallVector<Value *> DeinterleavedValues(InterleaveFactor);
-      DeinterleavedValues[0] = NewLoad;
-      // For the case of InterleaveFactor > 2, we will have to do recursive
-      // deinterleaving, because the current available deinterleave intrinsic
-      // supports only Factor of 2, otherwise it will bailout after first
-      // iteration.
-      // When deinterleaving, the number of values will double until we
-      // have "InterleaveFactor".
-      for (unsigned NumVectors = 1; NumVectors < InterleaveFactor;
-           NumVectors *= 2) {
-        // Deinterleave the elements within the vector
-        SmallVector<Value *> TempDeinterleavedValues(NumVectors);
-        for (unsigned I = 0; I < NumVectors; ++I) {
-          auto *DiTy = DeinterleavedValues[I]->getType();
-          TempDeinterleavedValues[I] = State.Builder.CreateIntrinsic(
-              Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
-              /*FMFSource=*/nullptr, "strided.vec");
-        }
-        // Extract the deinterleaved values:
-        for (unsigned I = 0; I < 2; ++I)
-          for (unsigned J = 0; J < NumVectors; ++J)
-            DeinterleavedValues[NumVectors * I + J] =
-                State.Builder.CreateExtractValue(TempDeinterleavedValues[J], I);
-      }
-
-#ifndef NDEBUG
-      for (Value *Val : DeinterleavedValues)
-        assert(Val && "NULL Deinterleaved Value");
-#endif
-      for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) {
+        // Scalable vectors cannot use arbitrary shufflevectors (only splats),
+        // so must use intrinsics to deinterleave.
+      Value *DI = State.Builder.CreateIntrinsic(
+          Intrinsic::vector_deinterleave2, VecTy, NewLoad,
+          /*FMFSource=*/nullptr, "strided.vec");
+      unsigned J = 0;
+      for (unsigned I = 0; I < InterleaveFactor; ++I) {
         Instruction *Member = Group->getMember(I);
-        Value *StridedVec = DeinterleavedValues[I];
-        if (!Member) {
-          // This value is not needed as it's not used
-          static_cast<Instruction *>(StridedVec)->eraseFromParent();
+
+        if (!Member)
           continue;
-        }
+
+        Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
         // If this member has different type, cast the result type.
         if (Member->getType() != ScalarTy) {
           VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
 
@@ -139,17 +139,10 @@ class VectorCombine {
 
   void eraseInstruction(Instruction &I) {
     LLVM_DEBUG(dbgs() << "VC: Erasing: " << I << '\n');
-    SmallVector<Value *> Ops(I.operands());
+    for (Value *Op : I.operands())
+      Worklist.pushValue(Op);
     Worklist.remove(&I);
     I.eraseFromParent();
-
-    // Push remaining users and then the operand itself - allows further folds
-    // that were hindered by OneUse limits.
-    for (Value *Op : Ops)
-      if (auto *OpI = dyn_cast<Instruction>(Op)) {
-        Worklist.pushUsersToWorkList(*OpI);
-        Worklist.pushValue(OpI);
-      }
   }
 };
 } // namespace
Original file line number	Diff line number	Diff line change
`@@ -310,6 +310,7 @@ set(x86_80_BIT_SOURCES`
`310`	`310`	`mulxc3.c`
`311`	`311`	`powixf2.c`
`312`	`312`	`trunctfxf2.c`
	`313`	`+ truncxfhf2.c`
`313`	`314`	`)`
`314`	`315`
`315`	`316`	`if (NOT MSVC)`