llvm · Sterling-Augustine · Oct 7, 2024 · Sep 24, 2024 · Oct 1, 2024 · Oct 3, 2024
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
@@ -0,0 +1,132 @@
+//===- SeedCollector.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This file contains the mechanism for collecting the seed instructions that
+// are used as starting points for forming the vectorization graph.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H
+#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/SandboxIR/Instruction.h"
+#include "llvm/SandboxIR/Utils.h"
+#include "llvm/SandboxIR/Value.h"
+#include <iterator>
+#include <memory>
+
+namespace llvm::sandboxir {
+
+/// A set of candidate Instructions for vectorizing together.
+class SeedBundle {
+public:
+  /// Initialize a bundle with \p I.
+  explicit SeedBundle(Instruction *I) { insertAt(begin(), I); }
+  explicit SeedBundle(SmallVector<Instruction *> &&L) : Seeds(std::move(L)) {
+    for (auto &S : Seeds)
+      NumUnusedBits += Utils::getNumBits(S);
+  }
+  /// No need to allow copies.
+  SeedBundle(const SeedBundle &) = delete;
+  SeedBundle &operator=(const SeedBundle &) = delete;
+  virtual ~SeedBundle() {}
+
+  using iterator = SmallVector<Instruction *>::iterator;
+  using const_iterator = SmallVector<Instruction *>::const_iterator;
+  iterator begin() { return Seeds.begin(); }
+  iterator end() { return Seeds.end(); }
+  const_iterator begin() const { return Seeds.begin(); }
+  const_iterator end() const { return Seeds.end(); }
+
+  Instruction *operator[](unsigned Idx) const { return Seeds[Idx]; }
+
+  /// Insert \p I into position \p P. Clients should choose Pos
+  /// by symbol, symbol-offset, and program order (which depends if scheduling
+  /// bottom-up or top-down).
+  void insertAt(iterator Pos, Instruction *I) {
+#ifdef EXPENSIVE_CHECKS
+    for (auto Itr : Seeds) {
+      assert(*Itr != I && "Attempt to insert an instruction twice.");
+    }
+#endif
+    Seeds.insert(Pos, I);
+    NumUnusedBits += Utils::getNumBits(I);
+  }
+
+  unsigned getFirstUnusedElementIdx() const {
+    for (unsigned ElmIdx : seq<unsigned>(0, Seeds.size()))
+      if (!isUsed(ElmIdx))
+        return ElmIdx;
+    return Seeds.size();
+  }
+  /// Marks instruction \p I "used" within the bundle. Clients
+  /// use this property when assembling a vectorized instruction from
+  /// the seeds in a bundle. This allows constant time evaluation
+  /// and "removal" from the list.
+  void setUsed(Instruction *I) {
+    auto It = std::find(begin(), end(), I);
+    assert(It != end() && "Instruction not in the bundle!");
+    auto Idx = It - begin();
+    setUsed(Idx, 1, /*VerifyUnused=*/false);
+  }
+
+  void setUsed(unsigned ElementIdx, unsigned Sz = 1, bool VerifyUnused = true) {
+    if (ElementIdx + Sz >= UsedLanes.size())
+      UsedLanes.resize(ElementIdx + Sz);
+    for (unsigned Idx : seq<unsigned>(ElementIdx, ElementIdx + Sz)) {
+      assert((!VerifyUnused || !UsedLanes.test(Idx)) &&
+             "Already marked as used!");
+      UsedLanes.set(Idx);
+      UsedLaneCount++;
+    }
+    NumUnusedBits -= Utils::getNumBits(Seeds[ElementIdx]);
+  }
+  /// \Returns whether or not \p Element has been used.
+  bool isUsed(unsigned Element) const {
+    return Element < UsedLanes.size() && UsedLanes.test(Element);
+  }
+  bool allUsed() const { return UsedLaneCount == Seeds.size(); }
+  unsigned getNumUnusedBits() const { return NumUnusedBits; }
+
+  /// \Returns a slice of seed elements, starting at the element \p StartIdx,
+  /// with a total size <= \p MaxVecRegBits, or an empty slice if the
+  /// requirements cannot be met . If \p ForcePowOf2 is true, then the returned
+  /// slice will have a total number of bits that is a power of 2.
+  MutableArrayRef<Instruction *>
+  getSlice(unsigned StartIdx, unsigned MaxVecRegBits, bool ForcePowOf2);
+
+protected:
+  SmallVector<Instruction *> Seeds;
+  /// The lanes that we have already vectorized.
+  BitVector UsedLanes;
+  /// Tracks used lanes for constant-time accessor.
+  unsigned UsedLaneCount = 0;
+  /// Tracks the remaining bits available to vectorize
+  unsigned NumUnusedBits = 0;
+
+public:
+#ifndef NDEBUG
+  void dump(raw_ostream &OS) const {
+    for (auto [ElmIdx, I] : enumerate(*this)) {
+      OS.indent(2) << ElmIdx << ". ";
+      if (isUsed(ElmIdx))
+        OS << "[USED]";
+      else
+        OS << *I;
+      OS << "\n";
+    }
+  }
+  LLVM_DUMP_METHOD void dump() const {
+    dump(dbgs());
+    dbgs() << "\n";
+  }
+#endif // NDEBUG
+};
+} // namespace llvm::sandboxir
+#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H
diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_component_library(LLVMVectorize
   SandboxVectorizer/DependencyGraph.cpp
   SandboxVectorizer/Passes/BottomUpVec.cpp
   SandboxVectorizer/SandboxVectorizer.cpp
+  SandboxVectorizer/SeedCollector.cpp
   SLPVectorizer.cpp
   Vectorize.cpp
   VectorCombine.cpp

diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
@@ -0,0 +1,65 @@
+//===- SeedCollection.cpp  -0000000----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Type.h"
+#include "llvm/SandboxIR/Instruction.h"
+#include "llvm/SandboxIR/Utils.h"
+#include "llvm/Support/Debug.h"
+#include <span>
+
+using namespace llvm;
+namespace llvm::sandboxir {
+
+MutableArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
+                                                    unsigned MaxVecRegBits,
+                                                    bool ForcePowerOf2) {
+  // Use uint32_t here for compatibility with IsPowerOf2_32
+
+  // BitCount tracks the size of the working slice. From that we can tell
+  // when the working slice's size is a power-of-two and when it exceeds
+  // the legal size in MaxVecBits.
+  uint32_t BitCount = 0;
+  uint32_t NumElements = 0;
+  // Tracks the most recent slice where NumElements gave a power-of-2 BitCount
+  uint32_t NumElementsPowerOfTwo = 0;
+  uint32_t BitCountPowerOfTwo = 0;
+  // Can't start a slice with a used instruction.
+  assert(!isUsed(StartIdx) && "Expected unused at StartIdx");
+  for (auto S : make_range(Seeds.begin() + StartIdx, Seeds.end())) {
+    uint32_t InstBits = Utils::getNumBits(S);
+    // Stop if this instruction is used, or if adding it puts the slice over
+    // the limit.
+    if (isUsed(StartIdx + NumElements) || BitCount + InstBits > MaxVecRegBits)
+      break;
+    NumElements++;
+    BitCount += InstBits;
+    if (ForcePowerOf2 && isPowerOf2_32(BitCount)) {
+      NumElementsPowerOfTwo = NumElements;
+      BitCountPowerOfTwo = BitCount;
+    }
+  }
+  if (ForcePowerOf2) {
+    NumElements = NumElementsPowerOfTwo;
+    BitCount = BitCountPowerOfTwo;
+  }
+
+  assert((!ForcePowerOf2 || isPowerOf2_32(BitCount)) &&
+         "Must be a power of two");
+  // Return any non-empty slice
+  if (NumElements > 1)
+    return MutableArrayRef<Instruction *>(&Seeds[StartIdx], NumElements);
+  else
+    return {};
+}
+
+} // namespace llvm::sandboxir
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt
@@ -11,4 +11,5 @@ add_llvm_unittest(SandboxVectorizerTests
   DependencyGraphTest.cpp
   IntervalTest.cpp
   LegalityTest.cpp
-  )
+  SeedCollectorTest.cpp	
+)
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp
@@ -0,0 +1,125 @@
+//===- SeedCollectorTest.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/SandboxIR/Function.h"
+#include "llvm/SandboxIR/Instruction.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+#include "gtest/gtest.h"
+#include <span>
+using namespace llvm;
+
+struct SeedBundleTest : public testing::Test {
+  LLVMContext C;
+  std::unique_ptr<Module> M;
+
+  void parseIR(LLVMContext &C, const char *IR) {
+    SMDiagnostic Err;
+    M = parseAssemblyString(IR, Err, C);
+    if (!M)
+      Err.print("LegalityTest", errs());
+  }
+};
+
+TEST_F(SeedBundleTest, SeedBundle) {
+  parseIR(C, R"IR(
+define void @foo(float %v0, i32 %i0, i16 %i1, i8 %i2) {
+bb:
+  %add0 = fadd float %v0, %v0
+  %add1 = fadd float %v0, %v0
+  %add2 = add i8 %i2, %i2
+  %add3 = add i16 %i1, %i1
+  %add4 = add i32 %i0, %i0
+  %add5 = add i16 %i1, %i1
+  %add6 = add i8 %i2, %i2
+  %add7 = add i8 %i2, %i2
+  ret void
+}
+)IR");
+  Function &LLVMF = *M->getFunction("foo");
+  sandboxir::Context Ctx(C);
+  auto &F = *Ctx.createFunction(&LLVMF);
+  DataLayout DL(M->getDataLayout());
+  auto *BB = &*F.begin();
+  auto It = BB->begin();
+  auto *I0 = &*It++;
+  auto *I1 = &*It++;
+  // Assume first two instructions are identical in the number of bits.
+  const unsigned IOBits = sandboxir::Utils::getNumBits(I0, DL);
+  // Constructor
+  sandboxir::SeedBundle SBO(I0);
+  EXPECT_EQ(*SBO.begin(), I0);
+  // getNumUnusedBits after constructor
+  EXPECT_EQ(SBO.getNumUnusedBits(), IOBits);
+  // setUsed
+  SBO.setUsed(I0);
+  // allUsed
+  EXPECT_TRUE(SBO.allUsed());
+  // isUsed
+  EXPECT_TRUE(SBO.isUsed(0));
+  // getNumUnusedBits after setUsed
+  EXPECT_EQ(SBO.getNumUnusedBits(), 0u);
+  // insertAt
+  SBO.insertAt(SBO.end(), I1);
+  EXPECT_NE(*SBO.begin(), I1);
+  // getNumUnusedBits after insertAt
+  EXPECT_EQ(SBO.getNumUnusedBits(), IOBits);
+  // allUsed
+  EXPECT_FALSE(SBO.allUsed());
+  // getFirstUnusedElement
+  EXPECT_EQ(SBO.getFirstUnusedElementIdx(), 1u);
+
+  SmallVector<sandboxir::Instruction *> Insts;
+  // add2 through add7
+  Insts.push_back(&*It++);
+  Insts.push_back(&*It++);
+  Insts.push_back(&*It++);
+  Insts.push_back(&*It++);
+  Insts.push_back(&*It++);
+  Insts.push_back(&*It++);
+  unsigned BundleBits = 0;
+  for (auto &S : Insts)
+    BundleBits += sandboxir::Utils::getNumBits(S);
+  // Ensure the instructions are as expected.
+  EXPECT_EQ(BundleBits, 88u);
+  auto Seeds = Insts;
+  // Constructor
+  sandboxir::SeedBundle SB1(std::move(Seeds));
+  // getNumUnusedBits after constructor
+  EXPECT_EQ(SB1.getNumUnusedBits(), BundleBits);
+  // setUsed with index
+  SB1.setUsed(1);
+  // getFirstUnusedElementIdx
+  EXPECT_EQ(SB1.getFirstUnusedElementIdx(), 0u);
+  SB1.setUsed(unsigned(0));
+  // getFirstUnusedElementIdx not at end
+  EXPECT_EQ(SB1.getFirstUnusedElementIdx(), 2u);
+
+  // getSlice is (StartIdx, MaxVecRegBits, ForcePowerOf2). It's easier to
+  // compare test cases without the parameter-name comments inline.
+  auto Slice0 = SB1.getSlice(2, 64, true);
+  EXPECT_THAT(Slice0,
+              testing::ElementsAre(Insts[2], Insts[3], Insts[4], Insts[5]));
+  auto Slice1 = SB1.getSlice(2, 72, true);
+  EXPECT_THAT(Slice1,
+              testing::ElementsAre(Insts[2], Insts[3], Insts[4], Insts[5]));
+  auto Slice2 = SB1.getSlice(2, 80, true);
+  EXPECT_THAT(Slice2,
+              testing::ElementsAre(Insts[2], Insts[3], Insts[4], Insts[5]));
+
+  SB1.setUsed(2);
+  auto Slice3 = SB1.getSlice(3, 64, false);
+  EXPECT_THAT(Slice3, testing::ElementsAre(Insts[3], Insts[4], Insts[5]));
+  // getSlice empty case
+  SB1.setUsed(3);
+  auto Slice4 = SB1.getSlice(4, /* MaxVecRegBits */ 8,
+                             /* ForcePowerOf2 */ true);
+  EXPECT_EQ(Slice4.size(), 0u);
+}