Skip to content

[SandboxVectorizer] Define SeedBundle: a set of instructions to be vectorized [retry] #111073

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
//===- SeedCollector.h ------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file contains the mechanism for collecting the seed instructions that
// are used as starting points for forming the vectorization graph.
//===----------------------------------------------------------------------===//

#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H
#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H

#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/SandboxIR/Instruction.h"
#include "llvm/SandboxIR/Utils.h"
#include "llvm/SandboxIR/Value.h"
#include <iterator>
#include <memory>

namespace llvm::sandboxir {

/// A set of candidate Instructions for vectorizing together.
class SeedBundle {
public:
/// Initialize a bundle with \p I.
explicit SeedBundle(Instruction *I) { insertAt(begin(), I); }
explicit SeedBundle(SmallVector<Instruction *> &&L) : Seeds(std::move(L)) {
for (auto &S : Seeds)
NumUnusedBits += Utils::getNumBits(S);
}
/// No need to allow copies.
SeedBundle(const SeedBundle &) = delete;
SeedBundle &operator=(const SeedBundle &) = delete;
virtual ~SeedBundle() {}

using iterator = SmallVector<Instruction *>::iterator;
using const_iterator = SmallVector<Instruction *>::const_iterator;
iterator begin() { return Seeds.begin(); }
iterator end() { return Seeds.end(); }
const_iterator begin() const { return Seeds.begin(); }
const_iterator end() const { return Seeds.end(); }

Instruction *operator[](unsigned Idx) const { return Seeds[Idx]; }

/// Insert \p I into position \p P. Clients should choose Pos
/// by symbol, symbol-offset, and program order (which depends if scheduling
/// bottom-up or top-down).
void insertAt(iterator Pos, Instruction *I) {
#ifdef EXPENSIVE_CHECKS
for (auto Itr : Seeds) {
assert(*Itr != I && "Attempt to insert an instruction twice.");
}
#endif
Seeds.insert(Pos, I);
NumUnusedBits += Utils::getNumBits(I);
}

unsigned getFirstUnusedElementIdx() const {
for (unsigned ElmIdx : seq<unsigned>(0, Seeds.size()))
if (!isUsed(ElmIdx))
return ElmIdx;
return Seeds.size();
}
/// Marks instruction \p I "used" within the bundle. Clients
/// use this property when assembling a vectorized instruction from
/// the seeds in a bundle. This allows constant time evaluation
/// and "removal" from the list.
void setUsed(Instruction *I) {
auto It = std::find(begin(), end(), I);
assert(It != end() && "Instruction not in the bundle!");
auto Idx = It - begin();
setUsed(Idx, 1, /*VerifyUnused=*/false);
}

void setUsed(unsigned ElementIdx, unsigned Sz = 1, bool VerifyUnused = true) {
if (ElementIdx + Sz >= UsedLanes.size())
UsedLanes.resize(ElementIdx + Sz);
for (unsigned Idx : seq<unsigned>(ElementIdx, ElementIdx + Sz)) {
assert((!VerifyUnused || !UsedLanes.test(Idx)) &&
"Already marked as used!");
UsedLanes.set(Idx);
UsedLaneCount++;
}
NumUnusedBits -= Utils::getNumBits(Seeds[ElementIdx]);
}
/// \Returns whether or not \p Element has been used.
bool isUsed(unsigned Element) const {
return Element < UsedLanes.size() && UsedLanes.test(Element);
}
bool allUsed() const { return UsedLaneCount == Seeds.size(); }
unsigned getNumUnusedBits() const { return NumUnusedBits; }

/// \Returns a slice of seed elements, starting at the element \p StartIdx,
/// with a total size <= \p MaxVecRegBits, or an empty slice if the
/// requirements cannot be met . If \p ForcePowOf2 is true, then the returned
/// slice will have a total number of bits that is a power of 2.
MutableArrayRef<Instruction *>
getSlice(unsigned StartIdx, unsigned MaxVecRegBits, bool ForcePowOf2);

protected:
SmallVector<Instruction *> Seeds;
/// The lanes that we have already vectorized.
BitVector UsedLanes;
/// Tracks used lanes for constant-time accessor.
unsigned UsedLaneCount = 0;
/// Tracks the remaining bits available to vectorize
unsigned NumUnusedBits = 0;

public:
#ifndef NDEBUG
void dump(raw_ostream &OS) const {
for (auto [ElmIdx, I] : enumerate(*this)) {
OS.indent(2) << ElmIdx << ". ";
if (isUsed(ElmIdx))
OS << "[USED]";
else
OS << *I;
OS << "\n";
}
}
LLVM_DUMP_METHOD void dump() const {
dump(dbgs());
dbgs() << "\n";
}
#endif // NDEBUG
};
} // namespace llvm::sandboxir
#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ add_llvm_component_library(LLVMVectorize
SandboxVectorizer/DependencyGraph.cpp
SandboxVectorizer/Passes/BottomUpVec.cpp
SandboxVectorizer/SandboxVectorizer.cpp
SandboxVectorizer/SeedCollector.cpp
SLPVectorizer.cpp
Vectorize.cpp
VectorCombine.cpp
Expand Down
65 changes: 65 additions & 0 deletions llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
//===- SeedCollection.cpp -0000000----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Type.h"
#include "llvm/SandboxIR/Instruction.h"
#include "llvm/SandboxIR/Utils.h"
#include "llvm/Support/Debug.h"
#include <span>

using namespace llvm;
namespace llvm::sandboxir {

MutableArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
unsigned MaxVecRegBits,
bool ForcePowerOf2) {
// Use uint32_t here for compatibility with IsPowerOf2_32

// BitCount tracks the size of the working slice. From that we can tell
// when the working slice's size is a power-of-two and when it exceeds
// the legal size in MaxVecBits.
uint32_t BitCount = 0;
uint32_t NumElements = 0;
// Tracks the most recent slice where NumElements gave a power-of-2 BitCount
uint32_t NumElementsPowerOfTwo = 0;
uint32_t BitCountPowerOfTwo = 0;
// Can't start a slice with a used instruction.
assert(!isUsed(StartIdx) && "Expected unused at StartIdx");
for (auto S : make_range(Seeds.begin() + StartIdx, Seeds.end())) {
uint32_t InstBits = Utils::getNumBits(S);
// Stop if this instruction is used, or if adding it puts the slice over
// the limit.
if (isUsed(StartIdx + NumElements) || BitCount + InstBits > MaxVecRegBits)
break;
NumElements++;
BitCount += InstBits;
if (ForcePowerOf2 && isPowerOf2_32(BitCount)) {
NumElementsPowerOfTwo = NumElements;
BitCountPowerOfTwo = BitCount;
}
}
if (ForcePowerOf2) {
NumElements = NumElementsPowerOfTwo;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

won't this potentially make the slice contain a "used" instruction that we didn't check for? e.g. 0-2 are unused but 3 is used, force power of 2 will make the slice contain 3 which is used

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The early exit on line 42 guarantees that there will never be a used instruction between StartIdx and NumElements.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh I see, we only set NumElementsPowerOfTwo if the seed is unused

BitCount = BitCountPowerOfTwo;
}

assert((!ForcePowerOf2 || isPowerOf2_32(BitCount)) &&
"Must be a power of two");
// Return any non-empty slice
if (NumElements > 1)
return MutableArrayRef<Instruction *>(&Seeds[StartIdx], NumElements);
else
return {};
}

} // namespace llvm::sandboxir
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ add_llvm_unittest(SandboxVectorizerTests
DependencyGraphTest.cpp
IntervalTest.cpp
LegalityTest.cpp
)
SeedCollectorTest.cpp
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
//===- SeedCollectorTest.cpp ----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/SandboxIR/Function.h"
#include "llvm/SandboxIR/Instruction.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Testing/Support/SupportHelpers.h"
#include "gtest/gtest.h"
#include <span>
using namespace llvm;

struct SeedBundleTest : public testing::Test {
LLVMContext C;
std::unique_ptr<Module> M;

void parseIR(LLVMContext &C, const char *IR) {
SMDiagnostic Err;
M = parseAssemblyString(IR, Err, C);
if (!M)
Err.print("LegalityTest", errs());
}
};

TEST_F(SeedBundleTest, SeedBundle) {
parseIR(C, R"IR(
define void @foo(float %v0, i32 %i0, i16 %i1, i8 %i2) {
bb:
%add0 = fadd float %v0, %v0
%add1 = fadd float %v0, %v0
%add2 = add i8 %i2, %i2
%add3 = add i16 %i1, %i1
%add4 = add i32 %i0, %i0
%add5 = add i16 %i1, %i1
%add6 = add i8 %i2, %i2
%add7 = add i8 %i2, %i2
ret void
}
)IR");
Function &LLVMF = *M->getFunction("foo");
sandboxir::Context Ctx(C);
auto &F = *Ctx.createFunction(&LLVMF);
DataLayout DL(M->getDataLayout());
auto *BB = &*F.begin();
auto It = BB->begin();
auto *I0 = &*It++;
auto *I1 = &*It++;
// Assume first two instructions are identical in the number of bits.
const unsigned IOBits = sandboxir::Utils::getNumBits(I0, DL);
// Constructor
sandboxir::SeedBundle SBO(I0);
EXPECT_EQ(*SBO.begin(), I0);
// getNumUnusedBits after constructor
EXPECT_EQ(SBO.getNumUnusedBits(), IOBits);
// setUsed
SBO.setUsed(I0);
// allUsed
EXPECT_TRUE(SBO.allUsed());
// isUsed
EXPECT_TRUE(SBO.isUsed(0));
// getNumUnusedBits after setUsed
EXPECT_EQ(SBO.getNumUnusedBits(), 0u);
// insertAt
SBO.insertAt(SBO.end(), I1);
EXPECT_NE(*SBO.begin(), I1);
// getNumUnusedBits after insertAt
EXPECT_EQ(SBO.getNumUnusedBits(), IOBits);
// allUsed
EXPECT_FALSE(SBO.allUsed());
// getFirstUnusedElement
EXPECT_EQ(SBO.getFirstUnusedElementIdx(), 1u);

SmallVector<sandboxir::Instruction *> Insts;
// add2 through add7
Insts.push_back(&*It++);
Insts.push_back(&*It++);
Insts.push_back(&*It++);
Insts.push_back(&*It++);
Insts.push_back(&*It++);
Insts.push_back(&*It++);
unsigned BundleBits = 0;
for (auto &S : Insts)
BundleBits += sandboxir::Utils::getNumBits(S);
// Ensure the instructions are as expected.
EXPECT_EQ(BundleBits, 88u);
auto Seeds = Insts;
// Constructor
sandboxir::SeedBundle SB1(std::move(Seeds));
// getNumUnusedBits after constructor
EXPECT_EQ(SB1.getNumUnusedBits(), BundleBits);
// setUsed with index
SB1.setUsed(1);
// getFirstUnusedElementIdx
EXPECT_EQ(SB1.getFirstUnusedElementIdx(), 0u);
SB1.setUsed(unsigned(0));
// getFirstUnusedElementIdx not at end
EXPECT_EQ(SB1.getFirstUnusedElementIdx(), 2u);

// getSlice is (StartIdx, MaxVecRegBits, ForcePowerOf2). It's easier to
// compare test cases without the parameter-name comments inline.
auto Slice0 = SB1.getSlice(2, 64, true);
EXPECT_THAT(Slice0,
testing::ElementsAre(Insts[2], Insts[3], Insts[4], Insts[5]));
auto Slice1 = SB1.getSlice(2, 72, true);
EXPECT_THAT(Slice1,
testing::ElementsAre(Insts[2], Insts[3], Insts[4], Insts[5]));
auto Slice2 = SB1.getSlice(2, 80, true);
EXPECT_THAT(Slice2,
testing::ElementsAre(Insts[2], Insts[3], Insts[4], Insts[5]));

SB1.setUsed(2);
auto Slice3 = SB1.getSlice(3, 64, false);
EXPECT_THAT(Slice3, testing::ElementsAre(Insts[3], Insts[4], Insts[5]));
// getSlice empty case
SB1.setUsed(3);
auto Slice4 = SB1.getSlice(4, /* MaxVecRegBits */ 8,
/* ForcePowerOf2 */ true);
EXPECT_EQ(Slice4.size(), 0u);
}
Loading