-
Notifications
You must be signed in to change notification settings - Fork 13.5k
TableGen: Generate reverseComposeSubRegIndices #127050
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
TableGen: Generate reverseComposeSubRegIndices #127050
Conversation
This is necessary to enable composing subregisters in peephole-opt. For now use a brute force table to find the return value. The worst case target is AMDGPU with a 399 x 399 entry table.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-regalloc Author: Matt Arsenault (arsenm) ChangesThis is necessary to enable composing subregisters in peephole-opt. Full diff: https://github.com/llvm/llvm-project/pull/127050.diff 3 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 114149ff53d85..66fd3fb9b0526 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -732,6 +732,22 @@ class TargetRegisterInfo : public MCRegisterInfo {
return composeSubRegIndicesImpl(a, b);
}
+ /// Return a subregister index that will compose to give you the subregister
+ /// index.
+ ///
+ /// Finds a subregister index x such that composeSubRegIndices(a, x) ==
+ /// b. Note that this relationship does not hold if
+ /// reverseComposeSubRegIndices returns the null subregister.
+ ///
+ /// The special null sub-register index composes as the identity.
+ unsigned reverseComposeSubRegIndices(unsigned a, unsigned b) const {
+ if (!a)
+ return b;
+ if (!b)
+ return a;
+ return reverseComposeSubRegIndicesImpl(a, b);
+ }
+
/// Transforms a LaneMask computed for one subregister to the lanemask that
/// would have been computed when composing the subsubregisters with IdxA
/// first. @sa composeSubRegIndices()
@@ -774,6 +790,11 @@ class TargetRegisterInfo : public MCRegisterInfo {
llvm_unreachable("Target has no sub-registers");
}
+ /// Overridden by TableGen in targets that have sub-registers.
+ virtual unsigned reverseComposeSubRegIndicesImpl(unsigned, unsigned) const {
+ llvm_unreachable("Target has no sub-registers");
+ }
+
/// Overridden by TableGen in targets that have sub-registers.
virtual LaneBitmask
composeSubRegIndexLaneMaskImpl(unsigned, LaneBitmask) const {
diff --git a/llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp b/llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp
index d0a3cfa84ee01..8fbd470815b79 100644
--- a/llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp
+++ b/llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp
@@ -164,3 +164,83 @@ TEST(AMDGPU, TestVGPRLimitsPerOccupancy) {
testGPRLimits("VGPR", true, test);
}
+
+static const char *printSubReg(const TargetRegisterInfo &TRI, unsigned SubReg) {
+ return SubReg ? TRI.getSubRegIndexName(SubReg) : "<none>";
+}
+
+TEST(AMDGPU, TestReverseComposeSubRegIndices) {
+ auto TM = createAMDGPUTargetMachine("amdgcn-amd-", "gfx900", "");
+ if (!TM)
+ return;
+ GCNSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
+ std::string(TM->getTargetFeatureString()), *TM);
+
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+#define EXPECT_SUBREG_EQ(A, B, Expect) \
+ do { \
+ unsigned Reversed = TRI->reverseComposeSubRegIndices(A, B); \
+ EXPECT_EQ(Reversed, Expect) \
+ << printSubReg(*TRI, A) << ", " << printSubReg(*TRI, B) << " => " \
+ << printSubReg(*TRI, Reversed) << ", *" << printSubReg(*TRI, Expect); \
+ } while (0);
+
+ EXPECT_SUBREG_EQ(AMDGPU::NoSubRegister, AMDGPU::sub0, AMDGPU::sub0);
+ EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::NoSubRegister, AMDGPU::sub0);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub0, AMDGPU::sub0);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub1);
+ EXPECT_SUBREG_EQ(AMDGPU::sub1, AMDGPU::sub0, AMDGPU::NoSubRegister);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub0, AMDGPU::sub0);
+ EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub0_sub1,
+ AMDGPU::sub0_sub1);
+ EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2_sub3,
+ AMDGPU::sub0_sub1_sub2_sub3);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2,
+ AMDGPU::sub1_sub2);
+ EXPECT_SUBREG_EQ(AMDGPU::sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3,
+ AMDGPU::NoSubRegister);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub1_sub2_sub3, AMDGPU::sub0_sub1_sub2_sub3,
+ AMDGPU::NoSubRegister);
+ EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3,
+ AMDGPU::sub1_sub2_sub3);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub30, AMDGPU::NoSubRegister);
+ EXPECT_SUBREG_EQ(AMDGPU::sub30, AMDGPU::sub0, AMDGPU::NoSubRegister);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub31, AMDGPU::NoSubRegister);
+ EXPECT_SUBREG_EQ(AMDGPU::sub31, AMDGPU::sub0, AMDGPU::NoSubRegister);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub30, AMDGPU::NoSubRegister);
+ EXPECT_SUBREG_EQ(AMDGPU::sub30, AMDGPU::sub0_sub1, AMDGPU::NoSubRegister);
+
+ EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub30_sub31,
+ AMDGPU::NoSubRegister);
+ EXPECT_SUBREG_EQ(AMDGPU::sub30_sub31, AMDGPU::sub0_sub1,
+ AMDGPU::NoSubRegister);
+
+ for (unsigned SubIdx0 = 1, LastSubReg = TRI->getNumSubRegIndices();
+ SubIdx0 != LastSubReg; ++SubIdx0) {
+ for (unsigned SubIdx1 = 1; SubIdx1 != LastSubReg; ++SubIdx1) {
+ if (unsigned ForwardCompose =
+ TRI->composeSubRegIndices(SubIdx0, SubIdx1)) {
+ unsigned ReverseComposed =
+ TRI->reverseComposeSubRegIndices(SubIdx0, ForwardCompose);
+ EXPECT_EQ(ReverseComposed, SubIdx1);
+ }
+
+ if (unsigned ReverseCompose =
+ TRI->reverseComposeSubRegIndices(SubIdx0, SubIdx1)) {
+ unsigned Recompose = TRI->composeSubRegIndices(SubIdx0, ReverseCompose);
+ EXPECT_EQ(Recompose, SubIdx1);
+ }
+ }
+ }
+}
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index 2f9ec2e6e7a22..752ebdf01b948 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -680,8 +680,6 @@ static bool combine(const CodeGenSubRegIndex *Idx,
void RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
StringRef ClassName) {
const auto &SubRegIndices = RegBank.getSubRegIndices();
- OS << "unsigned " << ClassName
- << "::composeSubRegIndicesImpl(unsigned IdxA, unsigned IdxB) const {\n";
// Many sub-register indexes are composition-compatible, meaning that
//
@@ -713,7 +711,10 @@ void RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
RowMap.push_back(Found);
}
- // Output the row map if there is multiple rows.
+ OS << "unsigned " << ClassName
+ << "::composeSubRegIndicesImpl(unsigned IdxA, unsigned IdxB) const {\n";
+
+ // Output the row map if there are multiple rows.
if (Rows.size() > 1) {
OS << " static const " << getMinimalTypeForRange(Rows.size(), 32)
<< " RowMap[" << SubRegIndicesSize << "] = {\n ";
@@ -743,6 +744,51 @@ void RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
else
OS << " return Rows[0][IdxB];\n";
OS << "}\n\n";
+
+ // Generate the reverse case.
+ //
+ // FIXME: This is the brute force approach. Compress the table similar to the
+ // forward case.
+ OS << "unsigned " << ClassName
+ << "::reverseComposeSubRegIndicesImpl(unsigned IdxA, unsigned IdxB) const "
+ "{\n";
+ OS << " static const " << getMinimalTypeForRange(SubRegIndicesSize + 1, 32)
+ << " Table[" << SubRegIndicesSize << "][" << SubRegIndicesSize
+ << "] = {\n";
+
+ // Find values where composeSubReg(A, X) == B;
+ for (const auto &IdxA : SubRegIndices) {
+ OS << " { ";
+
+ SmallVectorImpl<const CodeGenSubRegIndex *> &Row =
+ Rows[RowMap[IdxA.EnumValue - 1]];
+ for (const auto &IdxB : SubRegIndices) {
+ const CodeGenSubRegIndex *FoundReverse = nullptr;
+
+ for (unsigned i = 0, e = SubRegIndicesSize; i != e; ++i) {
+ const CodeGenSubRegIndex *This = &SubRegIndices[i];
+ const CodeGenSubRegIndex *Composed = Row[i];
+ if (Composed == &IdxB) {
+ if (FoundReverse && FoundReverse != This) // Not unique
+ break;
+ FoundReverse = This;
+ }
+ }
+
+ if (FoundReverse) {
+ OS << FoundReverse->getQualifiedName() << ", ";
+ } else {
+ OS << "0, ";
+ }
+ }
+ OS << "},\n";
+ }
+
+ OS << " };\n\n";
+ OS << " --IdxA; assert(IdxA < " << SubRegIndicesSize << ");\n"
+ << " --IdxB; assert(IdxB < " << SubRegIndicesSize << ");\n";
+ OS << " return Table[IdxA][IdxB];\n";
+ OS << " }\n\n";
}
void RegisterInfoEmitter::emitComposeSubRegIndexLaneMask(raw_ostream &OS,
@@ -1113,6 +1159,8 @@ void RegisterInfoEmitter::runTargetHeader(raw_ostream &OS) {
<< " unsigned PC = 0, unsigned HwMode = 0);\n";
if (!RegBank.getSubRegIndices().empty()) {
OS << " unsigned composeSubRegIndicesImpl"
+ << "(unsigned, unsigned) const override;\n"
+ << " unsigned reverseComposeSubRegIndicesImpl"
<< "(unsigned, unsigned) const override;\n"
<< " LaneBitmask composeSubRegIndexLaneMaskImpl"
<< "(unsigned, LaneBitmask) const override;\n"
|
This is necessary to enable composing subregisters in peephole-opt.
For now use a brute force table to find the return value. The worst
case target is AMDGPU with a 399 x 399 entry table.