diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h index 114149ff53d85..66fd3fb9b0526 100644 --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -732,6 +732,22 @@ class TargetRegisterInfo : public MCRegisterInfo { return composeSubRegIndicesImpl(a, b); } + /// Return a subregister index that will compose to give you the subregister + /// index. + /// + /// Finds a subregister index x such that composeSubRegIndices(a, x) == + /// b. Note that this relationship does not hold if + /// reverseComposeSubRegIndices returns the null subregister. + /// + /// The special null sub-register index composes as the identity. + unsigned reverseComposeSubRegIndices(unsigned a, unsigned b) const { + if (!a) + return b; + if (!b) + return a; + return reverseComposeSubRegIndicesImpl(a, b); + } + /// Transforms a LaneMask computed for one subregister to the lanemask that /// would have been computed when composing the subsubregisters with IdxA /// first. @sa composeSubRegIndices() @@ -774,6 +790,11 @@ class TargetRegisterInfo : public MCRegisterInfo { llvm_unreachable("Target has no sub-registers"); } + /// Overridden by TableGen in targets that have sub-registers. + virtual unsigned reverseComposeSubRegIndicesImpl(unsigned, unsigned) const { + llvm_unreachable("Target has no sub-registers"); + } + /// Overridden by TableGen in targets that have sub-registers. virtual LaneBitmask composeSubRegIndexLaneMaskImpl(unsigned, LaneBitmask) const { diff --git a/llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp b/llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp index d0a3cfa84ee01..8fbd470815b79 100644 --- a/llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp +++ b/llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp @@ -164,3 +164,83 @@ TEST(AMDGPU, TestVGPRLimitsPerOccupancy) { testGPRLimits("VGPR", true, test); } + +static const char *printSubReg(const TargetRegisterInfo &TRI, unsigned SubReg) { + return SubReg ? TRI.getSubRegIndexName(SubReg) : ""; +} + +TEST(AMDGPU, TestReverseComposeSubRegIndices) { + auto TM = createAMDGPUTargetMachine("amdgcn-amd-", "gfx900", ""); + if (!TM) + return; + GCNSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()), + std::string(TM->getTargetFeatureString()), *TM); + + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + +#define EXPECT_SUBREG_EQ(A, B, Expect) \ + do { \ + unsigned Reversed = TRI->reverseComposeSubRegIndices(A, B); \ + EXPECT_EQ(Reversed, Expect) \ + << printSubReg(*TRI, A) << ", " << printSubReg(*TRI, B) << " => " \ + << printSubReg(*TRI, Reversed) << ", *" << printSubReg(*TRI, Expect); \ + } while (0); + + EXPECT_SUBREG_EQ(AMDGPU::NoSubRegister, AMDGPU::sub0, AMDGPU::sub0); + EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::NoSubRegister, AMDGPU::sub0); + + EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub0, AMDGPU::sub0); + + EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub1); + EXPECT_SUBREG_EQ(AMDGPU::sub1, AMDGPU::sub0, AMDGPU::NoSubRegister); + + EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub0, AMDGPU::sub0); + EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1); + + EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub0_sub1, + AMDGPU::sub0_sub1); + EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2_sub3, + AMDGPU::sub0_sub1_sub2_sub3); + + EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2, + AMDGPU::sub1_sub2); + EXPECT_SUBREG_EQ(AMDGPU::sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3, + AMDGPU::NoSubRegister); + + EXPECT_SUBREG_EQ(AMDGPU::sub1_sub2_sub3, AMDGPU::sub0_sub1_sub2_sub3, + AMDGPU::NoSubRegister); + EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3, + AMDGPU::sub1_sub2_sub3); + + EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub30, AMDGPU::NoSubRegister); + EXPECT_SUBREG_EQ(AMDGPU::sub30, AMDGPU::sub0, AMDGPU::NoSubRegister); + + EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub31, AMDGPU::NoSubRegister); + EXPECT_SUBREG_EQ(AMDGPU::sub31, AMDGPU::sub0, AMDGPU::NoSubRegister); + + EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub30, AMDGPU::NoSubRegister); + EXPECT_SUBREG_EQ(AMDGPU::sub30, AMDGPU::sub0_sub1, AMDGPU::NoSubRegister); + + EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub30_sub31, + AMDGPU::NoSubRegister); + EXPECT_SUBREG_EQ(AMDGPU::sub30_sub31, AMDGPU::sub0_sub1, + AMDGPU::NoSubRegister); + + for (unsigned SubIdx0 = 1, LastSubReg = TRI->getNumSubRegIndices(); + SubIdx0 != LastSubReg; ++SubIdx0) { + for (unsigned SubIdx1 = 1; SubIdx1 != LastSubReg; ++SubIdx1) { + if (unsigned ForwardCompose = + TRI->composeSubRegIndices(SubIdx0, SubIdx1)) { + unsigned ReverseComposed = + TRI->reverseComposeSubRegIndices(SubIdx0, ForwardCompose); + EXPECT_EQ(ReverseComposed, SubIdx1); + } + + if (unsigned ReverseCompose = + TRI->reverseComposeSubRegIndices(SubIdx0, SubIdx1)) { + unsigned Recompose = TRI->composeSubRegIndices(SubIdx0, ReverseCompose); + EXPECT_EQ(Recompose, SubIdx1); + } + } + } +} diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp index 2f9ec2e6e7a22..752ebdf01b948 100644 --- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp +++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp @@ -680,8 +680,6 @@ static bool combine(const CodeGenSubRegIndex *Idx, void RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS, StringRef ClassName) { const auto &SubRegIndices = RegBank.getSubRegIndices(); - OS << "unsigned " << ClassName - << "::composeSubRegIndicesImpl(unsigned IdxA, unsigned IdxB) const {\n"; // Many sub-register indexes are composition-compatible, meaning that // @@ -713,7 +711,10 @@ void RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS, RowMap.push_back(Found); } - // Output the row map if there is multiple rows. + OS << "unsigned " << ClassName + << "::composeSubRegIndicesImpl(unsigned IdxA, unsigned IdxB) const {\n"; + + // Output the row map if there are multiple rows. if (Rows.size() > 1) { OS << " static const " << getMinimalTypeForRange(Rows.size(), 32) << " RowMap[" << SubRegIndicesSize << "] = {\n "; @@ -743,6 +744,51 @@ void RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS, else OS << " return Rows[0][IdxB];\n"; OS << "}\n\n"; + + // Generate the reverse case. + // + // FIXME: This is the brute force approach. Compress the table similar to the + // forward case. + OS << "unsigned " << ClassName + << "::reverseComposeSubRegIndicesImpl(unsigned IdxA, unsigned IdxB) const " + "{\n"; + OS << " static const " << getMinimalTypeForRange(SubRegIndicesSize + 1, 32) + << " Table[" << SubRegIndicesSize << "][" << SubRegIndicesSize + << "] = {\n"; + + // Find values where composeSubReg(A, X) == B; + for (const auto &IdxA : SubRegIndices) { + OS << " { "; + + SmallVectorImpl &Row = + Rows[RowMap[IdxA.EnumValue - 1]]; + for (const auto &IdxB : SubRegIndices) { + const CodeGenSubRegIndex *FoundReverse = nullptr; + + for (unsigned i = 0, e = SubRegIndicesSize; i != e; ++i) { + const CodeGenSubRegIndex *This = &SubRegIndices[i]; + const CodeGenSubRegIndex *Composed = Row[i]; + if (Composed == &IdxB) { + if (FoundReverse && FoundReverse != This) // Not unique + break; + FoundReverse = This; + } + } + + if (FoundReverse) { + OS << FoundReverse->getQualifiedName() << ", "; + } else { + OS << "0, "; + } + } + OS << "},\n"; + } + + OS << " };\n\n"; + OS << " --IdxA; assert(IdxA < " << SubRegIndicesSize << ");\n" + << " --IdxB; assert(IdxB < " << SubRegIndicesSize << ");\n"; + OS << " return Table[IdxA][IdxB];\n"; + OS << " }\n\n"; } void RegisterInfoEmitter::emitComposeSubRegIndexLaneMask(raw_ostream &OS, @@ -1113,6 +1159,8 @@ void RegisterInfoEmitter::runTargetHeader(raw_ostream &OS) { << " unsigned PC = 0, unsigned HwMode = 0);\n"; if (!RegBank.getSubRegIndices().empty()) { OS << " unsigned composeSubRegIndicesImpl" + << "(unsigned, unsigned) const override;\n" + << " unsigned reverseComposeSubRegIndicesImpl" << "(unsigned, unsigned) const override;\n" << " LaneBitmask composeSubRegIndexLaneMaskImpl" << "(unsigned, LaneBitmask) const override;\n"