Skip to content

Commit ab2d330

Browse files
authored
TableGen: Generate reverseComposeSubRegIndices (#127050)
This is necessary to enable composing subregisters in peephole-opt. For now use a brute force table to find the return value. The worst case target is AMDGPU with a 399 x 399 entry table.
1 parent 9d48705 commit ab2d330

File tree

3 files changed

+152
-3
lines changed

3 files changed

+152
-3
lines changed

llvm/include/llvm/CodeGen/TargetRegisterInfo.h

+21
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,22 @@ class TargetRegisterInfo : public MCRegisterInfo {
732732
return composeSubRegIndicesImpl(a, b);
733733
}
734734

735+
/// Return a subregister index that will compose to give you the subregister
736+
/// index.
737+
///
738+
/// Finds a subregister index x such that composeSubRegIndices(a, x) ==
739+
/// b. Note that this relationship does not hold if
740+
/// reverseComposeSubRegIndices returns the null subregister.
741+
///
742+
/// The special null sub-register index composes as the identity.
743+
unsigned reverseComposeSubRegIndices(unsigned a, unsigned b) const {
744+
if (!a)
745+
return b;
746+
if (!b)
747+
return a;
748+
return reverseComposeSubRegIndicesImpl(a, b);
749+
}
750+
735751
/// Transforms a LaneMask computed for one subregister to the lanemask that
736752
/// would have been computed when composing the subsubregisters with IdxA
737753
/// first. @sa composeSubRegIndices()
@@ -774,6 +790,11 @@ class TargetRegisterInfo : public MCRegisterInfo {
774790
llvm_unreachable("Target has no sub-registers");
775791
}
776792

793+
/// Overridden by TableGen in targets that have sub-registers.
794+
virtual unsigned reverseComposeSubRegIndicesImpl(unsigned, unsigned) const {
795+
llvm_unreachable("Target has no sub-registers");
796+
}
797+
777798
/// Overridden by TableGen in targets that have sub-registers.
778799
virtual LaneBitmask
779800
composeSubRegIndexLaneMaskImpl(unsigned, LaneBitmask) const {

llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp

+80
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,83 @@ TEST(AMDGPU, TestVGPRLimitsPerOccupancy) {
164164

165165
testGPRLimits("VGPR", true, test);
166166
}
167+
168+
static const char *printSubReg(const TargetRegisterInfo &TRI, unsigned SubReg) {
169+
return SubReg ? TRI.getSubRegIndexName(SubReg) : "<none>";
170+
}
171+
172+
TEST(AMDGPU, TestReverseComposeSubRegIndices) {
173+
auto TM = createAMDGPUTargetMachine("amdgcn-amd-", "gfx900", "");
174+
if (!TM)
175+
return;
176+
GCNSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
177+
std::string(TM->getTargetFeatureString()), *TM);
178+
179+
const SIRegisterInfo *TRI = ST.getRegisterInfo();
180+
181+
#define EXPECT_SUBREG_EQ(A, B, Expect) \
182+
do { \
183+
unsigned Reversed = TRI->reverseComposeSubRegIndices(A, B); \
184+
EXPECT_EQ(Reversed, Expect) \
185+
<< printSubReg(*TRI, A) << ", " << printSubReg(*TRI, B) << " => " \
186+
<< printSubReg(*TRI, Reversed) << ", *" << printSubReg(*TRI, Expect); \
187+
} while (0);
188+
189+
EXPECT_SUBREG_EQ(AMDGPU::NoSubRegister, AMDGPU::sub0, AMDGPU::sub0);
190+
EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::NoSubRegister, AMDGPU::sub0);
191+
192+
EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub0, AMDGPU::sub0);
193+
194+
EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub1);
195+
EXPECT_SUBREG_EQ(AMDGPU::sub1, AMDGPU::sub0, AMDGPU::NoSubRegister);
196+
197+
EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub0, AMDGPU::sub0);
198+
EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1);
199+
200+
EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub0_sub1,
201+
AMDGPU::sub0_sub1);
202+
EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2_sub3,
203+
AMDGPU::sub0_sub1_sub2_sub3);
204+
205+
EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2,
206+
AMDGPU::sub1_sub2);
207+
EXPECT_SUBREG_EQ(AMDGPU::sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3,
208+
AMDGPU::NoSubRegister);
209+
210+
EXPECT_SUBREG_EQ(AMDGPU::sub1_sub2_sub3, AMDGPU::sub0_sub1_sub2_sub3,
211+
AMDGPU::NoSubRegister);
212+
EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3,
213+
AMDGPU::sub1_sub2_sub3);
214+
215+
EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub30, AMDGPU::NoSubRegister);
216+
EXPECT_SUBREG_EQ(AMDGPU::sub30, AMDGPU::sub0, AMDGPU::NoSubRegister);
217+
218+
EXPECT_SUBREG_EQ(AMDGPU::sub0, AMDGPU::sub31, AMDGPU::NoSubRegister);
219+
EXPECT_SUBREG_EQ(AMDGPU::sub31, AMDGPU::sub0, AMDGPU::NoSubRegister);
220+
221+
EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub30, AMDGPU::NoSubRegister);
222+
EXPECT_SUBREG_EQ(AMDGPU::sub30, AMDGPU::sub0_sub1, AMDGPU::NoSubRegister);
223+
224+
EXPECT_SUBREG_EQ(AMDGPU::sub0_sub1, AMDGPU::sub30_sub31,
225+
AMDGPU::NoSubRegister);
226+
EXPECT_SUBREG_EQ(AMDGPU::sub30_sub31, AMDGPU::sub0_sub1,
227+
AMDGPU::NoSubRegister);
228+
229+
for (unsigned SubIdx0 = 1, LastSubReg = TRI->getNumSubRegIndices();
230+
SubIdx0 != LastSubReg; ++SubIdx0) {
231+
for (unsigned SubIdx1 = 1; SubIdx1 != LastSubReg; ++SubIdx1) {
232+
if (unsigned ForwardCompose =
233+
TRI->composeSubRegIndices(SubIdx0, SubIdx1)) {
234+
unsigned ReverseComposed =
235+
TRI->reverseComposeSubRegIndices(SubIdx0, ForwardCompose);
236+
EXPECT_EQ(ReverseComposed, SubIdx1);
237+
}
238+
239+
if (unsigned ReverseCompose =
240+
TRI->reverseComposeSubRegIndices(SubIdx0, SubIdx1)) {
241+
unsigned Recompose = TRI->composeSubRegIndices(SubIdx0, ReverseCompose);
242+
EXPECT_EQ(Recompose, SubIdx1);
243+
}
244+
}
245+
}
246+
}

llvm/utils/TableGen/RegisterInfoEmitter.cpp

+51-3
Original file line numberDiff line numberDiff line change
@@ -680,8 +680,6 @@ static bool combine(const CodeGenSubRegIndex *Idx,
680680
void RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
681681
StringRef ClassName) {
682682
const auto &SubRegIndices = RegBank.getSubRegIndices();
683-
OS << "unsigned " << ClassName
684-
<< "::composeSubRegIndicesImpl(unsigned IdxA, unsigned IdxB) const {\n";
685683

686684
// Many sub-register indexes are composition-compatible, meaning that
687685
//
@@ -713,7 +711,10 @@ void RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
713711
RowMap.push_back(Found);
714712
}
715713

716-
// Output the row map if there is multiple rows.
714+
OS << "unsigned " << ClassName
715+
<< "::composeSubRegIndicesImpl(unsigned IdxA, unsigned IdxB) const {\n";
716+
717+
// Output the row map if there are multiple rows.
717718
if (Rows.size() > 1) {
718719
OS << " static const " << getMinimalTypeForRange(Rows.size(), 32)
719720
<< " RowMap[" << SubRegIndicesSize << "] = {\n ";
@@ -743,6 +744,51 @@ void RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
743744
else
744745
OS << " return Rows[0][IdxB];\n";
745746
OS << "}\n\n";
747+
748+
// Generate the reverse case.
749+
//
750+
// FIXME: This is the brute force approach. Compress the table similar to the
751+
// forward case.
752+
OS << "unsigned " << ClassName
753+
<< "::reverseComposeSubRegIndicesImpl(unsigned IdxA, unsigned IdxB) const "
754+
"{\n";
755+
OS << " static const " << getMinimalTypeForRange(SubRegIndicesSize + 1, 32)
756+
<< " Table[" << SubRegIndicesSize << "][" << SubRegIndicesSize
757+
<< "] = {\n";
758+
759+
// Find values where composeSubReg(A, X) == B;
760+
for (const auto &IdxA : SubRegIndices) {
761+
OS << " { ";
762+
763+
SmallVectorImpl<const CodeGenSubRegIndex *> &Row =
764+
Rows[RowMap[IdxA.EnumValue - 1]];
765+
for (const auto &IdxB : SubRegIndices) {
766+
const CodeGenSubRegIndex *FoundReverse = nullptr;
767+
768+
for (unsigned i = 0, e = SubRegIndicesSize; i != e; ++i) {
769+
const CodeGenSubRegIndex *This = &SubRegIndices[i];
770+
const CodeGenSubRegIndex *Composed = Row[i];
771+
if (Composed == &IdxB) {
772+
if (FoundReverse && FoundReverse != This) // Not unique
773+
break;
774+
FoundReverse = This;
775+
}
776+
}
777+
778+
if (FoundReverse) {
779+
OS << FoundReverse->getQualifiedName() << ", ";
780+
} else {
781+
OS << "0, ";
782+
}
783+
}
784+
OS << "},\n";
785+
}
786+
787+
OS << " };\n\n";
788+
OS << " --IdxA; assert(IdxA < " << SubRegIndicesSize << ");\n"
789+
<< " --IdxB; assert(IdxB < " << SubRegIndicesSize << ");\n";
790+
OS << " return Table[IdxA][IdxB];\n";
791+
OS << " }\n\n";
746792
}
747793

748794
void RegisterInfoEmitter::emitComposeSubRegIndexLaneMask(raw_ostream &OS,
@@ -1113,6 +1159,8 @@ void RegisterInfoEmitter::runTargetHeader(raw_ostream &OS) {
11131159
<< " unsigned PC = 0, unsigned HwMode = 0);\n";
11141160
if (!RegBank.getSubRegIndices().empty()) {
11151161
OS << " unsigned composeSubRegIndicesImpl"
1162+
<< "(unsigned, unsigned) const override;\n"
1163+
<< " unsigned reverseComposeSubRegIndicesImpl"
11161164
<< "(unsigned, unsigned) const override;\n"
11171165
<< " LaneBitmask composeSubRegIndexLaneMaskImpl"
11181166
<< "(unsigned, LaneBitmask) const override;\n"

0 commit comments

Comments
 (0)