From 3a8b95f70f9ba5ac3a206921aa3aa471fbcb1a3e Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Tue, 3 Jun 2025 22:25:00 -0700 Subject: [PATCH 1/3] [LLVM][TableGen][DecoderEmitter] Add option to use lambdas in decodeToMCInst Add option `use-lambda-in-decode-to-mcinst` to use a table of lambdas instead of a switch case in the generated `decodeToMCInst` function. When the number of switch cases in this function is large, the generated code takes a long time to compile in release builds. Using a table of lambdas instead improves the compile time significantly (~3x speedup in compiling the code in a downstream target). This option will allow targets to opt into this mode if they desire for better build times. Tested with `check-llvm-mc` with the option enabled by default. --- llvm/test/TableGen/DecoderEmitterLambda.td | 84 ++++++++++++++++++++++ llvm/utils/TableGen/DecoderEmitter.cpp | 55 +++++++++++--- 2 files changed, 131 insertions(+), 8 deletions(-) create mode 100644 llvm/test/TableGen/DecoderEmitterLambda.td diff --git a/llvm/test/TableGen/DecoderEmitterLambda.td b/llvm/test/TableGen/DecoderEmitterLambda.td new file mode 100644 index 0000000000000..4926c8d7def66 --- /dev/null +++ b/llvm/test/TableGen/DecoderEmitterLambda.td @@ -0,0 +1,84 @@ +// RUN: llvm-tblgen -gen-disassembler -use-lambda-in-decode-to-mcinst -I %p/../../include %s | FileCheck %s + +include "llvm/Target/Target.td" + +def archInstrInfo : InstrInfo { } + +def arch : Target { + let InstructionSet = archInstrInfo; +} + +let Namespace = "arch" in { + def R0 : Register<"r0">; + def R1 : Register<"r1">; + def R2 : Register<"r2">; + def R3 : Register<"r3">; +} +def Regs : RegisterClass<"Regs", [i32], 32, (add R0, R1, R2, R3)>; + +class TestInstruction : Instruction { + let Size = 1; + let OutOperandList = (outs); + field bits<8> Inst; + field bits<8> SoftFail = 0; +} + +// Define instructions to generate 4 cases in decodeToMCInst. +// Lower 2 bits define the number of operands. Each register operand +// needs 2 bits to encode. + +// An instruction with no inputs. Encoded with lower 2 bits = 0 and upper +// 6 bits = 0 as well. +def Inst0 : TestInstruction { + let Inst = 0x0; + let InOperandList = (ins); + let AsmString = "Inst0"; +} + +// An instruction with a single input. Encoded with lower 2 bits = 1 and the +// single input in bits 2-3. +def Inst1 : TestInstruction { + bits<2> r0; + let Inst{1-0} = 1; + let Inst{3-2} = r0; + let InOperandList = (ins Regs:$r0); + let AsmString = "Inst1"; +} + +// An instruction with two inputs. Encoded with lower 2 bits = 2 and the +// inputs in bits 2-3 and 4-5. +def Inst2 : TestInstruction { + bits<2> r0; + bits<2> r1; + let Inst{1-0} = 2; + let Inst{3-2} = r0; + let Inst{5-4} = r1; + let InOperandList = (ins Regs:$r0, Regs:$r1); + let AsmString = "Inst2"; +} + +// An instruction with three inputs. Encoded with lower 2 bits = 3 and the +// inputs in bits 2-3 and 4-5 and 6-7. +def Inst3 : TestInstruction { + bits<2> r0; + bits<2> r1; + bits<2> r2; + let Inst{1-0} = 3; + let Inst{3-2} = r0; + let Inst{5-4} = r1; + let Inst{7-6} = r2; + let InOperandList = (ins Regs:$r0, Regs:$r1, Regs:$r2); + let AsmString = "Inst3"; +} + +// CHECK-LABEL: decodeToMCInst +// CHECK: decodeLambda0 = +// CHECK: decodeLambda1 = +// CHECK: decodeLambda2 = +// CHECK: decodeLambda3 = +// CHECK: decodeLambdaTable[] +// CHECK-NEXT: decodeLambda0 +// CHECK-NEXT: decodeLambda1 +// CHECK-NEXT: decodeLambda2 +// CHECK-NEXT: decodeLambda3 +// CHECK: return decodeLambdaTable[Idx] diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index 2e8ff2aa47d96..824b0ad2a2645 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -83,6 +83,13 @@ static cl::opt LargeTable( "in the table instead of the default 16 bits."), cl::init(false), cl::cat(DisassemblerEmitterCat)); +static cl::opt UseLambdaInDecodetoMCInst( + "use-lambda-in-decode-to-mcinst", + cl::desc("Use a table of lambdas instead of a switch case in the\n" + "generated `decodeToMCInst` function. Helps improve compile time\n" + "of the generated code."), + cl::init(false), cl::cat(DisassemblerEmitterCat)); + STATISTIC(NumEncodings, "Number of encodings considered"); STATISTIC(NumEncodingsLackingDisasm, "Number of encodings without disassembler info"); @@ -1082,15 +1089,47 @@ void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS, << "using TmpType = " "std::conditional_t::" "value, InsnType, uint64_t>;\n"; - OS << Indent << "TmpType tmp;\n"; - OS << Indent << "switch (Idx) {\n"; - OS << Indent << "default: llvm_unreachable(\"Invalid index!\");\n"; - for (const auto &[Index, Decoder] : enumerate(Decoders)) { - OS << Indent << "case " << Index << ":\n"; - OS << Decoder; - OS << Indent + 2 << "return S;\n"; + + if (UseLambdaInDecodetoMCInst) { + // Emit one lambda for each case first. + for (const auto &[Index, Decoder] : enumerate(Decoders)) { + OS << Indent << "auto decodeLambda" << Index << " = [](DecodeStatus S,\n" + << Indent << " InsnType insn, MCInst &MI,\n" + << Indent << " uint64_t Address, \n" + << Indent << " const MCDisassembler *Decoder,\n" + << Indent << " bool &DecodeComplete) {\n"; + OS << Indent + 2 << "[[maybe_unused]] TmpType tmp;\n"; + OS << Decoder; + OS << Indent + 2 << "return S;\n"; + OS << Indent << "};\n"; + } + // Build a table of lambdas. + + OS << R"( + using LambdaTy = + function_ref; + )"; + OS << Indent << "const static LambdaTy decodeLambdaTable[] = {\n"; + for (size_t Index : llvm::seq(Decoders.size())) + OS << Indent + 2 << "decodeLambda" << Index << ",\n"; + OS << Indent << "};\n"; + OS << Indent << "if (Idx >= " << Decoders.size() << ")\n"; + OS << Indent + 2 << "llvm_unreachable(\"Invalid index!\");\n"; + OS << Indent + << "return decodeLambdaTable[Idx](S, insn, MI, Address, Decoder, " + "DecodeComplete);\n"; + } else { + OS << Indent << "TmpType tmp;\n"; + OS << Indent << "switch (Idx) {\n"; + OS << Indent << "default: llvm_unreachable(\"Invalid index!\");\n"; + for (const auto &[Index, Decoder] : enumerate(Decoders)) { + OS << Indent << "case " << Index << ":\n"; + OS << Decoder; + OS << Indent + 2 << "return S;\n"; + } + OS << Indent << "}\n"; } - OS << Indent << "}\n"; Indent -= 2; OS << Indent << "}\n"; } From e3eb094f00422dd9d273a8423caa8ecbe0a6e2c3 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Sat, 21 Jun 2025 10:15:41 -0700 Subject: [PATCH 2/3] Use function of static tables instead of lambda --- ...tterLambda.td => DecoderEmitterFnTable.td} | 22 ++--- llvm/utils/TableGen/DecoderEmitter.cpp | 88 ++++++++++--------- 2 files changed, 58 insertions(+), 52 deletions(-) rename llvm/test/TableGen/{DecoderEmitterLambda.td => DecoderEmitterFnTable.td} (81%) diff --git a/llvm/test/TableGen/DecoderEmitterLambda.td b/llvm/test/TableGen/DecoderEmitterFnTable.td similarity index 81% rename from llvm/test/TableGen/DecoderEmitterLambda.td rename to llvm/test/TableGen/DecoderEmitterFnTable.td index 4926c8d7def66..ad21179b5c98c 100644 --- a/llvm/test/TableGen/DecoderEmitterLambda.td +++ b/llvm/test/TableGen/DecoderEmitterFnTable.td @@ -1,4 +1,4 @@ -// RUN: llvm-tblgen -gen-disassembler -use-lambda-in-decode-to-mcinst -I %p/../../include %s | FileCheck %s +// RUN: llvm-tblgen -gen-disassembler -use-fn-table-in-decode-to-mcinst -I %p/../../include %s | FileCheck %s include "llvm/Target/Target.td" @@ -71,14 +71,14 @@ def Inst3 : TestInstruction { let AsmString = "Inst3"; } +// CHECK-LABEL: decodeFn0 +// CHECK-LABEL: decodeFn1 +// CHECK-LABEL: decodeFn2 +// CHECK-LABEL: decodeFn3 // CHECK-LABEL: decodeToMCInst -// CHECK: decodeLambda0 = -// CHECK: decodeLambda1 = -// CHECK: decodeLambda2 = -// CHECK: decodeLambda3 = -// CHECK: decodeLambdaTable[] -// CHECK-NEXT: decodeLambda0 -// CHECK-NEXT: decodeLambda1 -// CHECK-NEXT: decodeLambda2 -// CHECK-NEXT: decodeLambda3 -// CHECK: return decodeLambdaTable[Idx] +// CHECK: static constexpr DecodeFnTy decodeFnTable[] +// CHECK-NEXT: decodeFn0, +// CHECK-NEXT: decodeFn1, +// CHECK-NEXT: decodeFn2, +// CHECK-NEXT: decodeFn3, +// CHECK: return decodeFnTable[Idx] diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index 824b0ad2a2645..af25975f7c7ec 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -83,11 +83,12 @@ static cl::opt LargeTable( "in the table instead of the default 16 bits."), cl::init(false), cl::cat(DisassemblerEmitterCat)); -static cl::opt UseLambdaInDecodetoMCInst( - "use-lambda-in-decode-to-mcinst", - cl::desc("Use a table of lambdas instead of a switch case in the\n" - "generated `decodeToMCInst` function. Helps improve compile time\n" - "of the generated code."), +static cl::opt UseFnTableInDecodetoMCInst( + "use-fn-table-in-decode-to-mcinst", + cl::desc( + "Use a table of function pointers instead of a switch case in the\n" + "generated `decodeToMCInst` function. Helps improve compile time\n" + "of the generated code."), cl::init(false), cl::cat(DisassemblerEmitterCat)); STATISTIC(NumEncodings, "Number of encodings considered"); @@ -1073,53 +1074,57 @@ void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS, void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders, indent Indent) const { - // The decoder function is just a big switch statement based on the - // input decoder index. - OS << Indent << "template \n"; - OS << Indent << "static DecodeStatus decodeToMCInst(DecodeStatus S," - << " unsigned Idx, InsnType insn, MCInst &MI,\n"; - OS << Indent << " uint64_t " - << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n"; - Indent += 2; - OS << Indent << "DecodeComplete = true;\n"; + // The decoder function is just a big switch statement or a table of function + // pointers based on the input decoder index. + // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits // It would be better for emitBinaryParser to use a 64-bit tmp whenever // possible but fall back to an InsnType-sized tmp for truly large fields. - OS << Indent - << "using TmpType = " - "std::conditional_t::" - "value, InsnType, uint64_t>;\n"; - - if (UseLambdaInDecodetoMCInst) { - // Emit one lambda for each case first. + StringRef TmpTypeDecl = + "using TmpType = std::conditional_t::value, " + "InsnType, uint64_t>;\n"; + StringRef DecodeParams = + "DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const " + "MCDisassembler *Decoder, bool &DecodeComplete"; + + if (UseFnTableInDecodetoMCInst) { + // Emit a function for each case first. for (const auto &[Index, Decoder] : enumerate(Decoders)) { - OS << Indent << "auto decodeLambda" << Index << " = [](DecodeStatus S,\n" - << Indent << " InsnType insn, MCInst &MI,\n" - << Indent << " uint64_t Address, \n" - << Indent << " const MCDisassembler *Decoder,\n" - << Indent << " bool &DecodeComplete) {\n"; - OS << Indent + 2 << "[[maybe_unused]] TmpType tmp;\n"; + OS << Indent << "template \n"; + OS << Indent << "DecodeStatus decodeFn" << Index << "(" << DecodeParams + << ") {\n"; + Indent += 2; + OS << Indent << TmpTypeDecl; + OS << Indent << "[[maybe_unused]] TmpType tmp;\n"; OS << Decoder; - OS << Indent + 2 << "return S;\n"; - OS << Indent << "};\n"; + OS << Indent << "return S;\n"; + Indent -= 2; + OS << Indent << "}\n\n"; } - // Build a table of lambdas. + } - OS << R"( - using LambdaTy = - function_ref; - )"; - OS << Indent << "const static LambdaTy decodeLambdaTable[] = {\n"; + OS << Indent << "// Handling " << Decoders.size() << " cases.\n"; + OS << Indent << "template \n"; + OS << Indent << "static DecodeStatus decodeToMCInst(unsigned Idx, " + << DecodeParams << ") {\n"; + Indent += 2; + OS << Indent << "DecodeComplete = true;\n"; + + if (UseFnTableInDecodetoMCInst) { + // Build a table of function pointers. + OS << Indent << "using DecodeFnTy = DecodeStatus (*)(" << DecodeParams + << ");\n"; + OS << Indent << "static constexpr DecodeFnTy decodeFnTable[] = {\n"; for (size_t Index : llvm::seq(Decoders.size())) - OS << Indent + 2 << "decodeLambda" << Index << ",\n"; + OS << Indent + 2 << "decodeFn" << Index << ",\n"; OS << Indent << "};\n"; OS << Indent << "if (Idx >= " << Decoders.size() << ")\n"; OS << Indent + 2 << "llvm_unreachable(\"Invalid index!\");\n"; OS << Indent - << "return decodeLambdaTable[Idx](S, insn, MI, Address, Decoder, " + << "return decodeFnTable[Idx](S, insn, MI, Address, Decoder, " "DecodeComplete);\n"; } else { + OS << Indent << TmpTypeDecl; OS << Indent << "TmpType tmp;\n"; OS << Indent << "switch (Idx) {\n"; OS << Indent << "default: llvm_unreachable(\"Invalid index!\");\n"; @@ -1306,7 +1311,8 @@ std::pair FilterChooser::getDecoderIndex(DecoderSet &Decoders, // FIXME: emitDecoder() function can take a buffer directly rather than // a stream. raw_svector_ostream S(Decoder); - bool HasCompleteDecoder = emitDecoder(S, indent(4), Opc); + indent Indent(UseFnTableInDecodetoMCInst ? 2 : 4); + bool HasCompleteDecoder = emitDecoder(S, Indent, Opc); // Using the full decoder string as the key value here is a bit // heavyweight, but is effective. If the string comparisons become a @@ -2410,7 +2416,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI, << " makeUp(insn, Len);"; } OS << R"( - S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, DecodeComplete); + S = decodeToMCInst(DecodeIdx, S, insn, MI, Address, DisAsm, DecodeComplete); assert(DecodeComplete); LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc @@ -2432,7 +2438,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI, MCInst TmpMI; TmpMI.setOpcode(Opc); bool DecodeComplete; - S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, DecodeComplete); + S = decodeToMCInst(DecodeIdx, S, insn, TmpMI, Address, DisAsm, DecodeComplete); LLVM_DEBUG(dbgs() << Loc << ": OPC_TryDecode: opcode " << Opc << ", using decoder " << DecodeIdx << ": "); From 9ed847b6667eaa485135d222ab44590e1520c553 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Mon, 23 Jun 2025 10:05:33 -0700 Subject: [PATCH 3/3] Review feedback --- llvm/test/TableGen/DecoderEmitterFnTable.td | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/test/TableGen/DecoderEmitterFnTable.td b/llvm/test/TableGen/DecoderEmitterFnTable.td index ad21179b5c98c..7bed18c19a513 100644 --- a/llvm/test/TableGen/DecoderEmitterFnTable.td +++ b/llvm/test/TableGen/DecoderEmitterFnTable.td @@ -71,14 +71,14 @@ def Inst3 : TestInstruction { let AsmString = "Inst3"; } -// CHECK-LABEL: decodeFn0 -// CHECK-LABEL: decodeFn1 -// CHECK-LABEL: decodeFn2 -// CHECK-LABEL: decodeFn3 -// CHECK-LABEL: decodeToMCInst +// CHECK-LABEL: DecodeStatus decodeFn0(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete) +// CHECK-LABEL: DecodeStatus decodeFn1(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete) +// CHECK-LABEL: DecodeStatus decodeFn2(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete) +// CHECK-LABEL: DecodeStatus decodeFn3(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete) +// CHECK-LABEL: decodeToMCInst(unsigned Idx, DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete) // CHECK: static constexpr DecodeFnTy decodeFnTable[] // CHECK-NEXT: decodeFn0, // CHECK-NEXT: decodeFn1, // CHECK-NEXT: decodeFn2, // CHECK-NEXT: decodeFn3, -// CHECK: return decodeFnTable[Idx] +// CHECK: return decodeFnTable[Idx](S, insn, MI, Address, Decoder, DecodeComplete)