Skip to content

Commit ccf0c8d

Browse files
authored
[BOLT] Add reading support for Linux kernel exception table (#83100)
Read Linux exception table and ignore functions with exceptions for now. Proper support requires an introduction of new control flow since some instructions with memory access can cause a control flow change. Hence looking at disassembly or CFG with exceptions annotations is valuable for code analysis, delay marking functions with exceptions as non-simple until immediately before emitting the code.
1 parent 6f5c4f2 commit ccf0c8d

File tree

2 files changed

+204
-70
lines changed

2 files changed

+204
-70
lines changed

bolt/lib/Rewrite/LinuxKernelRewriter.cpp

Lines changed: 140 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "bolt/Rewrite/MetadataRewriter.h"
1515
#include "bolt/Rewrite/MetadataRewriters.h"
1616
#include "bolt/Utils/CommandLineOpts.h"
17+
#include "llvm/ADT/DenseSet.h"
1718
#include "llvm/Support/BinaryStreamWriter.h"
1819
#include "llvm/Support/CommandLine.h"
1920
#include "llvm/Support/Debug.h"
@@ -27,9 +28,9 @@ using namespace bolt;
2728
namespace opts {
2829

2930
static cl::opt<bool>
30-
PrintORC("print-orc",
31-
cl::desc("print ORC unwind information for instructions"),
32-
cl::init(true), cl::Hidden, cl::cat(BoltCategory));
31+
DumpExceptions("dump-linux-exceptions",
32+
cl::desc("dump Linux kernel exception table"),
33+
cl::init(false), cl::Hidden, cl::cat(BoltCategory));
3334

3435
static cl::opt<bool>
3536
DumpORC("dump-orc", cl::desc("dump raw ORC unwind information (sorted)"),
@@ -40,6 +41,11 @@ static cl::opt<bool> DumpStaticCalls("dump-static-calls",
4041
cl::init(false), cl::Hidden,
4142
cl::cat(BoltCategory));
4243

44+
static cl::opt<bool>
45+
PrintORC("print-orc",
46+
cl::desc("print ORC unwind information for instructions"),
47+
cl::init(true), cl::Hidden, cl::cat(BoltCategory));
48+
4349
} // namespace opts
4450

4551
/// Linux Kernel supports stack unwinding using ORC (oops rewind capability).
@@ -134,6 +140,13 @@ class LinuxKernelRewriter final : public MetadataRewriter {
134140
using StaticCallListType = std::vector<StaticCallInfo>;
135141
StaticCallListType StaticCallEntries;
136142

143+
/// Section containing the Linux exception table.
144+
ErrorOr<BinarySection &> ExceptionsSection = std::errc::bad_address;
145+
static constexpr size_t EXCEPTION_TABLE_ENTRY_SIZE = 12;
146+
147+
/// Functions with exception handling code.
148+
DenseSet<BinaryFunction *> FunctionsWithExceptions;
149+
137150
/// Insert an LKMarker for a given code pointer \p PC from a non-code section
138151
/// \p SectionName.
139152
void insertLKMarker(uint64_t PC, uint64_t SectionOffset,
@@ -143,9 +156,6 @@ class LinuxKernelRewriter final : public MetadataRewriter {
143156
/// Process linux kernel special sections and their relocations.
144157
void processLKSections();
145158

146-
/// Process special linux kernel section, __ex_table.
147-
void processLKExTable();
148-
149159
/// Process special linux kernel section, .pci_fixup.
150160
void processLKPCIFixup();
151161

@@ -174,6 +184,9 @@ class LinuxKernelRewriter final : public MetadataRewriter {
174184
Error readStaticCalls();
175185
Error rewriteStaticCalls();
176186

187+
Error readExceptionTable();
188+
Error rewriteExceptionTable();
189+
177190
/// Mark instructions referenced by kernel metadata.
178191
Error markInstructions();
179192

@@ -192,6 +205,9 @@ class LinuxKernelRewriter final : public MetadataRewriter {
192205
if (Error E = readStaticCalls())
193206
return E;
194207

208+
if (Error E = readExceptionTable())
209+
return E;
210+
195211
return Error::success();
196212
}
197213

@@ -203,6 +219,11 @@ class LinuxKernelRewriter final : public MetadataRewriter {
203219
}
204220

205221
Error preEmitFinalizer() override {
222+
// Since rewriteExceptionTable() can mark functions as non-simple, run it
223+
// before other rewriters that depend on simple/emit status.
224+
if (Error E = rewriteExceptionTable())
225+
return E;
226+
206227
if (Error E = rewriteORCTables())
207228
return E;
208229

@@ -249,77 +270,13 @@ void LinuxKernelRewriter::insertLKMarker(uint64_t PC, uint64_t SectionOffset,
249270
}
250271

251272
void LinuxKernelRewriter::processLKSections() {
252-
processLKExTable();
253273
processLKPCIFixup();
254274
processLKKSymtab();
255275
processLKKSymtab(true);
256276
processLKBugTable();
257277
processLKSMPLocks();
258278
}
259279

260-
/// Process __ex_table section of Linux Kernel.
261-
/// This section contains information regarding kernel level exception
262-
/// handling (https://www.kernel.org/doc/html/latest/x86/exception-tables.html).
263-
/// More documentation is in arch/x86/include/asm/extable.h.
264-
///
265-
/// The section is the list of the following structures:
266-
///
267-
/// struct exception_table_entry {
268-
/// int insn;
269-
/// int fixup;
270-
/// int handler;
271-
/// };
272-
///
273-
void LinuxKernelRewriter::processLKExTable() {
274-
ErrorOr<BinarySection &> SectionOrError =
275-
BC.getUniqueSectionByName("__ex_table");
276-
if (!SectionOrError)
277-
return;
278-
279-
const uint64_t SectionSize = SectionOrError->getSize();
280-
const uint64_t SectionAddress = SectionOrError->getAddress();
281-
assert((SectionSize % 12) == 0 &&
282-
"The size of the __ex_table section should be a multiple of 12");
283-
for (uint64_t I = 0; I < SectionSize; I += 4) {
284-
const uint64_t EntryAddress = SectionAddress + I;
285-
ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(EntryAddress, 4);
286-
assert(Offset && "failed reading PC-relative offset for __ex_table");
287-
int32_t SignedOffset = *Offset;
288-
const uint64_t RefAddress = EntryAddress + SignedOffset;
289-
290-
BinaryFunction *ContainingBF =
291-
BC.getBinaryFunctionContainingAddress(RefAddress);
292-
if (!ContainingBF)
293-
continue;
294-
295-
MCSymbol *ReferencedSymbol = ContainingBF->getSymbol();
296-
const uint64_t FunctionOffset = RefAddress - ContainingBF->getAddress();
297-
switch (I % 12) {
298-
default:
299-
llvm_unreachable("bad alignment of __ex_table");
300-
break;
301-
case 0:
302-
// insn
303-
insertLKMarker(RefAddress, I, SignedOffset, true, "__ex_table");
304-
break;
305-
case 4:
306-
// fixup
307-
if (FunctionOffset)
308-
ReferencedSymbol = ContainingBF->addEntryPointAtOffset(FunctionOffset);
309-
BC.addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(), 0,
310-
*Offset);
311-
break;
312-
case 8:
313-
// handler
314-
assert(!FunctionOffset &&
315-
"__ex_table handler entry should point to function start");
316-
BC.addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(), 0,
317-
*Offset);
318-
break;
319-
}
320-
}
321-
}
322-
323280
/// Process .pci_fixup section of Linux Kernel.
324281
/// This section contains a list of entries for different PCI devices and their
325282
/// corresponding hook handler (code pointer where the fixup
@@ -943,6 +900,119 @@ Error LinuxKernelRewriter::rewriteStaticCalls() {
943900
return Error::success();
944901
}
945902

903+
/// Instructions that access user-space memory can cause page faults. These
904+
/// faults will be handled by the kernel and execution will resume at the fixup
905+
/// code location if the address was invalid. The kernel uses the exception
906+
/// table to match the faulting instruction to its fixup. The table consists of
907+
/// the following entries:
908+
///
909+
/// struct exception_table_entry {
910+
/// int insn;
911+
/// int fixup;
912+
/// int data;
913+
/// };
914+
///
915+
/// More info at:
916+
/// https://www.kernel.org/doc/Documentation/x86/exception-tables.txt
917+
Error LinuxKernelRewriter::readExceptionTable() {
918+
ExceptionsSection = BC.getUniqueSectionByName("__ex_table");
919+
if (!ExceptionsSection)
920+
return Error::success();
921+
922+
if (ExceptionsSection->getSize() % EXCEPTION_TABLE_ENTRY_SIZE)
923+
return createStringError(errc::executable_format_error,
924+
"exception table size error");
925+
926+
const uint64_t SectionAddress = ExceptionsSection->getAddress();
927+
DataExtractor DE(ExceptionsSection->getContents(),
928+
BC.AsmInfo->isLittleEndian(),
929+
BC.AsmInfo->getCodePointerSize());
930+
DataExtractor::Cursor Cursor(0);
931+
uint32_t EntryID = 0;
932+
while (Cursor && Cursor.tell() < ExceptionsSection->getSize()) {
933+
const uint64_t InstAddress =
934+
SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
935+
const uint64_t FixupAddress =
936+
SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
937+
const uint64_t Data = DE.getU32(Cursor);
938+
939+
// Consume the status of the cursor.
940+
if (!Cursor)
941+
return createStringError(errc::executable_format_error,
942+
"out of bounds while reading exception table");
943+
944+
++EntryID;
945+
946+
if (opts::DumpExceptions) {
947+
BC.outs() << "Exception Entry: " << EntryID << '\n';
948+
BC.outs() << "\tInsn: 0x" << Twine::utohexstr(InstAddress) << '\n'
949+
<< "\tFixup: 0x" << Twine::utohexstr(FixupAddress) << '\n'
950+
<< "\tData: 0x" << Twine::utohexstr(Data) << '\n';
951+
}
952+
953+
MCInst *Inst = nullptr;
954+
MCSymbol *FixupLabel = nullptr;
955+
956+
BinaryFunction *InstBF = BC.getBinaryFunctionContainingAddress(InstAddress);
957+
if (InstBF && BC.shouldEmit(*InstBF)) {
958+
Inst = InstBF->getInstructionAtOffset(InstAddress - InstBF->getAddress());
959+
if (!Inst)
960+
return createStringError(errc::executable_format_error,
961+
"no instruction at address 0x%" PRIx64
962+
" in exception table",
963+
InstAddress);
964+
BC.MIB->addAnnotation(*Inst, "ExceptionEntry", EntryID);
965+
FunctionsWithExceptions.insert(InstBF);
966+
}
967+
968+
if (!InstBF && opts::Verbosity) {
969+
BC.outs() << "BOLT-INFO: no function matches instruction at 0x"
970+
<< Twine::utohexstr(InstAddress)
971+
<< " referenced by Linux exception table\n";
972+
}
973+
974+
BinaryFunction *FixupBF =
975+
BC.getBinaryFunctionContainingAddress(FixupAddress);
976+
if (FixupBF && BC.shouldEmit(*FixupBF)) {
977+
const uint64_t Offset = FixupAddress - FixupBF->getAddress();
978+
if (!FixupBF->getInstructionAtOffset(Offset))
979+
return createStringError(errc::executable_format_error,
980+
"no instruction at fixup address 0x%" PRIx64
981+
" in exception table",
982+
FixupAddress);
983+
FixupLabel = Offset ? FixupBF->addEntryPointAtOffset(Offset)
984+
: FixupBF->getSymbol();
985+
if (Inst)
986+
BC.MIB->addAnnotation(*Inst, "Fixup", FixupLabel->getName());
987+
FunctionsWithExceptions.insert(FixupBF);
988+
}
989+
990+
if (!FixupBF && opts::Verbosity) {
991+
BC.outs() << "BOLT-INFO: no function matches fixup code at 0x"
992+
<< Twine::utohexstr(FixupAddress)
993+
<< " referenced by Linux exception table\n";
994+
}
995+
}
996+
997+
BC.outs() << "BOLT-INFO: parsed "
998+
<< ExceptionsSection->getSize() / EXCEPTION_TABLE_ENTRY_SIZE
999+
<< " exception table entries\n";
1000+
1001+
return Error::success();
1002+
}
1003+
1004+
/// Depending on the value of CONFIG_BUILDTIME_TABLE_SORT, the kernel expects
1005+
/// the exception table to be sorted. Hence we have to sort it after code
1006+
/// reordering.
1007+
Error LinuxKernelRewriter::rewriteExceptionTable() {
1008+
// Disable output of functions with exceptions before rewrite support is
1009+
// added.
1010+
for (BinaryFunction *BF : FunctionsWithExceptions)
1011+
BF->setSimple(false);
1012+
1013+
return Error::success();
1014+
}
1015+
9461016
} // namespace
9471017

9481018
std::unique_ptr<MetadataRewriter>

bolt/test/X86/linux-exceptions.s

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# REQUIRES: system-linux
2+
3+
## Check that BOLT correctly parses the Linux kernel exception table.
4+
5+
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
6+
# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \
7+
# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr
8+
9+
## Verify exception bindings to instructions.
10+
11+
# RUN: llvm-bolt %t.exe --print-normalized -o %t.out --keep-nops=0 \
12+
# RUN: --bolt-info=0 | FileCheck %s
13+
14+
## Verify the bindings again on the rewritten binary with nops removed.
15+
16+
# RUN: llvm-bolt %t.out -o %t.out.1 --print-normalized | FileCheck %s
17+
18+
# CHECK: BOLT-INFO: Linux kernel binary detected
19+
# CHECK: BOLT-INFO: parsed 2 exception table entries
20+
21+
.text
22+
.globl _start
23+
.type _start, %function
24+
_start:
25+
# CHECK: Binary Function "_start"
26+
nop
27+
.L0:
28+
mov (%rdi), %rax
29+
# CHECK: mov
30+
# CHECK-SAME: ExceptionEntry: 1 # Fixup: [[FIXUP:[a-zA-Z0-9_]+]]
31+
nop
32+
.L1:
33+
mov (%rsi), %rax
34+
# CHECK: mov
35+
# CHECK-SAME: ExceptionEntry: 2 # Fixup: [[FIXUP]]
36+
nop
37+
ret
38+
.LF0:
39+
# CHECK: Secondary Entry Point: [[FIXUP]]
40+
jmp foo
41+
.size _start, .-_start
42+
43+
.globl foo
44+
.type foo, %function
45+
foo:
46+
ret
47+
.size foo, .-foo
48+
49+
50+
## Exception table.
51+
.section __ex_table,"a",@progbits
52+
.align 4
53+
54+
.long .L0 - . # instruction
55+
.long .LF0 - . # fixup
56+
.long 0 # data
57+
58+
.long .L1 - . # instruction
59+
.long .LF0 - . # fixup
60+
.long 0 # data
61+
62+
## Fake Linux Kernel sections.
63+
.section __ksymtab,"a",@progbits
64+
.section __ksymtab_gpl,"a",@progbits

0 commit comments

Comments
 (0)