Skip to content

Commit a4900f0

Browse files
authored
[BOLT] Abort on out-of-section symbols in GOT (#100801)
This patch aborts BOLT execution if it finds out-of-section (section end) symbol in GOT table. In order to handle such situations properly in future, we would need to have an arch-dependent way to analyze relocations or its sequences, e.g., for ARM it would probably be ADRP + LDR analysis in order to get GOT entry address. Currently, it is also challenging because GOT-related relocation symbols are replaced to __BOLT_got_zero. Anyway, it seems to be quite a rare case, which seems to be only? related to static binaries. For the most part, it seems that it should be handled on the linker stage, since static binary should not have GOT table at all. LLD linker with relaxations enabled would replace instruction addresses from GOT directly to target symbols, which eliminates the problem. Anyway, in order to achieve detection of such cases, this patch fixes a few things in BOLT: 1. For the end symbols, we're now using the section provided by ELF binary. Previously it would be tied with a wrong section found by symbol address. 2. The end symbols would have limited registration we would only add them in name->data GlobalSymbols map, since using address->data BinaryDataMap map would likely be impossible due to address duality of such symbols. 3. The outdated BD->getSection (currently returning refence, not pointer) check in postProcessSymbolTable is replaced by getSize check in order to allow zero-sized top-level symbols if they are located in zero-sized sections. For the most part, such things could only be found in tests, but I don't see a reason not to handle such cases. 4. Updated section-end-sym test and removed x86_64 requirement since there is no reason for this (tested on aarch64 linux) The test was provided by peterwaller-arm (thank you) in #100096 and slightly modified by me.
1 parent 097ddd3 commit a4900f0

File tree

6 files changed

+98
-16
lines changed

6 files changed

+98
-16
lines changed

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -911,7 +911,8 @@ class BinaryContext {
911911
/// of \p Flags.
912912
MCSymbol *registerNameAtAddress(StringRef Name, uint64_t Address,
913913
uint64_t Size, uint16_t Alignment,
914-
unsigned Flags = 0);
914+
unsigned Flags = 0,
915+
BinarySection *Section = NULL);
915916

916917
/// Return BinaryData registered at a given \p Address or nullptr if no
917918
/// global symbol was registered at the location.

bolt/lib/Core/BinaryContext.cpp

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,18 +1056,28 @@ void BinaryContext::adjustCodePadding() {
10561056
MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
10571057
uint64_t Size,
10581058
uint16_t Alignment,
1059-
unsigned Flags) {
1059+
unsigned Flags,
1060+
BinarySection *Section) {
10601061
// Register the name with MCContext.
10611062
MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
1063+
BinaryData *BD;
1064+
1065+
// Register out of section symbols only in GlobalSymbols map
1066+
if (Section && Section->getEndAddress() == Address) {
1067+
BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
1068+
*Section, Flags);
1069+
GlobalSymbols[Name] = BD;
1070+
return Symbol;
1071+
}
10621072

10631073
auto GAI = BinaryDataMap.find(Address);
1064-
BinaryData *BD;
10651074
if (GAI == BinaryDataMap.end()) {
10661075
ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
1067-
BinarySection &Section =
1068-
SectionOrErr ? SectionOrErr.get() : absoluteSection();
1076+
BinarySection &SectionRef = Section ? *Section
1077+
: SectionOrErr ? SectionOrErr.get()
1078+
: absoluteSection();
10691079
BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
1070-
Section, Flags);
1080+
SectionRef, Flags);
10711081
GAI = BinaryDataMap.emplace(Address, BD).first;
10721082
GlobalSymbols[Name] = BD;
10731083
updateObjectNesting(GAI);
@@ -1402,7 +1412,7 @@ void BinaryContext::postProcessSymbolTable() {
14021412
if ((BD->getName().starts_with("SYMBOLat") ||
14031413
BD->getName().starts_with("DATAat")) &&
14041414
!BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1405-
BD->getSection()) {
1415+
BD->getSection().getSize()) {
14061416
this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
14071417
<< "\n";
14081418
Valid = false;

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -955,13 +955,13 @@ void RewriteInstance::discoverFileObjects() {
955955
uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
956956
uint64_t SymbolAlignment = Symbol.getAlignment();
957957

958-
auto registerName = [&](uint64_t FinalSize) {
958+
auto registerName = [&](uint64_t FinalSize, BinarySection *Section = NULL) {
959959
// Register names even if it's not a function, e.g. for an entry point.
960960
BC->registerNameAtAddress(UniqueName, SymbolAddress, FinalSize,
961-
SymbolAlignment, SymbolFlags);
961+
SymbolAlignment, SymbolFlags, Section);
962962
if (!AlternativeName.empty())
963963
BC->registerNameAtAddress(AlternativeName, SymbolAddress, FinalSize,
964-
SymbolAlignment, SymbolFlags);
964+
SymbolAlignment, SymbolFlags, Section);
965965
};
966966

967967
section_iterator Section =
@@ -986,12 +986,25 @@ void RewriteInstance::discoverFileObjects() {
986986
<< " for function\n");
987987

988988
if (SymbolAddress == Section->getAddress() + Section->getSize()) {
989+
ErrorOr<BinarySection &> SectionOrError =
990+
BC->getSectionForAddress(Section->getAddress());
991+
992+
// Skip symbols from invalid sections
993+
if (!SectionOrError) {
994+
BC->errs() << "BOLT-WARNING: " << UniqueName << " (0x"
995+
<< Twine::utohexstr(SymbolAddress)
996+
<< ") does not have any section\n";
997+
continue;
998+
}
999+
9891000
assert(SymbolSize == 0 &&
9901001
"unexpect non-zero sized symbol at end of section");
991-
LLVM_DEBUG(
992-
dbgs()
993-
<< "BOLT-DEBUG: rejecting as symbol points to end of its section\n");
994-
registerName(SymbolSize);
1002+
LLVM_DEBUG({
1003+
dbgs() << "BOLT-DEBUG: rejecting as symbol " << UniqueName
1004+
<< " points to end of " << SectionOrError->getName()
1005+
<< " section\n";
1006+
});
1007+
registerName(SymbolSize, &SectionOrError.get());
9951008
continue;
9961009
}
9971010

@@ -2623,6 +2636,30 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
26232636
}
26242637
}
26252638

2639+
if (Relocation::isGOT(RType) && !Relocation::isTLS(RType)) {
2640+
auto exitOnGotEndSymol = [&](StringRef Name) {
2641+
BC->errs() << "BOLT-ERROR: GOT table contains currently unsupported "
2642+
"section end symbol "
2643+
<< Name << "\n";
2644+
exit(1);
2645+
};
2646+
2647+
if (SymbolIter != InputFile->symbol_end() && ReferencedSection) {
2648+
if (cantFail(SymbolIter->getAddress()) ==
2649+
ReferencedSection->getEndAddress())
2650+
exitOnGotEndSymol(cantFail(SymbolIter->getName()));
2651+
} else {
2652+
// If no section and symbol are provided by relocation, try to find the
2653+
// symbol by its name, including the possibility that the symbol is local.
2654+
BinaryData *BD = BC->getBinaryDataByName(SymbolName);
2655+
if (!BD && NR.getUniquifiedNameCount(SymbolName) == 1)
2656+
BD = BC->getBinaryDataByName(NR.getUniqueName(SymbolName, 1));
2657+
2658+
if ((BD && BD->getAddress() == BD->getSection().getEndAddress()))
2659+
exitOnGotEndSymol(BD->getName());
2660+
}
2661+
}
2662+
26262663
if (!ReferencedSection)
26272664
ReferencedSection = BC->getSectionForAddress(SymbolAddress);
26282665

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
SECTIONS {
2+
PROVIDE (__executable_start = SEGMENT_START("text-segment", 0x400000)); . = SEGMENT_START("text-segment", 0x400000) + SIZEOF_HEADERS;
3+
.data : { *(.data) *(.array) }
4+
.text : { *(.text) }
5+
.got : { *(.got) *(.igot) }
6+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
2+
# RUN: %s -o %t.o
3+
# RUN: %clang %cflags -nostartfiles -nodefaultlibs -static -Wl,--no-relax \
4+
# RUN: -Wl,-q -Wl,-T %S/Inputs/got_end_of_section_symbol.lld_script \
5+
# RUN: %t.o -o %t.exe
6+
# RUN: not llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s
7+
8+
# CHECK: BOLT-ERROR: GOT table contains currently unsupported section end
9+
# CHECK-SAME: symbol array_end
10+
11+
.section .array, "a", @progbits
12+
.globl array_start
13+
.globl array_end
14+
array_start:
15+
.word 0
16+
array_end:
17+
18+
.section .text
19+
.globl _start
20+
.type _start, %function
21+
_start:
22+
adrp x1, #:got:array_start
23+
ldr x1, [x1, #:got_lo12:array_start]
24+
adrp x0, #:got:array_end
25+
ldr x0, [x0, #:got_lo12:array_end]
26+
adrp x2, #:got:_start
27+
ldr x2, [x2, #:got_lo12:_start]
28+
ret

bolt/test/X86/section-end-sym.s

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
## Check that BOLT doesn't consider end-of-section symbols (e.g., _etext) as
22
## functions.
33

4-
# REQUIRES: x86_64-linux, asserts
4+
# REQUIRES: system-linux, asserts
55

66
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o
77
# RUN: ld.lld %t.o -o %t.exe -q
88
# RUN: llvm-bolt %t.exe -o %t.null --print-cfg --debug-only=bolt 2>&1 \
99
# RUN: | FileCheck %s
1010

1111
# CHECK: considering symbol etext for function
12-
# CHECK-NEXT: rejecting as symbol points to end of its section
12+
# CHECK-NEXT: rejecting as symbol etext points to end of .text section
1313
# CHECK-NOT: Binary Function "etext{{.*}}" after building cfg
1414

1515

0 commit comments

Comments
 (0)