-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[BOLT] Abort on out-of-section symbols in GOT #100801
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-bolt Author: Vladislav Khmelevsky (yota9) Changes
Full diff: https://github.com/llvm/llvm-project/pull/100801.diff 5 Files Affected:
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 73932c4ca2fb3..42ab171706827 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -890,7 +890,8 @@ class BinaryContext {
/// of \p Flags.
MCSymbol *registerNameAtAddress(StringRef Name, uint64_t Address,
uint64_t Size, uint16_t Alignment,
- unsigned Flags = 0);
+ unsigned Flags = 0,
+ BinarySection *Section = NULL);
/// Return BinaryData registered at a given \p Address or nullptr if no
/// global symbol was registered at the location.
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 0a1f1bb9e0d20..648c74a4ed048 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1054,18 +1054,28 @@ void BinaryContext::adjustCodePadding() {
MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
uint64_t Size,
uint16_t Alignment,
- unsigned Flags) {
+ unsigned Flags,
+ BinarySection *Section) {
// Register the name with MCContext.
MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
+ BinaryData *BD;
+
+ // Register out of section symbols only in GlobalSymbols map
+ if (Section && Section->getEndAddress() == Address) {
+ BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
+ *Section, Flags);
+ GlobalSymbols[Name] = BD;
+ return Symbol;
+ }
auto GAI = BinaryDataMap.find(Address);
- BinaryData *BD;
if (GAI == BinaryDataMap.end()) {
ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
- BinarySection &Section =
- SectionOrErr ? SectionOrErr.get() : absoluteSection();
+ BinarySection &SectionRef = Section ? *Section
+ : SectionOrErr ? SectionOrErr.get()
+ : absoluteSection();
BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
- Section, Flags);
+ SectionRef, Flags);
GAI = BinaryDataMap.emplace(Address, BD).first;
GlobalSymbols[Name] = BD;
updateObjectNesting(GAI);
@@ -1399,7 +1409,7 @@ void BinaryContext::postProcessSymbolTable() {
if ((BD->getName().starts_with("SYMBOLat") ||
BD->getName().starts_with("DATAat")) &&
!BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
- BD->getSection()) {
+ BD->getSection().getSize()) {
this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
<< "\n";
Valid = false;
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 32562ccb6b345..e41abe489c875 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -954,13 +954,13 @@ void RewriteInstance::discoverFileObjects() {
uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
uint64_t SymbolAlignment = Symbol.getAlignment();
- auto registerName = [&](uint64_t FinalSize) {
+ auto registerName = [&](uint64_t FinalSize, BinarySection *Section = NULL) {
// Register names even if it's not a function, e.g. for an entry point.
BC->registerNameAtAddress(UniqueName, SymbolAddress, FinalSize,
- SymbolAlignment, SymbolFlags);
+ SymbolAlignment, SymbolFlags, Section);
if (!AlternativeName.empty())
BC->registerNameAtAddress(AlternativeName, SymbolAddress, FinalSize,
- SymbolAlignment, SymbolFlags);
+ SymbolAlignment, SymbolFlags, Section);
};
section_iterator Section =
@@ -985,12 +985,25 @@ void RewriteInstance::discoverFileObjects() {
<< " for function\n");
if (SymbolAddress == Section->getAddress() + Section->getSize()) {
+ ErrorOr<BinarySection &> SectionOrError =
+ BC->getSectionForAddress(Section->getAddress());
+
+ // Skip symbols from invalid sections
+ if (!SectionOrError) {
+ BC->errs() << "BOLT-WARNING: " << UniqueName << " (0x"
+ << Twine::utohexstr(SymbolAddress)
+ << ") does not have any section\n";
+ continue;
+ }
+
assert(SymbolSize == 0 &&
"unexpect non-zero sized symbol at end of section");
- LLVM_DEBUG(
- dbgs()
- << "BOLT-DEBUG: rejecting as symbol points to end of its section\n");
- registerName(SymbolSize);
+ LLVM_DEBUG({
+ dbgs() << "BOLT-DEBUG: rejecting as symbol " << UniqueName
+ << " points to end of " << SectionOrError->getName()
+ << " section\n";
+ });
+ registerName(SymbolSize, &SectionOrError.get());
continue;
}
@@ -1432,7 +1445,9 @@ void RewriteInstance::registerFragments() {
// of the last local symbol.
ELFSymbolRef LocalSymEnd = ELF64LEFile->toSymbolRef(SymTab, SymTab->sh_info);
- for (auto &[ParentName, BF] : AmbiguousFragments) {
+ for (auto &Fragment : AmbiguousFragments) {
+ const StringRef &ParentName = Fragment.first;
+ BinaryFunction *BF = Fragment.second;
const uint64_t Address = BF->getAddress();
// Get fragment's own symbol
@@ -2557,6 +2572,16 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
return;
}
+ if (Relocation::isGOT(RType) && !Relocation::isTLS(RType)) {
+ BinaryData *BD = BC->getBinaryDataByName(SymbolName);
+ if (BD && BD->getAddress() == BD->getSection().getEndAddress()) {
+ BC->errs() << "BOLT-ERROR: GOT table contains currently unsupported "
+ "section end symbol "
+ << BD->getName() << "\n";
+ exit(1);
+ }
+ }
+
const uint64_t Address = SymbolAddress + Addend;
LLVM_DEBUG({
diff --git a/bolt/test/AArch64/Inputs/got_end_of_section_symbol.lld_script b/bolt/test/AArch64/Inputs/got_end_of_section_symbol.lld_script
new file mode 100644
index 0000000000000..2ad4169bbcc60
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/got_end_of_section_symbol.lld_script
@@ -0,0 +1,6 @@
+SECTIONS {
+ PROVIDE (__executable_start = SEGMENT_START("text-segment", 0x400000)); . = SEGMENT_START("text-segment", 0x400000) + SIZEOF_HEADERS;
+ .data : { *(.data) *(.array) }
+ .text : { *(.text) }
+ .got : { *(.got) *(.igot) }
+}
diff --git a/bolt/test/AArch64/got_end_of_section_symbol.s b/bolt/test/AArch64/got_end_of_section_symbol.s
new file mode 100644
index 0000000000000..c203214fe3fbe
--- /dev/null
+++ b/bolt/test/AArch64/got_end_of_section_symbol.s
@@ -0,0 +1,28 @@
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
+# RUN: %s -o %t.o
+# RUN: %clang %cflags -nostartfiles -nodefaultlibs -static -Wl,--no-relax \
+# RUN: -Wl,-q -Wl,-T %S/Inputs/got_end_of_section_symbol.lld_script \
+# RUN: %t.o -o %t.exe
+# RUN: not llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s
+
+# CHECK: BOLT-ERROR: GOT table contains currently unsupported section end
+# CHECK-SAME: symbol array_end
+
+.section .array, "a", @progbits
+.globl array_start
+.globl array_end
+array_start:
+ .word 0
+array_end:
+
+.section .text
+.globl _start
+.type exitOk, %function
+_start:
+ adrp x1, #:got:array_start
+ ldr x1, [x1, #:got_lo12:array_start]
+ adrp x0, #:got:array_end
+ ldr x0, [x0, #:got_lo12:array_end]
+ adrp x2, #:got:_start
+ ldr x2, [x2, #:got_lo12:_start]
+ ret
|
Updated section-end-sym test and removed x86_64 requirement added by @omjavaid as there is no reason for this. I've tested it on aarch64 linux and have no problem with this test. |
b6b4898
to
31d0a10
Compare
This patch aborts BOLT execution if it finds out-of-section (section end) symbol in GOT table. In order to handle such situations properly in future, we would need to have an arch-dependent way to analyze relocations or its sequences, e.g., for ARM it would probably be ADRP + LDR analysis in order to get GOT entry address. Currently, it is also challenging because GOT-related relocation symbols are replaced to __BOLT_got_zero. Anyway, it seems to be quite a rare case, which seems to be only? related to static binaries. For the most part, it seems that it should be handled on the linker stage, since static binary should not have GOT table at all. LLD linker with relaxations enabled would replace instruction addresses from GOT directly to target symbols, which eliminates the problem. Anyway, in order to achieve detection of such cases, this patch fixes a few things in BOLT: 1. For the end symbols, we're now using the section provided by ELF binary. Previously it would be tied with a wrong section found by symbol address. 2. The end symbols would have limited registration we would only add them in name->data GlobalSymbols map, since using address->data BinaryDataMap map would likely be impossible due to address duality of such symbols. 3. The outdated BD->getSection (currently returning refence, not pointer) check in postProcessSymbolTable is replaced by getSize check in order to allow zero-sized top-level symbols if they are located in zero-sized sections. For the most part, such things could only be found in tests, but I don't see a reason not to handle such cases. 4. Updated section-end-sym test and removed x86_64 requirement since there is no reason for this (tested on aarch64 linux) The test was provided by peterwaller-arm (thank you) in llvm#100096 and slightly modified by me.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks! I have also tested this on the previously malformed static binaries reported in #100096 and can confirm that this correctly aborts on them.
Thanks, if no objections from other reviewers would merge it in 24 hours |
Add R_AARCH64_LD64_GOT*_LO15 to the list of supported relocations. But due to the fact that JITlink doesn't create GOT section when used with BOLT and there is no common VK_LO15 relocation for aarch64 don't record this relocation to further process. Since BOLT doesn't move GOT table it's OK that instruction would have preserved imm value and not expression. But this level of support is needed during relocations processing e.g. to abort on out-of-section GOT symbols (llvm#100801).
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/92/builds/4167 Here is the relevant piece of the build log for the reference:
|
Reverted in:
as it breaks buildbot. |
Any updates on this? TMU it remains as reverted correct? Are there any plans of getting it merged again? |
It looks like the issue #100096 is specific to BFD? With GOLD, AArch64MCCodeEmitter crashes with:
Maybe that could be handled here as well? LLD seems to work fine. |
This patch aborts BOLT execution if it finds out-of-section (section
end) symbol in GOT table. In order to handle such situations properly in
future, we would need to have an arch-dependent way to analyze
relocations or its sequences, e.g., for ARM it would probably be ADRP +
LDR analysis in order to get GOT entry address. Currently, it is also
challenging because GOT-related relocation symbols are replaced to
__BOLT_got_zero. Anyway, it seems to be quite a rare case, which seems
to be only? related to static binaries. For the most part, it seems that
it should be handled on the linker stage, since static binary should not
have GOT table at all. LLD linker with relaxations enabled would replace
instruction addresses from GOT directly to target symbols, which
eliminates the problem.
Anyway, in order to achieve detection of such cases, this patch fixes a
few things in BOLT:
binary. Previously it would be tied with a wrong section found by symbol
address.
add them in name->data GlobalSymbols map, since using address->data
BinaryDataMap map would likely be impossible due to address duality of
such symbols.
pointer) check in postProcessSymbolTable is replaced by getSize check in
order to allow zero-sized top-level symbols if they are located in
zero-sized sections. For the most part, such things could only be found
in tests, but I don't see a reason not to handle such cases.
there is no reason for this (tested on aarch64 linux)
The test was provided by peterwaller-arm (thank you) in #100096 and
slightly modified by me.