diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 5fb32a1ea6784..d2c1709b7eaff 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -911,7 +911,8 @@ class BinaryContext { /// of \p Flags. MCSymbol *registerNameAtAddress(StringRef Name, uint64_t Address, uint64_t Size, uint16_t Alignment, - unsigned Flags = 0); + unsigned Flags = 0, + BinarySection *Section = NULL); /// Return BinaryData registered at a given \p Address or nullptr if no /// global symbol was registered at the location. diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index e86075e69c05d..fbde42e822283 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1056,18 +1056,28 @@ void BinaryContext::adjustCodePadding() { MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, uint64_t Size, uint16_t Alignment, - unsigned Flags) { + unsigned Flags, + BinarySection *Section) { // Register the name with MCContext. MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); + BinaryData *BD; + + // Register out of section symbols only in GlobalSymbols map + if (Section && Section->getEndAddress() == Address) { + BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, + *Section, Flags); + GlobalSymbols[Name] = BD; + return Symbol; + } auto GAI = BinaryDataMap.find(Address); - BinaryData *BD; if (GAI == BinaryDataMap.end()) { ErrorOr SectionOrErr = getSectionForAddress(Address); - BinarySection &Section = - SectionOrErr ? SectionOrErr.get() : absoluteSection(); + BinarySection &SectionRef = Section ? *Section + : SectionOrErr ? SectionOrErr.get() + : absoluteSection(); BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, - Section, Flags); + SectionRef, Flags); GAI = BinaryDataMap.emplace(Address, BD).first; GlobalSymbols[Name] = BD; updateObjectNesting(GAI); @@ -1402,7 +1412,7 @@ void BinaryContext::postProcessSymbolTable() { if ((BD->getName().starts_with("SYMBOLat") || BD->getName().starts_with("DATAat")) && !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && - BD->getSection()) { + BD->getSection().getSize()) { this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; Valid = false; diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 33ebae3b6e6de..043487d007fd3 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -955,13 +955,13 @@ void RewriteInstance::discoverFileObjects() { uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); uint64_t SymbolAlignment = Symbol.getAlignment(); - auto registerName = [&](uint64_t FinalSize) { + auto registerName = [&](uint64_t FinalSize, BinarySection *Section = NULL) { // Register names even if it's not a function, e.g. for an entry point. BC->registerNameAtAddress(UniqueName, SymbolAddress, FinalSize, - SymbolAlignment, SymbolFlags); + SymbolAlignment, SymbolFlags, Section); if (!AlternativeName.empty()) BC->registerNameAtAddress(AlternativeName, SymbolAddress, FinalSize, - SymbolAlignment, SymbolFlags); + SymbolAlignment, SymbolFlags, Section); }; section_iterator Section = @@ -986,12 +986,25 @@ void RewriteInstance::discoverFileObjects() { << " for function\n"); if (SymbolAddress == Section->getAddress() + Section->getSize()) { + ErrorOr SectionOrError = + BC->getSectionForAddress(Section->getAddress()); + + // Skip symbols from invalid sections + if (!SectionOrError) { + BC->errs() << "BOLT-WARNING: " << UniqueName << " (0x" + << Twine::utohexstr(SymbolAddress) + << ") does not have any section\n"; + continue; + } + assert(SymbolSize == 0 && "unexpect non-zero sized symbol at end of section"); - LLVM_DEBUG( - dbgs() - << "BOLT-DEBUG: rejecting as symbol points to end of its section\n"); - registerName(SymbolSize); + LLVM_DEBUG({ + dbgs() << "BOLT-DEBUG: rejecting as symbol " << UniqueName + << " points to end of " << SectionOrError->getName() + << " section\n"; + }); + registerName(SymbolSize, &SectionOrError.get()); continue; } @@ -2614,6 +2627,30 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection, } } + if (Relocation::isGOT(RType) && !Relocation::isTLS(RType)) { + auto exitOnGotEndSymol = [&](StringRef Name) { + BC->errs() << "BOLT-ERROR: GOT table contains currently unsupported " + "section end symbol " + << Name << "\n"; + exit(1); + }; + + if (SymbolIter != InputFile->symbol_end() && ReferencedSection) { + if (cantFail(SymbolIter->getAddress()) == + ReferencedSection->getEndAddress()) + exitOnGotEndSymol(cantFail(SymbolIter->getName())); + } else { + // If no section and symbol are provided by relocation, try to find the + // symbol by its name, including the possibility that the symbol is local. + BinaryData *BD = BC->getBinaryDataByName(SymbolName); + if (!BD && NR.getUniquifiedNameCount(SymbolName) == 1) + BD = BC->getBinaryDataByName(NR.getUniqueName(SymbolName, 1)); + + if ((BD && BD->getAddress() == BD->getSection().getEndAddress())) + exitOnGotEndSymol(BD->getName()); + } + } + if (!ReferencedSection) ReferencedSection = BC->getSectionForAddress(SymbolAddress); diff --git a/bolt/test/AArch64/Inputs/got_end_of_section_symbol.lld_script b/bolt/test/AArch64/Inputs/got_end_of_section_symbol.lld_script new file mode 100644 index 0000000000000..2ad4169bbcc60 --- /dev/null +++ b/bolt/test/AArch64/Inputs/got_end_of_section_symbol.lld_script @@ -0,0 +1,6 @@ +SECTIONS { + PROVIDE (__executable_start = SEGMENT_START("text-segment", 0x400000)); . = SEGMENT_START("text-segment", 0x400000) + SIZEOF_HEADERS; + .data : { *(.data) *(.array) } + .text : { *(.text) } + .got : { *(.got) *(.igot) } +} diff --git a/bolt/test/AArch64/got_end_of_section_symbol.s b/bolt/test/AArch64/got_end_of_section_symbol.s new file mode 100644 index 0000000000000..1f3732c280f7b --- /dev/null +++ b/bolt/test/AArch64/got_end_of_section_symbol.s @@ -0,0 +1,28 @@ +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \ +# RUN: %s -o %t.o +# RUN: %clang %cflags -nostartfiles -nodefaultlibs -static -Wl,--no-relax \ +# RUN: -Wl,-q -Wl,-T %S/Inputs/got_end_of_section_symbol.lld_script \ +# RUN: %t.o -o %t.exe +# RUN: not llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s + +# CHECK: BOLT-ERROR: GOT table contains currently unsupported section end +# CHECK-SAME: symbol array_end + +.section .array, "a", @progbits +.globl array_start +.globl array_end +array_start: + .word 0 +array_end: + +.section .text +.globl _start +.type _start, %function +_start: + adrp x1, #:got:array_start + ldr x1, [x1, #:got_lo12:array_start] + adrp x0, #:got:array_end + ldr x0, [x0, #:got_lo12:array_end] + adrp x2, #:got:_start + ldr x2, [x2, #:got_lo12:_start] + ret diff --git a/bolt/test/X86/section-end-sym.s b/bolt/test/X86/section-end-sym.s index 545cf37263da5..29ff6e05118aa 100644 --- a/bolt/test/X86/section-end-sym.s +++ b/bolt/test/X86/section-end-sym.s @@ -1,7 +1,7 @@ ## Check that BOLT doesn't consider end-of-section symbols (e.g., _etext) as ## functions. -# REQUIRES: x86_64-linux, asserts +# REQUIRES: system-linux, asserts # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o # RUN: ld.lld %t.o -o %t.exe -q @@ -9,7 +9,7 @@ # RUN: | FileCheck %s # CHECK: considering symbol etext for function -# CHECK-NEXT: rejecting as symbol points to end of its section +# CHECK-NEXT: rejecting as symbol etext points to end of .text section # CHECK-NOT: Binary Function "etext{{.*}}" after building cfg