From fb24028754e838127c0a08ec1cc7cd1340e5c7df Mon Sep 17 00:00:00 2001 From: xupengying Date: Wed, 18 Dec 2024 11:44:48 +0800 Subject: [PATCH] feat: move the BPSectionOrder from MachO to Common for reuse in ELF later --- lld/Common/BPSectionOrdererBase.cpp | 374 ++++++++++++++++ lld/Common/CMakeLists.txt | 2 + lld/MachO/BPSectionOrderer.cpp | 416 +----------------- lld/MachO/BPSectionOrderer.h | 125 +++++- lld/MachO/CMakeLists.txt | 1 - lld/include/lld/Common/BPSectionOrdererBase.h | 80 ++++ 6 files changed, 595 insertions(+), 403 deletions(-) create mode 100644 lld/Common/BPSectionOrdererBase.cpp create mode 100644 lld/include/lld/Common/BPSectionOrdererBase.h diff --git a/lld/Common/BPSectionOrdererBase.cpp b/lld/Common/BPSectionOrdererBase.cpp new file mode 100644 index 0000000000000..7c5874f0d0413 --- /dev/null +++ b/lld/Common/BPSectionOrdererBase.cpp @@ -0,0 +1,374 @@ +//===- BPSectionOrdererBase.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lld/Common/BPSectionOrdererBase.h" +#include "lld/Common/ErrorHandler.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/Support/BalancedPartitioning.h" +#include "llvm/Support/TimeProfiler.h" +#include "llvm/Support/VirtualFileSystem.h" + +#define DEBUG_TYPE "bp-section-orderer" + +using namespace llvm; +using namespace lld; + +using UtilityNodes = SmallVector; + +static SmallVector> getUnsForCompression( + ArrayRef sections, + const DenseMap §ionToIdx, + ArrayRef sectionIdxs, + DenseMap> *duplicateSectionIdxs, + BPFunctionNode::UtilityNodeT &maxUN) { + TimeTraceScope timeScope("Build nodes for compression"); + + SmallVector>> sectionHashes; + sectionHashes.reserve(sectionIdxs.size()); + SmallVector hashes; + + for (unsigned sectionIdx : sectionIdxs) { + const auto *isec = sections[sectionIdx]; + isec->getSectionHashes(hashes, sectionToIdx); + sectionHashes.emplace_back(sectionIdx, std::move(hashes)); + hashes.clear(); + } + + DenseMap hashFrequency; + for (auto &[sectionIdx, hashes] : sectionHashes) + for (auto hash : hashes) + ++hashFrequency[hash]; + + if (duplicateSectionIdxs) { + // Merge sections that are nearly identical + SmallVector>> newSectionHashes; + DenseMap wholeHashToSectionIdx; + for (auto &[sectionIdx, hashes] : sectionHashes) { + uint64_t wholeHash = 0; + for (auto hash : hashes) + if (hashFrequency[hash] > 5) + wholeHash ^= hash; + auto [it, wasInserted] = + wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx)); + if (wasInserted) { + newSectionHashes.emplace_back(sectionIdx, hashes); + } else { + (*duplicateSectionIdxs)[it->getSecond()].push_back(sectionIdx); + } + } + sectionHashes = newSectionHashes; + + // Recompute hash frequencies + hashFrequency.clear(); + for (auto &[sectionIdx, hashes] : sectionHashes) + for (auto hash : hashes) + ++hashFrequency[hash]; + } + + // Filter rare and common hashes and assign each a unique utility node that + // doesn't conflict with the trace utility nodes + DenseMap hashToUN; + for (auto &[hash, frequency] : hashFrequency) { + if (frequency <= 1 || frequency * 2 > sectionHashes.size()) + continue; + hashToUN[hash] = ++maxUN; + } + + SmallVector> sectionUns; + for (auto &[sectionIdx, hashes] : sectionHashes) { + UtilityNodes uns; + for (auto &hash : hashes) { + auto it = hashToUN.find(hash); + if (it != hashToUN.end()) + uns.push_back(it->second); + } + sectionUns.emplace_back(sectionIdx, uns); + } + return sectionUns; +} + +llvm::DenseMap +BPSectionBase::reorderSectionsByBalancedPartitioning( + size_t &highestAvailablePriority, llvm::StringRef profilePath, + bool forFunctionCompression, bool forDataCompression, + bool compressionSortStartupFunctions, bool verbose, + SmallVector> &inputSections) { + TimeTraceScope timeScope("Setup Balanced Partitioning"); + SmallVector sections; + DenseMap sectionToIdx; + StringMap> symbolToSectionIdxs; + + // Process input sections + for (const auto &isec : inputSections) { + if (!isec->hasValidData()) + continue; + + unsigned sectionIdx = sections.size(); + sectionToIdx.try_emplace(isec->getSection(), sectionIdx); + sections.emplace_back(isec.get()); + for (auto &sym : isec->getSymbols()) + symbolToSectionIdxs[sym->getName()].insert(sectionIdx); + } + StringMap> rootSymbolToSectionIdxs; + for (auto &entry : symbolToSectionIdxs) { + StringRef name = entry.getKey(); + auto §ionIdxs = entry.getValue(); + name = BPSectionBase::getRootSymbol(name); + rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(), + sectionIdxs.end()); + if (auto resolvedLinkageName = + sections[*sectionIdxs.begin()]->getResolvedLinkageName(name)) + rootSymbolToSectionIdxs[resolvedLinkageName.value()].insert( + sectionIdxs.begin(), sectionIdxs.end()); + } + + BPFunctionNode::UtilityNodeT maxUN = 0; + DenseMap startupSectionIdxUNs; + // Used to define the initial order for startup functions. + DenseMap sectionIdxToTimestamp; + std::unique_ptr reader; + if (!profilePath.empty()) { + auto fs = vfs::getRealFileSystem(); + auto readerOrErr = InstrProfReader::create(profilePath, *fs); + lld::checkError(readerOrErr.takeError()); + + reader = std::move(readerOrErr.get()); + for (auto &entry : *reader) { + // Read all entries + (void)entry; + } + auto &traces = reader->getTemporalProfTraces(); + + DenseMap sectionIdxToFirstUN; + for (size_t traceIdx = 0; traceIdx < traces.size(); traceIdx++) { + uint64_t currentSize = 0, cutoffSize = 1; + size_t cutoffTimestamp = 1; + auto &trace = traces[traceIdx].FunctionNameRefs; + for (size_t timestamp = 0; timestamp < trace.size(); timestamp++) { + auto [Filename, ParsedFuncName] = getParsedIRPGOName( + reader->getSymtab().getFuncOrVarName(trace[timestamp])); + ParsedFuncName = BPSectionBase::getRootSymbol(ParsedFuncName); + + auto sectionIdxsIt = rootSymbolToSectionIdxs.find(ParsedFuncName); + if (sectionIdxsIt == rootSymbolToSectionIdxs.end()) + continue; + auto §ionIdxs = sectionIdxsIt->getValue(); + // If the same symbol is found in multiple sections, they might be + // identical, so we arbitrarily use the size from the first section. + currentSize += sections[*sectionIdxs.begin()]->getSize(); + + // Since BalancedPartitioning is sensitive to the initial order, we need + // to explicitly define it to be ordered by earliest timestamp. + for (unsigned sectionIdx : sectionIdxs) { + auto [it, wasInserted] = + sectionIdxToTimestamp.try_emplace(sectionIdx, timestamp); + if (!wasInserted) + it->getSecond() = std::min(it->getSecond(), timestamp); + } + + if (timestamp >= cutoffTimestamp || currentSize >= cutoffSize) { + ++maxUN; + cutoffSize = 2 * currentSize; + cutoffTimestamp = 2 * cutoffTimestamp; + } + for (unsigned sectionIdx : sectionIdxs) + sectionIdxToFirstUN.try_emplace(sectionIdx, maxUN); + } + for (auto &[sectionIdx, firstUN] : sectionIdxToFirstUN) + for (auto un = firstUN; un <= maxUN; ++un) + startupSectionIdxUNs[sectionIdx].push_back(un); + ++maxUN; + sectionIdxToFirstUN.clear(); + } + } + + SmallVector sectionIdxsForFunctionCompression, + sectionIdxsForDataCompression; + for (unsigned sectionIdx = 0; sectionIdx < sections.size(); sectionIdx++) { + if (startupSectionIdxUNs.count(sectionIdx)) + continue; + const auto *isec = sections[sectionIdx]; + if (isec->isCodeSection()) { + if (forFunctionCompression) + sectionIdxsForFunctionCompression.push_back(sectionIdx); + } else { + if (forDataCompression) + sectionIdxsForDataCompression.push_back(sectionIdx); + } + } + + if (compressionSortStartupFunctions) { + SmallVector startupIdxs; + for (auto &[sectionIdx, uns] : startupSectionIdxUNs) + startupIdxs.push_back(sectionIdx); + auto unsForStartupFunctionCompression = + getUnsForCompression(sections, sectionToIdx, startupIdxs, + /*duplicateSectionIdxs=*/nullptr, maxUN); + for (auto &[sectionIdx, compressionUns] : + unsForStartupFunctionCompression) { + auto &uns = startupSectionIdxUNs[sectionIdx]; + uns.append(compressionUns); + llvm::sort(uns); + uns.erase(std::unique(uns.begin(), uns.end()), uns.end()); + } + } + + // Map a section index (order directly) to a list of duplicate section indices + // (not ordered directly). + DenseMap> duplicateSectionIdxs; + auto unsForFunctionCompression = getUnsForCompression( + sections, sectionToIdx, sectionIdxsForFunctionCompression, + &duplicateSectionIdxs, maxUN); + auto unsForDataCompression = getUnsForCompression( + sections, sectionToIdx, sectionIdxsForDataCompression, + &duplicateSectionIdxs, maxUN); + + std::vector nodesForStartup, nodesForFunctionCompression, + nodesForDataCompression; + for (auto &[sectionIdx, uns] : startupSectionIdxUNs) + nodesForStartup.emplace_back(sectionIdx, uns); + for (auto &[sectionIdx, uns] : unsForFunctionCompression) + nodesForFunctionCompression.emplace_back(sectionIdx, uns); + for (auto &[sectionIdx, uns] : unsForDataCompression) + nodesForDataCompression.emplace_back(sectionIdx, uns); + + // Use the first timestamp to define the initial order for startup nodes. + llvm::sort(nodesForStartup, [§ionIdxToTimestamp](auto &L, auto &R) { + return std::make_pair(sectionIdxToTimestamp[L.Id], L.Id) < + std::make_pair(sectionIdxToTimestamp[R.Id], R.Id); + }); + // Sort compression nodes by their Id (which is the section index) because the + // input linker order tends to be not bad. + llvm::sort(nodesForFunctionCompression, + [](auto &L, auto &R) { return L.Id < R.Id; }); + llvm::sort(nodesForDataCompression, + [](auto &L, auto &R) { return L.Id < R.Id; }); + + { + TimeTraceScope timeScope("Balanced Partitioning"); + BalancedPartitioningConfig config; + BalancedPartitioning bp(config); + bp.run(nodesForStartup); + bp.run(nodesForFunctionCompression); + bp.run(nodesForDataCompression); + } + + unsigned numStartupSections = 0; + unsigned numCodeCompressionSections = 0; + unsigned numDuplicateCodeSections = 0; + unsigned numDataCompressionSections = 0; + unsigned numDuplicateDataSections = 0; + SetVector orderedSections; + // Order startup functions, + for (auto &node : nodesForStartup) { + const auto *isec = sections[node.Id]; + if (orderedSections.insert(isec)) + ++numStartupSections; + } + // then functions for compression, + for (auto &node : nodesForFunctionCompression) { + const auto *isec = sections[node.Id]; + if (orderedSections.insert(isec)) + ++numCodeCompressionSections; + + auto It = duplicateSectionIdxs.find(node.Id); + if (It == duplicateSectionIdxs.end()) + continue; + for (auto dupSecIdx : It->getSecond()) { + const auto *dupIsec = sections[dupSecIdx]; + if (orderedSections.insert(dupIsec)) + ++numDuplicateCodeSections; + } + } + // then data for compression. + for (auto &node : nodesForDataCompression) { + const auto *isec = sections[node.Id]; + if (orderedSections.insert(isec)) + ++numDataCompressionSections; + auto It = duplicateSectionIdxs.find(node.Id); + if (It == duplicateSectionIdxs.end()) + continue; + for (auto dupSecIdx : It->getSecond()) { + const auto *dupIsec = sections[dupSecIdx]; + if (orderedSections.insert(dupIsec)) + ++numDuplicateDataSections; + } + } + + if (verbose) { + unsigned numTotalOrderedSections = + numStartupSections + numCodeCompressionSections + + numDuplicateCodeSections + numDataCompressionSections + + numDuplicateDataSections; + dbgs() + << "Ordered " << numTotalOrderedSections + << " sections using balanced partitioning:\n Functions for startup: " + << numStartupSections + << "\n Functions for compression: " << numCodeCompressionSections + << "\n Duplicate functions: " << numDuplicateCodeSections + << "\n Data for compression: " << numDataCompressionSections + << "\n Duplicate data: " << numDuplicateDataSections << "\n"; + + if (!profilePath.empty()) { + // Evaluate this function order for startup + StringMap> symbolToPageNumbers; + const uint64_t pageSize = (1 << 14); + uint64_t currentAddress = 0; + for (const auto *isec : orderedSections) { + for (auto &sym : isec->getSymbols()) { + uint64_t startAddress = currentAddress + sym->getValue().value_or(0); + uint64_t endAddress = startAddress + sym->getSize().value_or(0); + uint64_t firstPage = startAddress / pageSize; + // I think the kernel might pull in a few pages when one it touched, + // so it might be more accurate to force lastPage to be aligned by + // 4? + uint64_t lastPage = endAddress / pageSize; + StringRef rootSymbol = sym->getName(); + rootSymbol = BPSectionBase::getRootSymbol(rootSymbol); + symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage); + if (auto resolvedLinkageName = + isec->getResolvedLinkageName(rootSymbol)) + symbolToPageNumbers.try_emplace(resolvedLinkageName.value(), + firstPage, lastPage); + } + currentAddress += isec->getSize(); + } + + // The area under the curve F where F(t) is the total number of page + // faults at step t. + unsigned area = 0; + for (auto &trace : reader->getTemporalProfTraces()) { + SmallSet touchedPages; + for (unsigned step = 0; step < trace.FunctionNameRefs.size(); step++) { + auto traceId = trace.FunctionNameRefs[step]; + auto [Filename, ParsedFuncName] = + getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId)); + ParsedFuncName = BPSectionBase::getRootSymbol(ParsedFuncName); + auto it = symbolToPageNumbers.find(ParsedFuncName); + if (it != symbolToPageNumbers.end()) { + auto &[firstPage, lastPage] = it->getValue(); + for (uint64_t i = firstPage; i <= lastPage; i++) + touchedPages.insert(i); + } + area += touchedPages.size(); + } + } + dbgs() << "Total area under the page fault curve: " << (float)area + << "\n"; + } + } + + DenseMap sectionPriorities; + for (const auto *isec : orderedSections) + sectionPriorities[isec] = --highestAvailablePriority; + return sectionPriorities; +} \ No newline at end of file diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt index 4f503d04f7844..43e91b85821db 100644 --- a/lld/Common/CMakeLists.txt +++ b/lld/Common/CMakeLists.txt @@ -24,6 +24,7 @@ set_source_files_properties("${version_inc}" add_lld_library(lldCommon Args.cpp + BPSectionOrdererBase.cpp CommonLinkerContext.cpp DriverDispatcher.cpp DWARF.cpp @@ -47,6 +48,7 @@ add_lld_library(lldCommon Demangle MC Option + ProfileData Support Target TargetParser diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp index 5db2242a35ef2..0ffbf16007fda 100644 --- a/lld/MachO/BPSectionOrderer.cpp +++ b/lld/MachO/BPSectionOrderer.cpp @@ -1,4 +1,4 @@ -//===- BPSectionOrderer.cpp--------------------------------------*- C++ -*-===// +//===- BPSectionOrderer.cpp -----------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -8,428 +8,42 @@ #include "BPSectionOrderer.h" #include "InputSection.h" -#include "lld/Common/ErrorHandler.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ProfileData/InstrProfReader.h" -#include "llvm/Support/BalancedPartitioning.h" -#include "llvm/Support/TimeProfiler.h" -#include "llvm/Support/VirtualFileSystem.h" -#include "llvm/Support/xxhash.h" #define DEBUG_TYPE "bp-section-orderer" + using namespace llvm; using namespace lld::macho; -using UtilityNodes = SmallVector; - -/// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and -/// "yyyy" are numbers that could change between builds. We need to use the root -/// symbol name before this suffix so these symbols can be matched with profiles -/// which may have different suffixes. -static StringRef getRootSymbol(StringRef Name) { - auto [P0, S0] = Name.rsplit(".llvm."); - auto [P1, S1] = P0.rsplit(".__uniq."); - return P1; -} - -static uint64_t getRelocHash(StringRef kind, uint64_t sectionIdx, - uint64_t offset, uint64_t addend) { - return xxHash64((kind + ": " + Twine::utohexstr(sectionIdx) + " + " + - Twine::utohexstr(offset) + " + " + Twine::utohexstr(addend)) - .str()); -} - -static uint64_t -getRelocHash(const Reloc &reloc, - const DenseMap §ionToIdx) { - auto *isec = reloc.getReferentInputSection(); - std::optional sectionIdx; - auto sectionIdxIt = sectionToIdx.find(isec); - if (sectionIdxIt != sectionToIdx.end()) - sectionIdx = sectionIdxIt->getSecond(); - std::string kind; - if (isec) - kind = ("Section " + Twine(static_cast(isec->kind()))).str(); - if (auto *sym = reloc.referent.dyn_cast()) { - kind += (" Symbol " + Twine(static_cast(sym->kind()))).str(); - if (auto *d = dyn_cast(sym)) - return getRelocHash(kind, sectionIdx.value_or(0), d->value, reloc.addend); - } - return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend); -} - -/// Given \p sectionIdxs, a list of section indexes, return a list of utility -/// nodes for each section index. If \p duplicateSectionIdx is provided, -/// populate it with nearly identical sections. Increment \p maxUN to be the -/// largest utility node we have used so far. -static SmallVector> getUnsForCompression( - ArrayRef sections, - const DenseMap §ionToIdx, - ArrayRef sectionIdxs, - DenseMap> *duplicateSectionIdxs, - BPFunctionNode::UtilityNodeT &maxUN) { - TimeTraceScope timeScope("Build nodes for compression"); - - SmallVector>> sectionHashes; - sectionHashes.reserve(sectionIdxs.size()); - SmallVector hashes; - for (unsigned sectionIdx : sectionIdxs) { - const auto *isec = sections[sectionIdx]; - constexpr unsigned windowSize = 4; - - for (size_t i = 0; i < isec->data.size(); i++) { - auto window = isec->data.drop_front(i).take_front(windowSize); - hashes.push_back(xxHash64(window)); - } - for (const auto &r : isec->relocs) { - if (r.length == 0 || r.referent.isNull() || r.offset >= isec->data.size()) - continue; - uint64_t relocHash = getRelocHash(r, sectionToIdx); - uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1; - for (uint32_t i = start; i < r.offset + r.length; i++) { - auto window = isec->data.drop_front(i).take_front(windowSize); - hashes.push_back(xxHash64(window) + relocHash); - } - } - - llvm::sort(hashes); - hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end()); - - sectionHashes.emplace_back(sectionIdx, hashes); - hashes.clear(); - } - - DenseMap hashFrequency; - for (auto &[sectionIdx, hashes] : sectionHashes) - for (auto hash : hashes) - ++hashFrequency[hash]; - - if (duplicateSectionIdxs) { - // Merge section that are nearly identical - SmallVector>> newSectionHashes; - DenseMap wholeHashToSectionIdx; - for (auto &[sectionIdx, hashes] : sectionHashes) { - uint64_t wholeHash = 0; - for (auto hash : hashes) - if (hashFrequency[hash] > 5) - wholeHash ^= hash; - auto [it, wasInserted] = - wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx)); - if (wasInserted) { - newSectionHashes.emplace_back(sectionIdx, hashes); - } else { - (*duplicateSectionIdxs)[it->getSecond()].push_back(sectionIdx); - } - } - sectionHashes = newSectionHashes; - - // Recompute hash frequencies - hashFrequency.clear(); - for (auto &[sectionIdx, hashes] : sectionHashes) - for (auto hash : hashes) - ++hashFrequency[hash]; - } - - // Filter rare and common hashes and assign each a unique utility node that - // doesn't conflict with the trace utility nodes - DenseMap hashToUN; - for (auto &[hash, frequency] : hashFrequency) { - if (frequency <= 1 || frequency * 2 > sectionHashes.size()) - continue; - hashToUN[hash] = ++maxUN; - } - - SmallVector> sectionUns; - for (auto &[sectionIdx, hashes] : sectionHashes) { - UtilityNodes uns; - for (auto &hash : hashes) { - auto it = hashToUN.find(hash); - if (it != hashToUN.end()) - uns.push_back(it->second); - } - sectionUns.emplace_back(sectionIdx, uns); - } - return sectionUns; -} - DenseMap lld::macho::runBalancedPartitioning( size_t &highestAvailablePriority, StringRef profilePath, bool forFunctionCompression, bool forDataCompression, bool compressionSortStartupFunctions, bool verbose) { - SmallVector sections; - DenseMap sectionToIdx; - StringMap> symbolToSectionIdxs; + SmallVector> sections; for (const auto *file : inputFiles) { for (auto *sec : file->sections) { for (auto &subsec : sec->subsections) { auto *isec = subsec.isec; if (!isec || isec->data.empty() || !isec->data.data()) continue; - unsigned sectionIdx = sections.size(); - sectionToIdx.try_emplace(isec, sectionIdx); - sections.push_back(isec); - for (Symbol *sym : isec->symbols) - if (auto *d = dyn_cast_or_null(sym)) - symbolToSectionIdxs[d->getName()].insert(sectionIdx); - } - } - } - - StringMap> rootSymbolToSectionIdxs; - for (auto &entry : symbolToSectionIdxs) { - StringRef name = entry.getKey(); - auto §ionIdxs = entry.getValue(); - name = getRootSymbol(name); - rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(), - sectionIdxs.end()); - // Linkage names can be prefixed with "_" or "l_" on Mach-O. See - // Mangler::getNameWithPrefix() for details. - if (name.consume_front("_") || name.consume_front("l_")) - rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(), - sectionIdxs.end()); - } - - BPFunctionNode::UtilityNodeT maxUN = 0; - DenseMap startupSectionIdxUNs; - // Used to define the initial order for startup functions. - DenseMap sectionIdxToTimestamp; - std::unique_ptr reader; - if (!profilePath.empty()) { - auto fs = vfs::getRealFileSystem(); - auto readerOrErr = InstrProfReader::create(profilePath, *fs); - lld::checkError(readerOrErr.takeError()); - - reader = std::move(readerOrErr.get()); - for (auto &entry : *reader) { - // Read all entries - (void)entry; - } - auto &traces = reader->getTemporalProfTraces(); - - DenseMap sectionIdxToFirstUN; - for (size_t traceIdx = 0; traceIdx < traces.size(); traceIdx++) { - uint64_t currentSize = 0, cutoffSize = 1; - size_t cutoffTimestamp = 1; - auto &trace = traces[traceIdx].FunctionNameRefs; - for (size_t timestamp = 0; timestamp < trace.size(); timestamp++) { - auto [Filename, ParsedFuncName] = getParsedIRPGOName( - reader->getSymtab().getFuncOrVarName(trace[timestamp])); - ParsedFuncName = getRootSymbol(ParsedFuncName); - - auto sectionIdxsIt = rootSymbolToSectionIdxs.find(ParsedFuncName); - if (sectionIdxsIt == rootSymbolToSectionIdxs.end()) - continue; - auto §ionIdxs = sectionIdxsIt->getValue(); - // If the same symbol is found in multiple sections, they might be - // identical, so we arbitrarily use the size from the first section. - currentSize += sections[*sectionIdxs.begin()]->getSize(); - - // Since BalancedPartitioning is sensitive to the initial order, we need - // to explicitly define it to be ordered by earliest timestamp. - for (unsigned sectionIdx : sectionIdxs) { - auto [it, wasInserted] = - sectionIdxToTimestamp.try_emplace(sectionIdx, timestamp); - if (!wasInserted) - it->getSecond() = std::min(it->getSecond(), timestamp); - } - - if (timestamp >= cutoffTimestamp || currentSize >= cutoffSize) { - ++maxUN; - cutoffSize = 2 * currentSize; - cutoffTimestamp = 2 * cutoffTimestamp; - } - for (unsigned sectionIdx : sectionIdxs) - sectionIdxToFirstUN.try_emplace(sectionIdx, maxUN); + sections.emplace_back( + std::make_unique(isec, sections.size())); } - for (auto &[sectionIdx, firstUN] : sectionIdxToFirstUN) - for (auto un = firstUN; un <= maxUN; ++un) - startupSectionIdxUNs[sectionIdx].push_back(un); - ++maxUN; - sectionIdxToFirstUN.clear(); } } - SmallVector sectionIdxsForFunctionCompression, - sectionIdxsForDataCompression; - for (unsigned sectionIdx = 0; sectionIdx < sections.size(); sectionIdx++) { - if (startupSectionIdxUNs.count(sectionIdx)) - continue; - const auto *isec = sections[sectionIdx]; - if (isCodeSection(isec)) { - if (forFunctionCompression) - sectionIdxsForFunctionCompression.push_back(sectionIdx); - } else { - if (forDataCompression) - sectionIdxsForDataCompression.push_back(sectionIdx); - } - } + auto reorderedSections = BPSectionBase::reorderSectionsByBalancedPartitioning( + highestAvailablePriority, profilePath, forFunctionCompression, + forDataCompression, compressionSortStartupFunctions, verbose, sections); - if (compressionSortStartupFunctions) { - SmallVector startupIdxs; - for (auto &[sectionIdx, uns] : startupSectionIdxUNs) - startupIdxs.push_back(sectionIdx); - auto unsForStartupFunctionCompression = - getUnsForCompression(sections, sectionToIdx, startupIdxs, - /*duplicateSectionIdxs=*/nullptr, maxUN); - for (auto &[sectionIdx, compressionUns] : - unsForStartupFunctionCompression) { - auto &uns = startupSectionIdxUNs[sectionIdx]; - uns.append(compressionUns); - llvm::sort(uns); - uns.erase(std::unique(uns.begin(), uns.end()), uns.end()); + DenseMap result; + for (const auto &[sec, priority] : reorderedSections) { + if (auto *machoSection = dyn_cast(sec)) { + result.try_emplace( + static_cast(machoSection->getSection()), + priority); } } - - // Map a section index (order directly) to a list of duplicate section indices - // (not ordered directly). - DenseMap> duplicateSectionIdxs; - auto unsForFunctionCompression = getUnsForCompression( - sections, sectionToIdx, sectionIdxsForFunctionCompression, - &duplicateSectionIdxs, maxUN); - auto unsForDataCompression = getUnsForCompression( - sections, sectionToIdx, sectionIdxsForDataCompression, - &duplicateSectionIdxs, maxUN); - - std::vector nodesForStartup, nodesForFunctionCompression, - nodesForDataCompression; - for (auto &[sectionIdx, uns] : startupSectionIdxUNs) - nodesForStartup.emplace_back(sectionIdx, uns); - for (auto &[sectionIdx, uns] : unsForFunctionCompression) - nodesForFunctionCompression.emplace_back(sectionIdx, uns); - for (auto &[sectionIdx, uns] : unsForDataCompression) - nodesForDataCompression.emplace_back(sectionIdx, uns); - - // Use the first timestamp to define the initial order for startup nodes. - llvm::sort(nodesForStartup, [§ionIdxToTimestamp](auto &L, auto &R) { - return std::make_pair(sectionIdxToTimestamp[L.Id], L.Id) < - std::make_pair(sectionIdxToTimestamp[R.Id], R.Id); - }); - // Sort compression nodes by their Id (which is the section index) because the - // input linker order tends to be not bad. - llvm::sort(nodesForFunctionCompression, - [](auto &L, auto &R) { return L.Id < R.Id; }); - llvm::sort(nodesForDataCompression, - [](auto &L, auto &R) { return L.Id < R.Id; }); - - { - TimeTraceScope timeScope("Balanced Partitioning"); - BalancedPartitioningConfig config; - BalancedPartitioning bp(config); - bp.run(nodesForStartup); - bp.run(nodesForFunctionCompression); - bp.run(nodesForDataCompression); - } - - unsigned numStartupSections = 0; - unsigned numCodeCompressionSections = 0; - unsigned numDuplicateCodeSections = 0; - unsigned numDataCompressionSections = 0; - unsigned numDuplicateDataSections = 0; - SetVector orderedSections; - // Order startup functions, - for (auto &node : nodesForStartup) { - const auto *isec = sections[node.Id]; - if (orderedSections.insert(isec)) - ++numStartupSections; - } - // then functions for compression, - for (auto &node : nodesForFunctionCompression) { - const auto *isec = sections[node.Id]; - if (orderedSections.insert(isec)) - ++numCodeCompressionSections; - - auto It = duplicateSectionIdxs.find(node.Id); - if (It == duplicateSectionIdxs.end()) - continue; - for (auto dupSecIdx : It->getSecond()) { - const auto *dupIsec = sections[dupSecIdx]; - if (orderedSections.insert(dupIsec)) - ++numDuplicateCodeSections; - } - } - // then data for compression. - for (auto &node : nodesForDataCompression) { - const auto *isec = sections[node.Id]; - if (orderedSections.insert(isec)) - ++numDataCompressionSections; - auto It = duplicateSectionIdxs.find(node.Id); - if (It == duplicateSectionIdxs.end()) - continue; - for (auto dupSecIdx : It->getSecond()) { - const auto *dupIsec = sections[dupSecIdx]; - if (orderedSections.insert(dupIsec)) - ++numDuplicateDataSections; - } - } - - if (verbose) { - unsigned numTotalOrderedSections = - numStartupSections + numCodeCompressionSections + - numDuplicateCodeSections + numDataCompressionSections + - numDuplicateDataSections; - dbgs() - << "Ordered " << numTotalOrderedSections - << " sections using balanced partitioning:\n Functions for startup: " - << numStartupSections - << "\n Functions for compression: " << numCodeCompressionSections - << "\n Duplicate functions: " << numDuplicateCodeSections - << "\n Data for compression: " << numDataCompressionSections - << "\n Duplicate data: " << numDuplicateDataSections << "\n"; - - if (!profilePath.empty()) { - // Evaluate this function order for startup - StringMap> symbolToPageNumbers; - const uint64_t pageSize = (1 << 14); - uint64_t currentAddress = 0; - for (const auto *isec : orderedSections) { - for (Symbol *sym : isec->symbols) { - if (auto *d = dyn_cast_or_null(sym)) { - uint64_t startAddress = currentAddress + d->value; - uint64_t endAddress = startAddress + d->size; - uint64_t firstPage = startAddress / pageSize; - // I think the kernel might pull in a few pages when one it touched, - // so it might be more accurate to force lastPage to be aligned by - // 4? - uint64_t lastPage = endAddress / pageSize; - StringRef rootSymbol = d->getName(); - rootSymbol = getRootSymbol(rootSymbol); - symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage); - if (rootSymbol.consume_front("_") || rootSymbol.consume_front("l_")) - symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage); - } - } - - currentAddress += isec->getSize(); - } - - // The area under the curve F where F(t) is the total number of page - // faults at step t. - unsigned area = 0; - for (auto &trace : reader->getTemporalProfTraces()) { - SmallSet touchedPages; - for (unsigned step = 0; step < trace.FunctionNameRefs.size(); step++) { - auto traceId = trace.FunctionNameRefs[step]; - auto [Filename, ParsedFuncName] = - getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId)); - ParsedFuncName = getRootSymbol(ParsedFuncName); - auto it = symbolToPageNumbers.find(ParsedFuncName); - if (it != symbolToPageNumbers.end()) { - auto &[firstPage, lastPage] = it->getValue(); - for (uint64_t i = firstPage; i <= lastPage; i++) - touchedPages.insert(i); - } - area += touchedPages.size(); - } - } - dbgs() << "Total area under the page fault curve: " << (float)area - << "\n"; - } - } - - DenseMap sectionPriorities; - for (const auto *isec : orderedSections) - sectionPriorities[isec] = --highestAvailablePriority; - return sectionPriorities; + return result; } diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h index cefd0ceb10e56..29b20c781c6b0 100644 --- a/lld/MachO/BPSectionOrderer.h +++ b/lld/MachO/BPSectionOrderer.h @@ -1,4 +1,4 @@ -//===- BPSectionOrderer.h ---------------------------------------*- C++ -*-===// +//===- BPSectionOrderer.h -------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -14,6 +14,10 @@ #ifndef LLD_MACHO_BPSECTION_ORDERER_H #define LLD_MACHO_BPSECTION_ORDERER_H +#include "InputSection.h" +#include "Relocations.h" +#include "Symbols.h" +#include "lld/Common/BPSectionOrdererBase.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" @@ -21,6 +25,125 @@ namespace lld::macho { class InputSection; +class BPSymbolMacho : public BPSymbol { + const Symbol *sym; + +public: + explicit BPSymbolMacho(const Symbol *s) : sym(s) {} + + llvm::StringRef getName() const override { return sym->getName(); } + + const Defined *asDefined() const { + return llvm::dyn_cast_or_null(sym); + } + + std::optional getValue() const override { + if (auto *d = asDefined()) + return d->value; + return {}; + } + + std::optional getSize() const override { + if (auto *d = asDefined()) + return d->size; + return {}; + } + + const Symbol *getSymbol() const { return sym; } +}; + +class BPSectionMacho : public BPSectionBase { + const InputSection *isec; + uint64_t sectionIdx; + +public: + explicit BPSectionMacho(const InputSection *sec, uint64_t sectionIdx) + : isec(sec), sectionIdx(sectionIdx) {} + + const void *getSection() const override { return isec; } + + uint64_t getSize() const override { return isec->getSize(); } + + uint64_t getSectionIdx() const { return sectionIdx; } + + bool isCodeSection() const override { return macho::isCodeSection(isec); } + + bool hasValidData() const override { + return isec && !isec->data.empty() && isec->data.data(); + } + + SmallVector> getSymbols() const override { + SmallVector> symbols; + for (auto *sym : isec->symbols) + if (auto *d = llvm::dyn_cast_or_null(sym)) + symbols.emplace_back(std::make_unique(d)); + return symbols; + } + + // Linkage names can be prefixed with "_" or "l_" on Mach-O. See + // Mangler::getNameWithPrefix() for details. + std::optional + getResolvedLinkageName(llvm::StringRef name) const override { + if (name.consume_front("_") || name.consume_front("l_")) + return name; + return {}; + } + + void getSectionHashes(llvm::SmallVectorImpl &hashes, + const llvm::DenseMap + §ionToIdx) const override { + constexpr unsigned windowSize = 4; + + // Calculate content hashes + size_t dataSize = isec->data.size(); + for (size_t i = 0; i < dataSize; i++) { + auto window = isec->data.drop_front(i).take_front(windowSize); + hashes.push_back(xxHash64(window)); + } + + // Calculate relocation hashes + for (const auto &r : isec->relocs) { + if (r.length == 0 || r.referent.isNull() || r.offset >= isec->data.size()) + continue; + + uint64_t relocHash = getRelocHash(r, sectionToIdx); + uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1; + for (uint32_t i = start; i < r.offset + r.length; i++) { + auto window = isec->data.drop_front(i).take_front(windowSize); + hashes.push_back(xxHash64(window) + relocHash); + } + } + + llvm::sort(hashes); + hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end()); + } + + static bool classof(const BPSectionBase *s) { return true; } + +private: + static uint64_t + getRelocHash(const Reloc &reloc, + const llvm::DenseMap §ionToIdx) { + auto *isec = reloc.getReferentInputSection(); + std::optional sectionIdx; + if (auto it = sectionToIdx.find(isec); it != sectionToIdx.end()) + sectionIdx = it->second; + std::string kind; + if (isec) + kind = ("Section " + Twine(isec->kind())).str(); + + if (auto *sym = reloc.referent.dyn_cast()) { + kind += (" Symbol " + Twine(sym->kind())).str(); + if (auto *d = llvm::dyn_cast(sym)) { + return BPSectionBase::getRelocHash(kind, sectionIdx.value_or(0), + d->value, reloc.addend); + } + } + return BPSectionBase::getRelocHash(kind, sectionIdx.value_or(0), 0, + reloc.addend); + } +}; + /// Run Balanced Partitioning to find the optimal function and data order to /// improve startup time and compressed size. /// diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt index ecf6ce609e59f..c778fcf7b6fff 100644 --- a/lld/MachO/CMakeLists.txt +++ b/lld/MachO/CMakeLists.txt @@ -50,7 +50,6 @@ add_lld_library(lldMachO Object Option Passes - ProfileData Support TargetParser TextAPI diff --git a/lld/include/lld/Common/BPSectionOrdererBase.h b/lld/include/lld/Common/BPSectionOrdererBase.h new file mode 100644 index 0000000000000..6f483aff464aa --- /dev/null +++ b/lld/include/lld/Common/BPSectionOrdererBase.h @@ -0,0 +1,80 @@ +//===- BPSectionOrdererBase.h ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the common interfaces which may be used by +// BPSectionOrderer. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COMMON_BP_SECTION_ORDERER_BASE_H +#define LLD_COMMON_BP_SECTION_ORDERER_BASE_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/xxhash.h" +#include +#include + +namespace lld { + +class BPSymbol { + +public: + virtual ~BPSymbol() = default; + virtual llvm::StringRef getName() const = 0; + virtual std::optional getValue() const = 0; + virtual std::optional getSize() const = 0; +}; + +class BPSectionBase { +public: + virtual ~BPSectionBase() = default; + virtual uint64_t getSize() const = 0; + virtual bool hasValidData() const = 0; + virtual bool isCodeSection() const = 0; + virtual llvm::SmallVector> getSymbols() const = 0; + virtual const void *getSection() const = 0; + virtual void getSectionHashes( + llvm::SmallVectorImpl &hashes, + const llvm::DenseMap §ionToIdx) const = 0; + virtual std::optional + getResolvedLinkageName(llvm::StringRef name) const = 0; + + /// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and + /// "yyyy" are numbers that could change between builds. We need to use the + /// root symbol name before this suffix so these symbols can be matched with + /// profiles which may have different suffixes. + static llvm::StringRef getRootSymbol(llvm::StringRef Name) { + auto [P0, S0] = Name.rsplit(".llvm."); + auto [P1, S1] = P0.rsplit(".__uniq."); + return P1; + } + + static uint64_t getRelocHash(llvm::StringRef kind, uint64_t sectionIdx, + uint64_t offset, uint64_t addend) { + return llvm::xxHash64((kind + ": " + llvm::Twine::utohexstr(sectionIdx) + + " + " + llvm::Twine::utohexstr(offset) + " + " + + llvm::Twine::utohexstr(addend)) + .str()); + } + + /// Reorders sections using balanced partitioning algorithm based on profile + /// data. + static llvm::DenseMap + reorderSectionsByBalancedPartitioning( + size_t &highestAvailablePriority, llvm::StringRef profilePath, + bool forFunctionCompression, bool forDataCompression, + bool compressionSortStartupFunctions, bool verbose, + llvm::SmallVector> &inputSections); +}; + +} // namespace lld + +#endif