Skip to content

[lld-macho] Refactor BPSectionOrderer with CRTP. NFC #124482

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions lld/Common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ set_source_files_properties("${version_inc}"

add_lld_library(lldCommon
Args.cpp
BPSectionOrdererBase.cpp
CommonLinkerContext.cpp
DriverDispatcher.cpp
DWARF.cpp
Expand All @@ -48,7 +47,6 @@ add_lld_library(lldCommon
Demangle
MC
Option
ProfileData
Support
Target
TargetParser
Expand Down
134 changes: 118 additions & 16 deletions lld/MachO/BPSectionOrderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,39 +8,141 @@

#include "BPSectionOrderer.h"
#include "InputSection.h"
#include "Relocations.h"
#include "Symbols.h"
#include "lld/Common/BPSectionOrdererBase.inc"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StableHashing.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/xxhash.h"

#define DEBUG_TYPE "bp-section-orderer"

using namespace llvm;
using namespace lld::macho;

namespace {
struct BPOrdererMachO;
}
template <> struct lld::BPOrdererTraits<struct BPOrdererMachO> {
using Section = macho::InputSection;
using Symbol = macho::Symbol;
};
namespace {
struct BPOrdererMachO : lld::BPOrderer<BPOrdererMachO> {
static uint64_t getSize(const Section &sec) { return sec.getSize(); }
static bool isCodeSection(const Section &sec) {
return macho::isCodeSection(&sec);
}
static SmallVector<Symbol *, 0> getSymbols(const Section &sec) {
SmallVector<Symbol *, 0> symbols;
for (auto *sym : sec.symbols)
if (auto *d = llvm::dyn_cast_or_null<Defined>(sym))
symbols.emplace_back(d);
return symbols;
}

// Linkage names can be prefixed with "_" or "l_" on Mach-O. See
// Mangler::getNameWithPrefix() for details.
std::optional<StringRef> static getResolvedLinkageName(llvm::StringRef name) {
if (name.consume_front("_") || name.consume_front("l_"))
return name;
return {};
}

static void
getSectionHashes(const Section &sec, llvm::SmallVectorImpl<uint64_t> &hashes,
const llvm::DenseMap<const void *, uint64_t> &sectionToIdx) {
constexpr unsigned windowSize = 4;

// Calculate content hashes: k-mers and the last k-1 bytes.
ArrayRef<uint8_t> data = sec.data;
if (data.size() >= windowSize)
for (size_t i = 0; i <= data.size() - windowSize; ++i)
hashes.push_back(llvm::support::endian::read32le(data.data() + i));
for (uint8_t byte : data.take_back(windowSize - 1))
hashes.push_back(byte);

// Calculate relocation hashes
for (const auto &r : sec.relocs) {
if (r.length == 0 || r.referent.isNull() || r.offset >= data.size())
continue;

uint64_t relocHash = getRelocHash(r, sectionToIdx);
uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1;
for (uint32_t i = start; i < r.offset + r.length; i++) {
auto window = data.drop_front(i).take_front(windowSize);
hashes.push_back(xxh3_64bits(window) ^ relocHash);
}
}

llvm::sort(hashes);
hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
}

static llvm::StringRef getSymName(const Symbol &sym) { return sym.getName(); }
static uint64_t getSymValue(const Symbol &sym) {
if (auto *d = dyn_cast<Defined>(&sym))
return d->value;
return 0;
}
static uint64_t getSymSize(const Symbol &sym) {
if (auto *d = dyn_cast<Defined>(&sym))
return d->size;
return 0;
}

private:
static uint64_t
getRelocHash(const Reloc &reloc,
const llvm::DenseMap<const void *, uint64_t> &sectionToIdx) {
auto *isec = reloc.getReferentInputSection();
std::optional<uint64_t> sectionIdx;
if (auto it = sectionToIdx.find(isec); it != sectionToIdx.end())
sectionIdx = it->second;
uint64_t kind = -1, value = 0;
if (isec)
kind = uint64_t(isec->kind());

if (auto *sym = reloc.referent.dyn_cast<Symbol *>()) {
kind = (kind << 8) | uint8_t(sym->kind());
if (auto *d = llvm::dyn_cast<Defined>(sym))
value = d->value;
}
return llvm::stable_hash_combine(kind, sectionIdx.value_or(0), value,
reloc.addend);
}
};
} // namespace

DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
StringRef profilePath, bool forFunctionCompression, bool forDataCompression,
bool compressionSortStartupFunctions, bool verbose) {

SmallVector<std::unique_ptr<BPSectionBase>> sections;
// Collect candidate sections and associated symbols.
SmallVector<InputSection *> sections;
DenseMap<CachedHashStringRef, DenseSet<unsigned>> rootSymbolToSectionIdxs;
for (const auto *file : inputFiles) {
for (auto *sec : file->sections) {
for (auto &subsec : sec->subsections) {
auto *isec = subsec.isec;
if (!isec || isec->data.empty() || !isec->data.data())
if (!isec || isec->data.empty())
continue;
sections.emplace_back(std::make_unique<BPSectionMacho>(isec));
size_t idx = sections.size();
sections.emplace_back(isec);
for (auto *sym : BPOrdererMachO::getSymbols(*isec)) {
auto rootName = getRootSymbol(sym->getName());
rootSymbolToSectionIdxs[CachedHashStringRef(rootName)].insert(idx);
if (auto linkageName =
BPOrdererMachO::getResolvedLinkageName(rootName))
rootSymbolToSectionIdxs[CachedHashStringRef(*linkageName)].insert(
idx);
}
}
}
}

auto reorderedSections = BPSectionBase::reorderSectionsByBalancedPartitioning(
profilePath, forFunctionCompression, forDataCompression,
compressionSortStartupFunctions, verbose, sections);

DenseMap<const InputSection *, int> result;
for (const auto &[sec, priority] : reorderedSections) {
result.try_emplace(
static_cast<const InputSection *>(
static_cast<const BPSectionMacho *>(sec)->getSection()),
priority);
}
return result;
return BPOrdererMachO::computeOrder(profilePath, forFunctionCompression,
forDataCompression,
compressionSortStartupFunctions, verbose,
sections, rootSymbolToSectionIdxs);
}
118 changes: 1 addition & 117 deletions lld/MachO/BPSectionOrderer.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,134 +14,18 @@
#ifndef LLD_MACHO_BPSECTION_ORDERER_H
#define LLD_MACHO_BPSECTION_ORDERER_H

#include "InputSection.h"
#include "Relocations.h"
#include "Symbols.h"
#include "lld/Common/BPSectionOrdererBase.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StableHashing.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/xxhash.h"

namespace lld::macho {

class InputSection;

class BPSymbolMacho : public BPSymbol {
const Symbol *sym;

public:
explicit BPSymbolMacho(const Symbol *s) : sym(s) {}

llvm::StringRef getName() const override { return sym->getName(); }

const Defined *asDefined() const {
return llvm::dyn_cast_or_null<Defined>(sym);
}

std::optional<uint64_t> getValue() const override {
if (auto *d = asDefined())
return d->value;
return {};
}

std::optional<uint64_t> getSize() const override {
if (auto *d = asDefined())
return d->size;
return {};
}

const Symbol *getSymbol() const { return sym; }
};

class BPSectionMacho : public BPSectionBase {
const InputSection *isec;

public:
explicit BPSectionMacho(const InputSection *sec) : isec(sec) {}

const void *getSection() const override { return isec; }

uint64_t getSize() const override { return isec->getSize(); }

bool isCodeSection() const override { return macho::isCodeSection(isec); }

SmallVector<std::unique_ptr<BPSymbol>> getSymbols() const override {
SmallVector<std::unique_ptr<BPSymbol>> symbols;
for (auto *sym : isec->symbols)
if (auto *d = llvm::dyn_cast_or_null<Defined>(sym))
symbols.emplace_back(std::make_unique<BPSymbolMacho>(d));
return symbols;
}

// Linkage names can be prefixed with "_" or "l_" on Mach-O. See
// Mangler::getNameWithPrefix() for details.
std::optional<StringRef>
getResolvedLinkageName(llvm::StringRef name) const override {
if (name.consume_front("_") || name.consume_front("l_"))
return name;
return {};
}

void getSectionHashes(llvm::SmallVectorImpl<uint64_t> &hashes,
const llvm::DenseMap<const void *, uint64_t>
&sectionToIdx) const override {
constexpr unsigned windowSize = 4;

// Calculate content hashes: k-mers and the last k-1 bytes.
ArrayRef<uint8_t> data = isec->data;
if (data.size() >= windowSize)
for (size_t i = 0; i <= data.size() - windowSize; ++i)
hashes.push_back(llvm::support::endian::read32le(data.data() + i));
for (uint8_t byte : data.take_back(windowSize - 1))
hashes.push_back(byte);

// Calculate relocation hashes
for (const auto &r : isec->relocs) {
if (r.length == 0 || r.referent.isNull() || r.offset >= data.size())
continue;

uint64_t relocHash = getRelocHash(r, sectionToIdx);
uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1;
for (uint32_t i = start; i < r.offset + r.length; i++) {
auto window = data.drop_front(i).take_front(windowSize);
hashes.push_back(xxh3_64bits(window) ^ relocHash);
}
}

llvm::sort(hashes);
hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
}

private:
static uint64_t
getRelocHash(const Reloc &reloc,
const llvm::DenseMap<const void *, uint64_t> &sectionToIdx) {
auto *isec = reloc.getReferentInputSection();
std::optional<uint64_t> sectionIdx;
if (auto it = sectionToIdx.find(isec); it != sectionToIdx.end())
sectionIdx = it->second;
uint64_t kind = -1, value = 0;
if (isec)
kind = uint64_t(isec->kind());

if (auto *sym = reloc.referent.dyn_cast<Symbol *>()) {
kind = (kind << 8) | uint8_t(sym->kind());
if (auto *d = llvm::dyn_cast<Defined>(sym))
value = d->value;
}
return llvm::stable_hash_combine(kind, sectionIdx.value_or(0), value,
reloc.addend);
}
};

/// Run Balanced Partitioning to find the optimal function and data order to
/// improve startup time and compressed size.
///
/// It is important that .subsections_via_symbols is used to ensure functions
/// and data are in their own sections and thus can be reordered.
llvm::DenseMap<const lld::macho::InputSection *, int>
llvm::DenseMap<const InputSection *, int>
runBalancedPartitioning(llvm::StringRef profilePath,
bool forFunctionCompression, bool forDataCompression,
bool compressionSortStartupFunctions, bool verbose);
Expand Down
1 change: 1 addition & 0 deletions lld/MachO/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ add_lld_library(lldMachO
Object
Option
Passes
ProfileData
Support
TargetParser
TextAPI
Expand Down
70 changes: 0 additions & 70 deletions lld/include/lld/Common/BPSectionOrdererBase.h

This file was deleted.

Loading
Loading