Skip to content

[ELF] Add BPSectionOrderer options #120514

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Feb 3, 2025
95 changes: 95 additions & 0 deletions lld/ELF/BPSectionOrderer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
//===- BPSectionOrderer.cpp -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "BPSectionOrderer.h"
#include "InputFiles.h"
#include "InputSection.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "lld/Common/BPSectionOrdererBase.inc"
#include "llvm/Support/Endian.h"

using namespace llvm;
using namespace lld::elf;

namespace {
struct BPOrdererELF;
}
template <> struct lld::BPOrdererTraits<struct BPOrdererELF> {
using Section = elf::InputSectionBase;
using Defined = elf::Defined;
};
namespace {
struct BPOrdererELF : lld::BPOrderer<BPOrdererELF> {
DenseMap<const InputSectionBase *, Defined *> secToSym;

static uint64_t getSize(const Section &sec) { return sec.getSize(); }
static bool isCodeSection(const Section &sec) {
return sec.flags & llvm::ELF::SHF_EXECINSTR;
}
ArrayRef<Defined *> getSymbols(const Section &sec) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The previous getSymbols iterated over all symbols in sec.file, which could be very slow. I rewrote this to use a member variable.

auto it = secToSym.find(&sec);
if (it == secToSym.end())
return {};
return ArrayRef(it->second);
}

static void
getSectionHashes(const Section &sec, llvm::SmallVectorImpl<uint64_t> &hashes,
const llvm::DenseMap<const void *, uint64_t> &sectionToIdx) {
constexpr unsigned windowSize = 4;

// Calculate content hashes: k-mers and the last k-1 bytes.
ArrayRef<uint8_t> data = sec.content();
if (data.size() >= windowSize)
for (size_t i = 0; i <= data.size() - windowSize; ++i)
hashes.push_back(llvm::support::endian::read32le(data.data() + i));
for (uint8_t byte : data.take_back(windowSize - 1))
hashes.push_back(byte);

llvm::sort(hashes);
hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
}

static StringRef getSymName(const Defined &sym) { return sym.getName(); }
static uint64_t getSymValue(const Defined &sym) { return sym.value; }
static uint64_t getSymSize(const Defined &sym) { return sym.size; }
};
} // namespace

DenseMap<const InputSectionBase *, int> elf::runBalancedPartitioning(
Ctx &ctx, StringRef profilePath, bool forFunctionCompression,
bool forDataCompression, bool compressionSortStartupFunctions,
bool verbose) {
// Collect candidate sections and associated symbols.
SmallVector<InputSectionBase *> sections;
DenseMap<CachedHashStringRef, DenseSet<unsigned>> rootSymbolToSectionIdxs;
BPOrdererELF orderer;

auto addSection = [&](Symbol &sym) {
auto *d = dyn_cast<Defined>(&sym);
if (!d)
return;
auto *sec = dyn_cast_or_null<InputSectionBase>(d->section);
if (!sec || sec->size == 0 || !orderer.secToSym.try_emplace(sec, d).second)
return;
rootSymbolToSectionIdxs[CachedHashStringRef(getRootSymbol(sym.getName()))]
.insert(sections.size());
sections.emplace_back(sec);
};

for (Symbol *sym : ctx.symtab->getSymbols())
addSection(*sym);
for (ELFFileBase *file : ctx.objectFiles)
for (Symbol *sym : file->getLocalSymbols())
addSection(*sym);
return orderer.computeOrder(profilePath, forFunctionCompression,
forDataCompression,
compressionSortStartupFunctions, verbose,
sections, rootSymbolToSectionIdxs);
}
37 changes: 37 additions & 0 deletions lld/ELF/BPSectionOrderer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//===- BPSectionOrderer.h -------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// This file uses Balanced Partitioning to order sections to improve startup
/// time and compressed size.
///
//===----------------------------------------------------------------------===//

#ifndef LLD_ELF_BPSECTION_ORDERER_H
#define LLD_ELF_BPSECTION_ORDERER_H

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringRef.h"

namespace lld::elf {
struct Ctx;
class InputSectionBase;

/// Run Balanced Partitioning to find the optimal function and data order to
/// improve startup time and compressed size.
///
/// It is important that -ffunction-sections and -fdata-sections compiler flags
/// are used to ensure functions and data are in their own sections and thus
/// can be reordered.
llvm::DenseMap<const InputSectionBase *, int>
runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
bool forFunctionCompression, bool forDataCompression,
bool compressionSortStartupFunctions, bool verbose);

} // namespace lld::elf

#endif
2 changes: 2 additions & 0 deletions lld/ELF/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ add_lld_library(lldELF
Arch/X86.cpp
Arch/X86_64.cpp
ARMErrataFix.cpp
BPSectionOrderer.cpp
CallGraphSort.cpp
DWARF.cpp
Driver.cpp
Expand Down Expand Up @@ -72,6 +73,7 @@ add_lld_library(lldELF
Object
Option
Passes
ProfileData
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is needed to fix BUILD_SHARED_LIBS=on builds

Support
TargetParser
TransformUtils
Expand Down
6 changes: 6 additions & 0 deletions lld/ELF/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,12 @@ struct Config {
bool armBe8 = false;
BsymbolicKind bsymbolic = BsymbolicKind::None;
CGProfileSortKind callGraphProfileSort;
llvm::StringRef irpgoProfilePath;
bool bpStartupFunctionSort = false;
bool bpCompressionSortStartupFunctions = false;
bool bpFunctionOrderForCompression = false;
bool bpDataOrderForCompression = false;
bool bpVerboseSectionOrderer = false;
bool checkSections;
bool checkDynamicRelocs;
std::optional<llvm::DebugCompressionType> compressDebugSections;
Expand Down
48 changes: 48 additions & 0 deletions lld/ELF/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1118,6 +1118,53 @@ static CGProfileSortKind getCGProfileSortKind(Ctx &ctx,
return CGProfileSortKind::None;
}

static void parseBPOrdererOptions(Ctx &ctx, opt::InputArgList &args) {
if (auto *arg = args.getLastArg(OPT_bp_compression_sort)) {
StringRef s = arg->getValue();
if (s == "function") {
ctx.arg.bpFunctionOrderForCompression = true;
} else if (s == "data") {
ctx.arg.bpDataOrderForCompression = true;
} else if (s == "both") {
ctx.arg.bpFunctionOrderForCompression = true;
ctx.arg.bpDataOrderForCompression = true;
} else if (s != "none") {
ErrAlways(ctx) << arg->getSpelling()
<< ": expected [none|function|data|both]";
}
if (s != "none" && args.hasArg(OPT_call_graph_ordering_file))
ErrAlways(ctx) << "--bp-compression-sort is incompatible with "
"--call-graph-ordering-file";
}
if (auto *arg = args.getLastArg(OPT_bp_startup_sort)) {
StringRef s = arg->getValue();
if (s == "function") {
ctx.arg.bpStartupFunctionSort = true;
} else if (s != "none") {
ErrAlways(ctx) << arg->getSpelling() << ": expected [none|function]";
}
if (s != "none" && args.hasArg(OPT_call_graph_ordering_file))
ErrAlways(ctx) << "--bp-startup-sort=function is incompatible with "
"--call-graph-ordering-file";
}

ctx.arg.bpCompressionSortStartupFunctions =
args.hasFlag(OPT_bp_compression_sort_startup_functions,
OPT_no_bp_compression_sort_startup_functions, false);
ctx.arg.bpVerboseSectionOrderer = args.hasArg(OPT_verbose_bp_section_orderer);

ctx.arg.irpgoProfilePath = args.getLastArgValue(OPT_irpgo_profile);
if (ctx.arg.irpgoProfilePath.empty()) {
if (ctx.arg.bpStartupFunctionSort)
ErrAlways(ctx) << "--bp-startup-sort=function must be used with "
"--irpgo-profile";
if (ctx.arg.bpCompressionSortStartupFunctions)
ErrAlways(ctx)
<< "--bp-compression-sort-startup-functions must be used with "
"--irpgo-profile";
}
}

static DebugCompressionType getCompressionType(Ctx &ctx, StringRef s,
StringRef option) {
DebugCompressionType type = StringSwitch<DebugCompressionType>(s)
Expand Down Expand Up @@ -1259,6 +1306,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
ctx.arg.bsymbolic = BsymbolicKind::All;
}
ctx.arg.callGraphProfileSort = getCGProfileSortKind(ctx, args);
parseBPOrdererOptions(ctx, args);
ctx.arg.checkSections =
args.hasFlag(OPT_check_sections, OPT_no_check_sections, true);
ctx.arg.chroot = args.getLastArgValue(OPT_chroot);
Expand Down
13 changes: 13 additions & 0 deletions lld/ELF/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,19 @@ def call_graph_profile_sort: JJ<"call-graph-profile-sort=">,
def : FF<"no-call-graph-profile-sort">, Alias<call_graph_profile_sort>, AliasArgs<["none"]>,
Flags<[HelpHidden]>;

defm irpgo_profile: EEq<"irpgo-profile",
"Read a temporary profile file for use with --bp-startup-sort=">;
def bp_compression_sort: JJ<"bp-compression-sort=">, MetaVarName<"[none,function,data,both]">,
HelpText<"Improve Lempel-Ziv compression by grouping similar sections together, resulting in a smaller compressed app size">;
def bp_startup_sort: JJ<"bp-startup-sort=">, MetaVarName<"[none,function]">,
HelpText<"Utilize a temporal profile file to reduce page faults during program startup">;

// Auxiliary options related to balanced partition
defm bp_compression_sort_startup_functions: BB<"bp-compression-sort-startup-functions",
"When --irpgo-profile is pecified, prioritize function similarity for compression in addition to startup time", "">;
def verbose_bp_section_orderer: FF<"verbose-bp-section-orderer">,
HelpText<"Print information on balanced partitioning">;

// --chroot doesn't have a help text because it is an internal option.
def chroot: Separate<["--"], "chroot">;

Expand Down
13 changes: 12 additions & 1 deletion lld/ELF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "Writer.h"
#include "AArch64ErrataFix.h"
#include "ARMErrataFix.h"
#include "BPSectionOrderer.h"
#include "CallGraphSort.h"
#include "Config.h"
#include "InputFiles.h"
Expand Down Expand Up @@ -1082,8 +1083,18 @@ static void maybeShuffle(Ctx &ctx,
// that don't appear in the order file.
static DenseMap<const InputSectionBase *, int> buildSectionOrder(Ctx &ctx) {
DenseMap<const InputSectionBase *, int> sectionOrder;
if (!ctx.arg.callGraphProfile.empty())
if (ctx.arg.bpStartupFunctionSort || ctx.arg.bpFunctionOrderForCompression ||
ctx.arg.bpDataOrderForCompression) {
TimeTraceScope timeScope("Balanced Partitioning Section Orderer");
sectionOrder = runBalancedPartitioning(
ctx, ctx.arg.bpStartupFunctionSort ? ctx.arg.irpgoProfilePath : "",
ctx.arg.bpFunctionOrderForCompression,
ctx.arg.bpDataOrderForCompression,
ctx.arg.bpCompressionSortStartupFunctions,
ctx.arg.bpVerboseSectionOrderer);
} else if (!ctx.arg.callGraphProfile.empty()) {
sectionOrder = computeCallGraphProfileOrder(ctx);
}

if (ctx.arg.symbolOrderingFile.empty())
return sectionOrder;
Expand Down
7 changes: 6 additions & 1 deletion lld/include/lld/Common/BPSectionOrdererBase.inc
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ template <class D> struct BPOrderer {
const DenseMap<CachedHashStringRef, DenseSet<unsigned>>
&rootSymbolToSectionIdxs)
-> llvm::DenseMap<const Section *, int>;

std::optional<StringRef> static getResolvedLinkageName(llvm::StringRef name) {
return {};
}
};
} // namespace lld

Expand Down Expand Up @@ -97,10 +101,11 @@ static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
// Merge sections that are nearly identical
SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> newSectionHashes;
DenseMap<uint64_t, unsigned> wholeHashToSectionIdx;
unsigned threshold = sectionHashes.size() > 10000 ? 5 : 0;
for (auto &[sectionIdx, hashes] : sectionHashes) {
uint64_t wholeHash = 0;
for (auto hash : hashes)
if (hashFrequency[hash] > 5)
if (hashFrequency[hash] > threshold)
wholeHash ^= hash;
auto [it, wasInserted] =
wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx));
Expand Down
Loading