Skip to content

Commit 0154dce

Browse files
ColibrowMaskRay
andauthored
[ELF] Add BPSectionOrderer options (#120514)
Add new ELF linker options for profile-guided section ordering optimizations: - `--irpgo-profile=<file>`: Read IRPGO profile data for use with startup and compression optimizations - `--bp-startup-sort={none,function}`: Order sections based on profile data to improve star tup time - `--bp-compression-sort={none,function,data,both}`: Order sections using balanced partitioning to improve compressed size - `--bp-compression-sort-startup-functions`: Additionally optimize startup functions for compression - `--verbose-bp-section-orderer`: Print statistics about balanced partitioning section ordering Thanks to the @ellishg, @thevinster, and their team's work. --------- Co-authored-by: Fangrui Song <[email protected]>
1 parent f6578c3 commit 0154dce

10 files changed

+658
-2
lines changed

lld/ELF/BPSectionOrderer.cpp

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
//===- BPSectionOrderer.cpp -----------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "BPSectionOrderer.h"
10+
#include "InputFiles.h"
11+
#include "InputSection.h"
12+
#include "SymbolTable.h"
13+
#include "Symbols.h"
14+
#include "lld/Common/BPSectionOrdererBase.inc"
15+
#include "llvm/Support/Endian.h"
16+
17+
using namespace llvm;
18+
using namespace lld::elf;
19+
20+
namespace {
21+
struct BPOrdererELF;
22+
}
23+
template <> struct lld::BPOrdererTraits<struct BPOrdererELF> {
24+
using Section = elf::InputSectionBase;
25+
using Defined = elf::Defined;
26+
};
27+
namespace {
28+
struct BPOrdererELF : lld::BPOrderer<BPOrdererELF> {
29+
DenseMap<const InputSectionBase *, Defined *> secToSym;
30+
31+
static uint64_t getSize(const Section &sec) { return sec.getSize(); }
32+
static bool isCodeSection(const Section &sec) {
33+
return sec.flags & llvm::ELF::SHF_EXECINSTR;
34+
}
35+
ArrayRef<Defined *> getSymbols(const Section &sec) {
36+
auto it = secToSym.find(&sec);
37+
if (it == secToSym.end())
38+
return {};
39+
return ArrayRef(it->second);
40+
}
41+
42+
static void
43+
getSectionHashes(const Section &sec, llvm::SmallVectorImpl<uint64_t> &hashes,
44+
const llvm::DenseMap<const void *, uint64_t> &sectionToIdx) {
45+
constexpr unsigned windowSize = 4;
46+
47+
// Calculate content hashes: k-mers and the last k-1 bytes.
48+
ArrayRef<uint8_t> data = sec.content();
49+
if (data.size() >= windowSize)
50+
for (size_t i = 0; i <= data.size() - windowSize; ++i)
51+
hashes.push_back(llvm::support::endian::read32le(data.data() + i));
52+
for (uint8_t byte : data.take_back(windowSize - 1))
53+
hashes.push_back(byte);
54+
55+
llvm::sort(hashes);
56+
hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
57+
}
58+
59+
static StringRef getSymName(const Defined &sym) { return sym.getName(); }
60+
static uint64_t getSymValue(const Defined &sym) { return sym.value; }
61+
static uint64_t getSymSize(const Defined &sym) { return sym.size; }
62+
};
63+
} // namespace
64+
65+
DenseMap<const InputSectionBase *, int> elf::runBalancedPartitioning(
66+
Ctx &ctx, StringRef profilePath, bool forFunctionCompression,
67+
bool forDataCompression, bool compressionSortStartupFunctions,
68+
bool verbose) {
69+
// Collect candidate sections and associated symbols.
70+
SmallVector<InputSectionBase *> sections;
71+
DenseMap<CachedHashStringRef, DenseSet<unsigned>> rootSymbolToSectionIdxs;
72+
BPOrdererELF orderer;
73+
74+
auto addSection = [&](Symbol &sym) {
75+
auto *d = dyn_cast<Defined>(&sym);
76+
if (!d)
77+
return;
78+
auto *sec = dyn_cast_or_null<InputSectionBase>(d->section);
79+
if (!sec || sec->size == 0 || !orderer.secToSym.try_emplace(sec, d).second)
80+
return;
81+
rootSymbolToSectionIdxs[CachedHashStringRef(getRootSymbol(sym.getName()))]
82+
.insert(sections.size());
83+
sections.emplace_back(sec);
84+
};
85+
86+
for (Symbol *sym : ctx.symtab->getSymbols())
87+
addSection(*sym);
88+
for (ELFFileBase *file : ctx.objectFiles)
89+
for (Symbol *sym : file->getLocalSymbols())
90+
addSection(*sym);
91+
return orderer.computeOrder(profilePath, forFunctionCompression,
92+
forDataCompression,
93+
compressionSortStartupFunctions, verbose,
94+
sections, rootSymbolToSectionIdxs);
95+
}

lld/ELF/BPSectionOrderer.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
//===- BPSectionOrderer.h -------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// This file uses Balanced Partitioning to order sections to improve startup
10+
/// time and compressed size.
11+
///
12+
//===----------------------------------------------------------------------===//
13+
14+
#ifndef LLD_ELF_BPSECTION_ORDERER_H
15+
#define LLD_ELF_BPSECTION_ORDERER_H
16+
17+
#include "llvm/ADT/DenseMap.h"
18+
#include "llvm/ADT/StringRef.h"
19+
20+
namespace lld::elf {
21+
struct Ctx;
22+
class InputSectionBase;
23+
24+
/// Run Balanced Partitioning to find the optimal function and data order to
25+
/// improve startup time and compressed size.
26+
///
27+
/// It is important that -ffunction-sections and -fdata-sections compiler flags
28+
/// are used to ensure functions and data are in their own sections and thus
29+
/// can be reordered.
30+
llvm::DenseMap<const InputSectionBase *, int>
31+
runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
32+
bool forFunctionCompression, bool forDataCompression,
33+
bool compressionSortStartupFunctions, bool verbose);
34+
35+
} // namespace lld::elf
36+
37+
#endif

lld/ELF/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ add_lld_library(lldELF
3737
Arch/X86.cpp
3838
Arch/X86_64.cpp
3939
ARMErrataFix.cpp
40+
BPSectionOrderer.cpp
4041
CallGraphSort.cpp
4142
DWARF.cpp
4243
Driver.cpp
@@ -72,6 +73,7 @@ add_lld_library(lldELF
7273
Object
7374
Option
7475
Passes
76+
ProfileData
7577
Support
7678
TargetParser
7779
TransformUtils

lld/ELF/Config.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,12 @@ struct Config {
264264
bool armBe8 = false;
265265
BsymbolicKind bsymbolic = BsymbolicKind::None;
266266
CGProfileSortKind callGraphProfileSort;
267+
llvm::StringRef irpgoProfilePath;
268+
bool bpStartupFunctionSort = false;
269+
bool bpCompressionSortStartupFunctions = false;
270+
bool bpFunctionOrderForCompression = false;
271+
bool bpDataOrderForCompression = false;
272+
bool bpVerboseSectionOrderer = false;
267273
bool checkSections;
268274
bool checkDynamicRelocs;
269275
std::optional<llvm::DebugCompressionType> compressDebugSections;

lld/ELF/Driver.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,6 +1118,53 @@ static CGProfileSortKind getCGProfileSortKind(Ctx &ctx,
11181118
return CGProfileSortKind::None;
11191119
}
11201120

1121+
static void parseBPOrdererOptions(Ctx &ctx, opt::InputArgList &args) {
1122+
if (auto *arg = args.getLastArg(OPT_bp_compression_sort)) {
1123+
StringRef s = arg->getValue();
1124+
if (s == "function") {
1125+
ctx.arg.bpFunctionOrderForCompression = true;
1126+
} else if (s == "data") {
1127+
ctx.arg.bpDataOrderForCompression = true;
1128+
} else if (s == "both") {
1129+
ctx.arg.bpFunctionOrderForCompression = true;
1130+
ctx.arg.bpDataOrderForCompression = true;
1131+
} else if (s != "none") {
1132+
ErrAlways(ctx) << arg->getSpelling()
1133+
<< ": expected [none|function|data|both]";
1134+
}
1135+
if (s != "none" && args.hasArg(OPT_call_graph_ordering_file))
1136+
ErrAlways(ctx) << "--bp-compression-sort is incompatible with "
1137+
"--call-graph-ordering-file";
1138+
}
1139+
if (auto *arg = args.getLastArg(OPT_bp_startup_sort)) {
1140+
StringRef s = arg->getValue();
1141+
if (s == "function") {
1142+
ctx.arg.bpStartupFunctionSort = true;
1143+
} else if (s != "none") {
1144+
ErrAlways(ctx) << arg->getSpelling() << ": expected [none|function]";
1145+
}
1146+
if (s != "none" && args.hasArg(OPT_call_graph_ordering_file))
1147+
ErrAlways(ctx) << "--bp-startup-sort=function is incompatible with "
1148+
"--call-graph-ordering-file";
1149+
}
1150+
1151+
ctx.arg.bpCompressionSortStartupFunctions =
1152+
args.hasFlag(OPT_bp_compression_sort_startup_functions,
1153+
OPT_no_bp_compression_sort_startup_functions, false);
1154+
ctx.arg.bpVerboseSectionOrderer = args.hasArg(OPT_verbose_bp_section_orderer);
1155+
1156+
ctx.arg.irpgoProfilePath = args.getLastArgValue(OPT_irpgo_profile);
1157+
if (ctx.arg.irpgoProfilePath.empty()) {
1158+
if (ctx.arg.bpStartupFunctionSort)
1159+
ErrAlways(ctx) << "--bp-startup-sort=function must be used with "
1160+
"--irpgo-profile";
1161+
if (ctx.arg.bpCompressionSortStartupFunctions)
1162+
ErrAlways(ctx)
1163+
<< "--bp-compression-sort-startup-functions must be used with "
1164+
"--irpgo-profile";
1165+
}
1166+
}
1167+
11211168
static DebugCompressionType getCompressionType(Ctx &ctx, StringRef s,
11221169
StringRef option) {
11231170
DebugCompressionType type = StringSwitch<DebugCompressionType>(s)
@@ -1259,6 +1306,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
12591306
ctx.arg.bsymbolic = BsymbolicKind::All;
12601307
}
12611308
ctx.arg.callGraphProfileSort = getCGProfileSortKind(ctx, args);
1309+
parseBPOrdererOptions(ctx, args);
12621310
ctx.arg.checkSections =
12631311
args.hasFlag(OPT_check_sections, OPT_no_check_sections, true);
12641312
ctx.arg.chroot = args.getLastArgValue(OPT_chroot);

lld/ELF/Options.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,19 @@ def call_graph_profile_sort: JJ<"call-graph-profile-sort=">,
141141
def : FF<"no-call-graph-profile-sort">, Alias<call_graph_profile_sort>, AliasArgs<["none"]>,
142142
Flags<[HelpHidden]>;
143143

144+
defm irpgo_profile: EEq<"irpgo-profile",
145+
"Read a temporary profile file for use with --bp-startup-sort=">;
146+
def bp_compression_sort: JJ<"bp-compression-sort=">, MetaVarName<"[none,function,data,both]">,
147+
HelpText<"Improve Lempel-Ziv compression by grouping similar sections together, resulting in a smaller compressed app size">;
148+
def bp_startup_sort: JJ<"bp-startup-sort=">, MetaVarName<"[none,function]">,
149+
HelpText<"Utilize a temporal profile file to reduce page faults during program startup">;
150+
151+
// Auxiliary options related to balanced partition
152+
defm bp_compression_sort_startup_functions: BB<"bp-compression-sort-startup-functions",
153+
"When --irpgo-profile is pecified, prioritize function similarity for compression in addition to startup time", "">;
154+
def verbose_bp_section_orderer: FF<"verbose-bp-section-orderer">,
155+
HelpText<"Print information on balanced partitioning">;
156+
144157
// --chroot doesn't have a help text because it is an internal option.
145158
def chroot: Separate<["--"], "chroot">;
146159

lld/ELF/Writer.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "Writer.h"
1010
#include "AArch64ErrataFix.h"
1111
#include "ARMErrataFix.h"
12+
#include "BPSectionOrderer.h"
1213
#include "CallGraphSort.h"
1314
#include "Config.h"
1415
#include "InputFiles.h"
@@ -1082,8 +1083,18 @@ static void maybeShuffle(Ctx &ctx,
10821083
// that don't appear in the order file.
10831084
static DenseMap<const InputSectionBase *, int> buildSectionOrder(Ctx &ctx) {
10841085
DenseMap<const InputSectionBase *, int> sectionOrder;
1085-
if (!ctx.arg.callGraphProfile.empty())
1086+
if (ctx.arg.bpStartupFunctionSort || ctx.arg.bpFunctionOrderForCompression ||
1087+
ctx.arg.bpDataOrderForCompression) {
1088+
TimeTraceScope timeScope("Balanced Partitioning Section Orderer");
1089+
sectionOrder = runBalancedPartitioning(
1090+
ctx, ctx.arg.bpStartupFunctionSort ? ctx.arg.irpgoProfilePath : "",
1091+
ctx.arg.bpFunctionOrderForCompression,
1092+
ctx.arg.bpDataOrderForCompression,
1093+
ctx.arg.bpCompressionSortStartupFunctions,
1094+
ctx.arg.bpVerboseSectionOrderer);
1095+
} else if (!ctx.arg.callGraphProfile.empty()) {
10861096
sectionOrder = computeCallGraphProfileOrder(ctx);
1097+
}
10871098

10881099
if (ctx.arg.symbolOrderingFile.empty())
10891100
return sectionOrder;

lld/include/lld/Common/BPSectionOrdererBase.inc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ template <class D> struct BPOrderer {
6363
const DenseMap<CachedHashStringRef, DenseSet<unsigned>>
6464
&rootSymbolToSectionIdxs)
6565
-> llvm::DenseMap<const Section *, int>;
66+
67+
std::optional<StringRef> static getResolvedLinkageName(llvm::StringRef name) {
68+
return {};
69+
}
6670
};
6771
} // namespace lld
6872

@@ -97,10 +101,11 @@ static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
97101
// Merge sections that are nearly identical
98102
SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> newSectionHashes;
99103
DenseMap<uint64_t, unsigned> wholeHashToSectionIdx;
104+
unsigned threshold = sectionHashes.size() > 10000 ? 5 : 0;
100105
for (auto &[sectionIdx, hashes] : sectionHashes) {
101106
uint64_t wholeHash = 0;
102107
for (auto hash : hashes)
103-
if (hashFrequency[hash] > 5)
108+
if (hashFrequency[hash] > threshold)
104109
wholeHash ^= hash;
105110
auto [it, wasInserted] =
106111
wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx));

0 commit comments

Comments
 (0)