Skip to content

Commit 9cc9efc

Browse files
HaohaiWenWen Haohaijh7370
authored
[lld][COFF] Remove duplicate strtab entries (#141197)
String table size is too big for large binary when symbol table is enabled. Some strings in strtab is same so it can be reused. This patch revives 9ffeaaa authored by mstorsjo with the prioritized string table builder to fix debug section name issue (see 4d2eda2 for more details). --------- Co-authored-by: Wen Haohai <[email protected]> Co-authored-by: James Henderson <[email protected]>
1 parent 0fa0c3c commit 9cc9efc

File tree

4 files changed

+97
-26
lines changed

4 files changed

+97
-26
lines changed

lld/COFF/Writer.cpp

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "llvm/ADT/STLExtras.h"
2525
#include "llvm/ADT/StringSet.h"
2626
#include "llvm/BinaryFormat/COFF.h"
27+
#include "llvm/MC/StringTableBuilder.h"
2728
#include "llvm/Support/Endian.h"
2829
#include "llvm/Support/FileOutputBuffer.h"
2930
#include "llvm/Support/Parallel.h"
@@ -201,7 +202,8 @@ struct ChunkRange {
201202
class Writer {
202203
public:
203204
Writer(COFFLinkerContext &c)
204-
: buffer(c.e.outputBuffer), delayIdata(c), ctx(c) {}
205+
: buffer(c.e.outputBuffer), strtab(StringTableBuilder::WinCOFF),
206+
delayIdata(c), ctx(c) {}
205207
void run();
206208

207209
private:
@@ -281,7 +283,7 @@ class Writer {
281283

282284
std::unique_ptr<FileOutputBuffer> &buffer;
283285
std::map<PartialSectionKey, PartialSection *> partialSections;
284-
std::vector<char> strtab;
286+
StringTableBuilder strtab;
285287
std::vector<llvm::object::coff_symbol16> outputSymtab;
286288
std::vector<ECCodeMapEntry> codeMap;
287289
IdataContents idata;
@@ -1434,14 +1436,6 @@ void Writer::assignOutputSectionIndices() {
14341436
sc->setOutputSectionIdx(mc->getOutputSectionIdx());
14351437
}
14361438

1437-
size_t Writer::addEntryToStringTable(StringRef str) {
1438-
assert(str.size() > COFF::NameSize);
1439-
size_t offsetOfEntry = strtab.size() + 4; // +4 for the size field
1440-
strtab.insert(strtab.end(), str.begin(), str.end());
1441-
strtab.push_back('\0');
1442-
return offsetOfEntry;
1443-
}
1444-
14451439
std::optional<coff_symbol16> Writer::createSymbol(Defined *def) {
14461440
coff_symbol16 sym;
14471441
switch (def->kind()) {
@@ -1482,7 +1476,8 @@ std::optional<coff_symbol16> Writer::createSymbol(Defined *def) {
14821476
StringRef name = def->getName();
14831477
if (name.size() > COFF::NameSize) {
14841478
sym.Name.Offset.Zeroes = 0;
1485-
sym.Name.Offset.Offset = addEntryToStringTable(name);
1479+
sym.Name.Offset.Offset = 0; // Filled in later.
1480+
strtab.add(name);
14861481
} else {
14871482
memset(sym.Name.ShortName, 0, COFF::NameSize);
14881483
memcpy(sym.Name.ShortName, name.data(), name.size());
@@ -1514,6 +1509,7 @@ void Writer::createSymbolAndStringTable() {
15141509
// solution where discardable sections have long names preserved and
15151510
// non-discardable sections have their names truncated, to ensure that any
15161511
// section which is mapped at runtime also has its name mapped at runtime.
1512+
SmallVector<OutputSection *> longNameSections;
15171513
for (OutputSection *sec : ctx.outputSections) {
15181514
if (sec->name.size() <= COFF::NameSize)
15191515
continue;
@@ -1525,9 +1521,13 @@ void Writer::createSymbolAndStringTable() {
15251521
<< " is longer than 8 characters and will use a non-standard string "
15261522
"table";
15271523
}
1528-
sec->setStringTableOff(addEntryToStringTable(sec->name));
1524+
// Put the section name in the begin of strtab so that its offset is less
1525+
// than Max7DecimalOffset otherwise lldb/gdb will not read it.
1526+
strtab.add(sec->name, /*Priority=*/UINT8_MAX);
1527+
longNameSections.push_back(sec);
15291528
}
15301529

1530+
std::vector<std::pair<size_t, StringRef>> longNameSymbols;
15311531
if (ctx.config.writeSymtab) {
15321532
for (ObjFile *file : ctx.objFileInstances) {
15331533
for (Symbol *b : file->getSymbols()) {
@@ -1542,15 +1542,22 @@ void Writer::createSymbolAndStringTable() {
15421542
continue;
15431543
}
15441544

1545-
if (std::optional<coff_symbol16> sym = createSymbol(d))
1545+
if (std::optional<coff_symbol16> sym = createSymbol(d)) {
1546+
if (d->getName().size() > COFF::NameSize)
1547+
longNameSymbols.emplace_back(outputSymtab.size(), d->getName());
15461548
outputSymtab.push_back(*sym);
1549+
}
15471550

15481551
if (auto *dthunk = dyn_cast<DefinedImportThunk>(d)) {
15491552
if (!dthunk->wrappedSym->writtenToSymtab) {
15501553
dthunk->wrappedSym->writtenToSymtab = true;
15511554
if (std::optional<coff_symbol16> sym =
1552-
createSymbol(dthunk->wrappedSym))
1555+
createSymbol(dthunk->wrappedSym)) {
1556+
if (d->getName().size() > COFF::NameSize)
1557+
longNameSymbols.emplace_back(outputSymtab.size(),
1558+
dthunk->wrappedSym->getName());
15531559
outputSymtab.push_back(*sym);
1560+
}
15541561
}
15551562
}
15561563
}
@@ -1560,11 +1567,19 @@ void Writer::createSymbolAndStringTable() {
15601567
if (outputSymtab.empty() && strtab.empty())
15611568
return;
15621569

1570+
strtab.finalize();
1571+
for (OutputSection *sec : longNameSections)
1572+
sec->setStringTableOff(strtab.getOffset(sec->name));
1573+
for (auto P : longNameSymbols) {
1574+
coff_symbol16 &sym = outputSymtab[P.first];
1575+
sym.Name.Offset.Offset = strtab.getOffset(P.second);
1576+
}
1577+
15631578
// We position the symbol table to be adjacent to the end of the last section.
15641579
uint64_t fileOff = fileSize;
15651580
pointerToSymbolTable = fileOff;
15661581
fileOff += outputSymtab.size() * sizeof(coff_symbol16);
1567-
fileOff += 4 + strtab.size();
1582+
fileOff += strtab.getSize();
15681583
fileSize = alignTo(fileOff, ctx.config.fileAlign);
15691584
}
15701585

@@ -1945,9 +1960,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
19451960
// Create the string table, it follows immediately after the symbol table.
19461961
// The first 4 bytes is length including itself.
19471962
buf = reinterpret_cast<uint8_t *>(&symbolTable[numberOfSymbols]);
1948-
write32le(buf, strtab.size() + 4);
1949-
if (!strtab.empty())
1950-
memcpy(buf + 4, strtab.data(), strtab.size());
1963+
strtab.write(buf);
19511964
}
19521965

19531966
void Writer::openFile(StringRef path) {

lld/test/COFF/strtab.s

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# RUN: llvm-mc -triple=x86_64-windows-msvc %s -filetype=obj -o %t.obj
2+
# RUN: lld-link -out:%t.exe -entry:main %t.obj -debug:dwarf
3+
# RUN: llvm-readobj --string-table %t.exe | FileCheck %s
4+
5+
# CHECK: StringTable {
6+
# CHECK-NEXT: Length: 87
7+
# CHECK-NEXT: [ 4] .debug_abbrev
8+
# CHECK-NEXT: [ 12] .debug_line
9+
# CHECK-NEXT: [ 1e] long_name_symbolz
10+
# CHECK-NEXT: [ 30] .debug_abbrez
11+
# CHECK-NEXT: [ 3e] __impl_long_name_symbolA
12+
# CHECK-NEXT: }
13+
14+
15+
.global main
16+
.text
17+
main:
18+
long_name_symbolz:
19+
long_name_symbolA:
20+
__impl_long_name_symbolA:
21+
name_symbolA:
22+
.debug_abbrez:
23+
ret
24+
25+
.section .debug_abbrev,"dr"
26+
.byte 0
27+
28+
.section .debug_line,"dr"
29+
.byte 0

llvm/include/llvm/MC/StringTableBuilder.h

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ class StringTableBuilder {
3838
};
3939

4040
private:
41+
// Only non-zero priority will be recorded.
42+
DenseMap<CachedHashStringRef, uint8_t> StringPriorityMap;
4143
DenseMap<CachedHashStringRef, size_t> StringIndexMap;
4244
size_t Size = 0;
4345
Kind K;
@@ -51,11 +53,16 @@ class StringTableBuilder {
5153
LLVM_ABI StringTableBuilder(Kind K, Align Alignment = Align(1));
5254
LLVM_ABI ~StringTableBuilder();
5355

54-
/// Add a string to the builder. Returns the position of S in the
55-
/// table. The position will be changed if finalize is used.
56-
/// Can only be used before the table is finalized.
57-
LLVM_ABI size_t add(CachedHashStringRef S);
58-
size_t add(StringRef S) { return add(CachedHashStringRef(S)); }
56+
/// Add a string to the builder. Returns the position of S in the table. The
57+
/// position will be changed if finalize is used. Can only be used before the
58+
/// table is finalized. Priority is only useful with reordering. Strings with
59+
/// the same priority will be put together. Strings with higher priority are
60+
/// placed closer to the begin of string table. When adding same string with
61+
/// different priority, the maximum priority win.
62+
LLVM_ABI size_t add(CachedHashStringRef S, uint8_t Priority = 0);
63+
size_t add(StringRef S, uint8_t Priority = 0) {
64+
return add(CachedHashStringRef(S), Priority);
65+
}
5966

6067
/// Analyze the strings and build the final table. No more strings can
6168
/// be added after this point.
@@ -78,6 +85,7 @@ class StringTableBuilder {
7885
bool contains(StringRef S) const { return contains(CachedHashStringRef(S)); }
7986
bool contains(CachedHashStringRef S) const { return StringIndexMap.count(S); }
8087

88+
bool empty() const { return StringIndexMap.empty(); }
8189
size_t getSize() const { return Size; }
8290
LLVM_ABI void clear();
8391

llvm/lib/MC/StringTableBuilder.cpp

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,13 +138,31 @@ void StringTableBuilder::finalizeInOrder() {
138138
void StringTableBuilder::finalizeStringTable(bool Optimize) {
139139
Finalized = true;
140140

141-
if (Optimize) {
141+
if (Optimize && StringIndexMap.size()) {
142142
std::vector<StringPair *> Strings;
143143
Strings.reserve(StringIndexMap.size());
144144
for (StringPair &P : StringIndexMap)
145145
Strings.push_back(&P);
146146

147-
multikeySort(Strings, 0);
147+
size_t RangeBegin = 0;
148+
MutableArrayRef<StringPair *> StringsRef(Strings);
149+
if (StringPriorityMap.size()) {
150+
llvm::sort(Strings,
151+
[&](const StringPair *LHS, const StringPair *RHS) -> bool {
152+
return StringPriorityMap.lookup(LHS->first) >
153+
StringPriorityMap.lookup(RHS->first);
154+
});
155+
uint8_t RangePriority = StringPriorityMap.lookup(Strings[0]->first);
156+
for (size_t I = 1, E = Strings.size(); I != E && RangePriority; ++I) {
157+
uint8_t Priority = StringPriorityMap.lookup(Strings[I]->first);
158+
if (Priority != RangePriority) {
159+
multikeySort(StringsRef.slice(RangeBegin, I - RangeBegin), 0);
160+
RangePriority = Priority;
161+
RangeBegin = I;
162+
}
163+
}
164+
}
165+
multikeySort(StringsRef.slice(RangeBegin), 0);
148166
initSize();
149167

150168
StringRef Previous;
@@ -199,11 +217,14 @@ size_t StringTableBuilder::getOffset(CachedHashStringRef S) const {
199217
return I->second;
200218
}
201219

202-
size_t StringTableBuilder::add(CachedHashStringRef S) {
220+
size_t StringTableBuilder::add(CachedHashStringRef S, uint8_t Priority) {
203221
if (K == WinCOFF)
204222
assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
205223

206224
assert(!isFinalized());
225+
if (Priority)
226+
StringPriorityMap[S] = std::max(Priority, StringPriorityMap[S]);
227+
207228
auto P = StringIndexMap.try_emplace(S);
208229
if (P.second) {
209230
size_t Start = alignTo(Size, Alignment);

0 commit comments

Comments
 (0)