Skip to content

Commit 594eae5

Browse files
committed
cmd/link: compress DWARF sections in ELF binaries
Forked from CL 111895. The trickiest part of this is that the binary layout code (blk, elfshbits, and various other things) assumes a constant offset between symbols' and sections' file locations and their virtual addresses. Compression, of course, breaks this constant offset. But we need to assign virtual addresses to everything before compression in order to resolve relocations before compression. As a result, compression needs to re-compute the "address" of the DWARF sections and symbols based on their compressed size. Luckily, these are at the end of the file, so this doesn't perturb any other sections or symbols. (And there is, of course, a surprising amount of code that assumes the DWARF segment comes last, so what's one more place?) Relevant benchmarks: name old time/op new time/op delta StdCmd 10.3s ± 2% 10.8s ± 1% +5.43% (p=0.000 n=30+30) name old text-bytes new text-bytes delta HelloSize 746kB ± 0% 746kB ± 0% ~ (all equal) CmdGoSize 8.41MB ± 0% 8.41MB ± 0% ~ (all equal) [Geo mean] 2.50MB 2.50MB +0.00% name old data-bytes new data-bytes delta HelloSize 10.6kB ± 0% 10.6kB ± 0% ~ (all equal) CmdGoSize 252kB ± 0% 252kB ± 0% ~ (all equal) [Geo mean] 51.5kB 51.5kB +0.00% name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) [Geo mean] 135kB 135kB +0.00% name old exe-bytes new exe-bytes delta HelloSize 1.60MB ± 0% 1.05MB ± 0% -34.39% (p=0.000 n=30+30) CmdGoSize 16.5MB ± 0% 11.3MB ± 0% -31.76% (p=0.000 n=30+30) [Geo mean] 5.14MB 3.44MB -33.08% Fixes #11799. Updates #6853. Change-Id: I64197afe4c01a237523a943088051ee056331c6f Reviewed-on: https://go-review.googlesource.com/118276 Run-TryBot: Heschi Kreinick <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Austin Clements <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]>
1 parent b397f63 commit 594eae5

File tree

5 files changed

+116
-18
lines changed

5 files changed

+116
-18
lines changed

src/cmd/dist/buildtool.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ var bootstrapDirs = []string{
8080
"cmd/link/internal/s390x",
8181
"cmd/link/internal/sym",
8282
"cmd/link/internal/x86",
83+
"compress/flate",
84+
"compress/zlib",
8385
"cmd/link/internal/wasm",
8486
"container/heap",
8587
"debug/dwarf",

src/cmd/link/internal/ld/data.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,13 @@
3232
package ld
3333

3434
import (
35+
"bytes"
3536
"cmd/internal/gcprog"
3637
"cmd/internal/objabi"
3738
"cmd/internal/sys"
3839
"cmd/link/internal/sym"
40+
"compress/zlib"
41+
"encoding/binary"
3942
"fmt"
4043
"log"
4144
"os"
@@ -679,6 +682,10 @@ func blk(ctxt *Link, syms []*sym.Symbol, addr, size int64, pad []byte) {
679682
}
680683
}
681684

685+
// This doesn't distinguish the memory size from the file
686+
// size, and it lays out the file based on Symbol.Value, which
687+
// is the virtual address. DWARF compression changes file sizes,
688+
// so dwarfcompress will fix this up later if necessary.
682689
eaddr := addr + size
683690
for _, s := range syms {
684691
if s.Attr.SubSymbol() {
@@ -2154,3 +2161,44 @@ func (ctxt *Link) AddTramp(s *sym.Symbol) {
21542161
ctxt.Logf("trampoline %s inserted\n", s)
21552162
}
21562163
}
2164+
2165+
// compressSyms compresses syms and returns the contents of the
2166+
// compressed section. If the section would get larger, it returns nil.
2167+
func compressSyms(ctxt *Link, syms []*sym.Symbol) []byte {
2168+
var total int64
2169+
for _, sym := range syms {
2170+
total += sym.Size
2171+
}
2172+
2173+
var buf bytes.Buffer
2174+
buf.Write([]byte("ZLIB"))
2175+
var sizeBytes [8]byte
2176+
binary.BigEndian.PutUint64(sizeBytes[:], uint64(total))
2177+
buf.Write(sizeBytes[:])
2178+
2179+
z := zlib.NewWriter(&buf)
2180+
for _, sym := range syms {
2181+
if _, err := z.Write(sym.P); err != nil {
2182+
log.Fatalf("compression failed: %s", err)
2183+
}
2184+
for i := sym.Size - int64(len(sym.P)); i > 0; {
2185+
b := zeros[:]
2186+
if i < int64(len(b)) {
2187+
b = b[:i]
2188+
}
2189+
n, err := z.Write(b)
2190+
if err != nil {
2191+
log.Fatalf("compression failed: %s", err)
2192+
}
2193+
i -= int64(n)
2194+
}
2195+
}
2196+
if err := z.Close(); err != nil {
2197+
log.Fatalf("compression failed: %s", err)
2198+
}
2199+
if int64(buf.Len()) >= total {
2200+
// Compression didn't save any space.
2201+
return nil
2202+
}
2203+
return buf.Bytes()
2204+
}

src/cmd/link/internal/ld/dwarf.go

Lines changed: 64 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1908,23 +1908,14 @@ func dwarfaddshstrings(ctxt *Link, shstrtab *sym.Symbol) {
19081908
return
19091909
}
19101910

1911-
Addstring(shstrtab, ".debug_abbrev")
1912-
Addstring(shstrtab, ".debug_frame")
1913-
Addstring(shstrtab, ".debug_info")
1914-
Addstring(shstrtab, ".debug_loc")
1915-
Addstring(shstrtab, ".debug_line")
1916-
Addstring(shstrtab, ".debug_pubnames")
1917-
Addstring(shstrtab, ".debug_pubtypes")
1918-
Addstring(shstrtab, ".debug_gdb_scripts")
1919-
Addstring(shstrtab, ".debug_ranges")
1920-
if ctxt.LinkMode == LinkExternal {
1921-
Addstring(shstrtab, elfRelType+".debug_info")
1922-
Addstring(shstrtab, elfRelType+".debug_loc")
1923-
Addstring(shstrtab, elfRelType+".debug_line")
1924-
Addstring(shstrtab, elfRelType+".debug_frame")
1925-
Addstring(shstrtab, elfRelType+".debug_pubnames")
1926-
Addstring(shstrtab, elfRelType+".debug_pubtypes")
1927-
Addstring(shstrtab, elfRelType+".debug_ranges")
1911+
secs := []string{"abbrev", "frame", "info", "loc", "line", "pubnames", "pubtypes", "gdb_scripts", "ranges"}
1912+
for _, sec := range secs {
1913+
Addstring(shstrtab, ".debug_"+sec)
1914+
if ctxt.LinkMode == LinkExternal {
1915+
Addstring(shstrtab, elfRelType+".debug_"+sec)
1916+
} else {
1917+
Addstring(shstrtab, ".zdebug_"+sec)
1918+
}
19281919
}
19291920
}
19301921

@@ -1937,6 +1928,7 @@ func dwarfaddelfsectionsyms(ctxt *Link) {
19371928
if ctxt.LinkMode != LinkExternal {
19381929
return
19391930
}
1931+
19401932
s := ctxt.Syms.Lookup(".debug_info", 0)
19411933
putelfsectionsym(ctxt.Out, s, s.Sect.Elfsect.(*ElfShdr).shnum)
19421934
s = ctxt.Syms.Lookup(".debug_abbrev", 0)
@@ -1954,3 +1946,58 @@ func dwarfaddelfsectionsyms(ctxt *Link) {
19541946
putelfsectionsym(ctxt.Out, s, s.Sect.Elfsect.(*ElfShdr).shnum)
19551947
}
19561948
}
1949+
1950+
// dwarfcompress compresses the DWARF sections. This must happen after
1951+
// relocations are applied. After this, dwarfp will contain a
1952+
// different (new) set of symbols, and sections may have been replaced.
1953+
func dwarfcompress(ctxt *Link) {
1954+
if !ctxt.IsELF || ctxt.LinkMode == LinkExternal {
1955+
return
1956+
}
1957+
1958+
var start int
1959+
var newDwarfp []*sym.Symbol
1960+
Segdwarf.Sections = Segdwarf.Sections[:0]
1961+
for i, s := range dwarfp {
1962+
// Find the boundaries between sections and compress
1963+
// the whole section once we've found the last of its
1964+
// symbols.
1965+
if i+1 >= len(dwarfp) || s.Sect != dwarfp[i+1].Sect {
1966+
s1 := compressSyms(ctxt, dwarfp[start:i+1])
1967+
if s1 == nil {
1968+
// Compression didn't help.
1969+
newDwarfp = append(newDwarfp, dwarfp[start:i+1]...)
1970+
Segdwarf.Sections = append(Segdwarf.Sections, s.Sect)
1971+
} else {
1972+
compressedSegName := ".zdebug_" + s.Sect.Name[len(".debug_"):]
1973+
sect := addsection(ctxt.Arch, &Segdwarf, compressedSegName, 04)
1974+
sect.Length = uint64(len(s1))
1975+
newSym := ctxt.Syms.Lookup(compressedSegName, 0)
1976+
newSym.P = s1
1977+
newSym.Size = int64(len(s1))
1978+
newSym.Sect = sect
1979+
newDwarfp = append(newDwarfp, newSym)
1980+
}
1981+
start = i + 1
1982+
}
1983+
}
1984+
dwarfp = newDwarfp
1985+
1986+
// Re-compute the locations of the compressed DWARF symbols
1987+
// and sections, since the layout of these within the file is
1988+
// based on Section.Vaddr and Symbol.Value.
1989+
pos := Segdwarf.Vaddr
1990+
var prevSect *sym.Section
1991+
for _, s := range dwarfp {
1992+
s.Value = int64(pos)
1993+
if s.Sect != prevSect {
1994+
s.Sect.Vaddr = uint64(s.Value)
1995+
prevSect = s.Sect
1996+
}
1997+
if s.Sub != nil {
1998+
log.Fatalf("%s: unexpected sub-symbols", s)
1999+
}
2000+
pos += uint64(s.Size)
2001+
}
2002+
Segdwarf.Length = pos - Segdwarf.Vaddr
2003+
}

src/cmd/link/internal/ld/elf.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1261,7 +1261,7 @@ func elfshbits(linkmode LinkMode, sect *sym.Section) *ElfShdr {
12611261
sh.flags |= SHF_TLS
12621262
sh.type_ = SHT_NOBITS
12631263
}
1264-
if strings.HasPrefix(sect.Name, ".debug") {
1264+
if strings.HasPrefix(sect.Name, ".debug") || strings.HasPrefix(sect.Name, ".zdebug") {
12651265
sh.flags = 0
12661266
}
12671267

src/cmd/link/internal/ld/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ func Main(arch *sys.Arch, theArch Arch) {
226226
ctxt.dodata()
227227
order := ctxt.address()
228228
ctxt.reloc()
229+
dwarfcompress(ctxt)
229230
ctxt.layout(order)
230231
thearch.Asmb(ctxt)
231232
ctxt.undef()

0 commit comments

Comments
 (0)