Skip to content

Commit 738739f

Browse files
committed
cmd/link: implement trampolines for ppc64le with ext linking
When using golang on ppc64le there have been issues when building executables that generate extremely large text sections. This is due to the call instruction and the limitation on the offset field, which is smaller than most platforms. If the size of the call target offset is too big for the offset field in the call instruction, then link errors can occur. The original solution to this problem in golang was to split the text section when it became too large, allowing the external (GNU) linker to insert the necessary stub to handle the long call. That worked fine until the another size limit for the program size was hit, where a plt_branch was created instead of a long branch. In that case the plt_branch code sequence expects r2 to contain the address of the TOC, but when golang creates dynamic executables by default (-buildmode=exe) r2 does not always contain the address of the TOC and as a result when building programs that reach this extremely large size, a runtime SEGV or SIGILL can occur due to branching to a bad address. When using internal linking, trampolines are generated to handle the long calls but the text sections are not split. With this change, text sections will still be split approrpriately with external linking but if the buildmode being used does not maintain r2 as the TOC addresses, then trampolines will be created for those calls. Fixes #20497 Change-Id: If5400b0f86c2c08e106b332be6db0b259b07d93d Reviewed-on: https://go-review.googlesource.com/45130 Run-TryBot: Lynn Boger <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Cherry Zhang <[email protected]>
1 parent df0892c commit 738739f

File tree

2 files changed

+84
-27
lines changed

2 files changed

+84
-27
lines changed

src/cmd/link/internal/ld/data.go

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -328,18 +328,36 @@ func isRuntimeDepPkg(pkg string) bool {
328328
return strings.HasPrefix(pkg, "runtime/internal/") && !strings.HasSuffix(pkg, "_test")
329329
}
330330

331+
// Estimate the max size needed to hold any new trampolines created for this function. This
332+
// is used to determine when the section can be split if it becomes too large, to ensure that
333+
// the trampolines are in the same section as the function that uses them.
334+
func maxSizeTrampolinesPPC64(s *Symbol, isTramp bool) uint64 {
335+
// If Thearch.Trampoline is nil, then trampoline support is not available on this arch.
336+
// A trampoline does not need any dependent trampolines.
337+
if Thearch.Trampoline == nil || isTramp {
338+
return 0
339+
}
340+
341+
n := uint64(0)
342+
for ri := range s.R {
343+
r := &s.R[ri]
344+
if r.Type.IsDirectJump() {
345+
n++
346+
}
347+
}
348+
// Trampolines in ppc64 are 4 instructions.
349+
return n * 16
350+
}
351+
331352
// detect too-far jumps in function s, and add trampolines if necessary
332-
// ARM supports trampoline insertion for internal and external linking
333-
// PPC64 & PPC64LE support trampoline insertion for internal linking only
353+
// ARM, PPC64 & PPC64LE support trampoline insertion for internal and external linking
354+
// On PPC64 & PPC64LE the text sections might be split but will still insert trampolines
355+
// where necessary.
334356
func trampoline(ctxt *Link, s *Symbol) {
335357
if Thearch.Trampoline == nil {
336358
return // no need or no support of trampolines on this arch
337359
}
338360

339-
if Linkmode == LinkExternal && SysArch.Family == sys.PPC64 {
340-
return
341-
}
342-
343361
for ri := range s.R {
344362
r := &s.R[ri]
345363
if !r.Type.IsDirectJump() {
@@ -2055,14 +2073,14 @@ func (ctxt *Link) textaddress() {
20552073
sect.Vaddr = va
20562074
ntramps := 0
20572075
for _, sym := range ctxt.Textp {
2058-
sect, n, va = assignAddress(ctxt, sect, n, sym, va)
2076+
sect, n, va = assignAddress(ctxt, sect, n, sym, va, false)
20592077

20602078
trampoline(ctxt, sym) // resolve jumps, may add trampolines if jump too far
20612079

20622080
// lay down trampolines after each function
20632081
for ; ntramps < len(ctxt.tramps); ntramps++ {
20642082
tramp := ctxt.tramps[ntramps]
2065-
sect, n, va = assignAddress(ctxt, sect, n, tramp, va)
2083+
sect, n, va = assignAddress(ctxt, sect, n, tramp, va, true)
20662084
}
20672085
}
20682086

@@ -2088,7 +2106,7 @@ func (ctxt *Link) textaddress() {
20882106
// assigns address for a text symbol, returns (possibly new) section, its number, and the address
20892107
// Note: once we have trampoline insertion support for external linking, this function
20902108
// will not need to create new text sections, and so no need to return sect and n.
2091-
func assignAddress(ctxt *Link, sect *Section, n int, sym *Symbol, va uint64) (*Section, int, uint64) {
2109+
func assignAddress(ctxt *Link, sect *Section, n int, sym *Symbol, va uint64, isTramp bool) (*Section, int, uint64) {
20922110
sym.Sect = sect
20932111
if sym.Type&SSUB != 0 {
20942112
return sect, n, va
@@ -2117,7 +2135,7 @@ func assignAddress(ctxt *Link, sect *Section, n int, sym *Symbol, va uint64) (*S
21172135

21182136
// Only break at outermost syms.
21192137

2120-
if SysArch.InFamily(sys.PPC64) && sym.Outer == nil && Iself && Linkmode == LinkExternal && va-sect.Vaddr+funcsize > 0x1c00000 {
2138+
if SysArch.InFamily(sys.PPC64) && sym.Outer == nil && Iself && Linkmode == LinkExternal && va-sect.Vaddr+funcsize+maxSizeTrampolinesPPC64(sym, isTramp) > 0x1c00000 {
21212139

21222140
// Set the length for the previous text section
21232141
sect.Length = va - sect.Vaddr

src/cmd/link/internal/ppc64/asm.go

Lines changed: 56 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -522,13 +522,22 @@ func archrelocaddr(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol, val *int64) int {
522522
// resolve direct jump relocation r in s, and add trampoline if necessary
523523
func trampoline(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol) {
524524

525+
// Trampolines are created if the branch offset is too large and the linker cannot insert a call stub to handle it.
526+
// For internal linking, trampolines are always created for long calls.
527+
// For external linking, the linker can insert a call stub to handle a long call, but depends on having the TOC address in
528+
// r2. For those build modes with external linking where the TOC address is not maintained in r2, trampolines must be created.
529+
if ld.Linkmode == ld.LinkExternal && (ctxt.DynlinkingGo() || ld.Buildmode == ld.BuildmodeCArchive || ld.Buildmode == ld.BuildmodeCShared || ld.Buildmode == ld.BuildmodePIE) {
530+
// No trampolines needed since r2 contains the TOC
531+
return
532+
}
533+
525534
t := ld.Symaddr(r.Sym) + r.Add - (s.Value + int64(r.Off))
526535
switch r.Type {
527536
case objabi.R_CALLPOWER:
528537

529538
// If branch offset is too far then create a trampoline.
530539

531-
if int64(int32(t<<6)>>6) != t || (*ld.FlagDebugTramp > 1 && s.File != r.Sym.File) {
540+
if (ld.Linkmode == ld.LinkExternal && s.Sect != r.Sym.Sect) || (ld.Linkmode == ld.LinkInternal && int64(int32(t<<6)>>6) != t) || (*ld.FlagDebugTramp > 1 && s.File != r.Sym.File) {
532541
var tramp *ld.Symbol
533542
for i := 0; ; i++ {
534543

@@ -552,26 +561,20 @@ func trampoline(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol) {
552561

553562
t = ld.Symaddr(tramp) + r.Add - (s.Value + int64(r.Off))
554563

555-
// If the offset of the trampoline that has been found is within range, use it.
556-
if int64(int32(t<<6)>>6) == t {
564+
// With internal linking, the trampoline can be used if it is not too far.
565+
// With external linking, the trampoline must be in this section for it to be reused.
566+
if (ld.Linkmode == ld.LinkInternal && int64(int32(t<<6)>>6) == t) || (ld.Linkmode == ld.LinkExternal && s.Sect == tramp.Sect) {
557567
break
558568
}
559569
}
560570
if tramp.Type == 0 {
561-
ctxt.AddTramp(tramp)
562-
tramp.Size = 16 // 4 instructions
563-
tramp.P = make([]byte, tramp.Size)
564-
t = ld.Symaddr(r.Sym) + r.Add
565-
f := t & 0xffff0000
566-
o1 := uint32(0x3fe00000 | (f >> 16)) // lis r31,trampaddr hi (r31 is temp reg)
567-
f = t & 0xffff
568-
o2 := uint32(0x63ff0000 | f) // ori r31,trampaddr lo
569-
o3 := uint32(0x7fe903a6) // mtctr
570-
o4 := uint32(0x4e800420) // bctr
571-
ld.SysArch.ByteOrder.PutUint32(tramp.P, o1)
572-
ld.SysArch.ByteOrder.PutUint32(tramp.P[4:], o2)
573-
ld.SysArch.ByteOrder.PutUint32(tramp.P[8:], o3)
574-
ld.SysArch.ByteOrder.PutUint32(tramp.P[12:], o4)
571+
if ctxt.DynlinkingGo() || ld.Buildmode == ld.BuildmodeCArchive || ld.Buildmode == ld.BuildmodeCShared || ld.Buildmode == ld.BuildmodePIE {
572+
// Should have returned for above cases
573+
ld.Errorf(s, "unexpected trampoline for shared or dynamic linking\n")
574+
} else {
575+
ctxt.AddTramp(tramp)
576+
gentramp(tramp, r.Sym, int64(r.Add))
577+
}
575578
}
576579
r.Sym = tramp
577580
r.Add = 0 // This was folded into the trampoline target address
@@ -582,6 +585,42 @@ func trampoline(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol) {
582585
}
583586
}
584587

588+
func gentramp(tramp, target *ld.Symbol, offset int64) {
589+
// Used for default build mode for an executable
590+
// Address of the call target is generated using
591+
// relocation and doesn't depend on r2 (TOC).
592+
tramp.Size = 16 // 4 instructions
593+
tramp.P = make([]byte, tramp.Size)
594+
t := ld.Symaddr(target) + offset
595+
o1 := uint32(0x3fe00000) // lis r31,targetaddr hi
596+
o2 := uint32(0x3bff0000) // addi r31,targetaddr lo
597+
// With external linking, the target address must be
598+
// relocated using LO and HA
599+
if ld.Linkmode == ld.LinkExternal {
600+
tr := ld.Addrel(tramp)
601+
tr.Off = 0
602+
tr.Type = objabi.R_ADDRPOWER
603+
tr.Siz = 8 // generates 2 relocations: HA + LO
604+
tr.Sym = target
605+
tr.Add = offset
606+
} else {
607+
// adjustment needed if lo has sign bit set
608+
// when using addi to compute address
609+
val := uint32((t & 0xffff0000) >> 16)
610+
if t&0x8000 != 0 {
611+
val += 1
612+
}
613+
o1 |= val // hi part of addr
614+
o2 |= uint32(t & 0xffff) // lo part of addr
615+
}
616+
o3 := uint32(0x7fe903a6) // mtctr r31
617+
o4 := uint32(0x4e800420) // bctr
618+
ld.SysArch.ByteOrder.PutUint32(tramp.P, o1)
619+
ld.SysArch.ByteOrder.PutUint32(tramp.P[4:], o2)
620+
ld.SysArch.ByteOrder.PutUint32(tramp.P[8:], o3)
621+
ld.SysArch.ByteOrder.PutUint32(tramp.P[12:], o4)
622+
}
623+
585624
func archreloc(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol, val *int64) int {
586625
if ld.Linkmode == ld.LinkExternal {
587626
switch r.Type {

0 commit comments

Comments
 (0)