Skip to content

Commit e25f46e

Browse files
committed
cmd/link: faster algorithm for nosplit stack checking, better errors
The linker performs a global analysis of all nosplit call chains to check they fit in the stack space ensured by splittable functions. That analysis has two problems right now: 1. It's inefficient. It performs a top-down analysis, starting with every nosplit function and the nosplit stack limit and walking *down* the call graph to compute how much stack remains at every call. As a result, it visits the same functions over and over, often with different remaining stack depths. This approach is historical: this check was originally written in C and this approach avoided the need for any interesting data structures. 2. If some call chain is over the limit, it only reports a single call chain. As a result, if the check does fail, you often wind up playing whack-a-mole by guessing where the problem is in the one chain, trying to reduce the stack size, and then seeing if the link works or reports a different path. This CL completely rewrites the nosplit stack check. It now uses a bottom-up analysis, computing the maximum stack height required by every function's call tree. This visits every function exactly once, making it much more efficient. It uses slightly more heap space for intermediate storage, but still very little in the scheme of the overall link. For example, when linking cmd/go, the new algorithm virtually eliminates the time spent in this pass, and reduces overall link time: │ before │ after │ │ sec/op │ sec/op vs base │ Dostkcheck 7.926m ± 4% 1.831m ± 6% -76.90% (p=0.000 n=20) TotalTime 301.3m ± 1% 296.4m ± 3% -1.62% (p=0.040 n=20) │ before │ after │ │ B/op │ B/op vs base │ Dostkcheck 40.00Ki ± 0% 212.15Ki ± 0% +430.37% (p=0.000 n=20) Most of this time is spent analyzing the runtime, so for larger binaries, the total time saved is roughly the same, and proportionally less of the overall link. If the new implementation finds an error, it redoes the analysis, switching to preferring quality of error reporting over performance. For error reporting, it computes stack depths top-down (like the old algorithm), and reports *all* paths that are over the stack limit, presented as a tree for compactness. For example, this is the output from a simple test case from test/nosplit with two over-limit paths from f1: main.f1: nosplit stack overflow main.f1 grows 768 bytes, calls main.f2 grows 56 bytes, calls main.f4 grows 48 bytes 80 bytes over limit grows 768 bytes, calls main.f3 grows 104 bytes 80 bytes over limit While we're here, we do a few nice cleanups: - We add a debug output flag, which will be useful for understanding what our nosplit chains look like and which ones are close to running over. - We move the implementation out of the fog of lib.go to its own file. - The implementation is generally more Go-like and less C-like. Change-Id: If1ab31197f5215475559b93695c44a01bd16e276 Reviewed-on: https://go-review.googlesource.com/c/go/+/398176 Run-TryBot: Austin Clements <[email protected]> Reviewed-by: Than McIntosh <[email protected]> Reviewed-by: Cherry Mui <[email protected]> TryBot-Result: Gopher Robot <[email protected]>
1 parent 7a06243 commit e25f46e

File tree

7 files changed

+575
-221
lines changed

7 files changed

+575
-221
lines changed

src/cmd/link/internal/ld/lib.go

Lines changed: 0 additions & 218 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ import (
3434
"bytes"
3535
"cmd/internal/bio"
3636
"cmd/internal/goobj"
37-
"cmd/internal/obj"
3837
"cmd/internal/objabi"
3938
"cmd/internal/sys"
4039
"cmd/link/internal/loadelf"
@@ -2343,223 +2342,6 @@ func addsection(ldr *loader.Loader, arch *sys.Arch, seg *sym.Segment, name strin
23432342
return sect
23442343
}
23452344

2346-
type chain struct {
2347-
sym loader.Sym
2348-
up *chain
2349-
limit int // limit on entry to sym
2350-
}
2351-
2352-
func callsize(ctxt *Link) int {
2353-
if ctxt.Arch.HasLR {
2354-
return 0
2355-
}
2356-
return ctxt.Arch.RegSize
2357-
}
2358-
2359-
type stkChk struct {
2360-
ldr *loader.Loader
2361-
ctxt *Link
2362-
morestack loader.Sym
2363-
done loader.Bitmap
2364-
}
2365-
2366-
// Walk the call tree and check that there is always enough stack space
2367-
// for the call frames, especially for a chain of nosplit functions.
2368-
func (ctxt *Link) dostkcheck() {
2369-
ldr := ctxt.loader
2370-
sc := stkChk{
2371-
ldr: ldr,
2372-
ctxt: ctxt,
2373-
morestack: ldr.Lookup("runtime.morestack", 0),
2374-
done: loader.MakeBitmap(ldr.NSym()),
2375-
}
2376-
2377-
// Every splitting function ensures that there are at least StackLimit
2378-
// bytes available below SP when the splitting prologue finishes.
2379-
// If the splitting function calls F, then F begins execution with
2380-
// at least StackLimit - callsize() bytes available.
2381-
// Check that every function behaves correctly with this amount
2382-
// of stack, following direct calls in order to piece together chains
2383-
// of non-splitting functions.
2384-
var ch chain
2385-
ch.limit = objabi.StackLimit - callsize(ctxt)
2386-
if buildcfg.GOARCH == "arm64" {
2387-
// need extra 8 bytes below SP to save FP
2388-
ch.limit -= 8
2389-
}
2390-
2391-
// Check every function, but do the nosplit functions in a first pass,
2392-
// to make the printed failure chains as short as possible.
2393-
for _, s := range ctxt.Textp {
2394-
if ldr.IsNoSplit(s) {
2395-
ch.sym = s
2396-
sc.check(&ch, 0)
2397-
}
2398-
}
2399-
2400-
for _, s := range ctxt.Textp {
2401-
if !ldr.IsNoSplit(s) {
2402-
ch.sym = s
2403-
sc.check(&ch, 0)
2404-
}
2405-
}
2406-
}
2407-
2408-
func (sc *stkChk) check(up *chain, depth int) int {
2409-
limit := up.limit
2410-
s := up.sym
2411-
ldr := sc.ldr
2412-
ctxt := sc.ctxt
2413-
2414-
// Don't duplicate work: only need to consider each
2415-
// function at top of safe zone once.
2416-
top := limit == objabi.StackLimit-callsize(ctxt)
2417-
if top {
2418-
if sc.done.Has(s) {
2419-
return 0
2420-
}
2421-
sc.done.Set(s)
2422-
}
2423-
2424-
if depth > 500 {
2425-
sc.ctxt.Errorf(s, "nosplit stack check too deep")
2426-
sc.broke(up, 0)
2427-
return -1
2428-
}
2429-
2430-
if ldr.AttrExternal(s) {
2431-
// external function.
2432-
// should never be called directly.
2433-
// onlyctxt.Diagnose the direct caller.
2434-
// TODO(mwhudson): actually think about this.
2435-
// TODO(khr): disabled for now. Calls to external functions can only happen on the g0 stack.
2436-
// See the trampolines in src/runtime/sys_darwin_$ARCH.go.
2437-
//if depth == 1 && ldr.SymType(s) != sym.SXREF && !ctxt.DynlinkingGo() &&
2438-
// ctxt.BuildMode != BuildModeCArchive && ctxt.BuildMode != BuildModePIE && ctxt.BuildMode != BuildModeCShared && ctxt.BuildMode != BuildModePlugin {
2439-
// Errorf(s, "call to external function")
2440-
//}
2441-
return -1
2442-
}
2443-
info := ldr.FuncInfo(s)
2444-
if !info.Valid() { // external function. see above.
2445-
return -1
2446-
}
2447-
2448-
if limit < 0 {
2449-
sc.broke(up, limit)
2450-
return -1
2451-
}
2452-
2453-
// morestack looks like it calls functions,
2454-
// but it switches the stack pointer first.
2455-
if s == sc.morestack {
2456-
return 0
2457-
}
2458-
2459-
var ch chain
2460-
ch.up = up
2461-
2462-
if !ldr.IsNoSplit(s) {
2463-
// Ensure we have enough stack to call morestack.
2464-
ch.limit = limit - callsize(ctxt)
2465-
ch.sym = sc.morestack
2466-
if sc.check(&ch, depth+1) < 0 {
2467-
return -1
2468-
}
2469-
if !top {
2470-
return 0
2471-
}
2472-
// Raise limit to allow frame.
2473-
locals := info.Locals()
2474-
limit = objabi.StackLimit + int(locals) + int(ctxt.Arch.FixedFrameSize)
2475-
}
2476-
2477-
// Walk through sp adjustments in function, consuming relocs.
2478-
relocs := ldr.Relocs(s)
2479-
var ch1 chain
2480-
pcsp := obj.NewPCIter(uint32(ctxt.Arch.MinLC))
2481-
ri := 0
2482-
for pcsp.Init(ldr.Data(ldr.Pcsp(s))); !pcsp.Done; pcsp.Next() {
2483-
// pcsp.value is in effect for [pcsp.pc, pcsp.nextpc).
2484-
2485-
// Check stack size in effect for this span.
2486-
if int32(limit)-pcsp.Value < 0 {
2487-
sc.broke(up, int(int32(limit)-pcsp.Value))
2488-
return -1
2489-
}
2490-
2491-
// Process calls in this span.
2492-
for ; ri < relocs.Count(); ri++ {
2493-
r := relocs.At(ri)
2494-
if uint32(r.Off()) >= pcsp.NextPC {
2495-
break
2496-
}
2497-
t := r.Type()
2498-
switch {
2499-
case t.IsDirectCall():
2500-
ch.limit = int(int32(limit) - pcsp.Value - int32(callsize(ctxt)))
2501-
ch.sym = r.Sym()
2502-
if sc.check(&ch, depth+1) < 0 {
2503-
return -1
2504-
}
2505-
2506-
// Indirect call. Assume it is a call to a splitting function,
2507-
// so we have to make sure it can call morestack.
2508-
// Arrange the data structures to report both calls, so that
2509-
// if there is an error, stkprint shows all the steps involved.
2510-
case t == objabi.R_CALLIND:
2511-
ch.limit = int(int32(limit) - pcsp.Value - int32(callsize(ctxt)))
2512-
ch.sym = 0
2513-
ch1.limit = ch.limit - callsize(ctxt) // for morestack in called prologue
2514-
ch1.up = &ch
2515-
ch1.sym = sc.morestack
2516-
if sc.check(&ch1, depth+2) < 0 {
2517-
return -1
2518-
}
2519-
}
2520-
}
2521-
}
2522-
2523-
return 0
2524-
}
2525-
2526-
func (sc *stkChk) broke(ch *chain, limit int) {
2527-
sc.ctxt.Errorf(ch.sym, "nosplit stack overflow")
2528-
sc.print(ch, limit)
2529-
}
2530-
2531-
func (sc *stkChk) print(ch *chain, limit int) {
2532-
ldr := sc.ldr
2533-
ctxt := sc.ctxt
2534-
var name string
2535-
if ch.sym != 0 {
2536-
name = fmt.Sprintf("%s<%d>", ldr.SymName(ch.sym), ldr.SymVersion(ch.sym))
2537-
if ldr.IsNoSplit(ch.sym) {
2538-
name += " (nosplit)"
2539-
}
2540-
} else {
2541-
name = "function pointer"
2542-
}
2543-
2544-
if ch.up == nil {
2545-
// top of chain. ch.sym != 0.
2546-
if ldr.IsNoSplit(ch.sym) {
2547-
fmt.Printf("\t%d\tassumed on entry to %s\n", ch.limit, name)
2548-
} else {
2549-
fmt.Printf("\t%d\tguaranteed after split check in %s\n", ch.limit, name)
2550-
}
2551-
} else {
2552-
sc.print(ch.up, ch.limit+callsize(ctxt))
2553-
if !ctxt.Arch.HasLR {
2554-
fmt.Printf("\t%d\ton entry to %s\n", ch.limit, name)
2555-
}
2556-
}
2557-
2558-
if ch.limit != limit {
2559-
fmt.Printf("\t%d\tafter %s uses %d\n", limit, name, ch.limit-limit)
2560-
}
2561-
}
2562-
25632345
func usage() {
25642346
fmt.Fprintf(os.Stderr, "usage: link [options] main.o\n")
25652347
objabi.Flagprint(os.Stderr)

src/cmd/link/internal/ld/main.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ var (
9393
flagInterpreter = flag.String("I", "", "use `linker` as ELF dynamic linker")
9494
FlagDebugTramp = flag.Int("debugtramp", 0, "debug trampolines")
9595
FlagDebugTextSize = flag.Int("debugtextsize", 0, "debug text section max size")
96+
flagDebugNosplit = flag.Bool("debugnosplit", false, "dump nosplit call graph")
9697
FlagStrictDups = flag.Int("strictdups", 0, "sanity check duplicate symbol contents during object file reading (1=warn 2=err).")
9798
FlagRound = flag.Int("R", -1, "set address rounding `quantum`")
9899
FlagTextAddr = flag.Int64("T", -1, "set text segment `address`")
@@ -283,8 +284,8 @@ func Main(arch *sys.Arch, theArch Arch) {
283284
bench.Start("callgraph")
284285
ctxt.callgraph()
285286

286-
bench.Start("dostkcheck")
287-
ctxt.dostkcheck()
287+
bench.Start("doStackCheck")
288+
ctxt.doStackCheck()
288289

289290
bench.Start("mangleTypeSym")
290291
ctxt.mangleTypeSym()

0 commit comments

Comments
 (0)