Skip to content

Commit b851e51

Browse files
runtime: don't crash on mlock failure
Instead, note that mlock has failed, start trying the mitigation of touching the signal stack before sending a preemption signal, and, if the program crashes, mention the possible problem and a wiki page describing the issue (https://golang.org/wiki/LinuxKernelSignalVectorBug). Tested on a kernel in the buggy version range, but with the patch, by using `ulimit -l 0`. Fixes #37436 Change-Id: I072aadb2101496dffd655e442fa5c367dad46ce8 Reviewed-on: https://go-review.googlesource.com/c/go/+/223121 Run-TryBot: Ian Lance Taylor <[email protected]> Reviewed-by: Austin Clements <[email protected]> Reviewed-by: Keith Randall <[email protected]>
1 parent dcc5c24 commit b851e51

File tree

3 files changed

+52
-8
lines changed

3 files changed

+52
-8
lines changed

src/runtime/os_linux.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
package runtime
66

77
import (
8+
"runtime/internal/atomic"
89
"runtime/internal/sys"
910
"unsafe"
1011
)
@@ -479,7 +480,21 @@ func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
479480
func getpid() int
480481
func tgkill(tgid, tid, sig int)
481482

483+
// touchStackBeforeSignal stores an errno value. If non-zero, it means
484+
// that we should touch the signal stack before sending a signal.
485+
// This is used on systems that have a bug when the signal stack must
486+
// be faulted in. See #35777 and #37436.
487+
//
488+
// This is accessed atomically as it is set and read in different threads.
489+
//
490+
// TODO(austin): Remove this after Go 1.15 when we remove the
491+
// mlockGsignal workaround.
492+
var touchStackBeforeSignal uint32
493+
482494
// signalM sends a signal to mp.
483495
func signalM(mp *m, sig int) {
496+
if atomic.Load(&touchStackBeforeSignal) != 0 {
497+
atomic.Cas((*uint32)(unsafe.Pointer(mp.gsignal.stack.hi-4)), 0, 0)
498+
}
484499
tgkill(getpid(), int(mp.procid), sig)
485500
}

src/runtime/os_linux_x86.go

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
package runtime
99

10+
import "runtime/internal/atomic"
11+
1012
//go:noescape
1113
func uname(utsname *new_utsname) int
1214

@@ -58,17 +60,34 @@ func osArchInit() {
5860
if m0.gsignal != nil {
5961
throw("gsignal quirk too late")
6062
}
63+
throwReportQuirk = throwBadKernel
6164
}
6265
}
6366

6467
func mlockGsignal(gsignal *g) {
65-
if err := mlock(gsignal.stack.hi-physPageSize, physPageSize); err < 0 {
66-
printlock()
67-
println("runtime: mlock of signal stack failed:", -err)
68-
if err == -_ENOMEM {
69-
println("runtime: increase the mlock limit (ulimit -l) or")
70-
}
71-
println("runtime: update your kernel to 5.3.15+, 5.4.2+, or 5.5+")
72-
throw("mlock failed")
68+
if atomic.Load(&touchStackBeforeSignal) != 0 {
69+
// mlock has already failed, don't try again.
70+
return
71+
}
72+
73+
// This mlock call may fail, but we don't report the failure.
74+
// Instead, if something goes badly wrong, we rely on prepareSignalM
75+
// and throwBadKernel to do further mitigation and to report a problem
76+
// to the user if mitigation fails. This is because many
77+
// systems have a limit on the total mlock size, and many kernels
78+
// that appear to have bad versions are actually patched to avoid the
79+
// bug described above. We want Go 1.14 to run on those systems.
80+
// See #37436.
81+
if errno := mlock(gsignal.stack.hi-physPageSize, physPageSize); errno < 0 {
82+
atomic.Store(&touchStackBeforeSignal, uint32(-errno))
83+
}
84+
}
85+
86+
// throwBadKernel is called, via throwReportQuirk, by throw.
87+
func throwBadKernel() {
88+
if errno := atomic.Load(&touchStackBeforeSignal); errno != 0 {
89+
println("runtime: note: your Linux kernel may be buggy")
90+
println("runtime: note: see https://golang.org/wiki/LinuxKernelSignalVectorBug")
91+
println("runtime: note: mlock workaround for kernel bug failed with errno", errno)
7392
}
7493
}

src/runtime/panic.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1281,6 +1281,12 @@ func startpanic_m() bool {
12811281
}
12821282
}
12831283

1284+
// throwReportQuirk, if non-nil, is called by throw after dumping the stacks.
1285+
//
1286+
// TODO(austin): Remove this after Go 1.15 when we remove the
1287+
// mlockGsignal workaround.
1288+
var throwReportQuirk func()
1289+
12841290
var didothers bool
12851291
var deadlock mutex
12861292

@@ -1327,6 +1333,10 @@ func dopanic_m(gp *g, pc, sp uintptr) bool {
13271333

13281334
printDebugLog()
13291335

1336+
if throwReportQuirk != nil {
1337+
throwReportQuirk()
1338+
}
1339+
13301340
return docrash
13311341
}
13321342

0 commit comments

Comments
 (0)