Skip to content

Commit 370682a

Browse files
committed
runtime: disable preemption in startm
startm contains a critical section from when it takes ownership of a P (either on function entry or call to pidleput) until it wakes the M receiving the P. If preempted in this critical section, the owned P is left in limbo. If preempted for a GC stop, there will be nothing to stop the owned P and STW will wait forever. golang.org/cl/232298 introduced the first call to startm that is not on the system stack (via a wakep call), introducing the possibility of preemption. Disable preemption in startm to ensure this remains non-preemptible. Since we're not always on the system stack anymore, we also need to be careful in allocm. Updates #42237 Change-Id: Icb95eef9eb262121856485316098331beea045da Reviewed-on: https://go-review.googlesource.com/c/go/+/267257 Reviewed-by: Austin Clements <[email protected]> Reviewed-by: Michael Knyszek <[email protected]> Trust: Michael Pratt <[email protected]>
1 parent 06538fa commit 370682a

File tree

1 file changed

+42
-8
lines changed

1 file changed

+42
-8
lines changed

src/runtime/proc.go

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1665,7 +1665,12 @@ func allocm(_p_ *p, fn func(), id int64) *m {
16651665
freem = next
16661666
continue
16671667
}
1668-
stackfree(freem.g0.stack)
1668+
// stackfree must be on the system stack, but allocm is
1669+
// reachable off the system stack transitively from
1670+
// startm.
1671+
systemstack(func() {
1672+
stackfree(freem.g0.stack)
1673+
})
16691674
freem = freem.freelink
16701675
}
16711676
sched.freem = newList
@@ -2192,8 +2197,30 @@ func mspinning() {
21922197
// May run with m.p==nil, so write barriers are not allowed.
21932198
// If spinning is set, the caller has incremented nmspinning and startm will
21942199
// either decrement nmspinning or set m.spinning in the newly started M.
2200+
//
2201+
// Callers passing a non-nil P must call from a non-preemptible context. See
2202+
// comment on acquirem below.
2203+
//
2204+
// Must not have write barriers because this may be called without a P.
21952205
//go:nowritebarrierrec
21962206
func startm(_p_ *p, spinning bool) {
2207+
// Disable preemption.
2208+
//
2209+
// Every owned P must have an owner that will eventually stop it in the
2210+
// event of a GC stop request. startm takes transient ownership of a P
2211+
// (either from argument or pidleget below) and transfers ownership to
2212+
// a started M, which will be responsible for performing the stop.
2213+
//
2214+
// Preemption must be disabled during this transient ownership,
2215+
// otherwise the P this is running on may enter GC stop while still
2216+
// holding the transient P, leaving that P in limbo and deadlocking the
2217+
// STW.
2218+
//
2219+
// Callers passing a non-nil P must already be in non-preemptible
2220+
// context, otherwise such preemption could occur on function entry to
2221+
// startm. Callers passing a nil P may be preemptible, so we must
2222+
// disable preemption before acquiring a P from pidleget below.
2223+
mp := acquirem()
21972224
lock(&sched.lock)
21982225
if _p_ == nil {
21992226
_p_ = pidleget()
@@ -2206,11 +2233,12 @@ func startm(_p_ *p, spinning bool) {
22062233
throw("startm: negative nmspinning")
22072234
}
22082235
}
2236+
releasem(mp)
22092237
return
22102238
}
22112239
}
2212-
mp := mget()
2213-
if mp == nil {
2240+
nmp := mget()
2241+
if nmp == nil {
22142242
// No M is available, we must drop sched.lock and call newm.
22152243
// However, we already own a P to assign to the M.
22162244
//
@@ -2232,22 +2260,28 @@ func startm(_p_ *p, spinning bool) {
22322260
fn = mspinning
22332261
}
22342262
newm(fn, _p_, id)
2263+
// Ownership transfer of _p_ committed by start in newm.
2264+
// Preemption is now safe.
2265+
releasem(mp)
22352266
return
22362267
}
22372268
unlock(&sched.lock)
2238-
if mp.spinning {
2269+
if nmp.spinning {
22392270
throw("startm: m is spinning")
22402271
}
2241-
if mp.nextp != 0 {
2272+
if nmp.nextp != 0 {
22422273
throw("startm: m has p")
22432274
}
22442275
if spinning && !runqempty(_p_) {
22452276
throw("startm: p has runnable gs")
22462277
}
22472278
// The caller incremented nmspinning, so set m.spinning in the new M.
2248-
mp.spinning = spinning
2249-
mp.nextp.set(_p_)
2250-
notewakeup(&mp.park)
2279+
nmp.spinning = spinning
2280+
nmp.nextp.set(_p_)
2281+
notewakeup(&nmp.park)
2282+
// Ownership transfer of _p_ committed by wakeup. Preemption is now
2283+
// safe.
2284+
releasem(mp)
22512285
}
22522286

22532287
// Hands off P from syscall or locked M.

0 commit comments

Comments
 (0)