Skip to content

Commit d80ab3e

Browse files
runtime: wake netpoller when dropping P, don't sleep too long in sysmon
When dropping a P, if it has any timers, and if some thread is sleeping in the netpoller, wake the netpoller to run the P's timers. This mitigates races between the netpoller deciding how long to sleep and a new timer being added. In sysmon, if all P's are idle, check the timers to decide how long to sleep. This avoids oversleeping if no thread is using the netpoller. This can happen in particular if some threads use runtime.LockOSThread, as those threads do not block in the netpoller. Also, print the number of timers per P for GODEBUG=scheddetail=1. Before this CL, TestLockedDeadlock2 would fail about 1% of the time. With this CL, I ran it 150,000 times with no failures. Updates #6239 Updates #27707 Fixes #35274 Fixes #35288 Change-Id: I7e5193e6c885e567f0b1ee023664aa3e2902fcd1 Reviewed-on: https://go-review.googlesource.com/c/go/+/204800 Run-TryBot: Ian Lance Taylor <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Michael Knyszek <[email protected]>
1 parent 5a7c571 commit d80ab3e

File tree

2 files changed

+47
-23
lines changed

2 files changed

+47
-23
lines changed

src/runtime/proc.go

+26-23
Original file line numberDiff line numberDiff line change
@@ -1964,6 +1964,9 @@ func handoffp(_p_ *p) {
19641964
startm(_p_, false)
19651965
return
19661966
}
1967+
if when := nobarrierWakeTime(_p_); when != 0 {
1968+
wakeNetPoller(when)
1969+
}
19671970
pidleput(_p_)
19681971
unlock(&sched.lock)
19691972
}
@@ -4448,32 +4451,33 @@ func sysmon() {
44484451
delay = 10 * 1000
44494452
}
44504453
usleep(delay)
4454+
now := nanotime()
44514455
if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs)) {
44524456
lock(&sched.lock)
44534457
if atomic.Load(&sched.gcwaiting) != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs) {
4454-
atomic.Store(&sched.sysmonwait, 1)
4455-
unlock(&sched.lock)
4456-
// Make wake-up period small enough
4457-
// for the sampling to be correct.
4458-
maxsleep := forcegcperiod / 2
4459-
shouldRelax := true
4460-
if osRelaxMinNS > 0 {
4461-
next := timeSleepUntil()
4462-
now := nanotime()
4463-
if next-now < osRelaxMinNS {
4464-
shouldRelax = false
4458+
next := timeSleepUntil()
4459+
if next > now {
4460+
atomic.Store(&sched.sysmonwait, 1)
4461+
unlock(&sched.lock)
4462+
// Make wake-up period small enough
4463+
// for the sampling to be correct.
4464+
sleep := forcegcperiod / 2
4465+
if next-now < sleep {
4466+
sleep = next - now
44654467
}
4468+
shouldRelax := sleep >= osRelaxMinNS
4469+
if shouldRelax {
4470+
osRelax(true)
4471+
}
4472+
notetsleep(&sched.sysmonnote, sleep)
4473+
if shouldRelax {
4474+
osRelax(false)
4475+
}
4476+
now = nanotime()
4477+
lock(&sched.lock)
4478+
atomic.Store(&sched.sysmonwait, 0)
4479+
noteclear(&sched.sysmonnote)
44664480
}
4467-
if shouldRelax {
4468-
osRelax(true)
4469-
}
4470-
notetsleep(&sched.sysmonnote, maxsleep)
4471-
if shouldRelax {
4472-
osRelax(false)
4473-
}
4474-
lock(&sched.lock)
4475-
atomic.Store(&sched.sysmonwait, 0)
4476-
noteclear(&sched.sysmonnote)
44774481
idle = 0
44784482
delay = 20
44794483
}
@@ -4485,7 +4489,6 @@ func sysmon() {
44854489
}
44864490
// poll network if not polled for more than 10ms
44874491
lastpoll := int64(atomic.Load64(&sched.lastpoll))
4488-
now := nanotime()
44894492
if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now {
44904493
atomic.Cas64(&sched.lastpoll, uint64(lastpoll), uint64(now))
44914494
list := netpoll(0) // non-blocking - returns list of goroutines
@@ -4691,7 +4694,7 @@ func schedtrace(detailed bool) {
46914694
if mp != nil {
46924695
id = mp.id
46934696
}
4694-
print(" P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gFree.n, "\n")
4697+
print(" P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gFree.n, " timerslen=", len(_p_.timers), "\n")
46954698
} else {
46964699
// In non-detailed mode format lengths of per-P run queues as:
46974700
// [len1 len2 len3 len4]

src/runtime/time.go

+21
Original file line numberDiff line numberDiff line change
@@ -1024,6 +1024,27 @@ func addAdjustedTimers(pp *p, moved []*timer) {
10241024
}
10251025
}
10261026

1027+
// nobarrierWakeTime looks at P's timers and returns the time when we
1028+
// should wake up the netpoller. It returns 0 if there are no timers.
1029+
// This function is invoked when dropping a P, and must run without
1030+
// any write barriers. Therefore, if there are any timers that needs
1031+
// to be moved earlier, it conservatively returns the current time.
1032+
// The netpoller M will wake up and adjust timers before sleeping again.
1033+
//go:nowritebarrierrec
1034+
func nobarrierWakeTime(pp *p) int64 {
1035+
lock(&pp.timersLock)
1036+
ret := int64(0)
1037+
if len(pp.timers) > 0 {
1038+
if atomic.Load(&pp.adjustTimers) > 0 {
1039+
ret = nanotime()
1040+
} else {
1041+
ret = pp.timers[0].when
1042+
}
1043+
}
1044+
unlock(&pp.timersLock)
1045+
return ret
1046+
}
1047+
10271048
// runtimer examines the first timer in timers. If it is ready based on now,
10281049
// it runs the timer and removes or updates it.
10291050
// Returns 0 if it ran a timer, -1 if there are no more timers, or the time

0 commit comments

Comments
 (0)