Skip to content

Commit c9d88ea

Browse files
committed
runtime: traceAcquire and traceRelease across all P steals
Currently there are a few places where a P can get stolen where the runtime doesn't traceAcquire and traceRelease across the steal itself. What can happen then is the following scenario: - Thread 1 enters a syscall and writes an event about it. - Thread 2 steals Thread 1's P. - Thread 1 exits the syscall and writes one or more events about it. - Tracing ends (trace.gen is set to 0). - Thread 2 checks to see if it should write an event for the P it just stole, sees that tracing is disabled, and doesn't. This results in broken traces, because there's a missing ProcSteal event. The parser always waits for a ProcSteal to advance a GoSyscallEndBlocked event, and in this case, it never comes. Fixes #65181. Change-Id: I437629499bb7669bf7fe2fc6fc4f64c53002916b Reviewed-on: https://go-review.googlesource.com/c/go/+/560235 Reviewed-by: Michael Pratt <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent 829f2ce commit c9d88ea

File tree

1 file changed

+15
-6
lines changed

1 file changed

+15
-6
lines changed

src/runtime/proc.go

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4404,8 +4404,8 @@ func entersyscall_gcwait() {
44044404
pp := gp.m.oldp.ptr()
44054405

44064406
lock(&sched.lock)
4407+
trace := traceAcquire()
44074408
if sched.stopwait > 0 && atomic.Cas(&pp.status, _Psyscall, _Pgcstop) {
4408-
trace := traceAcquire()
44094409
if trace.ok() {
44104410
if goexperiment.ExecTracer2 {
44114411
// This is a steal in the new tracer. While it's very likely
@@ -4428,6 +4428,8 @@ func entersyscall_gcwait() {
44284428
if sched.stopwait--; sched.stopwait == 0 {
44294429
notewakeup(&sched.stopnote)
44304430
}
4431+
} else if trace.ok() {
4432+
traceRelease(trace)
44314433
}
44324434
unlock(&sched.lock)
44334435
}
@@ -4605,12 +4607,19 @@ func exitsyscallfast(oldp *p) bool {
46054607
}
46064608

46074609
// Try to re-acquire the last P.
4610+
trace := traceAcquire()
46084611
if oldp != nil && oldp.status == _Psyscall && atomic.Cas(&oldp.status, _Psyscall, _Pidle) {
46094612
// There's a cpu for us, so we can run.
46104613
wirep(oldp)
4611-
exitsyscallfast_reacquired()
4614+
exitsyscallfast_reacquired(trace)
4615+
if trace.ok() {
4616+
traceRelease(trace)
4617+
}
46124618
return true
46134619
}
4620+
if trace.ok() {
4621+
traceRelease(trace)
4622+
}
46144623

46154624
// Try to get any other idle P.
46164625
if sched.pidle != 0 {
@@ -4646,10 +4655,9 @@ func exitsyscallfast(oldp *p) bool {
46464655
// syscall.
46474656
//
46484657
//go:nosplit
4649-
func exitsyscallfast_reacquired() {
4658+
func exitsyscallfast_reacquired(trace traceLocker) {
46504659
gp := getg()
46514660
if gp.m.syscalltick != gp.m.p.ptr().syscalltick {
4652-
trace := traceAcquire()
46534661
if trace.ok() {
46544662
// The p was retaken and then enter into syscall again (since gp.m.syscalltick has changed).
46554663
// traceGoSysBlock for this syscall was already emitted,
@@ -4666,7 +4674,6 @@ func exitsyscallfast_reacquired() {
46664674
// Denote completion of the current syscall.
46674675
trace.GoSysExit(true)
46684676
}
4669-
traceRelease(trace)
46704677
})
46714678
}
46724679
gp.m.p.ptr().syscalltick++
@@ -6146,8 +6153,8 @@ func retake(now int64) uint32 {
61466153
// Otherwise the M from which we retake can exit the syscall,
61476154
// increment nmidle and report deadlock.
61486155
incidlelocked(-1)
6156+
trace := traceAcquire()
61496157
if atomic.Cas(&pp.status, s, _Pidle) {
6150-
trace := traceAcquire()
61516158
if trace.ok() {
61526159
trace.GoSysBlock(pp)
61536160
trace.ProcSteal(pp, false)
@@ -6156,6 +6163,8 @@ func retake(now int64) uint32 {
61566163
n++
61576164
pp.syscalltick++
61586165
handoffp(pp)
6166+
} else if trace.ok() {
6167+
traceRelease(trace)
61596168
}
61606169
incidlelocked(1)
61616170
lock(&allpLock)

0 commit comments

Comments
 (0)