Skip to content

Commit 61dd3a4

Browse files
committed
support for startm caller to acquire sched.lock, add trace to detect pending crash
1 parent 3246a5f commit 61dd3a4

File tree

2 files changed

+45
-24
lines changed

2 files changed

+45
-24
lines changed

src/runtime/proc.go

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2406,7 +2406,7 @@ func mspinning() {
24062406
// Must not have write barriers because this may be called without a P.
24072407
//
24082408
//go:nowritebarrierrec
2409-
func startm(pp *p, spinning bool) {
2409+
func startm(pp *p, spinning bool, lockheld bool) {
24102410
// Disable preemption.
24112411
//
24122412
// Every owned P must have an owner that will eventually stop it in the
@@ -2424,9 +2424,12 @@ func startm(pp *p, spinning bool) {
24242424
// startm. Callers passing a nil P may be preemptible, so we must
24252425
// disable preemption before acquiring a P from pidleget below.
24262426
mp := acquirem()
2427-
pushEventTrace("startm acquiring sched lock")
2428-
lock(&sched.lock)
2429-
pushEventTrace("startm acquired sched lock")
2427+
2428+
if !lockheld {
2429+
pushEventTrace("startm acquiring sched lock")
2430+
lock(&sched.lock)
2431+
pushEventTrace("startm acquired sched lock")
2432+
}
24302433
if pp == nil {
24312434
if spinning {
24322435
// TODO(prattmic): All remaining calls to this function
@@ -2436,9 +2439,11 @@ func startm(pp *p, spinning bool) {
24362439
}
24372440
pp, _ = pidleget(0)
24382441
if pp == nil {
2439-
pushEventTrace("mcommoninit releasing sched lock (1)")
2440-
unlock(&sched.lock)
2441-
pushEventTrace("mcommoninit released sched lock (1)")
2442+
if !lockheld {
2443+
pushEventTrace("mcommoninit releasing sched lock (1)")
2444+
unlock(&sched.lock)
2445+
pushEventTrace("mcommoninit released sched lock (1)")
2446+
}
24422447
releasem(mp)
24432448
return
24442449
}
@@ -2458,9 +2463,11 @@ func startm(pp *p, spinning bool) {
24582463
// new M will eventually run the scheduler to execute any
24592464
// queued G's.
24602465
id := mReserveID()
2461-
pushEventTrace("mcommoninit releasing sched lock (2)")
2462-
unlock(&sched.lock)
2463-
pushEventTrace("mcommoninit released sched lock (2)")
2466+
if !lockheld {
2467+
pushEventTrace("mcommoninit releasing sched lock (2)")
2468+
unlock(&sched.lock)
2469+
pushEventTrace("mcommoninit released sched lock (2)")
2470+
}
24642471

24652472
var fn func()
24662473
if spinning {
@@ -2473,9 +2480,11 @@ func startm(pp *p, spinning bool) {
24732480
releasem(mp)
24742481
return
24752482
}
2476-
pushEventTrace("mcommoninit releasing sched lock (3)")
2477-
unlock(&sched.lock)
2478-
pushEventTrace("mcommoninit released sched lock (3)")
2483+
if !lockheld {
2484+
pushEventTrace("mcommoninit releasing sched lock (3)")
2485+
unlock(&sched.lock)
2486+
pushEventTrace("mcommoninit released sched lock (3)")
2487+
}
24792488
if nmp.spinning {
24802489
throw("startm: m is spinning")
24812490
}
@@ -2504,24 +2513,24 @@ func handoffp(pp *p) {
25042513

25052514
// if it has local work, start it straight away
25062515
if !runqempty(pp) || sched.runqsize != 0 {
2507-
startm(pp, false)
2516+
startm(pp, false, false)
25082517
return
25092518
}
25102519
// if there's trace work to do, start it straight away
25112520
if (trace.enabled || trace.shutdown) && traceReaderAvailable() != nil {
2512-
startm(pp, false)
2521+
startm(pp, false, false)
25132522
return
25142523
}
25152524
// if it has GC work, start it straight away
25162525
if gcBlackenEnabled != 0 && gcMarkWorkAvailable(pp) {
2517-
startm(pp, false)
2526+
startm(pp, false, false)
25182527
return
25192528
}
25202529
// no local work, check that there are no spinning/idle M's,
25212530
// otherwise our help is not required
25222531
if sched.nmspinning.Load()+sched.npidle.Load() == 0 && sched.nmspinning.CompareAndSwap(0, 1) { // TODO: fast atomic
25232532
sched.needspinning.Store(0)
2524-
startm(pp, true)
2533+
startm(pp, true, false)
25252534
return
25262535
}
25272536
pushEventTrace("handoffp acquiring sched lock")
@@ -2549,7 +2558,7 @@ func handoffp(pp *p) {
25492558
pushEventTrace("mcommoninit releasing sched lock (2)")
25502559
unlock(&sched.lock)
25512560
pushEventTrace("mcommoninit released sched lock (2)")
2552-
startm(pp, false)
2561+
startm(pp, false, false)
25532562
return
25542563
}
25552564
// If this is the last running P and nobody is polling network,
@@ -2558,7 +2567,7 @@ func handoffp(pp *p) {
25582567
pushEventTrace("mcommoninit releasing sched lock (3)")
25592568
unlock(&sched.lock)
25602569
pushEventTrace("mcommoninit released sched lock (3)")
2561-
startm(pp, false)
2570+
startm(pp, false, false)
25622571
return
25632572
}
25642573

@@ -2615,7 +2624,7 @@ func wakep() {
26152624
unlock(&sched.lock)
26162625
pushEventTrace("wakep released sched lock (2)")
26172626

2618-
startm(pp, true)
2627+
startm(pp, true, false)
26192628

26202629
releasem(mp)
26212630
}
@@ -2957,6 +2966,7 @@ top:
29572966
throw("findrunnable: wrong p")
29582967
}
29592968
now = pidleput(pp, now)
2969+
sched.crashPending.Store(true)
29602970
pushEventTrace("findRunnable releasing sched lock (6)")
29612971
unlock(&sched.lock)
29622972
pushEventTrace("findRunnable released sched lock (6)")
@@ -3076,13 +3086,16 @@ top:
30763086
stopm()
30773087
goto top
30783088
}
3089+
30793090
pushEventTrace("findRunnable acquiring sched lock (4)")
30803091
lock(&sched.lock)
30813092
pushEventTrace("findRunnable acquired sched lock (4)")
30823093
pp, _ := pidleget(now)
30833094
pushEventTrace("findRunnable releasing sched lock (7)")
30843095
unlock(&sched.lock)
30853096
pushEventTrace("findRunnable released sched lock (7)")
3097+
sched.crashPending.Store(false)
3098+
30863099
if pp == nil {
30873100
injectglist(&list)
30883101
} else {
@@ -3416,10 +3429,17 @@ func injectglist(glist *gList) {
34163429
break
34173430
}
34183431

3432+
if sched.crashPending.Load() {
3433+
pushEventTrace("[checkdead] should have crashed")
3434+
sched.crashPending.Store(false)
3435+
}
3436+
3437+
startm(pp, false, true)
3438+
34193439
pushEventTrace("injectglist releasing sched lock (2)")
34203440
unlock(&sched.lock)
34213441
pushEventTrace("injectglist released sched lock (2)")
3422-
startm(pp, false)
3442+
34233443
releasem(mp)
34243444
}
34253445
}
@@ -5647,7 +5667,7 @@ func sysmon() {
56475667
// See issue 42515 and
56485668
// https://gnats.netbsd.org/cgi-bin/query-pr-single.pl?number=50094.
56495669
if next := timeSleepUntil(); next < now {
5650-
startm(nil, false)
5670+
startm(nil, false, false)
56515671
}
56525672
}
56535673
if scavenger.sysmonWake.Load() != 0 {
@@ -5932,7 +5952,7 @@ func schedEnableUser(enable bool) {
59325952
unlock(&sched.lock)
59335953
pushEventTrace("schedEnableUser released sched lock (2)")
59345954
for ; n != 0 && sched.npidle.Load() != 0; n-- {
5935-
startm(nil, false)
5955+
startm(nil, false, false)
59365956
}
59375957
} else {
59385958
pushEventTrace("schedEnableUser releasing sched lock (3)")

src/runtime/runtime2.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -862,7 +862,8 @@ type schedt struct {
862862
// with a waitreason of the form waitReasonSync{RW,}Mutex{R,}Lock.
863863
totalMutexWaitTime atomic.Int64
864864

865-
eventTrace *circbuf.CircularBuffer[*eventTraceElement]
865+
eventTrace *circbuf.CircularBuffer[*eventTraceElement]
866+
crashPending atomic.Bool
866867
}
867868

868869
// Values for the flags field of a sigTabT.

0 commit comments

Comments
 (0)