golang
diff --git a/‎src/runtime/debug.go
+11 b/‎src/runtime/debug.go
+11
diff --git a/‎src/runtime/export_test.go
+21 b/‎src/runtime/export_test.go
+21
diff --git a/‎src/runtime/extern.go
+8 b/‎src/runtime/extern.go
+8
diff --git a/‎src/runtime/lock_futex.go
+6 b/‎src/runtime/lock_futex.go
+6
diff --git a/‎src/runtime/lock_sema.go
+4 b/‎src/runtime/lock_sema.go
+4
diff --git a/‎src/runtime/metrics.go
+1-1 b/‎src/runtime/metrics.go
+1-1
@@ -52,6 +52,17 @@ func NumCgoCall() int64 {
 	return n
 }
 
+func totalMutexWaitTimeNanos() int64 {
+	total := sched.totalMutexWaitTime.Load()
+
+	total += sched.totalRuntimeLockWaitTime.Load()
+	for mp := (*m)(atomic.Loadp(unsafe.Pointer(&allm))); mp != nil; mp = mp.alllink {
+		total += mp.mLockProfile.waitTime.Load()
+	}
+
+	return total
+}
+
 // NumGoroutine returns the number of goroutines that currently exist.
 func NumGoroutine() int {
 	return int(gcount())
 
@@ -1340,6 +1340,27 @@ func PageCachePagesLeaked() (leaked uintptr) {
 	return
 }
 
+type Mutex = mutex
+
+var Lock = lock
+var Unlock = unlock
+
+func MutexContended(l *mutex) bool {
+	switch atomic.Loaduintptr(&l.key) {
+	case 0: // unlocked
+		return false
+	case 1: // locked
+		return false
+	default: // an M is sleeping
+		return true
+	}
+}
+
+func SemRootLock(addr *uint32) *mutex {
+	root := semtable.rootFor(addr)
+	return &root.lock
+}
+
 var Semacquire = semacquire
 var Semrelease1 = semrelease1
 
 
@@ -145,6 +145,14 @@ It is a comma-separated list of name=val pairs setting these named variables:
 	risk in that scenario. Currently not supported on Windows, plan9 or js/wasm. Setting this
 	option for some applications can produce large traces, so use with care.
 
+	profileruntimelocks: setting profileruntimelocks=1 includes call stacks related to
+	contention on runtime-internal locks in the "mutex" profile, subject to the
+	MutexProfileFraction setting. The call stacks will correspond to the unlock call that
+	released the lock. But instead of the value corresponding to the amount of contention that
+	call stack caused, it corresponds to the amount of time the caller of unlock had to wait
+	in its original call to lock. A future release is expected to align those and remove this
+	setting.
+
 	invalidptr: invalidptr=1 (the default) causes the garbage collector and stack
 	copier to crash the program if an invalid pointer value (for example, 1)
 	is found in a pointer-typed location. Setting invalidptr=0 disables this check.
 
@@ -71,6 +71,8 @@ func lock2(l *mutex) {
 	// its wakeup call.
 	wait := v
 
+	timer := &lockTimer{lock: l}
+	timer.begin()
 	// On uniprocessors, no point spinning.
 	// On multiprocessors, spin for ACTIVE_SPIN attempts.
 	spin := 0
@@ -82,6 +84,7 @@ func lock2(l *mutex) {
 		for i := 0; i < spin; i++ {
 			for l.key == mutex_unlocked {
 				if atomic.Cas(key32(&l.key), mutex_unlocked, wait) {
+					timer.end()
 					return
 				}
 			}
@@ -92,6 +95,7 @@ func lock2(l *mutex) {
 		for i := 0; i < passive_spin; i++ {
 			for l.key == mutex_unlocked {
 				if atomic.Cas(key32(&l.key), mutex_unlocked, wait) {
+					timer.end()
 					return
 				}
 			}
@@ -101,6 +105,7 @@ func lock2(l *mutex) {
 		// Sleep.
 		v = atomic.Xchg(key32(&l.key), mutex_sleeping)
 		if v == mutex_unlocked {
+			timer.end()
 			return
 		}
 		wait = mutex_sleeping
@@ -122,6 +127,7 @@ func unlock2(l *mutex) {
 	}
 
 	gp := getg()
+	gp.m.mLockProfile.recordUnlock(l)
 	gp.m.locks--
 	if gp.m.locks < 0 {
 		throw("runtime·unlock: lock count")
 
@@ -48,6 +48,8 @@ func lock2(l *mutex) {
 	}
 	semacreate(gp.m)
 
+	timer := &lockTimer{lock: l}
+	timer.begin()
 	// On uniprocessor's, no point spinning.
 	// On multiprocessors, spin for ACTIVE_SPIN attempts.
 	spin := 0
@@ -60,6 +62,7 @@ Loop:
 		if v&locked == 0 {
 			// Unlocked. Try to lock.
 			if atomic.Casuintptr(&l.key, v, v|locked) {
+				timer.end()
 				return
 			}
 			i = 0
@@ -119,6 +122,7 @@ func unlock2(l *mutex) {
 			}
 		}
 	}
+	gp.m.mLockProfile.recordUnlock(l)
 	gp.m.locks--
 	if gp.m.locks < 0 {
 		throw("runtime·unlock: lock count")
 
@@ -470,7 +470,7 @@ func initMetrics() {
 		"/sync/mutex/wait/total:seconds": {
 			compute: func(_ *statAggregate, out *metricValue) {
 				out.kind = metricKindFloat64
-				out.scalar = float64bits(nsToSec(sched.totalMutexWaitTime.Load()))
+				out.scalar = float64bits(nsToSec(totalMutexWaitTimeNanos()))
 			},
 		},
 	}
Original file line number	Diff line number	Diff line change
`@@ -48,6 +48,8 @@ func lock2(l *mutex) {`
`48`	`48`	`}`
`49`	`49`	`semacreate(gp.m)`
`50`	`50`
	`51`	`+ timer := &lockTimer{lock: l}`
	`52`	`+ timer.begin()`
`51`	`53`	`// On uniprocessor's, no point spinning.`
`52`	`54`	`// On multiprocessors, spin for ACTIVE_SPIN attempts.`
`53`	`55`	`spin := 0`
`@@ -60,6 +62,7 @@ Loop:`
`60`	`62`	`if v&locked == 0 {`
`61`	`63`	`// Unlocked. Try to lock.`
`62`	`64`	`if atomic.Casuintptr(&l.key, v, v\|locked) {`
	`65`	`+ timer.end()`
`63`	`66`	`return`
`64`	`67`	`}`
`65`	`68`	`i = 0`
`@@ -119,6 +122,7 @@ func unlock2(l *mutex) {`
`119`	`122`	`}`
`120`	`123`	`}`
`121`	`124`	`}`
	`125`	`+ gp.m.mLockProfile.recordUnlock(l)`
`122`	`126`	`gp.m.locks--`
`123`	`127`	`if gp.m.locks < 0 {`
`124`	`128`	`throw("runtime·unlock: lock count")`
Original file line number	Diff line number	Diff line change
`@@ -470,7 +470,7 @@ func initMetrics() {`
`470`	`470`	`"/sync/mutex/wait/total:seconds": {`
`471`	`471`	`compute: func(_ statAggregate, out metricValue) {`
`472`	`472`	`out.kind = metricKindFloat64`
`473`		`- out.scalar = float64bits(nsToSec(sched.totalMutexWaitTime.Load()))`
	`473`	`+ out.scalar = float64bits(nsToSec(totalMutexWaitTimeNanos()))`
`474`	`474`	`},`
`475`	`475`	`},`
`476`	`476`	`}`