Skip to content

Commit 39a5ee5

Browse files
committed
runtime: decouple consistent stats from mcache and allow P-less update
This change modifies the consistent stats implementation to keep the per-P sequence counter on each P instead of each mcache. A valid mcache is not available everywhere that we want to call e.g. allocSpan, as per issue #42339. By decoupling these two, we can add a mechanism to allow contexts without a P to update stats consistently. In this CL, we achieve that with a mutex. In practice, it will be very rare for an M to update these stats without a P. Furthermore, the stats reader also only needs to hold the mutex across the update to "gen" since once that changes, writers are free to continue updating the new stats generation. Contention could thus only arise between writers without a P, and as mentioned earlier, those should be rare. A nice side-effect of this change is that the consistent stats acquire and release API becomes simpler. Fixes #42339. Change-Id: Ied74ab256f69abd54b550394c8ad7c4c40a5fe34 Reviewed-on: https://go-review.googlesource.com/c/go/+/267158 Run-TryBot: Michael Knyszek <[email protected]> Trust: Michael Knyszek <[email protected]> Reviewed-by: Michael Pratt <[email protected]>
1 parent ac766e3 commit 39a5ee5

File tree

7 files changed

+88
-80
lines changed

7 files changed

+88
-80
lines changed

src/runtime/mcache.go

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,6 @@ type mcache struct {
5050
// in this mcache are stale and need to the flushed so they
5151
// can be swept. This is done in acquirep.
5252
flushGen uint32
53-
54-
// statsSeq is a counter indicating whether this P is currently
55-
// writing any stats. Its value is even when not, odd when it is.
56-
statsSeq uint32
5753
}
5854

5955
// A gclink is a node in a linked list of blocks, like mlink,
@@ -178,9 +174,9 @@ func (c *mcache) refill(spc spanClass) {
178174

179175
// Assume all objects from this span will be allocated in the
180176
// mcache. If it gets uncached, we'll adjust this.
181-
stats := memstats.heapStats.acquire(c)
177+
stats := memstats.heapStats.acquire()
182178
atomic.Xadduintptr(&stats.smallAllocCount[spc.sizeclass()], uintptr(s.nelems)-uintptr(s.allocCount))
183-
memstats.heapStats.release(c)
179+
memstats.heapStats.release()
184180

185181
// Update heap_live with the same assumption.
186182
usedBytes := uintptr(s.allocCount) * s.elemsize
@@ -229,10 +225,10 @@ func (c *mcache) allocLarge(size uintptr, needzero bool, noscan bool) *mspan {
229225
if s == nil {
230226
throw("out of memory")
231227
}
232-
stats := memstats.heapStats.acquire(c)
228+
stats := memstats.heapStats.acquire()
233229
atomic.Xadduintptr(&stats.largeAlloc, npages*pageSize)
234230
atomic.Xadduintptr(&stats.largeAllocCount, 1)
235-
memstats.heapStats.release(c)
231+
memstats.heapStats.release()
236232

237233
// Update heap_live and revise pacing if needed.
238234
atomic.Xadd64(&memstats.heap_live, int64(npages*pageSize))
@@ -263,9 +259,9 @@ func (c *mcache) releaseAll() {
263259
if s != &emptymspan {
264260
// Adjust nsmallalloc in case the span wasn't fully allocated.
265261
n := uintptr(s.nelems) - uintptr(s.allocCount)
266-
stats := memstats.heapStats.acquire(c)
262+
stats := memstats.heapStats.acquire()
267263
atomic.Xadduintptr(&stats.smallAllocCount[spanClass(i).sizeclass()], -n)
268-
memstats.heapStats.release(c)
264+
memstats.heapStats.release()
269265
if s.sweepgen != sg+1 {
270266
// refill conservatively counted unallocated slots in heap_live.
271267
// Undo this.

src/runtime/mgcscavenge.go

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -733,14 +733,10 @@ func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr
733733
atomic.Xadd64(&memstats.heap_released, nbytes)
734734

735735
// Update consistent accounting too.
736-
c := getMCache()
737-
if c == nil {
738-
throw("scavengeRangeLocked called without a P or outside bootstrapping")
739-
}
740-
stats := memstats.heapStats.acquire(c)
736+
stats := memstats.heapStats.acquire()
741737
atomic.Xaddint64(&stats.committed, -nbytes)
742738
atomic.Xaddint64(&stats.released, nbytes)
743-
memstats.heapStats.release(c)
739+
memstats.heapStats.release()
744740

745741
return addr
746742
}

src/runtime/mgcsweep.go

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -339,8 +339,6 @@ func (s *mspan) sweep(preserve bool) bool {
339339
spc := s.spanclass
340340
size := s.elemsize
341341

342-
c := _g_.m.p.ptr().mcache
343-
344342
// The allocBits indicate which unmarked objects don't need to be
345343
// processed since they were free at the end of the last GC cycle
346344
// and were not allocated since then.
@@ -505,9 +503,9 @@ func (s *mspan) sweep(preserve bool) bool {
505503
// wasn't totally filled, but then swept, still has all of its
506504
// free slots zeroed.
507505
s.needzero = 1
508-
stats := memstats.heapStats.acquire(c)
506+
stats := memstats.heapStats.acquire()
509507
atomic.Xadduintptr(&stats.smallFreeCount[spc.sizeclass()], uintptr(nfreed))
510-
memstats.heapStats.release(c)
508+
memstats.heapStats.release()
511509
}
512510
if !preserve {
513511
// The caller may not have removed this span from whatever
@@ -552,10 +550,10 @@ func (s *mspan) sweep(preserve bool) bool {
552550
} else {
553551
mheap_.freeSpan(s)
554552
}
555-
stats := memstats.heapStats.acquire(c)
553+
stats := memstats.heapStats.acquire()
556554
atomic.Xadduintptr(&stats.largeFreeCount, 1)
557555
atomic.Xadduintptr(&stats.largeFree, size)
558-
memstats.heapStats.release(c)
556+
memstats.heapStats.release()
559557
return true
560558
}
561559

src/runtime/mheap.go

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1246,12 +1246,7 @@ HaveSpan:
12461246
memstats.heap_sys.add(-int64(nbytes))
12471247
}
12481248
// Update consistent stats.
1249-
c := getMCache()
1250-
if c == nil {
1251-
// TODO(mknyszek): Remove this and handle this case to fix #42339.
1252-
throw("allocSpan called without P or outside bootstrapping")
1253-
}
1254-
stats := memstats.heapStats.acquire(c)
1249+
stats := memstats.heapStats.acquire()
12551250
atomic.Xaddint64(&stats.committed, int64(scav))
12561251
atomic.Xaddint64(&stats.released, -int64(scav))
12571252
switch typ {
@@ -1264,7 +1259,7 @@ HaveSpan:
12641259
case spanAllocWorkBuf:
12651260
atomic.Xaddint64(&stats.inWorkBufs, int64(nbytes))
12661261
}
1267-
memstats.heapStats.release(c)
1262+
memstats.heapStats.release()
12681263

12691264
// Publish the span in various locations.
12701265

@@ -1344,14 +1339,9 @@ func (h *mheap) grow(npage uintptr) bool {
13441339
// size which is always > physPageSize, so its safe to
13451340
// just add directly to heap_released.
13461341
atomic.Xadd64(&memstats.heap_released, int64(asize))
1347-
c := getMCache()
1348-
if c == nil {
1349-
// TODO(mknyszek): Remove this and handle this case to fix #42339.
1350-
throw("grow called without P or outside bootstrapping")
1351-
}
1352-
stats := memstats.heapStats.acquire(c)
1342+
stats := memstats.heapStats.acquire()
13531343
atomic.Xaddint64(&stats.released, int64(asize))
1354-
memstats.heapStats.release(c)
1344+
memstats.heapStats.release()
13551345

13561346
// Recalculate nBase.
13571347
// We know this won't overflow, because sysAlloc returned
@@ -1447,12 +1437,7 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) {
14471437
memstats.heap_sys.add(int64(nbytes))
14481438
}
14491439
// Update consistent stats.
1450-
c := getMCache()
1451-
if c == nil {
1452-
// TODO(mknyszek): Remove this and handle this case to fix #42339.
1453-
throw("freeSpanLocked called without P or outside bootstrapping")
1454-
}
1455-
stats := memstats.heapStats.acquire(c)
1440+
stats := memstats.heapStats.acquire()
14561441
switch typ {
14571442
case spanAllocHeap:
14581443
atomic.Xaddint64(&stats.inHeap, -int64(nbytes))
@@ -1463,7 +1448,7 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) {
14631448
case spanAllocWorkBuf:
14641449
atomic.Xaddint64(&stats.inWorkBufs, -int64(nbytes))
14651450
}
1466-
memstats.heapStats.release(c)
1451+
memstats.heapStats.release()
14671452

14681453
// Mark the space as free.
14691454
h.pages.free(s.base(), s.npages)

src/runtime/mstats.go

Lines changed: 60 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ type mstats struct {
158158
// heapStats is a set of statistics
159159
heapStats consistentHeapStats
160160

161-
_ uint32 // ensure gcPauseDist is aligned
161+
// _ uint32 // ensure gcPauseDist is aligned
162162

163163
// gcPauseDist represents the distribution of all GC-related
164164
// application pauses in the runtime.
@@ -818,10 +818,11 @@ type consistentHeapStats struct {
818818
// Writers always atomically update the delta at index gen.
819819
//
820820
// Readers operate by rotating gen (0 -> 1 -> 2 -> 0 -> ...)
821-
// and synchronizing with writers by observing each mcache's
822-
// statsSeq field. If the reader observes a P (to which the
823-
// mcache is bound) not writing, it can be sure that it will
824-
// pick up the new gen value the next time it writes.
821+
// and synchronizing with writers by observing each P's
822+
// statsSeq field. If the reader observes a P not writing,
823+
// it can be sure that it will pick up the new gen value the
824+
// next time it writes.
825+
//
825826
// The reader then takes responsibility by clearing space
826827
// in the ring buffer for the next reader to rotate gen to
827828
// that space (i.e. it merges in values from index (gen-2) mod 3
@@ -830,7 +831,7 @@ type consistentHeapStats struct {
830831
// Note that this means only one reader can be reading at a time.
831832
// There is no way for readers to synchronize.
832833
//
833-
// This process is why we need ring buffer of size 3 instead
834+
// This process is why we need a ring buffer of size 3 instead
834835
// of 2: one is for the writers, one contains the most recent
835836
// data, and the last one is clear so writers can begin writing
836837
// to it the moment gen is updated.
@@ -840,24 +841,34 @@ type consistentHeapStats struct {
840841
// are writing, and can take on the value of 0, 1, or 2.
841842
// This value is updated atomically.
842843
gen uint32
844+
845+
// noPLock is intended to provide mutual exclusion for updating
846+
// stats when no P is available. It does not block other writers
847+
// with a P, only other writers without a P and the reader. Because
848+
// stats are usually updated when a P is available, contention on
849+
// this lock should be minimal.
850+
noPLock mutex
843851
}
844852

845853
// acquire returns a heapStatsDelta to be updated. In effect,
846854
// it acquires the shard for writing. release must be called
847-
// as soon as the relevant deltas are updated. c must be
848-
// a valid mcache not being used by any other thread.
855+
// as soon as the relevant deltas are updated.
849856
//
850857
// The returned heapStatsDelta must be updated atomically.
851858
//
852-
// Note however, that this is unsafe to call concurrently
853-
// with other writers and there must be only one writer
854-
// at a time.
855-
func (m *consistentHeapStats) acquire(c *mcache) *heapStatsDelta {
856-
seq := atomic.Xadd(&c.statsSeq, 1)
857-
if seq%2 == 0 {
858-
// Should have been incremented to odd.
859-
print("runtime: seq=", seq, "\n")
860-
throw("bad sequence number")
859+
// The caller's P must not change between acquire and
860+
// release. This also means that the caller should not
861+
// acquire a P or release its P in between.
862+
func (m *consistentHeapStats) acquire() *heapStatsDelta {
863+
if pp := getg().m.p.ptr(); pp != nil {
864+
seq := atomic.Xadd(&pp.statsSeq, 1)
865+
if seq%2 == 0 {
866+
// Should have been incremented to odd.
867+
print("runtime: seq=", seq, "\n")
868+
throw("bad sequence number")
869+
}
870+
} else {
871+
lock(&m.noPLock)
861872
}
862873
gen := atomic.Load(&m.gen) % 3
863874
return &m.stats[gen]
@@ -868,14 +879,19 @@ func (m *consistentHeapStats) acquire(c *mcache) *heapStatsDelta {
868879
// acquire must no longer be accessed or modified after
869880
// release is called.
870881
//
871-
// The mcache passed here must be the same as the one
872-
// passed to acquire.
873-
func (m *consistentHeapStats) release(c *mcache) {
874-
seq := atomic.Xadd(&c.statsSeq, 1)
875-
if seq%2 != 0 {
876-
// Should have been incremented to even.
877-
print("runtime: seq=", seq, "\n")
878-
throw("bad sequence number")
882+
// The caller's P must not change between acquire and
883+
// release. This also means that the caller should not
884+
// acquire a P or release its P in between.
885+
func (m *consistentHeapStats) release() {
886+
if pp := getg().m.p.ptr(); pp != nil {
887+
seq := atomic.Xadd(&pp.statsSeq, 1)
888+
if seq%2 != 0 {
889+
// Should have been incremented to even.
890+
print("runtime: seq=", seq, "\n")
891+
throw("bad sequence number")
892+
}
893+
} else {
894+
unlock(&m.noPLock)
879895
}
880896
}
881897

@@ -916,25 +932,33 @@ func (m *consistentHeapStats) read(out *heapStatsDelta) {
916932
// so it doesn't change out from under us.
917933
mp := acquirem()
918934

935+
// Get the current generation. We can be confident that this
936+
// will not change since read is serialized and is the only
937+
// one that modifies currGen.
938+
currGen := atomic.Load(&m.gen)
939+
prevGen := currGen - 1
940+
if currGen == 0 {
941+
prevGen = 2
942+
}
943+
944+
// Prevent writers without a P from writing while we update gen.
945+
lock(&m.noPLock)
946+
919947
// Rotate gen, effectively taking a snapshot of the state of
920948
// these statistics at the point of the exchange by moving
921949
// writers to the next set of deltas.
922950
//
923951
// This exchange is safe to do because we won't race
924952
// with anyone else trying to update this value.
925-
currGen := atomic.Load(&m.gen)
926953
atomic.Xchg(&m.gen, (currGen+1)%3)
927-
prevGen := currGen - 1
928-
if currGen == 0 {
929-
prevGen = 2
930-
}
954+
955+
// Allow P-less writers to continue. They'll be writing to the
956+
// next generation now.
957+
unlock(&m.noPLock)
958+
931959
for _, p := range allp {
932-
c := p.mcache
933-
if c == nil {
934-
continue
935-
}
936960
// Spin until there are no more writers.
937-
for atomic.Load(&c.statsSeq)%2 != 0 {
961+
for atomic.Load(&p.statsSeq)%2 != 0 {
938962
}
939963
}
940964

@@ -951,5 +975,6 @@ func (m *consistentHeapStats) read(out *heapStatsDelta) {
951975

952976
// Finally, copy out the complete delta.
953977
*out = m.stats[currGen]
978+
954979
releasem(mp)
955980
}

src/runtime/proc.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,10 @@ func schedinit() {
577577
lockInit(&trace.lock, lockRankTrace)
578578
lockInit(&cpuprof.lock, lockRankCpuprof)
579579
lockInit(&trace.stackTab.lock, lockRankTraceStackTab)
580+
// Enforce that this lock is always a leaf lock.
581+
// All of this lock's critical sections should be
582+
// extremely short.
583+
lockInit(&memstats.heapStats.noPLock, lockRankLeafRank)
580584

581585
// raceinit must be the first call to race detector.
582586
// In particular, it must be done before mallocinit below calls racemapshadow.

src/runtime/runtime2.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -654,8 +654,8 @@ type p struct {
654654
timerModifiedEarliest uint64
655655

656656
// Per-P GC state
657-
gcAssistTime int64 // Nanoseconds in assistAlloc
658-
gcFractionalMarkTime int64 // Nanoseconds in fractional mark worker (atomic)
657+
gcAssistTime int64 // Nanoseconds in assistAlloc
658+
gcFractionalMarkTime int64 // Nanoseconds in fractional mark worker (atomic)
659659

660660
// gcMarkWorkerMode is the mode for the next mark worker to run in.
661661
// That is, this is used to communicate with the worker goroutine
@@ -679,6 +679,10 @@ type p struct {
679679

680680
runSafePointFn uint32 // if 1, run sched.safePointFn at next safe point
681681

682+
// statsSeq is a counter indicating whether this P is currently
683+
// writing any stats. Its value is even when not, odd when it is.
684+
statsSeq uint32
685+
682686
// Lock for timers. We normally access the timers while running
683687
// on this P, but the scheduler can also do it from a different P.
684688
timersLock mutex

0 commit comments

Comments
 (0)