Skip to content

Commit 79781e8

Browse files
committed
runtime: move malloc stats into consistentHeapStats
This change moves the mcache-local malloc stats into the consistentHeapStats structure so the malloc stats can be managed consistently with the memory stats. The one exception here is tinyAllocs for which moving that into the global stats would incur several atomic writes on the fast path. Microbenchmarks for just one CPU core have shown a 50% loss in throughput. Since tiny allocation counnt isn't exposed anyway and is always blindly added to both allocs and frees, let that stay inconsistent and flush the tiny allocation count every so often. Change-Id: I2a4b75f209c0e659b9c0db081a3287bf227c10ca Reviewed-on: https://go-review.googlesource.com/c/go/+/247039 Run-TryBot: Michael Knyszek <[email protected]> TryBot-Result: Go Bot <[email protected]> Trust: Michael Knyszek <[email protected]> Reviewed-by: Michael Pratt <[email protected]>
1 parent f77a902 commit 79781e8

File tree

6 files changed

+90
-109
lines changed

6 files changed

+90
-109
lines changed

src/runtime/export_test.go

Lines changed: 13 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -337,33 +337,22 @@ func ReadMemStatsSlow() (base, slow MemStats) {
337337
}
338338
}
339339

340-
// Add in frees. readmemstats_m flushed the cached stats, so
341-
// these are up-to-date.
342-
var tinyAllocs, largeFree, smallFree uint64
343-
for _, p := range allp {
344-
c := p.mcache
345-
if c == nil {
346-
continue
347-
}
348-
// Collect large allocation stats.
349-
largeFree += uint64(c.largeFree)
350-
slow.Frees += uint64(c.largeFreeCount)
351-
352-
// Collect tiny allocation stats.
353-
tinyAllocs += uint64(c.tinyAllocCount)
354-
355-
// Collect per-sizeclass stats.
356-
for i := 0; i < _NumSizeClasses; i++ {
357-
slow.Frees += uint64(c.smallFreeCount[i])
358-
bySize[i].Frees += uint64(c.smallFreeCount[i])
359-
bySize[i].Mallocs += uint64(c.smallFreeCount[i])
360-
smallFree += uint64(c.smallFreeCount[i]) * uint64(class_to_size[i])
361-
}
340+
// Add in frees by just reading the stats for those directly.
341+
var m heapStatsDelta
342+
memstats.heapStats.unsafeRead(&m)
343+
344+
// Collect per-sizeclass free stats.
345+
var smallFree uint64
346+
for i := 0; i < _NumSizeClasses; i++ {
347+
slow.Frees += uint64(m.smallFreeCount[i])
348+
bySize[i].Frees += uint64(m.smallFreeCount[i])
349+
bySize[i].Mallocs += uint64(m.smallFreeCount[i])
350+
smallFree += uint64(m.smallFreeCount[i]) * uint64(class_to_size[i])
362351
}
363-
slow.Frees += tinyAllocs
352+
slow.Frees += memstats.tinyallocs + uint64(m.largeFreeCount)
364353
slow.Mallocs += slow.Frees
365354

366-
slow.TotalAlloc = slow.Alloc + largeFree + smallFree
355+
slow.TotalAlloc = slow.Alloc + uint64(m.largeFree) + smallFree
367356

368357
for i := range slow.BySize {
369358
slow.BySize[i].Mallocs = bySize[i].Mallocs

src/runtime/malloc.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1028,7 +1028,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
10281028
// The object fits into existing tiny block.
10291029
x = unsafe.Pointer(c.tiny + off)
10301030
c.tinyoffset = off + size
1031-
c.tinyAllocCount++
1031+
c.tinyAllocs++
10321032
mp.mallocing = 0
10331033
releasem(mp)
10341034
return x

src/runtime/mcache.go

Lines changed: 23 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -32,30 +32,19 @@ type mcache struct {
3232
// tiny is a heap pointer. Since mcache is in non-GC'd memory,
3333
// we handle it by clearing it in releaseAll during mark
3434
// termination.
35+
//
36+
// tinyAllocs is the number of tiny allocations performed
37+
// by the P that owns this mcache.
3538
tiny uintptr
3639
tinyoffset uintptr
40+
tinyAllocs uintptr
3741

3842
// The rest is not accessed on every malloc.
3943

4044
alloc [numSpanClasses]*mspan // spans to allocate from, indexed by spanClass
4145

4246
stackcache [_NumStackOrders]stackfreelist
4347

44-
// Allocator stats (source-of-truth).
45-
// Only the P that owns this mcache may write to these
46-
// variables, so it's safe for that P to read non-atomically.
47-
//
48-
// When read with stats from other mcaches and with the world
49-
// stopped, the result will accurately reflect the state of the
50-
// application.
51-
tinyAllocCount uintptr // number of tiny allocs not counted in other stats
52-
largeAlloc uintptr // bytes allocated for large objects
53-
largeAllocCount uintptr // number of large object allocations
54-
smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects
55-
largeFree uintptr // bytes freed for large objects (>maxSmallSize)
56-
largeFreeCount uintptr // number of frees for large objects (>maxSmallSize)
57-
smallFreeCount [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize)
58-
5948
// flushGen indicates the sweepgen during which this mcache
6049
// was last flushed. If flushGen != mheap_.sweepgen, the spans
6150
// in this mcache are stale and need to the flushed so they
@@ -117,7 +106,7 @@ func allocmcache() *mcache {
117106
// In some cases there is no way to simply release
118107
// resources, such as statistics, so donate them to
119108
// a different mcache (the recipient).
120-
func freemcache(c *mcache, recipient *mcache) {
109+
func freemcache(c *mcache) {
121110
systemstack(func() {
122111
c.releaseAll()
123112
stackcache_clear(c)
@@ -128,8 +117,6 @@ func freemcache(c *mcache, recipient *mcache) {
128117
// gcworkbuffree(c.gcworkbuf)
129118

130119
lock(&mheap_.lock)
131-
// Donate anything else that's left.
132-
c.donate(recipient)
133120
mheap_.cachealloc.free(unsafe.Pointer(c))
134121
unlock(&mheap_.lock)
135122
})
@@ -158,31 +145,6 @@ func getMCache() *mcache {
158145
return c
159146
}
160147

161-
// donate flushes data and resources which have no global
162-
// pool to another mcache.
163-
func (c *mcache) donate(d *mcache) {
164-
// scanAlloc is handled separately because it's not
165-
// like these stats -- it's used for GC pacing.
166-
d.largeAlloc += c.largeAlloc
167-
c.largeAlloc = 0
168-
d.largeAllocCount += c.largeAllocCount
169-
c.largeAllocCount = 0
170-
for i := range c.smallAllocCount {
171-
d.smallAllocCount[i] += c.smallAllocCount[i]
172-
c.smallAllocCount[i] = 0
173-
}
174-
d.largeFree += c.largeFree
175-
c.largeFree = 0
176-
d.largeFreeCount += c.largeFreeCount
177-
c.largeFreeCount = 0
178-
for i := range c.smallFreeCount {
179-
d.smallFreeCount[i] += c.smallFreeCount[i]
180-
c.smallFreeCount[i] = 0
181-
}
182-
d.tinyAllocCount += c.tinyAllocCount
183-
c.tinyAllocCount = 0
184-
}
185-
186148
// refill acquires a new span of span class spc for c. This span will
187149
// have at least one free object. The current span in c must be full.
188150
//
@@ -219,12 +181,20 @@ func (c *mcache) refill(spc spanClass) {
219181

220182
// Assume all objects from this span will be allocated in the
221183
// mcache. If it gets uncached, we'll adjust this.
222-
c.smallAllocCount[spc.sizeclass()] += uintptr(s.nelems) - uintptr(s.allocCount)
184+
stats := memstats.heapStats.acquire(c)
185+
atomic.Xadduintptr(&stats.smallAllocCount[spc.sizeclass()], uintptr(s.nelems)-uintptr(s.allocCount))
186+
memstats.heapStats.release(c)
223187

224188
// Update heap_live with the same assumption.
225189
usedBytes := uintptr(s.allocCount) * s.elemsize
226190
atomic.Xadd64(&memstats.heap_live, int64(s.npages*pageSize)-int64(usedBytes))
227191

192+
// Flush tinyAllocs.
193+
if spc == tinySpanClass {
194+
atomic.Xadd64(&memstats.tinyallocs, int64(c.tinyAllocs))
195+
c.tinyAllocs = 0
196+
}
197+
228198
// While we're here, flush scanAlloc, since we have to call
229199
// revise anyway.
230200
atomic.Xadd64(&memstats.heap_scan, int64(c.scanAlloc))
@@ -262,8 +232,10 @@ func (c *mcache) allocLarge(size uintptr, needzero bool, noscan bool) *mspan {
262232
if s == nil {
263233
throw("out of memory")
264234
}
265-
c.largeAlloc += npages * pageSize
266-
c.largeAllocCount++
235+
stats := memstats.heapStats.acquire(c)
236+
atomic.Xadduintptr(&stats.largeAlloc, npages*pageSize)
237+
atomic.Xadduintptr(&stats.largeAllocCount, 1)
238+
memstats.heapStats.release(c)
267239

268240
// Update heap_live and revise pacing if needed.
269241
atomic.Xadd64(&memstats.heap_live, int64(npages*pageSize))
@@ -294,7 +266,9 @@ func (c *mcache) releaseAll() {
294266
if s != &emptymspan {
295267
// Adjust nsmallalloc in case the span wasn't fully allocated.
296268
n := uintptr(s.nelems) - uintptr(s.allocCount)
297-
c.smallAllocCount[spanClass(i).sizeclass()] -= n
269+
stats := memstats.heapStats.acquire(c)
270+
atomic.Xadduintptr(&stats.smallAllocCount[spanClass(i).sizeclass()], -n)
271+
memstats.heapStats.release(c)
298272
if s.sweepgen != sg+1 {
299273
// refill conservatively counted unallocated slots in heap_live.
300274
// Undo this.
@@ -313,6 +287,8 @@ func (c *mcache) releaseAll() {
313287
// Clear tinyalloc pool.
314288
c.tiny = 0
315289
c.tinyoffset = 0
290+
atomic.Xadd64(&memstats.tinyallocs, int64(c.tinyAllocs))
291+
c.tinyAllocs = 0
316292

317293
// Updated heap_scan and possible heap_live.
318294
if gcBlackenEnabled != 0 {

src/runtime/mgcsweep.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,9 @@ func (s *mspan) sweep(preserve bool) bool {
503503
// wasn't totally filled, but then swept, still has all of its
504504
// free slots zeroed.
505505
s.needzero = 1
506-
c.smallFreeCount[spc.sizeclass()] += uintptr(nfreed)
506+
stats := memstats.heapStats.acquire(c)
507+
atomic.Xadduintptr(&stats.smallFreeCount[spc.sizeclass()], uintptr(nfreed))
508+
memstats.heapStats.release(c)
507509
}
508510
if !preserve {
509511
// The caller may not have removed this span from whatever
@@ -548,8 +550,10 @@ func (s *mspan) sweep(preserve bool) bool {
548550
} else {
549551
mheap_.freeSpan(s)
550552
}
551-
c.largeFreeCount++
552-
c.largeFree += size
553+
stats := memstats.heapStats.acquire(c)
554+
atomic.Xadduintptr(&stats.largeFreeCount, 1)
555+
atomic.Xadduintptr(&stats.largeFree, size)
556+
memstats.heapStats.release(c)
553557
return true
554558
}
555559

src/runtime/mstats.go

Lines changed: 45 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -612,48 +612,36 @@ func updatememstats() {
612612
memstats.total_alloc = 0
613613
memstats.nmalloc = 0
614614
memstats.nfree = 0
615-
memstats.tinyallocs = 0
616615
for i := 0; i < len(memstats.by_size); i++ {
617616
memstats.by_size[i].nmalloc = 0
618617
memstats.by_size[i].nfree = 0
619618
}
620-
621-
// Collect allocation stats. This is safe and consistent
622-
// because the world is stopped.
623-
var smallFree, totalAlloc, totalFree uint64
624-
for _, p := range allp {
625-
c := p.mcache
626-
if c == nil {
627-
continue
628-
}
629-
// Collect large allocation stats.
630-
memstats.nmalloc += uint64(c.largeAllocCount)
631-
totalAlloc += uint64(c.largeAlloc)
632-
totalFree += uint64(c.largeFree)
633-
memstats.nfree += uint64(c.largeFreeCount)
634-
635-
// Collect tiny allocation stats.
636-
memstats.tinyallocs += uint64(c.tinyAllocCount)
637-
638-
// Collect per-sizeclass stats.
639-
for i := 0; i < _NumSizeClasses; i++ {
640-
// Malloc stats.
641-
memstats.nmalloc += uint64(c.smallAllocCount[i])
642-
memstats.by_size[i].nmalloc += uint64(c.smallAllocCount[i])
643-
totalAlloc += uint64(c.smallAllocCount[i]) * uint64(class_to_size[i])
644-
645-
// Free stats.
646-
memstats.nfree += uint64(c.smallFreeCount[i])
647-
memstats.by_size[i].nfree += uint64(c.smallFreeCount[i])
648-
smallFree += uint64(c.smallFreeCount[i]) * uint64(class_to_size[i])
649-
}
650-
}
651619
// Collect consistent stats, which are the source-of-truth in the some cases.
652620
var consStats heapStatsDelta
653621
memstats.heapStats.unsafeRead(&consStats)
654622

655-
totalFree += smallFree
623+
// Collect large allocation stats.
624+
totalAlloc := uint64(consStats.largeAlloc)
625+
memstats.nmalloc += uint64(consStats.largeAllocCount)
626+
totalFree := uint64(consStats.largeFree)
627+
memstats.nfree += uint64(consStats.largeFreeCount)
628+
629+
// Collect per-sizeclass stats.
630+
for i := 0; i < _NumSizeClasses; i++ {
631+
// Malloc stats.
632+
a := uint64(consStats.smallAllocCount[i])
633+
totalAlloc += a * uint64(class_to_size[i])
634+
memstats.nmalloc += a
635+
memstats.by_size[i].nmalloc = a
636+
637+
// Free stats.
638+
f := uint64(consStats.smallFreeCount[i])
639+
totalFree += f * uint64(class_to_size[i])
640+
memstats.nfree += f
641+
memstats.by_size[i].nfree = f
642+
}
656643

644+
// Account for tiny allocations.
657645
memstats.nfree += memstats.tinyallocs
658646
memstats.nmalloc += memstats.tinyallocs
659647

@@ -752,12 +740,25 @@ func (s *sysMemStat) add(n int64) {
752740
// that need to be updated together in order for them to be kept
753741
// consistent with one another.
754742
type heapStatsDelta struct {
743+
// Memory stats.
755744
committed int64 // byte delta of memory committed
756745
released int64 // byte delta of released memory generated
757746
inHeap int64 // byte delta of memory placed in the heap
758747
inStacks int64 // byte delta of memory reserved for stacks
759748
inWorkBufs int64 // byte delta of memory reserved for work bufs
760749
inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits
750+
751+
// Allocator stats.
752+
largeAlloc uintptr // bytes allocated for large objects
753+
largeAllocCount uintptr // number of large object allocations
754+
smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects
755+
largeFree uintptr // bytes freed for large objects (>maxSmallSize)
756+
largeFreeCount uintptr // number of frees for large objects (>maxSmallSize)
757+
smallFreeCount [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize)
758+
759+
// Add a uint32 to ensure this struct is a multiple of 8 bytes in size.
760+
// Only necessary on 32-bit platforms.
761+
// _ [(sys.PtrSize / 4) % 2]uint32
761762
}
762763

763764
// merge adds in the deltas from b into a.
@@ -768,6 +769,17 @@ func (a *heapStatsDelta) merge(b *heapStatsDelta) {
768769
a.inStacks += b.inStacks
769770
a.inWorkBufs += b.inWorkBufs
770771
a.inPtrScalarBits += b.inPtrScalarBits
772+
773+
a.largeAlloc += b.largeAlloc
774+
a.largeAllocCount += b.largeAllocCount
775+
for i := range b.smallAllocCount {
776+
a.smallAllocCount[i] += b.smallAllocCount[i]
777+
}
778+
a.largeFree += b.largeFree
779+
a.largeFreeCount += b.largeFreeCount
780+
for i := range b.smallFreeCount {
781+
a.smallFreeCount[i] += b.smallFreeCount[i]
782+
}
771783
}
772784

773785
// consistentHeapStats represents a set of various memory statistics

src/runtime/proc.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4550,7 +4550,7 @@ func (pp *p) destroy() {
45504550
pp.mspancache.len = 0
45514551
pp.pcache.flush(&mheap_.pages)
45524552
})
4553-
freemcache(pp.mcache, allp[0].mcache)
4553+
freemcache(pp.mcache)
45544554
pp.mcache = nil
45554555
gfpurge(pp)
45564556
traceProcFree(pp)

0 commit comments

Comments
 (0)