Skip to content

Commit 27ee719

Browse files
rauls5382minux
authored andcommitted
pprof: improve sampling for heap profiling
The current heap sampling introduces some bias that interferes with unsampling, producing unexpected heap profiles. The solution is to use a Poisson process to generate the sampling points, using the formulas described at https://en.wikipedia.org/wiki/Poisson_process This fixes #12620 Change-Id: If2400809ed3c41de504dd6cff06be14e476ff96c Reviewed-on: https://go-review.googlesource.com/14590 Reviewed-by: Keith Randall <[email protected]> Reviewed-by: Minux Ma <[email protected]> Run-TryBot: Minux Ma <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
1 parent 0357c38 commit 27ee719

File tree

9 files changed

+362
-29
lines changed

9 files changed

+362
-29
lines changed

src/runtime/export_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ var Xadduintptr = xadduintptr
2626

2727
var FuncPC = funcPC
2828

29+
var Fastlog2 = fastlog2
30+
2931
type LFNode struct {
3032
Next uint64
3133
Pushcnt uintptr

src/runtime/fastlog2.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// Copyright 2015 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package runtime
6+
7+
import "unsafe"
8+
9+
// fastlog2 implements a fast approximation to the base 2 log of a
10+
// float64. This is used to compute a geometric distribution for heap
11+
// sampling, without introducing dependences into package math. This
12+
// uses a very rough approximation using the float64 exponent and the
13+
// first 25 bits of the mantissa. The top 5 bits of the mantissa are
14+
// used to load limits from a table of constants and the rest are used
15+
// to scale linearly between them.
16+
func fastlog2(x float64) float64 {
17+
const fastlogScaleBits = 20
18+
const fastlogScaleRatio = 1.0 / (1 << fastlogScaleBits)
19+
20+
xBits := float64bits(x)
21+
// Extract the exponent from the IEEE float64, and index a constant
22+
// table with the first 10 bits from the mantissa.
23+
xExp := int64((xBits>>52)&0x7FF) - 1023
24+
xManIndex := (xBits >> (52 - fastlogNumBits)) % (1 << fastlogNumBits)
25+
xManScale := (xBits >> (52 - fastlogNumBits - fastlogScaleBits)) % (1 << fastlogScaleBits)
26+
27+
low, high := fastlog2Table[xManIndex], fastlog2Table[xManIndex+1]
28+
return float64(xExp) + low + (high-low)*float64(xManScale)*fastlogScaleRatio
29+
}
30+
31+
// float64bits returns the IEEE 754 binary representation of f.
32+
// Taken from math.Float64bits to avoid dependences into package math.
33+
func float64bits(f float64) uint64 { return *(*uint64)(unsafe.Pointer(&f)) }

src/runtime/fastlog2_test.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Copyright 2015 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package runtime_test
6+
7+
import (
8+
"math"
9+
"runtime"
10+
"testing"
11+
)
12+
13+
func TestFastLog2(t *testing.T) {
14+
// Compute the euclidean distance between math.Log2 and the FastLog2
15+
// implementation over the range of interest for heap sampling.
16+
const randomBitCount = 26
17+
var e float64
18+
for i := 1; i < 1<<randomBitCount; i++ {
19+
l, fl := math.Log2(float64(i)), runtime.Fastlog2(float64(i))
20+
d := l - fl
21+
e += d * d
22+
}
23+
e = math.Sqrt(e)
24+
25+
if e > 1.0 {
26+
t.Fatalf("imprecision on fastlog2 implementation, want <=1.0, got %f", e)
27+
}
28+
}

src/runtime/fastlog2table.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// AUTO-GENERATED by mkfastlog2table.go
2+
// Run go generate from src/runtime to update.
3+
// See mkfastlog2table.go for comments.
4+
5+
package runtime
6+
7+
const fastlogNumBits = 5
8+
9+
var fastlog2Table = [1<<fastlogNumBits + 1]float64{
10+
0,
11+
0.0443941193584535,
12+
0.08746284125033943,
13+
0.12928301694496647,
14+
0.16992500144231248,
15+
0.2094533656289499,
16+
0.24792751344358555,
17+
0.28540221886224837,
18+
0.3219280948873623,
19+
0.3575520046180837,
20+
0.39231742277876036,
21+
0.4262647547020979,
22+
0.4594316186372973,
23+
0.4918530963296748,
24+
0.5235619560570128,
25+
0.5545888516776374,
26+
0.5849625007211563,
27+
0.6147098441152082,
28+
0.6438561897747247,
29+
0.6724253419714956,
30+
0.7004397181410922,
31+
0.7279204545631992,
32+
0.7548875021634686,
33+
0.7813597135246596,
34+
0.8073549220576042,
35+
0.8328900141647417,
36+
0.8579809951275721,
37+
0.8826430493618412,
38+
0.9068905956085185,
39+
0.9307373375628862,
40+
0.9541963103868752,
41+
0.9772799234999164,
42+
1,
43+
}

src/runtime/malloc.go

Lines changed: 36 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -792,28 +792,45 @@ func rawmem(size uintptr) unsafe.Pointer {
792792
}
793793

794794
func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
795-
c := mp.mcache
796-
rate := MemProfileRate
797-
if size < uintptr(rate) {
798-
// pick next profile time
799-
// If you change this, also change allocmcache.
800-
if rate > 0x3fffffff { // make 2*rate not overflow
801-
rate = 0x3fffffff
802-
}
803-
next := int32(fastrand1()) % (2 * int32(rate))
804-
// Subtract the "remainder" of the current allocation.
805-
// Otherwise objects that are close in size to sampling rate
806-
// will be under-sampled, because we consistently discard this remainder.
807-
next -= (int32(size) - c.next_sample)
808-
if next < 0 {
809-
next = 0
810-
}
811-
c.next_sample = next
812-
}
813-
795+
mp.mcache.next_sample = nextSample()
814796
mProf_Malloc(x, size)
815797
}
816798

799+
// nextSample returns the next sampling point for heap profiling.
800+
// It produces a random variable with a geometric distribution and
801+
// mean MemProfileRate. This is done by generating a uniformly
802+
// distributed random number and applying the cumulative distribution
803+
// function for an exponential.
804+
func nextSample() int32 {
805+
period := MemProfileRate
806+
807+
// make nextSample not overflow. Maximum possible step is
808+
// -ln(1/(1<<kRandomBitCount)) * period, approximately 20 * period.
809+
switch {
810+
case period > 0x7000000:
811+
period = 0x7000000
812+
case period == 0:
813+
return 0
814+
}
815+
816+
// Let m be the sample rate,
817+
// the probability distribution function is m*exp(-mx), so the CDF is
818+
// p = 1 - exp(-mx), so
819+
// q = 1 - p == exp(-mx)
820+
// log_e(q) = -mx
821+
// -log_e(q)/m = x
822+
// x = -log_e(q) * period
823+
// x = log_2(q) * (-log_e(2)) * period ; Using log_2 for efficiency
824+
const randomBitCount = 26
825+
q := uint32(fastrand1())%(1<<randomBitCount) + 1
826+
qlog := fastlog2(float64(q)) - randomBitCount
827+
if qlog > 0 {
828+
qlog = 0
829+
}
830+
const minusLog2 = -0.6931471805599453 // -ln(2)
831+
return int32(qlog*(minusLog2*float64(period))) + 1
832+
}
833+
817834
type persistentAlloc struct {
818835
base unsafe.Pointer
819836
off uintptr

src/runtime/mcache.go

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,16 +69,7 @@ func allocmcache() *mcache {
6969
for i := 0; i < _NumSizeClasses; i++ {
7070
c.alloc[i] = &emptymspan
7171
}
72-
73-
// Set first allocation sample size.
74-
rate := MemProfileRate
75-
if rate > 0x3fffffff { // make 2*rate not overflow
76-
rate = 0x3fffffff
77-
}
78-
if rate != 0 {
79-
c.next_sample = int32(int(fastrand1()) % (2 * rate))
80-
}
81-
72+
c.next_sample = nextSample()
8273
return c
8374
}
8475

src/runtime/mkfastlog2table.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// Copyright 2015 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// +build ignore
6+
7+
// fastlog2Table contains log2 approximations for 5 binary digits.
8+
// This is used to implement fastlog2, which is used for heap sampling.
9+
10+
package main
11+
12+
import (
13+
"bytes"
14+
"fmt"
15+
"io/ioutil"
16+
"log"
17+
"math"
18+
)
19+
20+
func main() {
21+
var buf bytes.Buffer
22+
23+
fmt.Fprintln(&buf, "// AUTO-GENERATED by mkfastlog2table.go")
24+
fmt.Fprintln(&buf, "// Run go generate from src/runtime to update.")
25+
fmt.Fprintln(&buf, "// See mkfastlog2table.go for comments.")
26+
fmt.Fprintln(&buf)
27+
fmt.Fprintln(&buf, "package runtime")
28+
fmt.Fprintln(&buf)
29+
fmt.Fprintln(&buf, "const fastlogNumBits =", fastlogNumBits)
30+
fmt.Fprintln(&buf)
31+
32+
fmt.Fprintln(&buf, "var fastlog2Table = [1<<fastlogNumBits + 1]float64{")
33+
table := computeTable()
34+
for _, t := range table {
35+
fmt.Fprintf(&buf, "\t%v,\n", t)
36+
}
37+
fmt.Fprintln(&buf, "}")
38+
39+
if err := ioutil.WriteFile("fastlog2table.go", buf.Bytes(), 0644); err != nil {
40+
log.Fatalln(err)
41+
}
42+
}
43+
44+
const fastlogNumBits = 5
45+
46+
func computeTable() []float64 {
47+
fastlog2Table := make([]float64, 1<<fastlogNumBits+1)
48+
for i := 0; i <= (1 << fastlogNumBits); i++ {
49+
fastlog2Table[i] = math.Log2(1.0 + float64(i)/(1<<fastlogNumBits))
50+
}
51+
return fastlog2Table
52+
}

src/runtime/runtime.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import _ "unsafe" // for go:linkname
88

99
//go:generate go run wincallback.go
1010
//go:generate go run mkduff.go
11+
//go:generate go run mkfastlog2table.go
1112

1213
var ticks struct {
1314
lock mutex

0 commit comments

Comments
 (0)