Skip to content

Commit 0ae9c3b

Browse files
felixgeprattmic
authored andcommitted
runtime/pprof: fix block profile bias
Block profiles were biased towards infrequent long events over frequent short events. This fix corrects the bias by aggregating shorter events as longer but less frequent in the profiles. As a result their cumulative duration will be accurately represented in the profile without skewing their sample mean (duration/count). Credit to @dvyukov for suggesting to adjust the count in the saveblockevent function. Fixes #44192. Change-Id: I71a99d7f6ebdb2d484d44890a2517863cceb4004 Reviewed-on: https://go-review.googlesource.com/c/go/+/299991 Trust: Michael Pratt <[email protected]> Trust: Ian Lance Taylor <[email protected]> Run-TryBot: Michael Pratt <[email protected]> TryBot-Result: Go Bot <[email protected]> Reviewed-by: Michael Pratt <[email protected]>
1 parent 8e0023b commit 0ae9c3b

File tree

2 files changed

+95
-10
lines changed

2 files changed

+95
-10
lines changed

src/runtime/mprof.go

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ func (a *memRecordCycle) add(b *memRecordCycle) {
133133
// A blockRecord is the bucket data for a bucket of type blockProfile,
134134
// which is used in blocking and mutex profiles.
135135
type blockRecord struct {
136-
count int64
136+
count float64
137137
cycles int64
138138
}
139139

@@ -398,20 +398,23 @@ func blockevent(cycles int64, skip int) {
398398
if cycles <= 0 {
399399
cycles = 1
400400
}
401-
if blocksampled(cycles) {
402-
saveblockevent(cycles, skip+1, blockProfile)
401+
402+
rate := int64(atomic.Load64(&blockprofilerate))
403+
if blocksampled(cycles, rate) {
404+
saveblockevent(cycles, rate, skip+1, blockProfile)
403405
}
404406
}
405407

406-
func blocksampled(cycles int64) bool {
407-
rate := int64(atomic.Load64(&blockprofilerate))
408+
// blocksampled returns true for all events where cycles >= rate. Shorter
409+
// events have a cycles/rate random chance of returning true.
410+
func blocksampled(cycles, rate int64) bool {
408411
if rate <= 0 || (rate > cycles && int64(fastrand())%rate > cycles) {
409412
return false
410413
}
411414
return true
412415
}
413416

414-
func saveblockevent(cycles int64, skip int, which bucketType) {
417+
func saveblockevent(cycles, rate int64, skip int, which bucketType) {
415418
gp := getg()
416419
var nstk int
417420
var stk [maxStack]uintptr
@@ -422,8 +425,15 @@ func saveblockevent(cycles int64, skip int, which bucketType) {
422425
}
423426
lock(&proflock)
424427
b := stkbucket(which, 0, stk[:nstk], true)
425-
b.bp().count++
426-
b.bp().cycles += cycles
428+
429+
if which == blockProfile && cycles < rate {
430+
// Remove sampling bias, see discussion on http://golang.org/cl/299991.
431+
b.bp().count += float64(rate) / float64(cycles)
432+
b.bp().cycles += rate
433+
} else {
434+
b.bp().count++
435+
b.bp().cycles += cycles
436+
}
427437
unlock(&proflock)
428438
}
429439

@@ -454,7 +464,7 @@ func mutexevent(cycles int64, skip int) {
454464
// TODO(pjw): measure impact of always calling fastrand vs using something
455465
// like malloc.go:nextSample()
456466
if rate > 0 && int64(fastrand())%rate == 0 {
457-
saveblockevent(cycles, skip+1, mutexProfile)
467+
saveblockevent(cycles, rate, skip+1, mutexProfile)
458468
}
459469
}
460470

@@ -656,7 +666,12 @@ func BlockProfile(p []BlockProfileRecord) (n int, ok bool) {
656666
for b := bbuckets; b != nil; b = b.allnext {
657667
bp := b.bp()
658668
r := &p[0]
659-
r.Count = bp.count
669+
r.Count = int64(bp.count)
670+
// Prevent callers from having to worry about division by zero errors.
671+
// See discussion on http://golang.org/cl/299991.
672+
if r.Count == 0 {
673+
r.Count = 1
674+
}
660675
r.Cycles = bp.cycles
661676
if raceenabled {
662677
racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), funcPC(BlockProfile))

src/runtime/pprof/pprof_test.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"internal/profile"
1515
"internal/testenv"
1616
"io"
17+
"math"
1718
"math/big"
1819
"os"
1920
"os/exec"
@@ -24,6 +25,7 @@ import (
2425
"sync/atomic"
2526
"testing"
2627
"time"
28+
_ "unsafe"
2729
)
2830

2931
func cpuHogger(f func(x int) int, y *int, dur time.Duration) {
@@ -895,6 +897,74 @@ func blockCond() {
895897
mu.Unlock()
896898
}
897899

900+
// See http://golang.org/cl/299991.
901+
func TestBlockProfileBias(t *testing.T) {
902+
rate := int(1000) // arbitrary value
903+
runtime.SetBlockProfileRate(rate)
904+
defer runtime.SetBlockProfileRate(0)
905+
906+
// simulate blocking events
907+
blockFrequentShort(rate)
908+
blockInfrequentLong(rate)
909+
910+
var w bytes.Buffer
911+
Lookup("block").WriteTo(&w, 0)
912+
p, err := profile.Parse(&w)
913+
if err != nil {
914+
t.Fatalf("failed to parse profile: %v", err)
915+
}
916+
t.Logf("parsed proto: %s", p)
917+
918+
il := float64(-1) // blockInfrequentLong duration
919+
fs := float64(-1) // blockFrequentShort duration
920+
for _, s := range p.Sample {
921+
for _, l := range s.Location {
922+
for _, line := range l.Line {
923+
if len(s.Value) < 2 {
924+
t.Fatal("block profile has less than 2 sample types")
925+
}
926+
927+
if line.Function.Name == "runtime/pprof.blockInfrequentLong" {
928+
il = float64(s.Value[1])
929+
} else if line.Function.Name == "runtime/pprof.blockFrequentShort" {
930+
fs = float64(s.Value[1])
931+
}
932+
}
933+
}
934+
}
935+
if il == -1 || fs == -1 {
936+
t.Fatal("block profile is missing expected functions")
937+
}
938+
939+
// stddev of bias from 100 runs on local machine multiplied by 10x
940+
const threshold = 0.2
941+
if bias := (il - fs) / il; math.Abs(bias) > threshold {
942+
t.Fatalf("bias: abs(%f) > %f", bias, threshold)
943+
} else {
944+
t.Logf("bias: abs(%f) < %f", bias, threshold)
945+
}
946+
}
947+
948+
// blockFrequentShort produces 100000 block events with an average duration of
949+
// rate / 10.
950+
func blockFrequentShort(rate int) {
951+
for i := 0; i < 100000; i++ {
952+
blockevent(int64(rate/10), 1)
953+
}
954+
}
955+
956+
// blockFrequentShort produces 10000 block events with an average duration of
957+
// rate.
958+
func blockInfrequentLong(rate int) {
959+
for i := 0; i < 10000; i++ {
960+
blockevent(int64(rate), 1)
961+
}
962+
}
963+
964+
// Used by TestBlockProfileBias.
965+
//go:linkname blockevent runtime.blockevent
966+
func blockevent(cycles int64, skip int)
967+
898968
func TestMutexProfile(t *testing.T) {
899969
// Generate mutex profile
900970

0 commit comments

Comments
 (0)