Skip to content

Commit 877387e

Browse files
committed
runtime: use buffered write barrier for bulkBarrierPreWrite
This modifies bulkBarrierPreWrite to use the buffered write barrier instead of the eager write barrier. This reduces the number of system stack switches and sanity checks by a factor of the buffer size (currently 256). This affects both typedmemmove and typedmemclr. Since this is purely a runtime change, it applies to all arches (unlike the pointer write barrier). name old time/op new time/op delta BulkWriteBarrier-12 7.33ns ± 6% 4.46ns ± 9% -39.10% (p=0.000 n=20+19) Updates #22460. Change-Id: I6a686a63bbf08be02b9b97250e37163c5a90cdd8 Reviewed-on: https://go-review.googlesource.com/73832 Run-TryBot: Austin Clements <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Rick Hudson <[email protected]>
1 parent 6a5f1e5 commit 877387e

File tree

3 files changed

+53
-6
lines changed

3 files changed

+53
-6
lines changed

src/runtime/mbarrier.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,10 @@ func writebarrierptr_prewrite(dst *uintptr, src uintptr) {
239239

240240
// typedmemmove copies a value of type t to dst from src.
241241
// Must be nosplit, see #16026.
242+
//
243+
// TODO: Perfect for go:nosplitrec since we can't have a safe point
244+
// anywhere in the bulk barrier or memmove.
245+
//
242246
//go:nosplit
243247
func typedmemmove(typ *_type, dst, src unsafe.Pointer) {
244248
if typ.kind&kindNoPointers == 0 {

src/runtime/mbitmap.go

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -523,12 +523,13 @@ func (h heapBits) setCheckmarked(size uintptr) {
523523
atomic.Or8(h.bitp, bitScan<<(heapBitsShift+h.shift))
524524
}
525525

526-
// bulkBarrierPreWrite executes writebarrierptr_prewrite1
526+
// bulkBarrierPreWrite executes a write barrier
527527
// for every pointer slot in the memory range [src, src+size),
528528
// using pointer/scalar information from [dst, dst+size).
529529
// This executes the write barriers necessary before a memmove.
530530
// src, dst, and size must be pointer-aligned.
531531
// The range [dst, dst+size) must lie within a single object.
532+
// It does not perform the actual writes.
532533
//
533534
// As a special case, src == 0 indicates that this is being used for a
534535
// memclr. bulkBarrierPreWrite will pass 0 for the src of each write
@@ -578,12 +579,15 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
578579
return
579580
}
580581

582+
buf := &getg().m.p.ptr().wbBuf
581583
h := heapBitsForAddr(dst)
582584
if src == 0 {
583585
for i := uintptr(0); i < size; i += sys.PtrSize {
584586
if h.isPointer() {
585587
dstx := (*uintptr)(unsafe.Pointer(dst + i))
586-
writebarrierptr_prewrite1(dstx, 0)
588+
if !buf.putFast(*dstx, 0) {
589+
wbBufFlush(nil, 0)
590+
}
587591
}
588592
h = h.next()
589593
}
@@ -592,7 +596,9 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
592596
if h.isPointer() {
593597
dstx := (*uintptr)(unsafe.Pointer(dst + i))
594598
srcx := (*uintptr)(unsafe.Pointer(src + i))
595-
writebarrierptr_prewrite1(dstx, *srcx)
599+
if !buf.putFast(*dstx, *srcx) {
600+
wbBufFlush(nil, 0)
601+
}
596602
}
597603
h = h.next()
598604
}
@@ -612,6 +618,7 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
612618
bits = addb(bits, word/8)
613619
mask := uint8(1) << (word % 8)
614620

621+
buf := &getg().m.p.ptr().wbBuf
615622
for i := uintptr(0); i < size; i += sys.PtrSize {
616623
if mask == 0 {
617624
bits = addb(bits, 1)
@@ -625,10 +632,14 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
625632
if *bits&mask != 0 {
626633
dstx := (*uintptr)(unsafe.Pointer(dst + i))
627634
if src == 0 {
628-
writebarrierptr_prewrite1(dstx, 0)
635+
if !buf.putFast(*dstx, 0) {
636+
wbBufFlush(nil, 0)
637+
}
629638
} else {
630639
srcx := (*uintptr)(unsafe.Pointer(src + i))
631-
writebarrierptr_prewrite1(dstx, *srcx)
640+
if !buf.putFast(*dstx, *srcx) {
641+
wbBufFlush(nil, 0)
642+
}
632643
}
633644
}
634645
mask <<= 1

src/runtime/mwbbuf.go

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
package runtime
2121

2222
import (
23+
"runtime/internal/sys"
2324
"unsafe"
2425
)
2526

@@ -94,6 +95,37 @@ func (b *wbBuf) reset() {
9495
}
9596
}
9697

98+
// putFast adds old and new to the write barrier buffer and returns
99+
// false if a flush is necessary. Callers should use this as:
100+
//
101+
// buf := &getg().m.p.ptr().wbBuf
102+
// if !buf.putFast(old, new) {
103+
// wbBufFlush(...)
104+
// }
105+
//
106+
// The arguments to wbBufFlush depend on whether the caller is doing
107+
// its own cgo pointer checks. If it is, then this can be
108+
// wbBufFlush(nil, 0). Otherwise, it must pass the slot address and
109+
// new.
110+
//
111+
// Since buf is a per-P resource, the caller must ensure there are no
112+
// preemption points while buf is in use.
113+
//
114+
// It must be nowritebarrierrec to because write barriers here would
115+
// corrupt the write barrier buffer. It (and everything it calls, if
116+
// it called anything) has to be nosplit to avoid scheduling on to a
117+
// different P and a different buffer.
118+
//
119+
//go:nowritebarrierrec
120+
//go:nosplit
121+
func (b *wbBuf) putFast(old, new uintptr) bool {
122+
p := (*[2]uintptr)(unsafe.Pointer(b.next))
123+
p[0] = old
124+
p[1] = new
125+
b.next += 2 * sys.PtrSize
126+
return b.next != b.end
127+
}
128+
97129
// wbBufFlush flushes the current P's write barrier buffer to the GC
98130
// workbufs. It is passed the slot and value of the write barrier that
99131
// caused the flush so that it can implement cgocheck.
@@ -118,7 +150,7 @@ func wbBufFlush(dst *uintptr, src uintptr) {
118150
return
119151
}
120152

121-
if writeBarrier.cgo {
153+
if writeBarrier.cgo && dst != nil {
122154
// This must be called from the stack that did the
123155
// write. It's nosplit all the way down.
124156
cgoCheckWriteBarrier(dst, src)

0 commit comments

Comments
 (0)