Skip to content

Commit e49e876

Browse files
committed
runtime: process ptr bitmaps one word at a time
[This is a retry of CL 407036 + its revert CL 422394. The only content change is the 1-line change in cmd/internal/obj/objfile.go.] Read the bitmaps one uintptr at a time instead of one byte at a time. Performance so far: Allocation heavy, no retention: ~30% faster in heapBitsSetType Scan heavy, ~no allocation: ~even in scanobject Change-Id: I04d899e1dbd23e989e9f552cdc1880318779c14c Reviewed-on: https://go-review.googlesource.com/c/go/+/422635 TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Keith Randall <[email protected]> Run-TryBot: Keith Randall <[email protected]> Reviewed-by: Michael Knyszek <[email protected]>
1 parent 6a9c674 commit e49e876

File tree

4 files changed

+91
-18
lines changed

4 files changed

+91
-18
lines changed

src/cmd/compile/internal/reflectdata/reflect.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1548,7 +1548,11 @@ func dgcsym(t *types.Type, write bool) (lsym *obj.LSym, useGCProg bool, ptrdata
15481548

15491549
// dgcptrmask emits and returns the symbol containing a pointer mask for type t.
15501550
func dgcptrmask(t *types.Type, write bool) *obj.LSym {
1551-
ptrmask := make([]byte, (types.PtrDataSize(t)/int64(types.PtrSize)+7)/8)
1551+
// Bytes we need for the ptrmask.
1552+
n := (types.PtrDataSize(t)/int64(types.PtrSize) + 7) / 8
1553+
// Runtime wants ptrmasks padded to a multiple of uintptr in size.
1554+
n = (n + int64(types.PtrSize) - 1) &^ (int64(types.PtrSize) - 1)
1555+
ptrmask := make([]byte, n)
15521556
fillptrmask(t, ptrmask)
15531557
p := fmt.Sprintf("runtime.gcbits.%x", ptrmask)
15541558

src/cmd/internal/obj/objfile.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,6 @@ func (w *writer) Sym(s *LSym) {
354354
case strings.HasPrefix(s.Name, "go:string."),
355355
strings.HasPrefix(name, "type:.namedata."),
356356
strings.HasPrefix(name, "type:.importpath."),
357-
strings.HasPrefix(name, "runtime.gcbits."),
358357
strings.HasSuffix(name, ".opendefer"),
359358
strings.HasSuffix(name, ".arginfo0"),
360359
strings.HasSuffix(name, ".arginfo1"),

src/reflect/type.go

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2271,7 +2271,10 @@ func bucketOf(ktyp, etyp *rtype) *rtype {
22712271

22722272
if ktyp.ptrdata != 0 || etyp.ptrdata != 0 {
22732273
nptr := (bucketSize*(1+ktyp.size+etyp.size) + goarch.PtrSize) / goarch.PtrSize
2274-
mask := make([]byte, (nptr+7)/8)
2274+
n := (nptr + 7) / 8
2275+
// Runtime needs pointer masks to be a multiple of uintptr in size.
2276+
n = (n + goarch.PtrSize - 1) &^ (goarch.PtrSize - 1)
2277+
mask := make([]byte, n)
22752278
base := bucketSize / goarch.PtrSize
22762279

22772280
if ktyp.ptrdata != 0 {
@@ -2977,7 +2980,10 @@ func ArrayOf(length int, elem Type) Type {
29772980
// Element is small with pointer mask; array is still small.
29782981
// Create direct pointer mask by turning each 1 bit in elem
29792982
// into length 1 bits in larger mask.
2980-
mask := make([]byte, (array.ptrdata/goarch.PtrSize+7)/8)
2983+
n := (array.ptrdata/goarch.PtrSize + 7) / 8
2984+
// Runtime needs pointer masks to be a multiple of uintptr in size.
2985+
n = (n + goarch.PtrSize - 1) &^ (goarch.PtrSize - 1)
2986+
mask := make([]byte, n)
29812987
emitGCMask(mask, 0, typ, array.len)
29822988
array.gcdata = &mask[0]
29832989

@@ -3146,8 +3152,13 @@ type bitVector struct {
31463152

31473153
// append a bit to the bitmap.
31483154
func (bv *bitVector) append(bit uint8) {
3149-
if bv.n%8 == 0 {
3150-
bv.data = append(bv.data, 0)
3155+
if bv.n%(8*goarch.PtrSize) == 0 {
3156+
// Runtime needs pointer masks to be a multiple of uintptr in size.
3157+
// Since reflect passes bv.data directly to the runtime as a pointer mask,
3158+
// we append a full uintptr of zeros at a time.
3159+
for i := 0; i < goarch.PtrSize; i++ {
3160+
bv.data = append(bv.data, 0)
3161+
}
31513162
}
31523163
bv.data[bv.n/8] |= bit << (bv.n % 8)
31533164
bv.n++

src/runtime/mbitmap.go

Lines changed: 71 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -893,6 +893,19 @@ func (h writeHeapBits) flush(addr, size uintptr) {
893893
}
894894
}
895895

896+
// Read the bytes starting at the aligned pointer p into a uintptr.
897+
// Read is little-endian.
898+
func readUintptr(p *byte) uintptr {
899+
x := *(*uintptr)(unsafe.Pointer(p))
900+
if goarch.BigEndian {
901+
if goarch.PtrSize == 8 {
902+
return uintptr(sys.Bswap64(uint64(x)))
903+
}
904+
return uintptr(sys.Bswap32(uint32(x)))
905+
}
906+
return x
907+
}
908+
896909
// heapBitsSetType records that the new allocation [x, x+size)
897910
// holds in [x, x+dataSize) one or more values of type typ.
898911
// (The number of values is given by dataSize / typ.size.)
@@ -917,7 +930,7 @@ func (h writeHeapBits) flush(addr, size uintptr) {
917930
// machines, callers must execute a store/store (publication) barrier
918931
// between calling this function and making the object reachable.
919932
func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
920-
const doubleCheck = true // slow but helpful; enable to test modifications to this code
933+
const doubleCheck = false // slow but helpful; enable to test modifications to this code
921934

922935
if doubleCheck && dataSize%typ.size != 0 {
923936
throw("heapBitsSetType: dataSize not a multiple of typ.size")
@@ -995,19 +1008,65 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
9951008
// objects with scalar tails, all but the last tail does have to
9961009
// be initialized, because there is no way to say "skip forward".
9971010

998-
for i := uintptr(0); true; i += typ.size {
999-
p := typ.gcdata
1000-
var j uintptr
1001-
for j = 0; j+8*goarch.PtrSize < typ.ptrdata; j += 8 * goarch.PtrSize {
1002-
h = h.write(uintptr(*p), 8)
1003-
p = add1(p)
1011+
ptrs := typ.ptrdata / goarch.PtrSize
1012+
if typ.size == dataSize { // Single element
1013+
if ptrs <= ptrBits { // Single small element
1014+
m := readUintptr(typ.gcdata)
1015+
h = h.write(m, ptrs)
1016+
} else { // Single large element
1017+
p := typ.gcdata
1018+
for {
1019+
h = h.write(readUintptr(p), ptrBits)
1020+
p = addb(p, ptrBits/8)
1021+
ptrs -= ptrBits
1022+
if ptrs <= ptrBits {
1023+
break
1024+
}
1025+
}
1026+
m := readUintptr(p)
1027+
h = h.write(m, ptrs)
10041028
}
1005-
h = h.write(uintptr(*p), (typ.ptrdata-j)/goarch.PtrSize)
1006-
if i+typ.size == dataSize {
1007-
break // don't need the trailing nonptr bits on the last element.
1029+
} else { // Repeated element
1030+
words := typ.size / goarch.PtrSize // total words, including scalar tail
1031+
if words <= ptrBits { // Repeated small element
1032+
n := dataSize / typ.size
1033+
m := readUintptr(typ.gcdata)
1034+
// Make larger unit to repeat
1035+
for words <= ptrBits/2 {
1036+
if n&1 != 0 {
1037+
h = h.write(m, words)
1038+
}
1039+
n /= 2
1040+
m |= m << words
1041+
ptrs += words
1042+
words *= 2
1043+
if n == 1 {
1044+
break
1045+
}
1046+
}
1047+
for n > 1 {
1048+
h = h.write(m, words)
1049+
n--
1050+
}
1051+
h = h.write(m, ptrs)
1052+
} else { // Repeated large element
1053+
for i := uintptr(0); true; i += typ.size {
1054+
p := typ.gcdata
1055+
j := ptrs
1056+
for j > ptrBits {
1057+
h = h.write(readUintptr(p), ptrBits)
1058+
p = addb(p, ptrBits/8)
1059+
j -= ptrBits
1060+
}
1061+
m := readUintptr(p)
1062+
h = h.write(m, j)
1063+
if i+typ.size == dataSize {
1064+
break // don't need the trailing nonptr bits on the last element.
1065+
}
1066+
// Pad with zeros to the start of the next element.
1067+
h = h.pad(typ.size - typ.ptrdata)
1068+
}
10081069
}
1009-
// Pad with zeros to the start of the next element.
1010-
h = h.pad(typ.size - typ.ptrdata)
10111070
}
10121071
h.flush(x, size)
10131072

0 commit comments

Comments
 (0)