Skip to content

Commit cbafcc5

Browse files
committed
cmd/compile,runtime: implement stack objects
Rework how the compiler+runtime handles stack-allocated variables whose address is taken. Direct references to such variables work as before. References through pointers, however, use a new mechanism. The new mechanism is more precise than the old "ambiguously live" mechanism. It computes liveness at runtime based on the actual references among objects on the stack. Each function records all of its address-taken objects in a FUNCDATA. These are called "stack objects". The runtime then uses that information while scanning a stack to find all of the stack objects on a stack. It then does a mark phase on the stack objects, using all the pointers found on the stack (and ancillary structures, like defer records) as the root set. Only stack objects which are found to be live during this mark phase will be scanned and thus retain any heap objects they point to. A subsequent CL will remove all the "ambiguously live" logic from the compiler, so that the stack object tracing will be required. For this CL, the stack tracing is all redundant with the current ambiguously live logic. Update #22350 Change-Id: Ide19f1f71a5b6ec8c4d54f8f66f0e9a98344772f Reviewed-on: https://go-review.googlesource.com/c/134155 Reviewed-by: Austin Clements <[email protected]>
1 parent 4334966 commit cbafcc5

File tree

13 files changed

+607
-27
lines changed

13 files changed

+607
-27
lines changed

src/cmd/compile/internal/gc/obj.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ func dumpglobls() {
281281
funcsyms = nil
282282
}
283283

284-
// addGCLocals adds gcargs and gclocals symbols to Ctxt.Data.
284+
// addGCLocals adds gcargs, gclocals, gcregs, and stack object symbols to Ctxt.Data.
285285
// It takes care not to add any duplicates.
286286
// Though the object file format handles duplicates efficiently,
287287
// storing only a single copy of the data,
@@ -299,6 +299,9 @@ func addGCLocals() {
299299
Ctxt.Data = append(Ctxt.Data, gcsym)
300300
seen[gcsym.Name] = true
301301
}
302+
if x := s.Func.StackObjects; x != nil {
303+
ggloblsym(x, int32(len(x.P)), obj.RODATA|obj.LOCAL)
304+
}
302305
}
303306
}
304307

src/cmd/compile/internal/gc/pgen.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,26 @@ func compile(fn *Node) {
233233
// Set up the function's LSym early to avoid data races with the assemblers.
234234
fn.Func.initLSym()
235235

236+
// Make sure type syms are declared for all types that might
237+
// be types of stack objects. We need to do this here
238+
// because symbols must be allocated before the parallel
239+
// phase of the compiler.
240+
if fn.Func.lsym != nil { // not func _(){}
241+
for _, n := range fn.Func.Dcl {
242+
switch n.Class() {
243+
case PPARAM, PPARAMOUT, PAUTO:
244+
if livenessShouldTrack(n) && n.Addrtaken() {
245+
dtypesym(n.Type)
246+
// Also make sure we allocate a linker symbol
247+
// for the stack object data, for the same reason.
248+
if fn.Func.lsym.Func.StackObjects == nil {
249+
fn.Func.lsym.Func.StackObjects = lookup(fmt.Sprintf("%s.stkobj", fn.funcname())).Linksym()
250+
}
251+
}
252+
}
253+
}
254+
}
255+
236256
if compilenow() {
237257
compileSSA(fn, 0)
238258
} else {

src/cmd/compile/internal/gc/ssa.go

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"cmd/compile/internal/ssa"
1717
"cmd/compile/internal/types"
1818
"cmd/internal/obj"
19+
"cmd/internal/objabi"
1920
"cmd/internal/src"
2021
"cmd/internal/sys"
2122
)
@@ -4933,13 +4934,59 @@ func (s *SSAGenState) DebugFriendlySetPosFrom(v *ssa.Value) {
49334934
}
49344935
}
49354936

4937+
// byXoffset implements sort.Interface for []*Node using Xoffset as the ordering.
4938+
type byXoffset []*Node
4939+
4940+
func (s byXoffset) Len() int { return len(s) }
4941+
func (s byXoffset) Less(i, j int) bool { return s[i].Xoffset < s[j].Xoffset }
4942+
func (s byXoffset) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
4943+
4944+
func emitStackObjects(e *ssafn, pp *Progs) {
4945+
var vars []*Node
4946+
for _, n := range e.curfn.Func.Dcl {
4947+
if livenessShouldTrack(n) && n.Addrtaken() {
4948+
vars = append(vars, n)
4949+
}
4950+
}
4951+
if len(vars) == 0 {
4952+
return
4953+
}
4954+
4955+
// Sort variables from lowest to highest address.
4956+
sort.Sort(byXoffset(vars))
4957+
4958+
// Populate the stack object data.
4959+
// Format must match runtime/stack.go:stackObjectRecord.
4960+
x := e.curfn.Func.lsym.Func.StackObjects
4961+
off := 0
4962+
off = duintptr(x, off, uint64(len(vars)))
4963+
for _, v := range vars {
4964+
// Note: arguments and return values have non-negative Xoffset,
4965+
// in which case the offset is relative to argp.
4966+
// Locals have a negative Xoffset, in which case the offset is relative to varp.
4967+
off = duintptr(x, off, uint64(v.Xoffset))
4968+
if !typesym(v.Type).Siggen() {
4969+
Fatalf("stack object's type symbol not generated for type %s", v.Type)
4970+
}
4971+
off = dsymptr(x, off, dtypesym(v.Type), 0)
4972+
}
4973+
4974+
// Emit a funcdata pointing at the stack object data.
4975+
p := pp.Prog(obj.AFUNCDATA)
4976+
Addrconst(&p.From, objabi.FUNCDATA_StackObjects)
4977+
p.To.Type = obj.TYPE_MEM
4978+
p.To.Name = obj.NAME_EXTERN
4979+
p.To.Sym = x
4980+
}
4981+
49364982
// genssa appends entries to pp for each instruction in f.
49374983
func genssa(f *ssa.Func, pp *Progs) {
49384984
var s SSAGenState
49394985

49404986
e := f.Frontend().(*ssafn)
49414987

49424988
s.livenessMap = liveness(e, f)
4989+
emitStackObjects(e, pp)
49434990

49444991
// Remember where each block starts.
49454992
s.bstart = make([]*obj.Prog, f.NumBlocks())
@@ -5054,7 +5101,6 @@ func genssa(f *ssa.Func, pp *Progs) {
50545101
}
50555102
}
50565103
}
5057-
50585104
// Emit control flow instructions for block
50595105
var next *ssa.Block
50605106
if i < len(f.Blocks)-1 && Debug['N'] == 0 {

src/cmd/internal/obj/link.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -402,9 +402,10 @@ type FuncInfo struct {
402402
dwarfAbsFnSym *LSym
403403
dwarfIsStmtSym *LSym
404404

405-
GCArgs LSym
406-
GCLocals LSym
407-
GCRegs LSym
405+
GCArgs LSym
406+
GCLocals LSym
407+
GCRegs LSym
408+
StackObjects *LSym
408409
}
409410

410411
// Attribute is a set of symbol attributes.

src/cmd/internal/objabi/funcdata.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ const (
1818
FUNCDATA_LocalsPointerMaps = 1
1919
FUNCDATA_InlTree = 2
2020
FUNCDATA_RegPointerMaps = 3
21+
FUNCDATA_StackObjects = 4
2122

2223
// ArgsSizeUnknown is set in Func.argsize to mark all functions
2324
// whose argument size is unknown (C vararg functions, and

src/reflect/all_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5988,7 +5988,8 @@ func TestFuncLayout(t *testing.T) {
59885988
func verifyGCBits(t *testing.T, typ Type, bits []byte) {
59895989
heapBits := GCBits(New(typ).Interface())
59905990
if !bytes.Equal(heapBits, bits) {
5991-
t.Errorf("heapBits incorrect for %v\nhave %v\nwant %v", typ, heapBits, bits)
5991+
_, _, line, _ := runtime.Caller(1)
5992+
t.Errorf("line %d: heapBits incorrect for %v\nhave %v\nwant %v", line, typ, heapBits, bits)
59925993
}
59935994
}
59945995

src/runtime/funcdata.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#define FUNCDATA_LocalsPointerMaps 1
1717
#define FUNCDATA_InlTree 2
1818
#define FUNCDATA_RegPointerMaps 3
19+
#define FUNCDATA_StackObjects 4
1920

2021
// Pseudo-assembly statements.
2122

src/runtime/mbitmap.go

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1911,6 +1911,20 @@ Run:
19111911
return totalBits
19121912
}
19131913

1914+
// materializeGCProg allocates space for the (1-bit) pointer bitmask
1915+
// for an object of size ptrdata. Then it fills that space with the
1916+
// pointer bitmask specified by the program prog.
1917+
// The bitmask starts at s.startAddr.
1918+
// The result must be deallocated with dematerializeGCProg.
1919+
func materializeGCProg(ptrdata uintptr, prog *byte) *mspan {
1920+
s := mheap_.allocManual((ptrdata/(8*sys.PtrSize)+pageSize-1)/pageSize, &memstats.gc_sys)
1921+
runGCProg(addb(prog, 4), nil, (*byte)(unsafe.Pointer(s.startAddr)), 1)
1922+
return s
1923+
}
1924+
func dematerializeGCProg(s *mspan) {
1925+
mheap_.freeManual(s, &memstats.gc_sys)
1926+
}
1927+
19141928
func dumpGCProg(p *byte) {
19151929
nptr := 0
19161930
for {
@@ -2037,7 +2051,12 @@ func getgcmask(ep interface{}) (mask []byte) {
20372051
_g_ := getg()
20382052
gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
20392053
if frame.fn.valid() {
2040-
locals, _ := getStackMap(&frame, nil, false)
2054+
// TODO: once stack objects are enabled (and their pointers
2055+
// are no longer described by the stack pointermap directly),
2056+
// tests using this will probably need fixing. We might need
2057+
// to loop through the stackobjects and if we're inside one,
2058+
// use the pointermap from that object.
2059+
locals, _, _ := getStackMap(&frame, nil, false)
20412060
if locals.n == 0 {
20422061
return
20432062
}

src/runtime/mgcmark.go

Lines changed: 112 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ func markroot(gcw *gcWork, i uint32) {
169169
case i == fixedRootFinalizers:
170170
for fb := allfin; fb != nil; fb = fb.alllink {
171171
cnt := uintptr(atomic.Load(&fb.cnt))
172-
scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), cnt*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw)
172+
scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), cnt*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw, nil)
173173
}
174174

175175
case i == fixedRootFreeGStacks:
@@ -248,7 +248,7 @@ func markrootBlock(b0, n0 uintptr, ptrmask0 *uint8, gcw *gcWork, shard int) {
248248
}
249249

250250
// Scan this shard.
251-
scanblock(b, n, ptrmask, gcw)
251+
scanblock(b, n, ptrmask, gcw, nil)
252252
}
253253

254254
// markrootFreeGStacks frees stacks of dead Gs.
@@ -349,7 +349,7 @@ func markrootSpans(gcw *gcWork, shard int) {
349349
scanobject(p, gcw)
350350

351351
// The special itself is a root.
352-
scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw)
352+
scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw, nil)
353353
}
354354

355355
unlock(&s.speciallock)
@@ -689,42 +689,136 @@ func scanstack(gp *g, gcw *gcWork) {
689689
// Shrink the stack if not much of it is being used.
690690
shrinkstack(gp)
691691

692+
var state stackScanState
693+
state.stack = gp.stack
694+
695+
if stackTraceDebug {
696+
println("stack trace goroutine", gp.goid)
697+
}
698+
692699
// Scan the saved context register. This is effectively a live
693700
// register that gets moved back and forth between the
694701
// register and sched.ctxt without a write barrier.
695702
if gp.sched.ctxt != nil {
696-
scanblock(uintptr(unsafe.Pointer(&gp.sched.ctxt)), sys.PtrSize, &oneptrmask[0], gcw)
703+
scanblock(uintptr(unsafe.Pointer(&gp.sched.ctxt)), sys.PtrSize, &oneptrmask[0], gcw, &state)
697704
}
698705

699-
// Scan the stack.
700-
var cache pcvalueCache
706+
// Scan the stack. Accumulate a list of stack objects.
701707
scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
702-
scanframeworker(frame, &cache, gcw)
708+
scanframeworker(frame, &state, gcw)
703709
return true
704710
}
705711
gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
706712
tracebackdefers(gp, scanframe, nil)
713+
714+
// Find and scan all reachable stack objects.
715+
state.buildIndex()
716+
for {
717+
p := state.getPtr()
718+
if p == 0 {
719+
break
720+
}
721+
obj := state.findObject(p)
722+
if obj == nil {
723+
continue
724+
}
725+
t := obj.typ
726+
if t == nil {
727+
// We've already scanned this object.
728+
continue
729+
}
730+
obj.setType(nil) // Don't scan it again.
731+
if stackTraceDebug {
732+
println(" live stkobj at", hex(state.stack.lo+uintptr(obj.off)), "of type", t.string())
733+
}
734+
gcdata := t.gcdata
735+
var s *mspan
736+
if t.kind&kindGCProg != 0 {
737+
// This path is pretty unlikely, an object large enough
738+
// to have a GC program allocated on the stack.
739+
// We need some space to unpack the program into a straight
740+
// bitmask, which we allocate/free here.
741+
// TODO: it would be nice if there were a way to run a GC
742+
// program without having to store all its bits. We'd have
743+
// to change from a Lempel-Ziv style program to something else.
744+
// Or we can forbid putting objects on stacks if they require
745+
// a gc program (see issue 27447).
746+
s = materializeGCProg(t.ptrdata, gcdata)
747+
gcdata = (*byte)(unsafe.Pointer(s.startAddr))
748+
}
749+
750+
scanblock(state.stack.lo+uintptr(obj.off), t.ptrdata, gcdata, gcw, &state)
751+
752+
if s != nil {
753+
dematerializeGCProg(s)
754+
}
755+
}
756+
757+
// Deallocate object buffers.
758+
// (Pointer buffers were all deallocated in the loop above.)
759+
for state.head != nil {
760+
x := state.head
761+
state.head = x.next
762+
if stackTraceDebug {
763+
for _, obj := range x.obj[:x.nobj] {
764+
if obj.typ == nil { // reachable
765+
continue
766+
}
767+
println(" dead stkobj at", hex(gp.stack.lo+uintptr(obj.off)), "of type", obj.typ.string())
768+
// Note: not necessarily really dead - only reachable-from-ptr dead.
769+
}
770+
}
771+
x.nobj = 0
772+
putempty((*workbuf)(unsafe.Pointer(x)))
773+
}
774+
if state.buf != nil || state.freeBuf != nil {
775+
throw("remaining pointer buffers")
776+
}
777+
707778
gp.gcscanvalid = true
708779
}
709780

710781
// Scan a stack frame: local variables and function arguments/results.
711782
//go:nowritebarrier
712-
func scanframeworker(frame *stkframe, cache *pcvalueCache, gcw *gcWork) {
783+
func scanframeworker(frame *stkframe, state *stackScanState, gcw *gcWork) {
713784
if _DebugGC > 1 && frame.continpc != 0 {
714785
print("scanframe ", funcname(frame.fn), "\n")
715786
}
716787

717-
locals, args := getStackMap(frame, cache, false)
788+
locals, args, objs := getStackMap(frame, &state.cache, false)
718789

719790
// Scan local variables if stack frame has been allocated.
720791
if locals.n > 0 {
721792
size := uintptr(locals.n) * sys.PtrSize
722-
scanblock(frame.varp-size, size, locals.bytedata, gcw)
793+
scanblock(frame.varp-size, size, locals.bytedata, gcw, state)
723794
}
724795

725796
// Scan arguments.
726797
if args.n > 0 {
727-
scanblock(frame.argp, uintptr(args.n)*sys.PtrSize, args.bytedata, gcw)
798+
scanblock(frame.argp, uintptr(args.n)*sys.PtrSize, args.bytedata, gcw, state)
799+
}
800+
801+
// Add all stack objects to the stack object list.
802+
if frame.varp != 0 {
803+
// varp is 0 for defers, where there are no locals.
804+
// In that case, there can't be a pointer to its args, either.
805+
// (And all args would be scanned above anyway.)
806+
for _, obj := range objs {
807+
off := obj.off
808+
base := frame.varp // locals base pointer
809+
if off >= 0 {
810+
base = frame.argp // arguments and return values base pointer
811+
}
812+
ptr := base + uintptr(off)
813+
if ptr < frame.sp {
814+
// object hasn't been allocated in the frame yet.
815+
continue
816+
}
817+
if stackTraceDebug {
818+
println("stkobj at", hex(ptr), "of type", obj.typ.string())
819+
}
820+
state.addObject(ptr, obj.typ)
821+
}
728822
}
729823
}
730824

@@ -939,8 +1033,9 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 {
9391033
// This is used to scan non-heap roots, so it does not update
9401034
// gcw.bytesMarked or gcw.scanWork.
9411035
//
1036+
// If stk != nil, possible stack pointers are also reported to stk.putPtr.
9421037
//go:nowritebarrier
943-
func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
1038+
func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork, stk *stackScanState) {
9441039
// Use local copies of original parameters, so that a stack trace
9451040
// due to one of the throws below shows the original block
9461041
// base and extent.
@@ -957,10 +1052,12 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
9571052
for j := 0; j < 8 && i < n; j++ {
9581053
if bits&1 != 0 {
9591054
// Same work as in scanobject; see comments there.
960-
obj := *(*uintptr)(unsafe.Pointer(b + i))
961-
if obj != 0 {
962-
if obj, span, objIndex := findObject(obj, b, i); obj != 0 {
1055+
p := *(*uintptr)(unsafe.Pointer(b + i))
1056+
if p != 0 {
1057+
if obj, span, objIndex := findObject(p, b, i); obj != 0 {
9631058
greyobject(obj, b, i, span, gcw, objIndex)
1059+
} else if stk != nil && p >= stk.stack.lo && p < stk.stack.hi {
1060+
stk.putPtr(p)
9641061
}
9651062
}
9661063
}

0 commit comments

Comments
 (0)