Skip to content

Commit fff4f59

Browse files
committed
cmd/compile,runtime: allocate defer records on the stack
When a defer is executed at most once in a function body, we can allocate the defer record for it on the stack instead of on the heap. This should make defers like this (which are very common) faster. This optimization applies to 363 out of the 370 static defer sites in the cmd/go binary. name old time/op new time/op delta Defer-4 52.2ns ± 5% 36.2ns ± 3% -30.70% (p=0.000 n=10+10) Fixes #6980 Update #14939 Change-Id: I697109dd7aeef9e97a9eeba2ef65ff53d3ee1004 Reviewed-on: https://go-review.googlesource.com/c/go/+/171758 Run-TryBot: Keith Randall <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Austin Clements <[email protected]>
1 parent 8343a09 commit fff4f59

File tree

15 files changed

+328
-108
lines changed

15 files changed

+328
-108
lines changed

src/cmd/compile/internal/gc/esc.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -802,6 +802,7 @@ opSwitch:
802802

803803
case ODEFER:
804804
if e.loopdepth == 1 { // top level
805+
n.Esc = EscNever // force stack allocation of defer record (see ssa.go)
805806
break
806807
}
807808
// arguments leak out of scope

src/cmd/compile/internal/gc/escape.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -882,6 +882,7 @@ func (e *Escape) augmentParamHole(k EscHole, where *Node) EscHole {
882882
// non-transient location to avoid arguments from being
883883
// transiently allocated.
884884
if where.Op == ODEFER && e.loopDepth == 1 {
885+
where.Esc = EscNever // force stack allocation of defer record (see ssa.go)
885886
// TODO(mdempsky): Eliminate redundant EscLocation allocs.
886887
return e.teeHole(k, e.newLoc(nil, false).asHole())
887888
}

src/cmd/compile/internal/gc/go.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,7 @@ var (
287287
assertI2I,
288288
assertI2I2,
289289
deferproc,
290+
deferprocStack,
290291
Deferreturn,
291292
Duffcopy,
292293
Duffzero,

src/cmd/compile/internal/gc/reflect.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,48 @@ func hiter(t *types.Type) *types.Type {
317317
return hiter
318318
}
319319

320+
// deferstruct makes a runtime._defer structure, with additional space for
321+
// stksize bytes of args.
322+
func deferstruct(stksize int64) *types.Type {
323+
makefield := func(name string, typ *types.Type) *types.Field {
324+
f := types.NewField()
325+
f.Type = typ
326+
// Unlike the global makefield function, this one needs to set Pkg
327+
// because these types might be compared (in SSA CSE sorting).
328+
// TODO: unify this makefield and the global one above.
329+
f.Sym = &types.Sym{Name: name, Pkg: localpkg}
330+
return f
331+
}
332+
argtype := types.NewArray(types.Types[TUINT8], stksize)
333+
argtype.SetNoalg(true)
334+
argtype.Width = stksize
335+
argtype.Align = 1
336+
// These fields must match the ones in runtime/runtime2.go:_defer and
337+
// cmd/compile/internal/gc/ssa.go:(*state).call.
338+
fields := []*types.Field{
339+
makefield("siz", types.Types[TUINT32]),
340+
makefield("started", types.Types[TBOOL]),
341+
makefield("heap", types.Types[TBOOL]),
342+
makefield("sp", types.Types[TUINTPTR]),
343+
makefield("pc", types.Types[TUINTPTR]),
344+
// Note: the types here don't really matter. Defer structures
345+
// are always scanned explicitly during stack copying and GC,
346+
// so we make them uintptr type even though they are real pointers.
347+
makefield("fn", types.Types[TUINTPTR]),
348+
makefield("_panic", types.Types[TUINTPTR]),
349+
makefield("link", types.Types[TUINTPTR]),
350+
makefield("args", argtype),
351+
}
352+
353+
// build struct holding the above fields
354+
s := types.New(TSTRUCT)
355+
s.SetNoalg(true)
356+
s.SetFields(fields)
357+
s.Width = widstruct(s, s, 0, 1)
358+
s.Align = uint8(Widthptr)
359+
return s
360+
}
361+
320362
// f is method type, with receiver.
321363
// return function type, receiver as first argument (or not).
322364
func methodfunc(f *types.Type, receiver *types.Type) *types.Type {

src/cmd/compile/internal/gc/ssa.go

Lines changed: 135 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ func initssaconfig() {
6868
assertI2I = sysfunc("assertI2I")
6969
assertI2I2 = sysfunc("assertI2I2")
7070
deferproc = sysfunc("deferproc")
71+
deferprocStack = sysfunc("deferprocStack")
7172
Deferreturn = sysfunc("deferreturn")
7273
Duffcopy = sysvar("duffcopy") // asm func with special ABI
7374
Duffzero = sysvar("duffzero") // asm func with special ABI
@@ -864,7 +865,11 @@ func (s *state) stmt(n *Node) {
864865
}
865866
}
866867
case ODEFER:
867-
s.call(n.Left, callDefer)
868+
d := callDefer
869+
if n.Esc == EscNever {
870+
d = callDeferStack
871+
}
872+
s.call(n.Left, d)
868873
case OGO:
869874
s.call(n.Left, callGo)
870875

@@ -2859,6 +2864,7 @@ type callKind int8
28592864
const (
28602865
callNormal callKind = iota
28612866
callDefer
2867+
callDeferStack
28622868
callGo
28632869
)
28642870

@@ -3799,74 +3805,132 @@ func (s *state) call(n *Node, k callKind) *ssa.Value {
37993805
rcvr = s.newValue1(ssa.OpIData, types.Types[TUINTPTR], i)
38003806
}
38013807
dowidth(fn.Type)
3802-
stksize := fn.Type.ArgWidth() // includes receiver
3808+
stksize := fn.Type.ArgWidth() // includes receiver, args, and results
38033809

38043810
// Run all assignments of temps.
38053811
// The temps are introduced to avoid overwriting argument
38063812
// slots when arguments themselves require function calls.
38073813
s.stmtList(n.List)
38083814

3809-
// Store arguments to stack, including defer/go arguments and receiver for method calls.
3810-
// These are written in SP-offset order.
3811-
argStart := Ctxt.FixedFrameSize()
3812-
// Defer/go args.
3813-
if k != callNormal {
3814-
// Write argsize and closure (args to newproc/deferproc).
3815-
argsize := s.constInt32(types.Types[TUINT32], int32(stksize))
3816-
addr := s.constOffPtrSP(s.f.Config.Types.UInt32Ptr, argStart)
3817-
s.store(types.Types[TUINT32], addr, argsize)
3818-
addr = s.constOffPtrSP(s.f.Config.Types.UintptrPtr, argStart+int64(Widthptr))
3819-
s.store(types.Types[TUINTPTR], addr, closure)
3820-
stksize += 2 * int64(Widthptr)
3821-
argStart += 2 * int64(Widthptr)
3822-
}
3823-
3824-
// Set receiver (for interface calls).
3825-
if rcvr != nil {
3826-
addr := s.constOffPtrSP(s.f.Config.Types.UintptrPtr, argStart)
3827-
s.store(types.Types[TUINTPTR], addr, rcvr)
3828-
}
3829-
3830-
// Write args.
3831-
t := n.Left.Type
3832-
args := n.Rlist.Slice()
3833-
if n.Op == OCALLMETH {
3834-
f := t.Recv()
3835-
s.storeArg(args[0], f.Type, argStart+f.Offset)
3836-
args = args[1:]
3837-
}
3838-
for i, n := range args {
3839-
f := t.Params().Field(i)
3840-
s.storeArg(n, f.Type, argStart+f.Offset)
3841-
}
3842-
3843-
// call target
38443815
var call *ssa.Value
3845-
switch {
3846-
case k == callDefer:
3847-
call = s.newValue1A(ssa.OpStaticCall, types.TypeMem, deferproc, s.mem())
3848-
case k == callGo:
3849-
call = s.newValue1A(ssa.OpStaticCall, types.TypeMem, newproc, s.mem())
3850-
case closure != nil:
3851-
// rawLoad because loading the code pointer from a
3852-
// closure is always safe, but IsSanitizerSafeAddr
3853-
// can't always figure that out currently, and it's
3854-
// critical that we not clobber any arguments already
3855-
// stored onto the stack.
3856-
codeptr = s.rawLoad(types.Types[TUINTPTR], closure)
3857-
call = s.newValue3(ssa.OpClosureCall, types.TypeMem, codeptr, closure, s.mem())
3858-
case codeptr != nil:
3859-
call = s.newValue2(ssa.OpInterCall, types.TypeMem, codeptr, s.mem())
3860-
case sym != nil:
3861-
call = s.newValue1A(ssa.OpStaticCall, types.TypeMem, sym.Linksym(), s.mem())
3862-
default:
3863-
Fatalf("bad call type %v %v", n.Op, n)
3816+
if k == callDeferStack {
3817+
// Make a defer struct d on the stack.
3818+
t := deferstruct(stksize)
3819+
d := tempAt(n.Pos, s.curfn, t)
3820+
3821+
s.vars[&memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, d, s.mem())
3822+
addr := s.addr(d, false)
3823+
3824+
// Must match reflect.go:deferstruct and src/runtime/runtime2.go:_defer.
3825+
// 0: siz
3826+
s.store(types.Types[TUINT32],
3827+
s.newValue1I(ssa.OpOffPtr, types.Types[TUINT32].PtrTo(), t.FieldOff(0), addr),
3828+
s.constInt32(types.Types[TUINT32], int32(stksize)))
3829+
// 1: started, set in deferprocStack
3830+
// 2: heap, set in deferprocStack
3831+
// 3: sp, set in deferprocStack
3832+
// 4: pc, set in deferprocStack
3833+
// 5: fn
3834+
s.store(closure.Type,
3835+
s.newValue1I(ssa.OpOffPtr, closure.Type.PtrTo(), t.FieldOff(5), addr),
3836+
closure)
3837+
// 6: panic, set in deferprocStack
3838+
// 7: link, set in deferprocStack
3839+
3840+
// Then, store all the arguments of the defer call.
3841+
ft := fn.Type
3842+
off := t.FieldOff(8)
3843+
args := n.Rlist.Slice()
3844+
3845+
// Set receiver (for interface calls). Always a pointer.
3846+
if rcvr != nil {
3847+
p := s.newValue1I(ssa.OpOffPtr, ft.Recv().Type.PtrTo(), off, addr)
3848+
s.store(types.Types[TUINTPTR], p, rcvr)
3849+
}
3850+
// Set receiver (for method calls).
3851+
if n.Op == OCALLMETH {
3852+
f := ft.Recv()
3853+
s.storeArgWithBase(args[0], f.Type, addr, off+f.Offset)
3854+
args = args[1:]
3855+
}
3856+
// Set other args.
3857+
for _, f := range ft.Params().Fields().Slice() {
3858+
s.storeArgWithBase(args[0], f.Type, addr, off+f.Offset)
3859+
args = args[1:]
3860+
}
3861+
3862+
// Call runtime.deferprocStack with pointer to _defer record.
3863+
arg0 := s.constOffPtrSP(types.Types[TUINTPTR], Ctxt.FixedFrameSize())
3864+
s.store(types.Types[TUINTPTR], arg0, addr)
3865+
call = s.newValue1A(ssa.OpStaticCall, types.TypeMem, deferprocStack, s.mem())
3866+
if stksize < int64(Widthptr) {
3867+
// We need room for both the call to deferprocStack and the call to
3868+
// the deferred function.
3869+
stksize = int64(Widthptr)
3870+
}
3871+
call.AuxInt = stksize
3872+
} else {
3873+
// Store arguments to stack, including defer/go arguments and receiver for method calls.
3874+
// These are written in SP-offset order.
3875+
argStart := Ctxt.FixedFrameSize()
3876+
// Defer/go args.
3877+
if k != callNormal {
3878+
// Write argsize and closure (args to newproc/deferproc).
3879+
argsize := s.constInt32(types.Types[TUINT32], int32(stksize))
3880+
addr := s.constOffPtrSP(s.f.Config.Types.UInt32Ptr, argStart)
3881+
s.store(types.Types[TUINT32], addr, argsize)
3882+
addr = s.constOffPtrSP(s.f.Config.Types.UintptrPtr, argStart+int64(Widthptr))
3883+
s.store(types.Types[TUINTPTR], addr, closure)
3884+
stksize += 2 * int64(Widthptr)
3885+
argStart += 2 * int64(Widthptr)
3886+
}
3887+
3888+
// Set receiver (for interface calls).
3889+
if rcvr != nil {
3890+
addr := s.constOffPtrSP(s.f.Config.Types.UintptrPtr, argStart)
3891+
s.store(types.Types[TUINTPTR], addr, rcvr)
3892+
}
3893+
3894+
// Write args.
3895+
t := n.Left.Type
3896+
args := n.Rlist.Slice()
3897+
if n.Op == OCALLMETH {
3898+
f := t.Recv()
3899+
s.storeArg(args[0], f.Type, argStart+f.Offset)
3900+
args = args[1:]
3901+
}
3902+
for i, n := range args {
3903+
f := t.Params().Field(i)
3904+
s.storeArg(n, f.Type, argStart+f.Offset)
3905+
}
3906+
3907+
// call target
3908+
switch {
3909+
case k == callDefer:
3910+
call = s.newValue1A(ssa.OpStaticCall, types.TypeMem, deferproc, s.mem())
3911+
case k == callGo:
3912+
call = s.newValue1A(ssa.OpStaticCall, types.TypeMem, newproc, s.mem())
3913+
case closure != nil:
3914+
// rawLoad because loading the code pointer from a
3915+
// closure is always safe, but IsSanitizerSafeAddr
3916+
// can't always figure that out currently, and it's
3917+
// critical that we not clobber any arguments already
3918+
// stored onto the stack.
3919+
codeptr = s.rawLoad(types.Types[TUINTPTR], closure)
3920+
call = s.newValue3(ssa.OpClosureCall, types.TypeMem, codeptr, closure, s.mem())
3921+
case codeptr != nil:
3922+
call = s.newValue2(ssa.OpInterCall, types.TypeMem, codeptr, s.mem())
3923+
case sym != nil:
3924+
call = s.newValue1A(ssa.OpStaticCall, types.TypeMem, sym.Linksym(), s.mem())
3925+
default:
3926+
Fatalf("bad call type %v %v", n.Op, n)
3927+
}
3928+
call.AuxInt = stksize // Call operations carry the argsize of the callee along with them
38643929
}
3865-
call.AuxInt = stksize // Call operations carry the argsize of the callee along with them
38663930
s.vars[&memVar] = call
38673931

38683932
// Finish block for defers
3869-
if k == callDefer {
3933+
if k == callDefer || k == callDeferStack {
38703934
b := s.endBlock()
38713935
b.Kind = ssa.BlockDefer
38723936
b.SetControl(call)
@@ -4361,17 +4425,27 @@ func (s *state) storeTypePtrs(t *types.Type, left, right *ssa.Value) {
43614425
}
43624426

43634427
func (s *state) storeArg(n *Node, t *types.Type, off int64) {
4428+
s.storeArgWithBase(n, t, s.sp, off)
4429+
}
4430+
4431+
func (s *state) storeArgWithBase(n *Node, t *types.Type, base *ssa.Value, off int64) {
43644432
pt := types.NewPtr(t)
4365-
sp := s.constOffPtrSP(pt, off)
4433+
var addr *ssa.Value
4434+
if base == s.sp {
4435+
// Use special routine that avoids allocation on duplicate offsets.
4436+
addr = s.constOffPtrSP(pt, off)
4437+
} else {
4438+
addr = s.newValue1I(ssa.OpOffPtr, pt, off, base)
4439+
}
43664440

43674441
if !canSSAType(t) {
43684442
a := s.addr(n, false)
4369-
s.move(t, sp, a)
4443+
s.move(t, addr, a)
43704444
return
43714445
}
43724446

43734447
a := s.expr(n)
4374-
s.storeType(t, sp, a, 0, false)
4448+
s.storeType(t, addr, a, 0, false)
43754449
}
43764450

43774451
// slice computes the slice v[i:j:k] and returns ptr, len, and cap of result.

src/runtime/mgcmark.go

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -712,15 +712,31 @@ func scanstack(gp *g, gcw *gcWork) {
712712

713713
// Find additional pointers that point into the stack from the heap.
714714
// Currently this includes defers and panics. See also function copystack.
715+
716+
// Find and trace all defer arguments.
715717
tracebackdefers(gp, scanframe, nil)
718+
719+
// Find and trace other pointers in defer records.
716720
for d := gp._defer; d != nil; d = d.link {
717-
// tracebackdefers above does not scan the func value, which could
718-
// be a stack allocated closure. See issue 30453.
719721
if d.fn != nil {
722+
// tracebackdefers above does not scan the func value, which could
723+
// be a stack allocated closure. See issue 30453.
720724
scanblock(uintptr(unsafe.Pointer(&d.fn)), sys.PtrSize, &oneptrmask[0], gcw, &state)
721725
}
726+
if d.link != nil {
727+
// The link field of a stack-allocated defer record might point
728+
// to a heap-allocated defer record. Keep that heap record live.
729+
scanblock(uintptr(unsafe.Pointer(&d.link)), sys.PtrSize, &oneptrmask[0], gcw, &state)
730+
}
731+
// Retain defers records themselves.
732+
// Defer records might not be reachable from the G through regular heap
733+
// tracing because the defer linked list might weave between the stack and the heap.
734+
if d.heap {
735+
scanblock(uintptr(unsafe.Pointer(&d)), sys.PtrSize, &oneptrmask[0], gcw, &state)
736+
}
722737
}
723738
if gp._panic != nil {
739+
// Panics are always stack allocated.
724740
state.putPtr(uintptr(unsafe.Pointer(gp._panic)))
725741
}
726742

0 commit comments

Comments
 (0)