Skip to content

Commit a4650a2

Browse files
committed
cmd/compile: avoid write barrier in append fast path
When we are writing the result of an append back to the same slice, we don’t need a write barrier on the fast path. This re-implements an optimization that was present in the old backend. Updates #14921 Fixes #14969 Sample code: var x []byte func p() { x = append(x, 1, 2, 3) } Before: "".p t=1 size=224 args=0x0 locals=0x48 0x0000 00000 (append.go:21) TEXT "".p(SB), $72-0 0x0000 00000 (append.go:21) MOVQ (TLS), CX 0x0009 00009 (append.go:21) CMPQ SP, 16(CX) 0x000d 00013 (append.go:21) JLS 199 0x0013 00019 (append.go:21) SUBQ $72, SP 0x0017 00023 (append.go:21) FUNCDATA $0, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0017 00023 (append.go:21) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0017 00023 (append.go:19) MOVQ "".x+16(SB), CX 0x001e 00030 (append.go:19) MOVQ "".x(SB), DX 0x0025 00037 (append.go:19) MOVQ "".x+8(SB), BX 0x002c 00044 (append.go:19) MOVQ BX, "".autotmp_0+64(SP) 0x0031 00049 (append.go:22) LEAQ 3(BX), BP 0x0035 00053 (append.go:22) CMPQ BP, CX 0x0038 00056 (append.go:22) JGT $0, 131 0x003a 00058 (append.go:22) MOVB $1, (DX)(BX*1) 0x003e 00062 (append.go:22) MOVB $2, 1(DX)(BX*1) 0x0043 00067 (append.go:22) MOVB $3, 2(DX)(BX*1) 0x0048 00072 (append.go:22) MOVQ BP, "".x+8(SB) 0x004f 00079 (append.go:22) MOVQ CX, "".x+16(SB) 0x0056 00086 (append.go:22) MOVL runtime.writeBarrier(SB), AX 0x005c 00092 (append.go:22) TESTB AL, AL 0x005e 00094 (append.go:22) JNE $0, 108 0x0060 00096 (append.go:22) MOVQ DX, "".x(SB) 0x0067 00103 (append.go:23) ADDQ $72, SP 0x006b 00107 (append.go:23) RET 0x006c 00108 (append.go:22) LEAQ "".x(SB), CX 0x0073 00115 (append.go:22) MOVQ CX, (SP) 0x0077 00119 (append.go:22) MOVQ DX, 8(SP) 0x007c 00124 (append.go:22) PCDATA $0, $0 0x007c 00124 (append.go:22) CALL runtime.writebarrierptr(SB) 0x0081 00129 (append.go:23) JMP 103 0x0083 00131 (append.go:22) LEAQ type.[]uint8(SB), AX 0x008a 00138 (append.go:22) MOVQ AX, (SP) 0x008e 00142 (append.go:22) MOVQ DX, 8(SP) 0x0093 00147 (append.go:22) MOVQ BX, 16(SP) 0x0098 00152 (append.go:22) MOVQ CX, 24(SP) 0x009d 00157 (append.go:22) MOVQ BP, 32(SP) 0x00a2 00162 (append.go:22) PCDATA $0, $0 0x00a2 00162 (append.go:22) CALL runtime.growslice(SB) 0x00a7 00167 (append.go:22) MOVQ 40(SP), DX 0x00ac 00172 (append.go:22) MOVQ 48(SP), AX 0x00b1 00177 (append.go:22) MOVQ 56(SP), CX 0x00b6 00182 (append.go:22) ADDQ $3, AX 0x00ba 00186 (append.go:19) MOVQ "".autotmp_0+64(SP), BX 0x00bf 00191 (append.go:22) MOVQ AX, BP 0x00c2 00194 (append.go:22) JMP 58 0x00c7 00199 (append.go:22) NOP 0x00c7 00199 (append.go:21) CALL runtime.morestack_noctxt(SB) 0x00cc 00204 (append.go:21) JMP 0 After: "".p t=1 size=208 args=0x0 locals=0x48 0x0000 00000 (append.go:21) TEXT "".p(SB), $72-0 0x0000 00000 (append.go:21) MOVQ (TLS), CX 0x0009 00009 (append.go:21) CMPQ SP, 16(CX) 0x000d 00013 (append.go:21) JLS 191 0x0013 00019 (append.go:21) SUBQ $72, SP 0x0017 00023 (append.go:21) FUNCDATA $0, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0017 00023 (append.go:21) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0017 00023 (append.go:19) MOVQ "".x+16(SB), CX 0x001e 00030 (append.go:19) MOVQ "".x+8(SB), DX 0x0025 00037 (append.go:19) MOVQ DX, "".autotmp_0+64(SP) 0x002a 00042 (append.go:19) MOVQ "".x(SB), BX 0x0031 00049 (append.go:22) LEAQ 3(DX), BP 0x0035 00053 (append.go:22) MOVQ BP, "".x+8(SB) 0x003c 00060 (append.go:22) CMPQ BP, CX 0x003f 00063 (append.go:22) JGT $0, 84 0x0041 00065 (append.go:22) MOVB $1, (BX)(DX*1) 0x0045 00069 (append.go:22) MOVB $2, 1(BX)(DX*1) 0x004a 00074 (append.go:22) MOVB $3, 2(BX)(DX*1) 0x004f 00079 (append.go:23) ADDQ $72, SP 0x0053 00083 (append.go:23) RET 0x0054 00084 (append.go:22) LEAQ type.[]uint8(SB), AX 0x005b 00091 (append.go:22) MOVQ AX, (SP) 0x005f 00095 (append.go:22) MOVQ BX, 8(SP) 0x0064 00100 (append.go:22) MOVQ DX, 16(SP) 0x0069 00105 (append.go:22) MOVQ CX, 24(SP) 0x006e 00110 (append.go:22) MOVQ BP, 32(SP) 0x0073 00115 (append.go:22) PCDATA $0, $0 0x0073 00115 (append.go:22) CALL runtime.growslice(SB) 0x0078 00120 (append.go:22) MOVQ 40(SP), CX 0x007d 00125 (append.go:22) MOVQ 56(SP), AX 0x0082 00130 (append.go:22) MOVQ AX, "".x+16(SB) 0x0089 00137 (append.go:22) MOVL runtime.writeBarrier(SB), AX 0x008f 00143 (append.go:22) TESTB AL, AL 0x0091 00145 (append.go:22) JNE $0, 168 0x0093 00147 (append.go:22) MOVQ CX, "".x(SB) 0x009a 00154 (append.go:22) MOVQ "".x(SB), BX 0x00a1 00161 (append.go:19) MOVQ "".autotmp_0+64(SP), DX 0x00a6 00166 (append.go:22) JMP 65 0x00a8 00168 (append.go:22) LEAQ "".x(SB), DX 0x00af 00175 (append.go:22) MOVQ DX, (SP) 0x00b3 00179 (append.go:22) MOVQ CX, 8(SP) 0x00b8 00184 (append.go:22) PCDATA $0, $0 0x00b8 00184 (append.go:22) CALL runtime.writebarrierptr(SB) 0x00bd 00189 (append.go:22) JMP 154 0x00bf 00191 (append.go:22) NOP 0x00bf 00191 (append.go:21) CALL runtime.morestack_noctxt(SB) 0x00c4 00196 (append.go:21) JMP 0 Change-Id: I77a41ad3a22557a4bb4654de7d6d24a029efe34a Reviewed-on: https://go-review.googlesource.com/21813 Run-TryBot: Josh Bleecher Snyder <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Keith Randall <[email protected]>
1 parent 32efa16 commit a4650a2

File tree

1 file changed

+96
-27
lines changed
  • src/cmd/compile/internal/gc

1 file changed

+96
-27
lines changed

src/cmd/compile/internal/gc/ssa.go

Lines changed: 96 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -683,14 +683,27 @@ func (s *state) stmt(n *Node) {
683683

684684
// Evaluate RHS.
685685
rhs := n.Right
686-
if rhs != nil && (rhs.Op == OSTRUCTLIT || rhs.Op == OARRAYLIT) {
687-
// All literals with nonzero fields have already been
688-
// rewritten during walk. Any that remain are just T{}
689-
// or equivalents. Use the zero value.
690-
if !iszero(rhs) {
691-
Fatalf("literal with nonzero value in SSA: %v", rhs)
686+
if rhs != nil {
687+
switch rhs.Op {
688+
case OSTRUCTLIT, OARRAYLIT:
689+
// All literals with nonzero fields have already been
690+
// rewritten during walk. Any that remain are just T{}
691+
// or equivalents. Use the zero value.
692+
if !iszero(rhs) {
693+
Fatalf("literal with nonzero value in SSA: %v", rhs)
694+
}
695+
rhs = nil
696+
case OAPPEND:
697+
// If we're writing the result of an append back to the same slice,
698+
// handle it specially to avoid write barriers on the fast (non-growth) path.
699+
// If the slice can be SSA'd, it'll be on the stack,
700+
// so there will be no write barriers,
701+
// so there's no need to attempt to prevent them.
702+
if samesafeexpr(n.Left, rhs.List.First()) && !s.canSSA(n.Left) {
703+
s.append(rhs, true)
704+
return
705+
}
692706
}
693-
rhs = nil
694707
}
695708
var r *ssa.Value
696709
needwb := n.Op == OASWB && rhs != nil
@@ -709,11 +722,11 @@ func (s *state) stmt(n *Node) {
709722
}
710723
}
711724
if rhs != nil && rhs.Op == OAPPEND {
712-
// Yuck! The frontend gets rid of the write barrier, but we need it!
713-
// At least, we need it in the case where growslice is called.
714-
// TODO: Do the write barrier on just the growslice branch.
725+
// The frontend gets rid of the write barrier to enable the special OAPPEND
726+
// handling above, but since this is not a special case, we need it.
715727
// TODO: just add a ptr graying to the end of growslice?
716-
// TODO: check whether we need to do this for ODOTTYPE and ORECV also.
728+
// TODO: check whether we need to provide special handling and a write barrier
729+
// for ODOTTYPE and ORECV also.
717730
// They get similar wb-removal treatment in walk.go:OAS.
718731
needwb = true
719732
}
@@ -2079,33 +2092,65 @@ func (s *state) expr(n *Node) *ssa.Value {
20792092
return s.newValue1(ssa.OpGetG, n.Type, s.mem())
20802093

20812094
case OAPPEND:
2082-
return s.exprAppend(n)
2095+
return s.append(n, false)
20832096

20842097
default:
20852098
s.Unimplementedf("unhandled expr %s", opnames[n.Op])
20862099
return nil
20872100
}
20882101
}
20892102

2090-
// exprAppend converts an OAPPEND node n to an ssa.Value, adds it to s, and returns the Value.
2091-
func (s *state) exprAppend(n *Node) *ssa.Value {
2092-
// append(s, e1, e2, e3). Compile like:
2103+
// append converts an OAPPEND node to SSA.
2104+
// If inplace is false, it converts the OAPPEND expression n to an ssa.Value,
2105+
// adds it to s, and returns the Value.
2106+
// If inplace is true, it writes the result of the OAPPEND expression n
2107+
// back to the slice being appended to, and returns nil.
2108+
// inplace MUST be set to false if the slice can be SSA'd.
2109+
func (s *state) append(n *Node, inplace bool) *ssa.Value {
2110+
// If inplace is false, process as expression "append(s, e1, e2, e3)":
2111+
//
20932112
// ptr, len, cap := s
20942113
// newlen := len + 3
2095-
// if newlen > s.cap {
2114+
// if newlen > cap {
20962115
// ptr, len, cap = growslice(s, newlen)
20972116
// newlen = len + 3 // recalculate to avoid a spill
20982117
// }
2118+
// // with write barriers, if needed:
2119+
// *(ptr+len) = e1
2120+
// *(ptr+len+1) = e2
2121+
// *(ptr+len+2) = e3
2122+
// return makeslice(ptr, newlen, cap)
2123+
//
2124+
//
2125+
// If inplace is true, process as statement "s = append(s, e1, e2, e3)":
2126+
//
2127+
// a := &s
2128+
// ptr, len, cap := s
2129+
// newlen := len + 3
2130+
// *a.len = newlen // store newlen immediately to avoid a spill
2131+
// if newlen > cap {
2132+
// newptr, _, newcap = growslice(ptr, len, cap, newlen)
2133+
// *a.cap = newcap // write before ptr to avoid a spill
2134+
// *a.ptr = newptr // with write barrier
2135+
// }
2136+
// // with write barriers, if needed:
20992137
// *(ptr+len) = e1
21002138
// *(ptr+len+1) = e2
21012139
// *(ptr+len+2) = e3
2102-
// makeslice(ptr, newlen, cap)
21032140

21042141
et := n.Type.Elem()
21052142
pt := Ptrto(et)
21062143

21072144
// Evaluate slice
2108-
slice := s.expr(n.List.First())
2145+
sn := n.List.First() // the slice node is the first in the list
2146+
2147+
var slice, addr *ssa.Value
2148+
if inplace {
2149+
addr = s.addr(sn, false)
2150+
slice = s.newValue2(ssa.OpLoad, n.Type, addr, s.mem())
2151+
} else {
2152+
slice = s.expr(sn)
2153+
}
21092154

21102155
// Allocate new blocks
21112156
grow := s.f.NewBlock(ssa.BlockPlain)
@@ -2117,10 +2162,20 @@ func (s *state) exprAppend(n *Node) *ssa.Value {
21172162
l := s.newValue1(ssa.OpSliceLen, Types[TINT], slice)
21182163
c := s.newValue1(ssa.OpSliceCap, Types[TINT], slice)
21192164
nl := s.newValue2(s.ssaOp(OADD, Types[TINT]), Types[TINT], l, s.constInt(Types[TINT], nargs))
2165+
2166+
if inplace {
2167+
lenaddr := s.newValue1I(ssa.OpOffPtr, pt, int64(Array_nel), addr)
2168+
s.vars[&memVar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, s.config.IntSize, lenaddr, nl, s.mem())
2169+
}
2170+
21202171
cmp := s.newValue2(s.ssaOp(OGT, Types[TINT]), Types[TBOOL], nl, c)
21212172
s.vars[&ptrVar] = p
2122-
s.vars[&newlenVar] = nl
2123-
s.vars[&capVar] = c
2173+
2174+
if !inplace {
2175+
s.vars[&newlenVar] = nl
2176+
s.vars[&capVar] = c
2177+
}
2178+
21242179
b := s.endBlock()
21252180
b.Kind = ssa.BlockIf
21262181
b.Likely = ssa.BranchUnlikely
@@ -2134,9 +2189,18 @@ func (s *state) exprAppend(n *Node) *ssa.Value {
21342189

21352190
r := s.rtcall(growslice, true, []*Type{pt, Types[TINT], Types[TINT]}, taddr, p, l, c, nl)
21362191

2137-
s.vars[&ptrVar] = r[0]
2138-
s.vars[&newlenVar] = s.newValue2(s.ssaOp(OADD, Types[TINT]), Types[TINT], r[1], s.constInt(Types[TINT], nargs))
2139-
s.vars[&capVar] = r[2]
2192+
if inplace {
2193+
capaddr := s.newValue1I(ssa.OpOffPtr, pt, int64(Array_cap), addr)
2194+
s.vars[&memVar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, s.config.IntSize, capaddr, r[2], s.mem())
2195+
s.insertWBstore(pt, addr, r[0], n.Lineno, 0)
2196+
// load the value we just stored to avoid having to spill it
2197+
s.vars[&ptrVar] = s.newValue2(ssa.OpLoad, pt, addr, s.mem())
2198+
} else {
2199+
s.vars[&ptrVar] = r[0]
2200+
s.vars[&newlenVar] = s.newValue2(s.ssaOp(OADD, Types[TINT]), Types[TINT], r[1], s.constInt(Types[TINT], nargs))
2201+
s.vars[&capVar] = r[2]
2202+
}
2203+
21402204
b = s.endBlock()
21412205
b.AddEdgeTo(assign)
21422206

@@ -2156,9 +2220,11 @@ func (s *state) exprAppend(n *Node) *ssa.Value {
21562220
}
21572221
}
21582222

2159-
p = s.variable(&ptrVar, pt) // generates phi for ptr
2160-
nl = s.variable(&newlenVar, Types[TINT]) // generates phi for nl
2161-
c = s.variable(&capVar, Types[TINT]) // generates phi for cap
2223+
p = s.variable(&ptrVar, pt) // generates phi for ptr
2224+
if !inplace {
2225+
nl = s.variable(&newlenVar, Types[TINT]) // generates phi for nl
2226+
c = s.variable(&capVar, Types[TINT]) // generates phi for cap
2227+
}
21622228
p2 := s.newValue2(ssa.OpPtrIndex, pt, p, l)
21632229
// TODO: just one write barrier call for all of these writes?
21642230
// TODO: maybe just one writeBarrier.enabled check?
@@ -2179,10 +2245,13 @@ func (s *state) exprAppend(n *Node) *ssa.Value {
21792245
}
21802246
}
21812247

2182-
// make result
21832248
delete(s.vars, &ptrVar)
2249+
if inplace {
2250+
return nil
2251+
}
21842252
delete(s.vars, &newlenVar)
21852253
delete(s.vars, &capVar)
2254+
// make result
21862255
return s.newValue3(ssa.OpSliceMake, n.Type, p, nl, c)
21872256
}
21882257

0 commit comments

Comments
 (0)