Skip to content

Commit 76a8409

Browse files
committed
runtime: update and restore g0 stack bounds at cgocallback
Currently, at a cgo callback where there is already a Go frame on the stack (i.e. C->Go->C->Go), we require that at the inner Go callback the SP is within the g0's stack bounds set by a previous callback. This is to prevent that the C code switches stack while having a Go frame on the stack, which we don't really support. But this could also happen when we cannot get accurate stack bounds, e.g. when pthread_getattr_np is not available. Since the stack bounds are just estimates based on the current SP, if there are multiple C->Go callbacks with various stack depth, it is possible that the SP of a later callback falls out of a previous call's estimate. This leads to runtime throw in a seemingly reasonable program. This CL changes it to save the old g0 stack bounds at cgocallback, update the bounds, and restore the old bounds at return. So each callback will get its own stack bounds based on the current SP, and when it returns, the outer callback has the its old stack bounds restored. Also, at a cgo callback when there is no Go frame on the stack, we currently always get new stack bounds. We do this because if we can only get estimated bounds based on the SP, and the stack depth varies a lot between two C->Go calls, the previous estimates may be off and we fall out or nearly fall out of the previous bounds. But this causes a performance problem: the pthread API to get accurate stack bounds (pthread_getattr_np) is very slow when called on the main thread. Getting the stack bounds every time significantly slows down repeated C->Go calls on the main thread. This CL fixes it by "caching" the stack bounds if they are accurate. I.e. at the second time Go calls into C, if the previous stack bounds are accurate, and the current SP is in bounds, we can be sure it is the same stack and we don't need to update the bounds. This avoids the repeated calls to pthread_getattr_np. If we cannot get the accurate bounds, we continue to update the stack bounds based on the SP, and that operation is very cheap. On a Linux/AMD64 machine with glibc: name old time/op new time/op delta CgoCallbackMainThread-8 96.4µs ± 3% 0.1µs ± 2% -99.92% (p=0.000 n=10+9) Fixes #68285. Fixes #68587. Change-Id: I3422badd5ad8ff63e1a733152d05fb7a44d5d435 Reviewed-on: https://go-review.googlesource.com/c/go/+/600296 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Michael Pratt <[email protected]>
1 parent 555ef55 commit 76a8409

File tree

9 files changed

+201
-94
lines changed

9 files changed

+201
-94
lines changed

src/cmd/cgo/internal/testcarchive/carchive_test.go

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ import (
3333
"unicode"
3434
)
3535

36-
var globalSkip = func(t *testing.T) {}
36+
var globalSkip = func(t testing.TB) {}
3737

3838
// Program to run.
3939
var bin []string
@@ -59,12 +59,12 @@ func TestMain(m *testing.M) {
5959

6060
func testMain(m *testing.M) int {
6161
if testing.Short() && os.Getenv("GO_BUILDER_NAME") == "" {
62-
globalSkip = func(t *testing.T) { t.Skip("short mode and $GO_BUILDER_NAME not set") }
62+
globalSkip = func(t testing.TB) { t.Skip("short mode and $GO_BUILDER_NAME not set") }
6363
return m.Run()
6464
}
6565
if runtime.GOOS == "linux" {
6666
if _, err := os.Stat("/etc/alpine-release"); err == nil {
67-
globalSkip = func(t *testing.T) { t.Skip("skipping failing test on alpine - go.dev/issue/19938") }
67+
globalSkip = func(t testing.TB) { t.Skip("skipping failing test on alpine - go.dev/issue/19938") }
6868
return m.Run()
6969
}
7070
}
@@ -1291,8 +1291,8 @@ func TestPreemption(t *testing.T) {
12911291
}
12921292
}
12931293

1294-
// Issue 59294. Test calling Go function from C after using some
1295-
// stack space.
1294+
// Issue 59294 and 68285. Test calling Go function from C after with
1295+
// various stack space.
12961296
func TestDeepStack(t *testing.T) {
12971297
globalSkip(t)
12981298
testenv.MustHaveGoBuild(t)
@@ -1350,6 +1350,53 @@ func TestDeepStack(t *testing.T) {
13501350
}
13511351
}
13521352

1353+
func BenchmarkCgoCallbackMainThread(b *testing.B) {
1354+
// Benchmark for calling into Go fron C main thread.
1355+
// See issue #68587.
1356+
//
1357+
// It uses a subprocess, which is a C binary that calls
1358+
// Go on the main thread b.N times. There is some overhead
1359+
// for launching the subprocess. It is probably fine when
1360+
// b.N is large.
1361+
1362+
globalSkip(b)
1363+
testenv.MustHaveGoBuild(b)
1364+
testenv.MustHaveCGO(b)
1365+
testenv.MustHaveBuildMode(b, "c-archive")
1366+
1367+
if !testWork {
1368+
defer func() {
1369+
os.Remove("testp10" + exeSuffix)
1370+
os.Remove("libgo10.a")
1371+
os.Remove("libgo10.h")
1372+
}()
1373+
}
1374+
1375+
cmd := exec.Command("go", "build", "-buildmode=c-archive", "-o", "libgo10.a", "./libgo10")
1376+
out, err := cmd.CombinedOutput()
1377+
b.Logf("%v\n%s", cmd.Args, out)
1378+
if err != nil {
1379+
b.Fatal(err)
1380+
}
1381+
1382+
ccArgs := append(cc, "-o", "testp10"+exeSuffix, "main10.c", "libgo10.a")
1383+
out, err = exec.Command(ccArgs[0], ccArgs[1:]...).CombinedOutput()
1384+
b.Logf("%v\n%s", ccArgs, out)
1385+
if err != nil {
1386+
b.Fatal(err)
1387+
}
1388+
1389+
argv := cmdToRun("./testp10")
1390+
argv = append(argv, fmt.Sprint(b.N))
1391+
cmd = exec.Command(argv[0], argv[1:]...)
1392+
1393+
b.ResetTimer()
1394+
err = cmd.Run()
1395+
if err != nil {
1396+
b.Fatal(err)
1397+
}
1398+
}
1399+
13531400
func TestSharedObject(t *testing.T) {
13541401
// Test that we can put a Go c-archive into a C shared object.
13551402
globalSkip(t)
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// Copyright 2024 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package main
6+
7+
import "C"
8+
9+
//export GoF
10+
func GoF() {}
11+
12+
func main() {}

src/cmd/cgo/internal/testcarchive/testdata/libgo9/a.go

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,29 @@ package main
66

77
import "runtime"
88

9+
// extern void callGoWithVariousStack(int);
910
import "C"
1011

1112
func main() {}
1213

1314
//export GoF
14-
func GoF() { runtime.GC() }
15+
func GoF(p int32) {
16+
runtime.GC()
17+
if p != 0 {
18+
panic("panic")
19+
}
20+
}
21+
22+
//export callGoWithVariousStackAndGoFrame
23+
func callGoWithVariousStackAndGoFrame(p int32) {
24+
if p != 0 {
25+
defer func() {
26+
e := recover()
27+
if e == nil {
28+
panic("did not panic")
29+
}
30+
runtime.GC()
31+
}()
32+
}
33+
C.callGoWithVariousStack(C.int(p));
34+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Copyright 2024 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
#include <stdio.h>
6+
#include <stdlib.h>
7+
8+
#include "libgo10.h"
9+
10+
int main(int argc, char **argv) {
11+
int n, i;
12+
13+
if (argc != 2) {
14+
perror("wrong arg");
15+
return 2;
16+
}
17+
n = atoi(argv[1]);
18+
for (i = 0; i < n; i++)
19+
GoF();
20+
21+
return 0;
22+
}

src/cmd/cgo/internal/testcarchive/testdata/main9.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,27 @@
66

77
void use(int *x) { (*x)++; }
88

9-
void callGoFWithDeepStack() {
9+
void callGoFWithDeepStack(int p) {
1010
int x[10000];
1111

1212
use(&x[0]);
1313
use(&x[9999]);
1414

15-
GoF();
15+
GoF(p);
1616

1717
use(&x[0]);
1818
use(&x[9999]);
1919
}
2020

21+
void callGoWithVariousStack(int p) {
22+
GoF(0); // call GoF without using much stack
23+
callGoFWithDeepStack(p); // call GoF with a deep stack
24+
GoF(0); // again on a shallow stack
25+
}
26+
2127
int main() {
22-
GoF(); // call GoF without using much stack
23-
callGoFWithDeepStack(); // call GoF with a deep stack
28+
callGoWithVariousStack(0);
29+
30+
callGoWithVariousStackAndGoFrame(0); // normal execution
31+
callGoWithVariousStackAndGoFrame(1); // panic and recover
2432
}

src/runtime/cgo/gcc_stack_unix.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,11 @@ x_cgo_getstackbound(uintptr bounds[2])
3131
pthread_attr_get_np(pthread_self(), &attr);
3232
pthread_attr_getstack(&attr, &addr, &size); // low address
3333
#else
34-
// We don't know how to get the current stacks, so assume they are the
35-
// same as the default stack bounds.
36-
pthread_attr_getstacksize(&attr, &size);
37-
addr = __builtin_frame_address(0) + 4096 - size;
34+
// We don't know how to get the current stacks, leave it as
35+
// 0 and the caller will use an estimate based on the current
36+
// SP.
37+
addr = 0;
38+
size = 0;
3839
#endif
3940
pthread_attr_destroy(&attr);
4041

src/runtime/cgocall.go

Lines changed: 34 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -231,61 +231,44 @@ func cgocall(fn, arg unsafe.Pointer) int32 {
231231
func callbackUpdateSystemStack(mp *m, sp uintptr, signal bool) {
232232
g0 := mp.g0
233233

234-
inBound := sp > g0.stack.lo && sp <= g0.stack.hi
235-
if mp.ncgo > 0 && !inBound {
236-
// ncgo > 0 indicates that this M was in Go further up the stack
237-
// (it called C and is now receiving a callback).
238-
//
239-
// !inBound indicates that we were called with SP outside the
240-
// expected system stack bounds (C changed the stack out from
241-
// under us between the cgocall and cgocallback?).
242-
//
243-
// It is not safe for the C call to change the stack out from
244-
// under us, so throw.
245-
246-
// Note that this case isn't possible for signal == true, as
247-
// that is always passing a new M from needm.
248-
249-
// Stack is bogus, but reset the bounds anyway so we can print.
250-
hi := g0.stack.hi
251-
lo := g0.stack.lo
252-
g0.stack.hi = sp + 1024
253-
g0.stack.lo = sp - 32*1024
254-
g0.stackguard0 = g0.stack.lo + stackGuard
255-
g0.stackguard1 = g0.stackguard0
256-
257-
print("M ", mp.id, " procid ", mp.procid, " runtime: cgocallback with sp=", hex(sp), " out of bounds [", hex(lo), ", ", hex(hi), "]")
258-
print("\n")
259-
exit(2)
260-
}
261-
262234
if !mp.isextra {
263235
// We allocated the stack for standard Ms. Don't replace the
264236
// stack bounds with estimated ones when we already initialized
265237
// with the exact ones.
266238
return
267239
}
268240

269-
// This M does not have Go further up the stack. However, it may have
270-
// previously called into Go, initializing the stack bounds. Between
271-
// that call returning and now the stack may have changed (perhaps the
272-
// C thread is running a coroutine library). We need to update the
273-
// stack bounds for this case.
241+
inBound := sp > g0.stack.lo && sp <= g0.stack.hi
242+
if inBound && mp.g0StackAccurate {
243+
// This M has called into Go before and has the stack bounds
244+
// initialized. We have the accurate stack bounds, and the SP
245+
// is in bounds. We expect it continues to run within the same
246+
// bounds.
247+
return
248+
}
249+
250+
// We don't have an accurate stack bounds (either it never calls
251+
// into Go before, or we couldn't get the accurate bounds), or the
252+
// current SP is not within the previous bounds (the stack may have
253+
// changed between calls). We need to update the stack bounds.
274254
//
275255
// N.B. we need to update the stack bounds even if SP appears to
276-
// already be in bounds. Our "bounds" may actually be estimated dummy
277-
// bounds (below). The actual stack bounds could have shifted but still
278-
// have partial overlap with our dummy bounds. If we failed to update
279-
// in that case, we could find ourselves seemingly called near the
280-
// bottom of the stack bounds, where we quickly run out of space.
256+
// already be in bounds, if our bounds are estimated dummy bounds
257+
// (below). We may be in a different region within the same actual
258+
// stack bounds, but our estimates were not accurate. Or the actual
259+
// stack bounds could have shifted but still have partial overlap with
260+
// our dummy bounds. If we failed to update in that case, we could find
261+
// ourselves seemingly called near the bottom of the stack bounds, where
262+
// we quickly run out of space.
281263

282264
// Set the stack bounds to match the current stack. If we don't
283265
// actually know how big the stack is, like we don't know how big any
284266
// scheduling stack is, but we assume there's at least 32 kB. If we
285267
// can get a more accurate stack bound from pthread, use that, provided
286-
// it actually contains SP..
268+
// it actually contains SP.
287269
g0.stack.hi = sp + 1024
288270
g0.stack.lo = sp - 32*1024
271+
mp.g0StackAccurate = false
289272
if !signal && _cgo_getstackbound != nil {
290273
// Don't adjust if called from the signal handler.
291274
// We are on the signal stack, not the pthread stack.
@@ -296,12 +279,16 @@ func callbackUpdateSystemStack(mp *m, sp uintptr, signal bool) {
296279
asmcgocall(_cgo_getstackbound, unsafe.Pointer(&bounds))
297280
// getstackbound is an unsupported no-op on Windows.
298281
//
282+
// On Unix systems, if the API to get accurate stack bounds is
283+
// not available, it returns zeros.
284+
//
299285
// Don't use these bounds if they don't contain SP. Perhaps we
300286
// were called by something not using the standard thread
301287
// stack.
302288
if bounds[0] != 0 && sp > bounds[0] && sp <= bounds[1] {
303289
g0.stack.lo = bounds[0]
304290
g0.stack.hi = bounds[1]
291+
mp.g0StackAccurate = true
305292
}
306293
}
307294
g0.stackguard0 = g0.stack.lo + stackGuard
@@ -319,6 +306,8 @@ func cgocallbackg(fn, frame unsafe.Pointer, ctxt uintptr) {
319306
}
320307

321308
sp := gp.m.g0.sched.sp // system sp saved by cgocallback.
309+
oldStack := gp.m.g0.stack
310+
oldAccurate := gp.m.g0StackAccurate
322311
callbackUpdateSystemStack(gp.m, sp, false)
323312

324313
// The call from C is on gp.m's g0 stack, so we must ensure
@@ -380,6 +369,12 @@ func cgocallbackg(fn, frame unsafe.Pointer, ctxt uintptr) {
380369
reentersyscall(savedpc, uintptr(savedsp), uintptr(savedbp))
381370

382371
gp.m.winsyscall = winsyscall
372+
373+
// Restore the old g0 stack bounds
374+
gp.m.g0.stack = oldStack
375+
gp.m.g0.stackguard0 = oldStack.lo + stackGuard
376+
gp.m.g0.stackguard1 = gp.m.g0.stackguard0
377+
gp.m.g0StackAccurate = oldAccurate
383378
}
384379

385380
func cgocallbackg1(fn, frame unsafe.Pointer, ctxt uintptr) {

src/runtime/proc.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2550,6 +2550,7 @@ func dropm() {
25502550
g0.stack.lo = 0
25512551
g0.stackguard0 = 0
25522552
g0.stackguard1 = 0
2553+
mp.g0StackAccurate = false
25532554

25542555
putExtraM(mp)
25552556

0 commit comments

Comments
 (0)