Skip to content

Commit 28ee179

Browse files
committed
net: prevent cancelation goroutine from adjusting fd timeout after connect
This was previously fixed in https://golang.org/cl/21497 but not enough. Fixes #16523 Change-Id: I678543a656304c82d654e25e12fb094cd6cc87e8 Reviewed-on: https://go-review.googlesource.com/25330 Run-TryBot: Brad Fitzpatrick <[email protected]> Reviewed-by: Joe Tsai <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
1 parent 2629446 commit 28ee179

File tree

3 files changed

+149
-17
lines changed

3 files changed

+149
-17
lines changed

src/net/dial_unix_test.go

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// Copyright 2016 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
6+
7+
package net
8+
9+
import (
10+
"context"
11+
"syscall"
12+
"testing"
13+
"time"
14+
)
15+
16+
// Issue 16523
17+
func TestDialContextCancelRace(t *testing.T) {
18+
oldConnectFunc := connectFunc
19+
oldGetsockoptIntFunc := getsockoptIntFunc
20+
oldTestHookCanceledDial := testHookCanceledDial
21+
defer func() {
22+
connectFunc = oldConnectFunc
23+
getsockoptIntFunc = oldGetsockoptIntFunc
24+
testHookCanceledDial = oldTestHookCanceledDial
25+
}()
26+
27+
ln, err := newLocalListener("tcp")
28+
if err != nil {
29+
t.Fatal(err)
30+
}
31+
listenerDone := make(chan struct{})
32+
go func() {
33+
defer close(listenerDone)
34+
c, err := ln.Accept()
35+
if err == nil {
36+
c.Close()
37+
}
38+
}()
39+
defer func() { <-listenerDone }()
40+
defer ln.Close()
41+
42+
sawCancel := make(chan bool, 1)
43+
testHookCanceledDial = func() {
44+
sawCancel <- true
45+
}
46+
47+
ctx, cancelCtx := context.WithCancel(context.Background())
48+
49+
connectFunc = func(fd int, addr syscall.Sockaddr) error {
50+
err := oldConnectFunc(fd, addr)
51+
t.Logf("connect(%d, addr) = %v", fd, err)
52+
if err == nil {
53+
// On some operating systems, localhost
54+
// connects _sometimes_ succeed immediately.
55+
// Prevent that, so we exercise the code path
56+
// we're interested in testing. This seems
57+
// harmless. It makes FreeBSD 10.10 work when
58+
// run with many iterations. It failed about
59+
// half the time previously.
60+
return syscall.EINPROGRESS
61+
}
62+
return err
63+
}
64+
65+
getsockoptIntFunc = func(fd, level, opt int) (val int, err error) {
66+
val, err = oldGetsockoptIntFunc(fd, level, opt)
67+
t.Logf("getsockoptIntFunc(%d, %d, %d) = (%v, %v)", fd, level, opt, val, err)
68+
if level == syscall.SOL_SOCKET && opt == syscall.SO_ERROR && err == nil && val == 0 {
69+
t.Logf("canceling context")
70+
71+
// Cancel the context at just the moment which
72+
// caused the race in issue 16523.
73+
cancelCtx()
74+
75+
// And wait for the "interrupter" goroutine to
76+
// cancel the dial by messing with its write
77+
// timeout before returning.
78+
select {
79+
case <-sawCancel:
80+
t.Logf("saw cancel")
81+
case <-time.After(5 * time.Second):
82+
t.Errorf("didn't see cancel after 5 seconds")
83+
}
84+
}
85+
return
86+
}
87+
88+
var d Dialer
89+
c, err := d.DialContext(ctx, "tcp", ln.Addr().String())
90+
if err == nil {
91+
c.Close()
92+
t.Fatal("unexpected successful dial; want context canceled error")
93+
}
94+
95+
select {
96+
case <-ctx.Done():
97+
case <-time.After(5 * time.Second):
98+
t.Fatal("expected context to be canceled")
99+
}
100+
101+
oe, ok := err.(*OpError)
102+
if !ok || oe.Op != "dial" {
103+
t.Fatalf("Dial error = %#v; want dial *OpError", err)
104+
}
105+
if oe.Err != ctx.Err() {
106+
t.Errorf("DialContext = (%v, %v); want OpError with error %v", c, err, ctx.Err())
107+
}
108+
}

src/net/fd_unix.go

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ func (fd *netFD) name() string {
6464
return fd.net + ":" + ls + "->" + rs
6565
}
6666

67-
func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) error {
67+
func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) (ret error) {
6868
// Do not need to call fd.writeLock here,
6969
// because fd is not yet accessible to user,
7070
// so no concurrent operations are possible.
@@ -101,21 +101,44 @@ func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) error {
101101
defer fd.setWriteDeadline(noDeadline)
102102
}
103103

104-
// Wait for the goroutine converting context.Done into a write timeout
105-
// to exist, otherwise our caller might cancel the context and
106-
// cause fd.setWriteDeadline(aLongTimeAgo) to cancel a successful dial.
107-
done := make(chan bool) // must be unbuffered
108-
defer func() { done <- true }()
109-
go func() {
110-
select {
111-
case <-ctx.Done():
112-
// Force the runtime's poller to immediately give
113-
// up waiting for writability.
114-
fd.setWriteDeadline(aLongTimeAgo)
115-
<-done
116-
case <-done:
117-
}
118-
}()
104+
// Start the "interrupter" goroutine, if this context might be canceled.
105+
// (The background context cannot)
106+
//
107+
// The interrupter goroutine waits for the context to be done and
108+
// interrupts the dial (by altering the fd's write deadline, which
109+
// wakes up waitWrite).
110+
if ctx != context.Background() {
111+
// Wait for the interrupter goroutine to exit before returning
112+
// from connect.
113+
done := make(chan struct{})
114+
interruptRes := make(chan error)
115+
defer func() {
116+
close(done)
117+
if ctxErr := <-interruptRes; ctxErr != nil && ret == nil {
118+
// The interrupter goroutine called setWriteDeadline,
119+
// but the connect code below had returned from
120+
// waitWrite already and did a successful connect (ret
121+
// == nil). Because we've now poisoned the connection
122+
// by making it unwritable, don't return a successful
123+
// dial. This was issue 16523.
124+
ret = ctxErr
125+
fd.Close() // prevent a leak
126+
}
127+
}()
128+
go func() {
129+
select {
130+
case <-ctx.Done():
131+
// Force the runtime's poller to immediately give up
132+
// waiting for writability, unblocking waitWrite
133+
// below.
134+
fd.setWriteDeadline(aLongTimeAgo)
135+
testHookCanceledDial()
136+
interruptRes <- ctx.Err()
137+
case <-done:
138+
interruptRes <- nil
139+
}
140+
}()
141+
}
119142

120143
for {
121144
// Performing multiple connect system calls on a

src/net/hook_unix.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ package net
99
import "syscall"
1010

1111
var (
12-
testHookDialChannel = func() {} // see golang.org/issue/5349
12+
testHookDialChannel = func() {} // for golang.org/issue/5349
13+
testHookCanceledDial = func() {} // for golang.org/issue/16523
1314

1415
// Placeholders for socket system calls.
1516
socketFunc func(int, int, int) (int, error) = syscall.Socket

0 commit comments

Comments
 (0)