Skip to content

Commit 254169d

Browse files
committed
crypto/tls: fix deadlock when racing to complete handshake.
After renegotiation support was added (af125a5) it's possible for a Write to block on a Read when racing to complete the handshake: 1. The Write determines that a handshake is needed and tries to take the neccesary locks in the correct order. 2. The Read also determines that a handshake is needed and wins the race to take the locks. 3. The Read goroutine completes the handshake and wins a race to unlock and relock c.in, which it'll hold when waiting for more network data. If the application-level protocol requires the Write to complete before data can be read then the system as a whole will deadlock. Unfortunately it doesn't appear possible to reverse the locking order of c.in and handshakeMutex because we might read a renegotiation request at any point and need to be able to do a handshake without unlocking. So this change adds a sync.Cond that indicates that a goroutine has committed to doing a handshake. Other interested goroutines can wait on that Cond when needed. The test for this isn't great. I was able to reproduce the deadlock with it only when building with -race. (Because -race happened to alter the timing just enough.) Fixes #17101. Change-Id: I4e8757f7b82a84e46c9963a977d089f0fb675495 Reviewed-on: https://go-review.googlesource.com/29164 Reviewed-by: Brad Fitzpatrick <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]>
1 parent ad5d91c commit 254169d

File tree

2 files changed

+107
-15
lines changed

2 files changed

+107
-15
lines changed

src/crypto/tls/conn.go

Lines changed: 53 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,14 @@ type Conn struct {
2929

3030
// constant after handshake; protected by handshakeMutex
3131
handshakeMutex sync.Mutex // handshakeMutex < in.Mutex, out.Mutex, errMutex
32-
handshakeErr error // error resulting from handshake
33-
vers uint16 // TLS version
34-
haveVers bool // version has been negotiated
35-
config *Config // configuration passed to constructor
32+
// handshakeCond, if not nil, indicates that a goroutine is committed
33+
// to running the handshake for this Conn. Other goroutines that need
34+
// to wait for the handshake can wait on this, under handshakeMutex.
35+
handshakeCond *sync.Cond
36+
handshakeErr error // error resulting from handshake
37+
vers uint16 // TLS version
38+
haveVers bool // version has been negotiated
39+
config *Config // configuration passed to constructor
3640
// handshakeComplete is true if the connection is currently transfering
3741
// application data (i.e. is not currently processing a handshake).
3842
handshakeComplete bool
@@ -1206,26 +1210,50 @@ func (c *Conn) Handshake() error {
12061210
// need to check whether a handshake is pending (such as Write) to
12071211
// block.
12081212
//
1209-
// Thus we take c.handshakeMutex first and, if we find that a handshake
1210-
// is needed, then we unlock, acquire c.in and c.handshakeMutex in the
1211-
// correct order, and check again.
1213+
// Thus we first take c.handshakeMutex to check whether a handshake is
1214+
// needed.
1215+
//
1216+
// If so then, previously, this code would unlock handshakeMutex and
1217+
// then lock c.in and handshakeMutex in the correct order to run the
1218+
// handshake. The problem was that it was possible for a Read to
1219+
// complete the handshake once handshakeMutex was unlocked and then
1220+
// keep c.in while waiting for network data. Thus a concurrent
1221+
// operation could be blocked on c.in.
1222+
//
1223+
// Thus handshakeCond is used to signal that a goroutine is committed
1224+
// to running the handshake and other goroutines can wait on it if they
1225+
// need. handshakeCond is protected by handshakeMutex.
12121226
c.handshakeMutex.Lock()
12131227
defer c.handshakeMutex.Unlock()
12141228

1215-
for i := 0; i < 2; i++ {
1216-
if i == 1 {
1217-
c.handshakeMutex.Unlock()
1218-
c.in.Lock()
1219-
defer c.in.Unlock()
1220-
c.handshakeMutex.Lock()
1221-
}
1222-
1229+
for {
12231230
if err := c.handshakeErr; err != nil {
12241231
return err
12251232
}
12261233
if c.handshakeComplete {
12271234
return nil
12281235
}
1236+
if c.handshakeCond == nil {
1237+
break
1238+
}
1239+
1240+
c.handshakeCond.Wait()
1241+
}
1242+
1243+
// Set handshakeCond to indicate that this goroutine is committing to
1244+
// running the handshake.
1245+
c.handshakeCond = sync.NewCond(&c.handshakeMutex)
1246+
c.handshakeMutex.Unlock()
1247+
1248+
c.in.Lock()
1249+
defer c.in.Unlock()
1250+
1251+
c.handshakeMutex.Lock()
1252+
1253+
// The handshake cannot have completed when handshakeMutex was unlocked
1254+
// because this goroutine set handshakeCond.
1255+
if c.handshakeErr != nil || c.handshakeComplete {
1256+
panic("handshake should not have been able to complete after handshakeCond was set")
12291257
}
12301258

12311259
if c.isClient {
@@ -1240,6 +1268,16 @@ func (c *Conn) Handshake() error {
12401268
// alert that might be left in the buffer.
12411269
c.flush()
12421270
}
1271+
1272+
if c.handshakeErr == nil && !c.handshakeComplete {
1273+
panic("handshake should have had a result.")
1274+
}
1275+
1276+
// Wake any other goroutines that are waiting for this handshake to
1277+
// complete.
1278+
c.handshakeCond.Broadcast()
1279+
c.handshakeCond = nil
1280+
12431281
return c.handshakeErr
12441282
}
12451283

src/crypto/tls/handshake_client_test.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,3 +1168,57 @@ func TestAlertFlushing(t *testing.T) {
11681168
t.Errorf("expected server handshake to complete with one write, but saw %d", n)
11691169
}
11701170
}
1171+
1172+
func TestHandshakeRace(t *testing.T) {
1173+
// This test races a Read and Write to try and complete a handshake in
1174+
// order to provide some evidence that there are no races or deadlocks
1175+
// in the handshake locking.
1176+
for i := 0; i < 32; i++ {
1177+
c, s := net.Pipe()
1178+
1179+
go func() {
1180+
server := Server(s, testConfig)
1181+
if err := server.Handshake(); err != nil {
1182+
panic(err)
1183+
}
1184+
1185+
var request [1]byte
1186+
if n, err := server.Read(request[:]); err != nil || n != 1 {
1187+
panic(err)
1188+
}
1189+
1190+
server.Write(request[:])
1191+
server.Close()
1192+
}()
1193+
1194+
startWrite := make(chan struct{})
1195+
startRead := make(chan struct{})
1196+
readDone := make(chan struct{})
1197+
1198+
client := Client(c, testConfig)
1199+
go func() {
1200+
<-startWrite
1201+
var request [1]byte
1202+
client.Write(request[:])
1203+
}()
1204+
1205+
go func() {
1206+
<-startRead
1207+
var reply [1]byte
1208+
if n, err := client.Read(reply[:]); err != nil || n != 1 {
1209+
panic(err)
1210+
}
1211+
c.Close()
1212+
readDone <- struct{}{}
1213+
}()
1214+
1215+
if i&1 == 1 {
1216+
startWrite <- struct{}{}
1217+
startRead <- struct{}{}
1218+
} else {
1219+
startRead <- struct{}{}
1220+
startWrite <- struct{}{}
1221+
}
1222+
<-readDone
1223+
}
1224+
}

0 commit comments

Comments
 (0)