Skip to content

Commit 4d2ac54

Browse files
committed
net/http: fix spurious logging in Transport when server closes idle conn
In https://golang.org/3210, Transport errors occurring before receiving response headers were wrapped in another error type to indicate to the retry logic elsewhere that the request might be re-tryable. But a check for err == io.EOF was missed, which then became false once io.EOF was wrapped in the beforeRespHeaderError type. The beforeRespHeaderError was too fragile. Remove it. I tried to fix it in an earlier version of this CL and just broke different things instead. Also remove the "markBroken" method. It's redundant and confusing. Also, rename the checkTransportResend method to shouldRetryRequest and make it return a bool instead of an error. This also helps readability. Now the code recognizes the two main reasons we'd want to retry a request: because we never wrote the request in the first place (so: count the number of bytes we've written), or because the server hung up on us before we received response headers for an idempotent request. As an added bonus, this could make POST requests safely re-tryable since we know we haven't written anything yet. But it's too late in Go 1.7 to enable that, so we'll do that later (filed #15723). This also adds a new internal (package http) test, since testing this blackbox at higher levels in transport_test wasn't possible. Fixes #15446 Change-Id: I2c1dc03b1f1ebdf3f04eba81792bd5c4fb6b6b66 Reviewed-on: https://go-review.googlesource.com/23160 Reviewed-by: Andrew Gerrand <[email protected]> Run-TryBot: Brad Fitzpatrick <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
1 parent 9d73e14 commit 4d2ac54

File tree

2 files changed

+205
-77
lines changed

2 files changed

+205
-77
lines changed

src/net/http/transport.go

Lines changed: 136 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -387,47 +387,47 @@ func (t *Transport) RoundTrip(req *Request) (*Response, error) {
387387
if err == nil {
388388
return resp, nil
389389
}
390-
if err := checkTransportResend(err, req, pconn); err != nil {
390+
if !pconn.shouldRetryRequest(req, err) {
391391
return nil, err
392392
}
393393
testHookRoundTripRetried()
394394
}
395395
}
396396

397-
// checkTransportResend checks whether a failed HTTP request can be
398-
// resent on a new connection. The non-nil input error is the error from
399-
// roundTrip, which might be wrapped in a beforeRespHeaderError error.
400-
//
401-
// The return value is either nil to retry the request, the provided
402-
// err unmodified, or the unwrapped error inside a
403-
// beforeRespHeaderError.
404-
func checkTransportResend(err error, req *Request, pconn *persistConn) error {
405-
brhErr, ok := err.(beforeRespHeaderError)
406-
if !ok {
407-
return err
397+
// shouldRetryRequest reports whether we should retry sending a failed
398+
// HTTP request on a new connection. The non-nil input error is the
399+
// error from roundTrip.
400+
func (pc *persistConn) shouldRetryRequest(req *Request, err error) bool {
401+
if err == errMissingHost {
402+
// User error.
403+
return false
408404
}
409-
err = brhErr.error // unwrap the custom error in case we return it
410-
if err != errMissingHost && pconn.isReused() && req.isReplayable() {
411-
// If we try to reuse a connection that the server is in the process of
412-
// closing, we may end up successfully writing out our request (or a
413-
// portion of our request) only to find a connection error when we try to
414-
// read from (or finish writing to) the socket.
415-
416-
// There can be a race between the socket pool checking whether a socket
417-
// is still connected, receiving the FIN, and sending/reading data on a
418-
// reused socket. If we receive the FIN between the connectedness check
419-
// and writing/reading from the socket, we may first learn the socket is
420-
// disconnected when we get a ERR_SOCKET_NOT_CONNECTED. This will most
421-
// likely happen when trying to retrieve its IP address. See
422-
// http://crbug.com/105824 for more details.
423-
424-
// We resend a request only if we reused a keep-alive connection and did
425-
// not yet receive any header data. This automatically prevents an
426-
// infinite resend loop because we'll run out of the cached keep-alive
427-
// connections eventually.
428-
return nil
405+
if !pc.isReused() {
406+
// This was a fresh connection. There's no reason the server
407+
// should've hung up on us.
408+
//
409+
// Also, if we retried now, we could loop forever
410+
// creating new connections and retrying if the server
411+
// is just hanging up on us because it doesn't like
412+
// our request (as opposed to sending an error).
413+
return false
429414
}
430-
return err
415+
if !req.isReplayable() {
416+
// Don't retry non-idempotent requests.
417+
418+
// TODO: swap the nothingWrittenError and isReplayable checks,
419+
// putting the "if nothingWrittenError => return true" case
420+
// first, per golang.org/issue/15723
421+
return false
422+
}
423+
if _, ok := err.(nothingWrittenError); ok {
424+
// We never wrote anything, so it's safe to retry.
425+
return true
426+
}
427+
if err == errServerClosedIdle || err == errServerClosedConn {
428+
return true
429+
}
430+
return false // conservatively
431431
}
432432

433433
// ErrSkipAltProtocol is a sentinel error value defined by Transport.RegisterProtocol.
@@ -570,7 +570,8 @@ var (
570570
errTooManyIdleHost = errors.New("http: putIdleConn: too many idle connections for host")
571571
errCloseIdleConns = errors.New("http: CloseIdleConnections called")
572572
errReadLoopExiting = errors.New("http: persistConn.readLoop exiting")
573-
errServerClosedIdle = errors.New("http: server closed idle conn")
573+
errServerClosedIdle = errors.New("http: server closed idle connection")
574+
errServerClosedConn = errors.New("http: server closed connection")
574575
errIdleConnTimeout = errors.New("http: idle connection timeout")
575576
)
576577

@@ -881,12 +882,13 @@ func (t *Transport) getConn(treq *transportRequest, cm connectMethod) (*persistC
881882

882883
func (t *Transport) dialConn(ctx context.Context, cm connectMethod) (*persistConn, error) {
883884
pconn := &persistConn{
884-
t: t,
885-
cacheKey: cm.key(),
886-
reqch: make(chan requestAndChan, 1),
887-
writech: make(chan writeRequest, 1),
888-
closech: make(chan struct{}),
889-
writeErrCh: make(chan error, 1),
885+
t: t,
886+
cacheKey: cm.key(),
887+
reqch: make(chan requestAndChan, 1),
888+
writech: make(chan writeRequest, 1),
889+
closech: make(chan struct{}),
890+
writeErrCh: make(chan error, 1),
891+
writeLoopDone: make(chan struct{}),
890892
}
891893
tlsDial := t.DialTLS != nil && cm.targetScheme == "https" && cm.proxyURL == nil
892894
if tlsDial {
@@ -1003,12 +1005,28 @@ func (t *Transport) dialConn(ctx context.Context, cm connectMethod) (*persistCon
10031005
}
10041006

10051007
pconn.br = bufio.NewReader(pconn)
1006-
pconn.bw = bufio.NewWriter(pconn.conn)
1008+
pconn.bw = bufio.NewWriter(persistConnWriter{pconn})
10071009
go pconn.readLoop()
10081010
go pconn.writeLoop()
10091011
return pconn, nil
10101012
}
10111013

1014+
// persistConnWriter is the io.Writer written to by pc.bw.
1015+
// It accumulates the number of bytes written to the underlying conn,
1016+
// so the retry logic can determine whether any bytes made it across
1017+
// the wire.
1018+
// This is exactly 1 pointer field wide so it can go into an interface
1019+
// without allocation.
1020+
type persistConnWriter struct {
1021+
pc *persistConn
1022+
}
1023+
1024+
func (w persistConnWriter) Write(p []byte) (n int, err error) {
1025+
n, err = w.pc.conn.Write(p)
1026+
w.pc.nwrite += int64(n)
1027+
return
1028+
}
1029+
10121030
// useProxy reports whether requests to addr should use a proxy,
10131031
// according to the NO_PROXY or no_proxy environment variable.
10141032
// addr is always a canonicalAddr with a host and port.
@@ -1142,6 +1160,7 @@ type persistConn struct {
11421160
tlsState *tls.ConnectionState
11431161
br *bufio.Reader // from conn
11441162
bw *bufio.Writer // to conn
1163+
nwrite int64 // bytes written
11451164
reqch chan requestAndChan // written by roundTrip; read by readLoop
11461165
writech chan writeRequest // written by roundTrip; read by writeLoop
11471166
closech chan struct{} // closed when conn closed
@@ -1154,6 +1173,8 @@ type persistConn struct {
11541173
// whether or not a connection can be reused. Issue 7569.
11551174
writeErrCh chan error
11561175

1176+
writeLoopDone chan struct{} // closed when write loop ends
1177+
11571178
// Both guarded by Transport.idleMu:
11581179
idleAt time.Time // time it last become idle
11591180
idleTimer *time.Timer // holding an AfterFunc to close it
@@ -1195,7 +1216,7 @@ func (pc *persistConn) Read(p []byte) (n int, err error) {
11951216
// isBroken reports whether this connection is in a known broken state.
11961217
func (pc *persistConn) isBroken() bool {
11971218
pc.mu.Lock()
1198-
b := pc.broken
1219+
b := pc.closed != nil
11991220
pc.mu.Unlock()
12001221
return b
12011222
}
@@ -1247,6 +1268,56 @@ func (pc *persistConn) closeConnIfStillIdle() {
12471268
pc.close(errIdleConnTimeout)
12481269
}
12491270

1271+
// mapRoundTripErrorFromReadLoop maps the provided readLoop error into
1272+
// the error value that should be returned from persistConn.roundTrip.
1273+
//
1274+
// The startBytesWritten value should be the value of pc.nwrite before the roundTrip
1275+
// started writing the request.
1276+
func (pc *persistConn) mapRoundTripErrorFromReadLoop(startBytesWritten int64, err error) (out error) {
1277+
if err == nil {
1278+
return nil
1279+
}
1280+
if pc.isCanceled() {
1281+
return errRequestCanceled
1282+
}
1283+
if err == errServerClosedIdle || err == errServerClosedConn {
1284+
return err
1285+
}
1286+
if pc.isBroken() {
1287+
<-pc.writeLoopDone
1288+
if pc.nwrite == startBytesWritten {
1289+
return nothingWrittenError{err}
1290+
}
1291+
}
1292+
return err
1293+
}
1294+
1295+
// mapRoundTripErrorAfterClosed returns the error value to be propagated
1296+
// up to Transport.RoundTrip method when persistConn.roundTrip sees
1297+
// its pc.closech channel close, indicating the persistConn is dead.
1298+
// (after closech is closed, pc.closed is valid).
1299+
func (pc *persistConn) mapRoundTripErrorAfterClosed(startBytesWritten int64) error {
1300+
if pc.isCanceled() {
1301+
return errRequestCanceled
1302+
}
1303+
err := pc.closed
1304+
if err == errServerClosedIdle || err == errServerClosedConn {
1305+
// Don't decorate
1306+
return err
1307+
}
1308+
1309+
// Wait for the writeLoop goroutine to terminated, and then
1310+
// see if we actually managed to write anything. If not, we
1311+
// can retry the request.
1312+
<-pc.writeLoopDone
1313+
if pc.nwrite == startBytesWritten {
1314+
return nothingWrittenError{err}
1315+
}
1316+
1317+
return fmt.Errorf("net/http: HTTP/1.x transport connection broken: %v", err)
1318+
1319+
}
1320+
12501321
func (pc *persistConn) readLoop() {
12511322
closeErr := errReadLoopExiting // default value, if not changed below
12521323
defer func() {
@@ -1283,9 +1354,6 @@ func (pc *persistConn) readLoop() {
12831354
for alive {
12841355
pc.readLimit = pc.maxHeaderResponseSize()
12851356
_, err := pc.br.Peek(1)
1286-
if err != nil {
1287-
err = beforeRespHeaderError{err}
1288-
}
12891357

12901358
pc.mu.Lock()
12911359
if pc.numExpectedResponses == 0 {
@@ -1301,20 +1369,24 @@ func (pc *persistConn) readLoop() {
13011369
var resp *Response
13021370
if err == nil {
13031371
resp, err = pc.readResponse(rc, trace)
1372+
} else {
1373+
err = errServerClosedConn
1374+
closeErr = err
13041375
}
13051376

13061377
if err != nil {
13071378
if pc.readLimit <= 0 {
13081379
err = fmt.Errorf("net/http: server response headers exceeded %d bytes; aborted", pc.maxHeaderResponseSize())
13091380
}
1381+
13101382
// If we won't be able to retry this request later (from the
13111383
// roundTrip goroutine), mark it as done now.
13121384
// BEFORE the send on rc.ch, as the client might re-use the
13131385
// same *Request pointer, and we don't want to set call
13141386
// t.setReqCanceler from this persistConn while the Transport
13151387
// potentially spins up a different persistConn for the
13161388
// caller's subsequent request.
1317-
if checkTransportResend(err, rc.req, pc) != nil {
1389+
if !pc.shouldRetryRequest(rc.req, err) {
13181390
pc.t.setReqCanceler(rc.req, nil)
13191391
}
13201392
select {
@@ -1501,24 +1573,33 @@ func (pc *persistConn) waitForContinue(continueCh <-chan struct{}) func() bool {
15011573
}
15021574
}
15031575

1576+
// nothingWrittenError wraps a write errors which ended up writing zero bytes.
1577+
type nothingWrittenError struct {
1578+
error
1579+
}
1580+
15041581
func (pc *persistConn) writeLoop() {
1582+
defer close(pc.writeLoopDone)
15051583
for {
15061584
select {
15071585
case wr := <-pc.writech:
1508-
if pc.isBroken() {
1509-
wr.ch <- errors.New("http: can't write HTTP request on broken connection")
1510-
continue
1511-
}
1586+
startBytesWritten := pc.nwrite
15121587
err := wr.req.Request.write(pc.bw, pc.isProxy, wr.req.extra, pc.waitForContinue(wr.continueCh))
15131588
if err == nil {
15141589
err = pc.bw.Flush()
15151590
}
15161591
if err != nil {
1517-
pc.markBroken()
15181592
wr.req.Request.closeBody()
1593+
if pc.nwrite == startBytesWritten {
1594+
err = nothingWrittenError{err}
1595+
}
15191596
}
15201597
pc.writeErrCh <- err // to the body reader, which might recycle us
15211598
wr.ch <- err // to the roundTrip function
1599+
if err != nil {
1600+
pc.close(err)
1601+
return
1602+
}
15221603
case <-pc.closech:
15231604
return
15241605
}
@@ -1619,12 +1700,6 @@ var (
16191700
testHookReadLoopBeforeNextRead = nop
16201701
)
16211702

1622-
// beforeRespHeaderError is used to indicate when an IO error has occurred before
1623-
// any header data was received.
1624-
type beforeRespHeaderError struct {
1625-
error
1626-
}
1627-
16281703
func (pc *persistConn) roundTrip(req *transportRequest) (resp *Response, err error) {
16291704
testHookEnterRoundTrip()
16301705
if !pc.t.replaceReqCanceler(req.Request, pc.cancelRequest) {
@@ -1680,6 +1755,7 @@ func (pc *persistConn) roundTrip(req *transportRequest) (resp *Response, err err
16801755
// Write the request concurrently with waiting for a response,
16811756
// in case the server decides to reply before reading our full
16821757
// request body.
1758+
startBytesWritten := pc.nwrite
16831759
writeErrCh := make(chan error, 1)
16841760
pc.writech <- writeRequest{req, writeErrCh, continueCh}
16851761

@@ -1704,7 +1780,7 @@ WaitResponse:
17041780
if pc.isCanceled() {
17051781
err = errRequestCanceled
17061782
}
1707-
re = responseAndError{err: beforeRespHeaderError{err}}
1783+
re = responseAndError{err: err}
17081784
pc.close(fmt.Errorf("write error: %v", err))
17091785
break WaitResponse
17101786
}
@@ -1714,22 +1790,14 @@ WaitResponse:
17141790
respHeaderTimer = timer.C
17151791
}
17161792
case <-pc.closech:
1717-
var err error
1718-
if pc.isCanceled() {
1719-
err = errRequestCanceled
1720-
} else {
1721-
err = beforeRespHeaderError{fmt.Errorf("net/http: HTTP/1 transport connection broken: %v", pc.closed)}
1722-
}
1723-
re = responseAndError{err: err}
1793+
re = responseAndError{err: pc.mapRoundTripErrorAfterClosed(startBytesWritten)}
17241794
break WaitResponse
17251795
case <-respHeaderTimer:
17261796
pc.close(errTimeout)
17271797
re = responseAndError{err: errTimeout}
17281798
break WaitResponse
17291799
case re = <-resc:
1730-
if re.err != nil && pc.isCanceled() {
1731-
re.err = errRequestCanceled
1732-
}
1800+
re.err = pc.mapRoundTripErrorFromReadLoop(startBytesWritten, re.err)
17331801
break WaitResponse
17341802
case <-cancelChan:
17351803
pc.t.CancelRequest(req.Request)
@@ -1749,15 +1817,6 @@ WaitResponse:
17491817
return re.res, re.err
17501818
}
17511819

1752-
// markBroken marks a connection as broken (so it's not reused).
1753-
// It differs from close in that it doesn't close the underlying
1754-
// connection for use when it's still being read.
1755-
func (pc *persistConn) markBroken() {
1756-
pc.mu.Lock()
1757-
defer pc.mu.Unlock()
1758-
pc.broken = true
1759-
}
1760-
17611820
// markReused marks this connection as having been successfully used for a
17621821
// request and response.
17631822
func (pc *persistConn) markReused() {

0 commit comments

Comments
 (0)