Skip to content

Commit e6c12c3

Browse files
committed
net/http: support gzip, x-gzip Transfer-Encodings
Support "gzip" aka "x-gzip" as a transfer-encoding for requests and responses as per RFC 7230 Section 3.3.1. "gzip" and "x-gzip" are equivalents as requested by RFC 7230 Section 4.2.3. Transfer-Encoding is an on-fly property of the body that can be applied by proxies, other servers and basically any intermediary to transport the content e.g. across data centers or backends/machine to machine that need compression. For this change, "gzip" is both explicitly and implicitly combined with transfer-encoding "chunked" in an ordering such as: Transfer-Encoding: gzip, chunked and NOT Transfer-Encoding: chunked, gzip Obviously the latter form is counter-intuitive for streaming. Thus "chunked" is the last value to appear in that transfer-encoding header, if explicitly included. When parsing the response, the chunked body is concatenated as "chunked" does, before finally being decompressed as "gzip". A chunked and compressed body would typically look like this: <LENGTH_1>\r\n<CHUNK_1_GZIPPED_BODY>\r\n<LENGTH_2>\r\n<CHUNK_2_GZIPPED_BODY>\0\r\n which when being processed we would contentate <FULL_BODY> := <CHUNK_1_GZIPPED_BODY> + <CHUNK_2_GZIPPED_BODY> + ... and then finally gunzip it <FINAL_BODY> := gunzip(<FULL_BODY>) If a "chunked" transfer-encoding is NOT applied but "gzip" is applied, we implicitly assume that they requested using "chunked" at the end. This is as per the recommendation of RFC 3.3.1. which explicitly says that for: * Request: " If any transfer coding other than chunked is applied to a request payload body, the sender MUST apply chunked as the final transfer coding to ensure that the message is properly framed." * Response: " If any transfer coding other than chunked is applied to a response payload body, the sender MUST either apply chunked as the final transfer coding or terminate the message by closing the connection." RELNOTE=yes Fixes #29162 Change-Id: Icb8b8b838cf4119705605b29725cabb1fe258491 Reviewed-on: https://go-review.googlesource.com/c/go/+/166517 Run-TryBot: Emmanuel Odeke <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Brad Fitzpatrick <[email protected]>
1 parent b2b0992 commit e6c12c3

File tree

2 files changed

+394
-15
lines changed

2 files changed

+394
-15
lines changed

src/net/http/transfer.go

Lines changed: 112 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ package http
77
import (
88
"bufio"
99
"bytes"
10+
"compress/gzip"
1011
"errors"
1112
"fmt"
1213
"io"
@@ -466,6 +467,34 @@ func suppressedHeaders(status int) []string {
466467
return nil
467468
}
468469

470+
// proxyingReadCloser is a composite type that accepts and proxies
471+
// io.Read and io.Close calls to its respective Reader and Closer.
472+
//
473+
// It is composed of:
474+
// a) a top-level reader e.g. the result of decompression
475+
// b) a symbolic Closer e.g. the result of decompression, the
476+
// original body and the connection itself.
477+
type proxyingReadCloser struct {
478+
io.Reader
479+
io.Closer
480+
}
481+
482+
// multiCloser implements io.Closer and allows a bunch of io.Closer values
483+
// to all be closed once.
484+
// Example usage is with proxyingReadCloser if we are decompressing a response
485+
// body on the fly and would like to close both *gzip.Reader and underlying body.
486+
type multiCloser []io.Closer
487+
488+
func (mc multiCloser) Close() error {
489+
var err error
490+
for _, c := range mc {
491+
if err1 := c.Close(); err1 != nil && err == nil {
492+
err = err1
493+
}
494+
}
495+
return err
496+
}
497+
469498
// msg is *Request or *Response.
470499
func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
471500
t := &transferReader{RequestMethod: "GET"}
@@ -543,7 +572,7 @@ func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
543572
// Prepare body reader. ContentLength < 0 means chunked encoding
544573
// or close connection when finished, since multipart is not supported yet
545574
switch {
546-
case chunked(t.TransferEncoding):
575+
case chunked(t.TransferEncoding) || implicitlyChunked(t.TransferEncoding):
547576
if noResponseBodyExpected(t.RequestMethod) || !bodyAllowedForStatus(t.StatusCode) {
548577
t.Body = NoBody
549578
} else {
@@ -564,6 +593,21 @@ func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
564593
}
565594
}
566595

596+
// Finally if "gzip" was one of the requested transfer-encodings,
597+
// we'll unzip the concatenated body/payload of the request.
598+
// TODO: As we support more transfer-encodings, extract
599+
// this code and apply the un-codings in reverse.
600+
if t.Body != NoBody && gzipped(t.TransferEncoding) {
601+
zr, err := gzip.NewReader(t.Body)
602+
if err != nil {
603+
return fmt.Errorf("http: failed to gunzip body: %v", err)
604+
}
605+
t.Body = &proxyingReadCloser{
606+
Reader: zr,
607+
Closer: multiCloser{zr, t.Body},
608+
}
609+
}
610+
567611
// Unify output
568612
switch rr := msg.(type) {
569613
case *Request:
@@ -583,8 +627,41 @@ func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
583627
return nil
584628
}
585629

586-
// Checks whether chunked is part of the encodings stack
587-
func chunked(te []string) bool { return len(te) > 0 && te[0] == "chunked" }
630+
// Checks whether chunked is the last part of the encodings stack
631+
func chunked(te []string) bool { return len(te) > 0 && te[len(te)-1] == "chunked" }
632+
633+
// implicitlyChunked is a helper to check for implicity of chunked, because
634+
// RFC 7230 Section 3.3.1 says that the sender MUST apply chunked as the final
635+
// payload body to ensure that the message is framed for both the request
636+
// and the body. Since "identity" is incompatabile with any other transformational
637+
// encoding cannot co-exist, the presence of "identity" will cause implicitlyChunked
638+
// to return false.
639+
func implicitlyChunked(te []string) bool {
640+
if len(te) == 0 { // No transfer-encodings passed in, so not implicity chunked.
641+
return false
642+
}
643+
for _, tei := range te {
644+
if tei == "identity" {
645+
return false
646+
}
647+
}
648+
return true
649+
}
650+
651+
func isGzipTransferEncoding(tei string) bool {
652+
// RFC 7230 4.2.3 requests that "x-gzip" SHOULD be considered the same as "gzip".
653+
return tei == "gzip" || tei == "x-gzip"
654+
}
655+
656+
// Checks where either of "gzip" or "x-gzip" are contained in transfer encodings.
657+
func gzipped(te []string) bool {
658+
for _, tei := range te {
659+
if isGzipTransferEncoding(tei) {
660+
return true
661+
}
662+
}
663+
return false
664+
}
588665

589666
// Checks whether the encoding is explicitly "identity".
590667
func isIdentity(te []string) bool { return len(te) == 1 && te[0] == "identity" }
@@ -620,25 +697,47 @@ func (t *transferReader) fixTransferEncoding() error {
620697

621698
encodings := strings.Split(raw[0], ",")
622699
te := make([]string, 0, len(encodings))
623-
// TODO: Even though we only support "identity" and "chunked"
624-
// encodings, the loop below is designed with foresight. One
625-
// invariant that must be maintained is that, if present,
626-
// chunked encoding must always come first.
627-
for _, encoding := range encodings {
700+
701+
// When adding new encodings, please maintain the invariant:
702+
// if chunked encoding is present, it must always
703+
// come last and it must be applied only once.
704+
// See RFC 7230 Section 3.3.1 Transfer-Encoding.
705+
for i, encoding := range encodings {
628706
encoding = strings.ToLower(strings.TrimSpace(encoding))
629-
// "identity" encoding is not recorded
707+
630708
if encoding == "identity" {
709+
// "identity" should not be mixed with other transfer-encodings/compressions
710+
// because it means "no compression, no transformation".
711+
if len(encodings) != 1 {
712+
return &badStringError{`"identity" when present must be the only transfer encoding`, strings.Join(encodings, ",")}
713+
}
714+
// "identity" is not recorded.
631715
break
632716
}
633-
if encoding != "chunked" {
717+
718+
switch {
719+
case encoding == "chunked":
720+
// "chunked" MUST ALWAYS be the last
721+
// encoding as per the loop invariant.
722+
// That is:
723+
// Invalid: [chunked, gzip]
724+
// Valid: [gzip, chunked]
725+
if i+1 != len(encodings) {
726+
return &badStringError{"chunked must be applied only once, as the last encoding", strings.Join(encodings, ",")}
727+
}
728+
// Supported otherwise.
729+
730+
case isGzipTransferEncoding(encoding):
731+
// Supported
732+
733+
default:
634734
return &unsupportedTEError{fmt.Sprintf("unsupported transfer encoding: %q", encoding)}
635735
}
736+
636737
te = te[0 : len(te)+1]
637738
te[len(te)-1] = encoding
638739
}
639-
if len(te) > 1 {
640-
return &badStringError{"too many transfer encodings", strings.Join(te, ",")}
641-
}
740+
642741
if len(te) > 0 {
643742
// RFC 7230 3.3.2 says "A sender MUST NOT send a
644743
// Content-Length header field in any message that

0 commit comments

Comments
 (0)