diff --git a/CHANGELOG.md b/CHANGELOG.md
index 27542a757aa..3ea37fb8a8e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@
 * [ENHANCEMENT] Compactor: Exposing Thanos accept-malformed-index to Cortex compactor. #5334
 * [ENHANCEMENT] Update Go version to 1.20.4. #5299
 * [ENHANCEMENT] Log: Avoid expensive log.Valuer evaluation for disallowed levels. #5297
+* [ENHANCEMENT] Improving Performance on the API Gzip Handler. #5347
 * [BUGFIX] Ruler: Validate if rule group can be safely converted back to rule group yaml from protobuf message #5265
 * [BUGFIX] Querier: Convert gRPC `ResourceExhausted` status code from store gateway to 422 limit error. #5286
 * [BUGFIX] Alertmanager: Route web-ui requests to the alertmanager distributor when sharding is enabled. #5293
diff --git a/go.mod b/go.mod
index 5a137d8166f..6708c5077fa 100644
--- a/go.mod
+++ b/go.mod
@@ -4,7 +4,6 @@ go 1.18
 
 require (
 	github.com/Masterminds/squirrel v1.5.4
-	github.com/NYTimes/gziphandler v1.1.1
 	github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137
 	github.com/alicebob/miniredis/v2 v2.30.1
 	github.com/armon/go-metrics v0.4.1
diff --git a/go.sum b/go.sum
index 92ddfa79164..4e25b8c5786 100644
--- a/go.sum
+++ b/go.sum
@@ -476,7 +476,6 @@ github.com/Microsoft/hcsshim v0.9.2/go.mod h1:7pLA8lDk46WKDWlVsENo92gC0XFa8rbKfy
 github.com/Microsoft/hcsshim/test v0.0.0-20201218223536-d3e5debf77da/go.mod h1:5hlzMzRKMLyo42nCZ9oml8AdTlq/0cvIaBv6tK1RehU=
 github.com/Microsoft/hcsshim/test v0.0.0-20210227013316-43a75bb4edd3/go.mod h1:mw7qgWloBUl75W/gVH3cQszUg1+gUITj7D6NY7ywVnY=
 github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
-github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I=
 github.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c=
 github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
 github.com/OneOfOne/xxhash v1.2.6 h1:U68crOE3y3MPttCMQGywZOLrTeF5HHJ3/vDBCJn9/bA=
diff --git a/pkg/api/api.go b/pkg/api/api.go
index 4a81c313497..8dcdd356b63 100644
--- a/pkg/api/api.go
+++ b/pkg/api/api.go
@@ -7,10 +7,10 @@ import (
 	"path"
 	"strings"
 
-	"github.com/NYTimes/gziphandler"
 	"github.com/felixge/fgprof"
 	"github.com/go-kit/log"
 	"github.com/go-kit/log/level"
+	"github.com/klauspost/compress/gzhttp"
 	"github.com/prometheus/prometheus/storage"
 	"github.com/weaveworks/common/middleware"
 	"github.com/weaveworks/common/server"
@@ -145,7 +145,7 @@ func (a *API) RegisterRoute(path string, handler http.Handler, auth bool, method
 	}
 
 	if a.cfg.ResponseCompression {
-		handler = gziphandler.GzipHandler(handler)
+		handler = gzhttp.GzipHandler(handler)
 	}
 	if a.HTTPHeaderMiddleware != nil {
 		handler = a.HTTPHeaderMiddleware.Wrap(handler)
@@ -165,7 +165,7 @@ func (a *API) RegisterRoutesWithPrefix(prefix string, handler http.Handler, auth
 	}
 
 	if a.cfg.ResponseCompression {
-		handler = gziphandler.GzipHandler(handler)
+		handler = gzhttp.GzipHandler(handler)
 	}
 	if a.HTTPHeaderMiddleware != nil {
 		handler = a.HTTPHeaderMiddleware.Wrap(handler)
diff --git a/pkg/api/api_test.go b/pkg/api/api_test.go
index 147820a1203..c25ca27234b 100644
--- a/pkg/api/api_test.go
+++ b/pkg/api/api_test.go
@@ -1,13 +1,24 @@
 package api
 
 import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
 	"testing"
 
 	"github.com/gorilla/mux"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/prometheus/model/labels"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/server"
 )
 
+const (
+	acceptEncodingHeader = "Accept-Encoding"
+	gzipEncoding         = "gzip"
+)
+
 type FakeLogger struct{}
 
 func (fl *FakeLogger) Log(keyvals ...interface{}) error {
@@ -94,3 +105,109 @@ func TestNewApiWithoutHeaderLogging(t *testing.T) {
 	require.Nil(t, api.HTTPHeaderMiddleware)
 
 }
+
+func Benchmark_Compression(b *testing.B) {
+	client := &http.Client{
+		Transport: &http.Transport{
+			DisableCompression: true,
+		},
+	}
+
+	cfg := Config{
+		ResponseCompression: true,
+	}
+
+	cases := map[string]struct {
+		enc            string
+		numberOfLabels int
+	}{
+		"gzip-10-labels": {
+			enc:            gzipEncoding,
+			numberOfLabels: 10,
+		},
+		"gzip-100-labels": {
+			enc:            gzipEncoding,
+			numberOfLabels: 100,
+		},
+		"gzip-1K-labels": {
+			enc:            gzipEncoding,
+			numberOfLabels: 1000,
+		},
+		"gzip-10K-labels": {
+			enc:            gzipEncoding,
+			numberOfLabels: 10000,
+		},
+		"gzip-100K-labels": {
+			enc:            gzipEncoding,
+			numberOfLabels: 100000,
+		},
+		"gzip-1M-labels": {
+			enc:            gzipEncoding,
+			numberOfLabels: 1000000,
+		},
+	}
+
+	for name, tc := range cases {
+		b.Run(name, func(b *testing.B) {
+			serverCfg := server.Config{
+				HTTPListenNetwork: server.DefaultNetwork,
+				HTTPListenPort:    8080,
+				Registerer:        prometheus.NewRegistry(),
+			}
+
+			server, err := server.New(serverCfg)
+			require.NoError(b, err)
+			api, err := New(cfg, serverCfg, server, &FakeLogger{})
+			require.NoError(b, err)
+
+			labels := labels.ScratchBuilder{}
+
+			for i := 0; i < tc.numberOfLabels; i++ {
+				labels.Add(fmt.Sprintf("Name%v", i), fmt.Sprintf("Value%v", i))
+			}
+
+			respBody, err := json.Marshal(labels.Labels())
+			require.NoError(b, err)
+
+			api.RegisterRoute("/foo_endpoint", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+				w.WriteHeader(http.StatusOK)
+				_, err := w.Write(respBody)
+				require.NoError(b, err)
+			}), false, "GET")
+
+			go func() {
+				err := server.Run()
+				require.NoError(b, err)
+			}()
+
+			defer server.Shutdown()
+			req, _ := http.NewRequest("GET", "http://"+server.HTTPListenAddr().String()+"/foo_endpoint", nil)
+			req.Header.Set(acceptEncodingHeader, "gzip")
+
+			b.ReportAllocs()
+			b.ResetTimer()
+
+			// Reusing the array to read the body and avoid allocation on the test
+			encRespBody := make([]byte, len(respBody))
+
+			for i := 0; i < b.N; i++ {
+				resp, err := client.Do(req)
+
+				require.NoError(b, err)
+
+				require.NoError(b, err, "client get failed with unexpected error")
+
+				responseBodySize := 0
+				for {
+					n, err := resp.Body.Read(encRespBody)
+					responseBodySize += n
+					if err == io.EOF {
+						break
+					}
+				}
+
+				b.ReportMetric(float64(responseBodySize), "ContentLength")
+			}
+		})
+	}
+}
diff --git a/vendor/github.com/NYTimes/gziphandler/.gitignore b/vendor/github.com/NYTimes/gziphandler/.gitignore
deleted file mode 100644
index 1377554ebea..00000000000
--- a/vendor/github.com/NYTimes/gziphandler/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-*.swp
diff --git a/vendor/github.com/NYTimes/gziphandler/.travis.yml b/vendor/github.com/NYTimes/gziphandler/.travis.yml
deleted file mode 100644
index 94dfae362d3..00000000000
--- a/vendor/github.com/NYTimes/gziphandler/.travis.yml
+++ /dev/null
@@ -1,10 +0,0 @@
-language: go
-go:
-  - 1.x
-  - tip
-env:
-  - GO111MODULE=on
-install:
-  - go mod download
-script:
-  - go test -race -v
diff --git a/vendor/github.com/NYTimes/gziphandler/CODE_OF_CONDUCT.md b/vendor/github.com/NYTimes/gziphandler/CODE_OF_CONDUCT.md
deleted file mode 100644
index cdbca194c34..00000000000
--- a/vendor/github.com/NYTimes/gziphandler/CODE_OF_CONDUCT.md
+++ /dev/null
@@ -1,75 +0,0 @@
----
-layout: code-of-conduct
-version: v1.0
----
-
-This code of conduct outlines our expectations for participants within the **NYTimes/gziphandler** community, as well as steps to reporting unacceptable behavior. We are committed to providing a welcoming and inspiring community for all and expect our code of conduct to be honored. Anyone who violates this code of conduct may be banned from the community.
-
-Our open source community strives to:
-
-* **Be friendly and patient.**
-* **Be welcoming**: We strive to be a community that welcomes and supports people of all backgrounds and identities. This includes, but is not limited to members of any race, ethnicity, culture, national origin, colour, immigration status, social and economic class, educational level, sex, sexual orientation, gender identity and expression, age, size, family status, political belief, religion, and mental and physical ability.
-* **Be considerate**: Your work will be used by other people, and you in turn will depend on the work of others. Any decision you take will affect users and colleagues, and you should take those consequences into account when making decisions. Remember that we're a world-wide community, so you might not be communicating in someone else's primary language.
-* **Be respectful**:  Not all of us will agree all the time, but disagreement is no excuse for poor behavior and poor manners. We might all experience some frustration now and then, but we cannot allow that frustration to turn into a personal attack. It’s important to remember that a community where people feel uncomfortable or threatened is not a productive one.
-* **Be careful in the words that we choose**: we are a community of professionals, and we conduct ourselves professionally. Be kind to others. Do not insult or put down other participants. Harassment and other exclusionary behavior aren't acceptable.
-* **Try to understand why we disagree**: Disagreements, both social and technical, happen all the time. It is important that we resolve disagreements and differing views constructively. Remember that we’re different. The strength of our community comes from its diversity, people from a wide range of backgrounds. Different people have different perspectives on issues. Being unable to understand why someone holds a viewpoint doesn’t mean that they’re wrong. Don’t forget that it is human to err and blaming each other doesn’t get us anywhere. Instead, focus on helping to resolve issues and learning from mistakes.
-
-## Definitions
-
-Harassment includes, but is not limited to:
-
-- Offensive comments related to gender, gender identity and expression, sexual orientation, disability, mental illness, neuro(a)typicality, physical appearance, body size, race, age, regional discrimination, political or religious affiliation
-- Unwelcome comments regarding a person’s lifestyle choices and practices, including those related to food, health, parenting, drugs, and employment
-- Deliberate misgendering. This includes deadnaming or persistently using a pronoun that does not correctly reflect a person's gender identity. You must address people by the name they give you when not addressing them by their username or handle
-- Physical contact and simulated physical contact (eg, textual descriptions like “*hug*” or “*backrub*”) without consent or after a request to stop
-- Threats of violence, both physical and psychological
-- Incitement of violence towards any individual, including encouraging a person to commit suicide or to engage in self-harm
-- Deliberate intimidation
-- Stalking or following
-- Harassing photography or recording, including logging online activity for harassment purposes
-- Sustained disruption of discussion
-- Unwelcome sexual attention, including gratuitous or off-topic sexual images or behaviour
-- Pattern of inappropriate social contact, such as requesting/assuming inappropriate levels of intimacy with others
-- Continued one-on-one communication after requests to cease
-- Deliberate “outing” of any aspect of a person’s identity without their consent except as necessary to protect others from intentional abuse
-- Publication of non-harassing private communication
-
-Our open source community prioritizes marginalized people’s safety over privileged people’s comfort. We will not act on complaints regarding:
-
-- ‘Reverse’ -isms, including ‘reverse racism,’ ‘reverse sexism,’ and ‘cisphobia’
-- Reasonable communication of boundaries, such as “leave me alone,” “go away,” or “I’m not discussing this with you”
-- Refusal to explain or debate social justice concepts
-- Communicating in a ‘tone’ you don’t find congenial
-- Criticizing racist, sexist, cissexist, or otherwise oppressive behavior or assumptions
-
-
-### Diversity Statement
-
-We encourage everyone to participate and are committed to building a community for all. Although we will fail at times, we seek to treat everyone both as fairly and equally as possible. Whenever a participant has made a mistake, we expect them to take responsibility for it. If someone has been harmed or offended, it is our responsibility to listen carefully and respectfully, and do our best to right the wrong.
-
-Although this list cannot be exhaustive, we explicitly honor diversity in age, gender, gender identity or expression, culture, ethnicity, language, national origin, political beliefs, profession, race, religion, sexual orientation, socioeconomic status, and technical ability. We will not tolerate discrimination based on any of the protected
-characteristics above, including participants with disabilities.
-
-### Reporting Issues
-
-If you experience or witness unacceptable behavior—or have any other concerns—please report it by contacting us via **code@nytimes.com**. All reports will be handled with discretion. In your report please include:
-
-- Your contact information.
-- Names (real, nicknames, or pseudonyms) of any individuals involved. If there are additional witnesses, please
-include them as well. Your account of what occurred, and if you believe the incident is ongoing. If there is a publicly available record (e.g. a mailing list archive or a public IRC logger), please include a link.
-- Any additional information that may be helpful.
-
-After filing a report, a representative will contact you personally, review the incident, follow up with any additional questions, and make a decision as to how to respond. If the person who is harassing you is part of the response team, they will recuse themselves from handling your incident. If the complaint originates from a member of the response team, it will be handled by a different member of the response team. We will respect confidentiality requests for the purpose of protecting victims of abuse.
-
-### Attribution & Acknowledgements
-
-We all stand on the shoulders of giants across many open source communities.  We'd like to thank the communities and projects that established code of conducts and diversity statements as our inspiration:
-
-* [Django](https://www.djangoproject.com/conduct/reporting/)
-* [Python](https://www.python.org/community/diversity/)
-* [Ubuntu](http://www.ubuntu.com/about/about-ubuntu/conduct)
-* [Contributor Covenant](http://contributor-covenant.org/)
-* [Geek Feminism](http://geekfeminism.org/about/code-of-conduct/)
-* [Citizen Code of Conduct](http://citizencodeofconduct.org/)
-
-This Code of Conduct was based on https://github.com/todogroup/opencodeofconduct
diff --git a/vendor/github.com/NYTimes/gziphandler/CONTRIBUTING.md b/vendor/github.com/NYTimes/gziphandler/CONTRIBUTING.md
deleted file mode 100644
index b89a9eb4fb2..00000000000
--- a/vendor/github.com/NYTimes/gziphandler/CONTRIBUTING.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# Contributing to NYTimes/gziphandler
-
-This is an open source project started by handful of developers at The New York Times and open to the entire Go community.
-
-We really appreciate your help!
-
-## Filing issues
-
-When filing an issue, make sure to answer these five questions:
-
-1. What version of Go are you using (`go version`)?
-2. What operating system and processor architecture are you using?
-3. What did you do?
-4. What did you expect to see?
-5. What did you see instead?
-
-## Contributing code
-
-Before submitting changes, please follow these guidelines:
-
-1. Check the open issues and pull requests for existing discussions.
-2. Open an issue to discuss a new feature.
-3. Write tests.
-4. Make sure code follows the ['Go Code Review Comments'](https://github.com/golang/go/wiki/CodeReviewComments).
-5. Make sure your changes pass `go test`.
-6. Make sure the entire test suite passes locally and on Travis CI.
-7. Open a Pull Request.
-8. [Squash your commits](http://gitready.com/advanced/2009/02/10/squashing-commits-with-rebase.html) after receiving feedback and add a [great commit message](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html).
-
-Unless otherwise noted, the gziphandler source files are distributed under the Apache 2.0-style license found in the LICENSE.md file.
diff --git a/vendor/github.com/NYTimes/gziphandler/README.md b/vendor/github.com/NYTimes/gziphandler/README.md
deleted file mode 100644
index 6259acaca79..00000000000
--- a/vendor/github.com/NYTimes/gziphandler/README.md
+++ /dev/null
@@ -1,56 +0,0 @@
-Gzip Handler
-============
-
-This is a tiny Go package which wraps HTTP handlers to transparently gzip the
-response body, for clients which support it. Although it's usually simpler to
-leave that to a reverse proxy (like nginx or Varnish), this package is useful
-when that's undesirable.
-
-## Install
-```bash
-go get -u github.com/NYTimes/gziphandler
-```
-
-## Usage
-
-Call `GzipHandler` with any handler (an object which implements the
-`http.Handler` interface), and it'll return a new handler which gzips the
-response. For example:
-
-```go
-package main
-
-import (
-	"io"
-	"net/http"
-	"github.com/NYTimes/gziphandler"
-)
-
-func main() {
-	withoutGz := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		w.Header().Set("Content-Type", "text/plain")
-		io.WriteString(w, "Hello, World")
-	})
-
-	withGz := gziphandler.GzipHandler(withoutGz)
-
-	http.Handle("/", withGz)
-	http.ListenAndServe("0.0.0.0:8000", nil)
-}
-```
-
-
-## Documentation
-
-The docs can be found at [godoc.org][docs], as usual.
-
-
-## License
-
-[Apache 2.0][license].
-
-
-
-
-[docs]:     https://godoc.org/github.com/NYTimes/gziphandler
-[license]:  https://github.com/NYTimes/gziphandler/blob/master/LICENSE
diff --git a/vendor/github.com/NYTimes/gziphandler/gzip.go b/vendor/github.com/NYTimes/gziphandler/gzip.go
deleted file mode 100644
index c112bbdf81c..00000000000
--- a/vendor/github.com/NYTimes/gziphandler/gzip.go
+++ /dev/null
@@ -1,532 +0,0 @@
-package gziphandler // import "github.com/NYTimes/gziphandler"
-
-import (
-	"bufio"
-	"compress/gzip"
-	"fmt"
-	"io"
-	"mime"
-	"net"
-	"net/http"
-	"strconv"
-	"strings"
-	"sync"
-)
-
-const (
-	vary            = "Vary"
-	acceptEncoding  = "Accept-Encoding"
-	contentEncoding = "Content-Encoding"
-	contentType     = "Content-Type"
-	contentLength   = "Content-Length"
-)
-
-type codings map[string]float64
-
-const (
-	// DefaultQValue is the default qvalue to assign to an encoding if no explicit qvalue is set.
-	// This is actually kind of ambiguous in RFC 2616, so hopefully it's correct.
-	// The examples seem to indicate that it is.
-	DefaultQValue = 1.0
-
-	// DefaultMinSize is the default minimum size until we enable gzip compression.
-	// 1500 bytes is the MTU size for the internet since that is the largest size allowed at the network layer.
-	// If you take a file that is 1300 bytes and compress it to 800 bytes, it’s still transmitted in that same 1500 byte packet regardless, so you’ve gained nothing.
-	// That being the case, you should restrict the gzip compression to files with a size greater than a single packet, 1400 bytes (1.4KB) is a safe value.
-	DefaultMinSize = 1400
-)
-
-// gzipWriterPools stores a sync.Pool for each compression level for reuse of
-// gzip.Writers. Use poolIndex to covert a compression level to an index into
-// gzipWriterPools.
-var gzipWriterPools [gzip.BestCompression - gzip.BestSpeed + 2]*sync.Pool
-
-func init() {
-	for i := gzip.BestSpeed; i <= gzip.BestCompression; i++ {
-		addLevelPool(i)
-	}
-	addLevelPool(gzip.DefaultCompression)
-}
-
-// poolIndex maps a compression level to its index into gzipWriterPools. It
-// assumes that level is a valid gzip compression level.
-func poolIndex(level int) int {
-	// gzip.DefaultCompression == -1, so we need to treat it special.
-	if level == gzip.DefaultCompression {
-		return gzip.BestCompression - gzip.BestSpeed + 1
-	}
-	return level - gzip.BestSpeed
-}
-
-func addLevelPool(level int) {
-	gzipWriterPools[poolIndex(level)] = &sync.Pool{
-		New: func() interface{} {
-			// NewWriterLevel only returns error on a bad level, we are guaranteeing
-			// that this will be a valid level so it is okay to ignore the returned
-			// error.
-			w, _ := gzip.NewWriterLevel(nil, level)
-			return w
-		},
-	}
-}
-
-// GzipResponseWriter provides an http.ResponseWriter interface, which gzips
-// bytes before writing them to the underlying response. This doesn't close the
-// writers, so don't forget to do that.
-// It can be configured to skip response smaller than minSize.
-type GzipResponseWriter struct {
-	http.ResponseWriter
-	index int // Index for gzipWriterPools.
-	gw    *gzip.Writer
-
-	code int // Saves the WriteHeader value.
-
-	minSize int    // Specifed the minimum response size to gzip. If the response length is bigger than this value, it is compressed.
-	buf     []byte // Holds the first part of the write before reaching the minSize or the end of the write.
-	ignore  bool   // If true, then we immediately passthru writes to the underlying ResponseWriter.
-
-	contentTypes []parsedContentType // Only compress if the response is one of these content-types. All are accepted if empty.
-}
-
-type GzipResponseWriterWithCloseNotify struct {
-	*GzipResponseWriter
-}
-
-func (w GzipResponseWriterWithCloseNotify) CloseNotify() <-chan bool {
-	return w.ResponseWriter.(http.CloseNotifier).CloseNotify()
-}
-
-// Write appends data to the gzip writer.
-func (w *GzipResponseWriter) Write(b []byte) (int, error) {
-	// GZIP responseWriter is initialized. Use the GZIP responseWriter.
-	if w.gw != nil {
-		return w.gw.Write(b)
-	}
-
-	// If we have already decided not to use GZIP, immediately passthrough.
-	if w.ignore {
-		return w.ResponseWriter.Write(b)
-	}
-
-	// Save the write into a buffer for later use in GZIP responseWriter (if content is long enough) or at close with regular responseWriter.
-	// On the first write, w.buf changes from nil to a valid slice
-	w.buf = append(w.buf, b...)
-
-	var (
-		cl, _ = strconv.Atoi(w.Header().Get(contentLength))
-		ct    = w.Header().Get(contentType)
-		ce    = w.Header().Get(contentEncoding)
-	)
-	// Only continue if they didn't already choose an encoding or a known unhandled content length or type.
-	if ce == "" && (cl == 0 || cl >= w.minSize) && (ct == "" || handleContentType(w.contentTypes, ct)) {
-		// If the current buffer is less than minSize and a Content-Length isn't set, then wait until we have more data.
-		if len(w.buf) < w.minSize && cl == 0 {
-			return len(b), nil
-		}
-		// If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue.
-		if cl >= w.minSize || len(w.buf) >= w.minSize {
-			// If a Content-Type wasn't specified, infer it from the current buffer.
-			if ct == "" {
-				ct = http.DetectContentType(w.buf)
-				w.Header().Set(contentType, ct)
-			}
-			// If the Content-Type is acceptable to GZIP, initialize the GZIP writer.
-			if handleContentType(w.contentTypes, ct) {
-				if err := w.startGzip(); err != nil {
-					return 0, err
-				}
-				return len(b), nil
-			}
-		}
-	}
-	// If we got here, we should not GZIP this response.
-	if err := w.startPlain(); err != nil {
-		return 0, err
-	}
-	return len(b), nil
-}
-
-// startGzip initializes a GZIP writer and writes the buffer.
-func (w *GzipResponseWriter) startGzip() error {
-	// Set the GZIP header.
-	w.Header().Set(contentEncoding, "gzip")
-
-	// if the Content-Length is already set, then calls to Write on gzip
-	// will fail to set the Content-Length header since its already set
-	// See: https://github.com/golang/go/issues/14975.
-	w.Header().Del(contentLength)
-
-	// Write the header to gzip response.
-	if w.code != 0 {
-		w.ResponseWriter.WriteHeader(w.code)
-		// Ensure that no other WriteHeader's happen
-		w.code = 0
-	}
-
-	// Initialize and flush the buffer into the gzip response if there are any bytes.
-	// If there aren't any, we shouldn't initialize it yet because on Close it will
-	// write the gzip header even if nothing was ever written.
-	if len(w.buf) > 0 {
-		// Initialize the GZIP response.
-		w.init()
-		n, err := w.gw.Write(w.buf)
-
-		// This should never happen (per io.Writer docs), but if the write didn't
-		// accept the entire buffer but returned no specific error, we have no clue
-		// what's going on, so abort just to be safe.
-		if err == nil && n < len(w.buf) {
-			err = io.ErrShortWrite
-		}
-		return err
-	}
-	return nil
-}
-
-// startPlain writes to sent bytes and buffer the underlying ResponseWriter without gzip.
-func (w *GzipResponseWriter) startPlain() error {
-	if w.code != 0 {
-		w.ResponseWriter.WriteHeader(w.code)
-		// Ensure that no other WriteHeader's happen
-		w.code = 0
-	}
-	w.ignore = true
-	// If Write was never called then don't call Write on the underlying ResponseWriter.
-	if w.buf == nil {
-		return nil
-	}
-	n, err := w.ResponseWriter.Write(w.buf)
-	w.buf = nil
-	// This should never happen (per io.Writer docs), but if the write didn't
-	// accept the entire buffer but returned no specific error, we have no clue
-	// what's going on, so abort just to be safe.
-	if err == nil && n < len(w.buf) {
-		err = io.ErrShortWrite
-	}
-	return err
-}
-
-// WriteHeader just saves the response code until close or GZIP effective writes.
-func (w *GzipResponseWriter) WriteHeader(code int) {
-	if w.code == 0 {
-		w.code = code
-	}
-}
-
-// init graps a new gzip writer from the gzipWriterPool and writes the correct
-// content encoding header.
-func (w *GzipResponseWriter) init() {
-	// Bytes written during ServeHTTP are redirected to this gzip writer
-	// before being written to the underlying response.
-	gzw := gzipWriterPools[w.index].Get().(*gzip.Writer)
-	gzw.Reset(w.ResponseWriter)
-	w.gw = gzw
-}
-
-// Close will close the gzip.Writer and will put it back in the gzipWriterPool.
-func (w *GzipResponseWriter) Close() error {
-	if w.ignore {
-		return nil
-	}
-
-	if w.gw == nil {
-		// GZIP not triggered yet, write out regular response.
-		err := w.startPlain()
-		// Returns the error if any at write.
-		if err != nil {
-			err = fmt.Errorf("gziphandler: write to regular responseWriter at close gets error: %q", err.Error())
-		}
-		return err
-	}
-
-	err := w.gw.Close()
-	gzipWriterPools[w.index].Put(w.gw)
-	w.gw = nil
-	return err
-}
-
-// Flush flushes the underlying *gzip.Writer and then the underlying
-// http.ResponseWriter if it is an http.Flusher. This makes GzipResponseWriter
-// an http.Flusher.
-func (w *GzipResponseWriter) Flush() {
-	if w.gw == nil && !w.ignore {
-		// Only flush once startGzip or startPlain has been called.
-		//
-		// Flush is thus a no-op until we're certain whether a plain
-		// or gzipped response will be served.
-		return
-	}
-
-	if w.gw != nil {
-		w.gw.Flush()
-	}
-
-	if fw, ok := w.ResponseWriter.(http.Flusher); ok {
-		fw.Flush()
-	}
-}
-
-// Hijack implements http.Hijacker. If the underlying ResponseWriter is a
-// Hijacker, its Hijack method is returned. Otherwise an error is returned.
-func (w *GzipResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
-	if hj, ok := w.ResponseWriter.(http.Hijacker); ok {
-		return hj.Hijack()
-	}
-	return nil, nil, fmt.Errorf("http.Hijacker interface is not supported")
-}
-
-// verify Hijacker interface implementation
-var _ http.Hijacker = &GzipResponseWriter{}
-
-// MustNewGzipLevelHandler behaves just like NewGzipLevelHandler except that in
-// an error case it panics rather than returning an error.
-func MustNewGzipLevelHandler(level int) func(http.Handler) http.Handler {
-	wrap, err := NewGzipLevelHandler(level)
-	if err != nil {
-		panic(err)
-	}
-	return wrap
-}
-
-// NewGzipLevelHandler returns a wrapper function (often known as middleware)
-// which can be used to wrap an HTTP handler to transparently gzip the response
-// body if the client supports it (via the Accept-Encoding header). Responses will
-// be encoded at the given gzip compression level. An error will be returned only
-// if an invalid gzip compression level is given, so if one can ensure the level
-// is valid, the returned error can be safely ignored.
-func NewGzipLevelHandler(level int) (func(http.Handler) http.Handler, error) {
-	return NewGzipLevelAndMinSize(level, DefaultMinSize)
-}
-
-// NewGzipLevelAndMinSize behave as NewGzipLevelHandler except it let the caller
-// specify the minimum size before compression.
-func NewGzipLevelAndMinSize(level, minSize int) (func(http.Handler) http.Handler, error) {
-	return GzipHandlerWithOpts(CompressionLevel(level), MinSize(minSize))
-}
-
-func GzipHandlerWithOpts(opts ...option) (func(http.Handler) http.Handler, error) {
-	c := &config{
-		level:   gzip.DefaultCompression,
-		minSize: DefaultMinSize,
-	}
-
-	for _, o := range opts {
-		o(c)
-	}
-
-	if err := c.validate(); err != nil {
-		return nil, err
-	}
-
-	return func(h http.Handler) http.Handler {
-		index := poolIndex(c.level)
-
-		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-			w.Header().Add(vary, acceptEncoding)
-			if acceptsGzip(r) {
-				gw := &GzipResponseWriter{
-					ResponseWriter: w,
-					index:          index,
-					minSize:        c.minSize,
-					contentTypes:   c.contentTypes,
-				}
-				defer gw.Close()
-
-				if _, ok := w.(http.CloseNotifier); ok {
-					gwcn := GzipResponseWriterWithCloseNotify{gw}
-					h.ServeHTTP(gwcn, r)
-				} else {
-					h.ServeHTTP(gw, r)
-				}
-
-			} else {
-				h.ServeHTTP(w, r)
-			}
-		})
-	}, nil
-}
-
-// Parsed representation of one of the inputs to ContentTypes.
-// See https://golang.org/pkg/mime/#ParseMediaType
-type parsedContentType struct {
-	mediaType string
-	params    map[string]string
-}
-
-// equals returns whether this content type matches another content type.
-func (pct parsedContentType) equals(mediaType string, params map[string]string) bool {
-	if pct.mediaType != mediaType {
-		return false
-	}
-	// if pct has no params, don't care about other's params
-	if len(pct.params) == 0 {
-		return true
-	}
-
-	// if pct has any params, they must be identical to other's.
-	if len(pct.params) != len(params) {
-		return false
-	}
-	for k, v := range pct.params {
-		if w, ok := params[k]; !ok || v != w {
-			return false
-		}
-	}
-	return true
-}
-
-// Used for functional configuration.
-type config struct {
-	minSize      int
-	level        int
-	contentTypes []parsedContentType
-}
-
-func (c *config) validate() error {
-	if c.level != gzip.DefaultCompression && (c.level < gzip.BestSpeed || c.level > gzip.BestCompression) {
-		return fmt.Errorf("invalid compression level requested: %d", c.level)
-	}
-
-	if c.minSize < 0 {
-		return fmt.Errorf("minimum size must be more than zero")
-	}
-
-	return nil
-}
-
-type option func(c *config)
-
-func MinSize(size int) option {
-	return func(c *config) {
-		c.minSize = size
-	}
-}
-
-func CompressionLevel(level int) option {
-	return func(c *config) {
-		c.level = level
-	}
-}
-
-// ContentTypes specifies a list of content types to compare
-// the Content-Type header to before compressing. If none
-// match, the response will be returned as-is.
-//
-// Content types are compared in a case-insensitive, whitespace-ignored
-// manner.
-//
-// A MIME type without any other directive will match a content type
-// that has the same MIME type, regardless of that content type's other
-// directives. I.e., "text/html" will match both "text/html" and
-// "text/html; charset=utf-8".
-//
-// A MIME type with any other directive will only match a content type
-// that has the same MIME type and other directives. I.e.,
-// "text/html; charset=utf-8" will only match "text/html; charset=utf-8".
-//
-// By default, responses are gzipped regardless of
-// Content-Type.
-func ContentTypes(types []string) option {
-	return func(c *config) {
-		c.contentTypes = []parsedContentType{}
-		for _, v := range types {
-			mediaType, params, err := mime.ParseMediaType(v)
-			if err == nil {
-				c.contentTypes = append(c.contentTypes, parsedContentType{mediaType, params})
-			}
-		}
-	}
-}
-
-// GzipHandler wraps an HTTP handler, to transparently gzip the response body if
-// the client supports it (via the Accept-Encoding header). This will compress at
-// the default compression level.
-func GzipHandler(h http.Handler) http.Handler {
-	wrapper, _ := NewGzipLevelHandler(gzip.DefaultCompression)
-	return wrapper(h)
-}
-
-// acceptsGzip returns true if the given HTTP request indicates that it will
-// accept a gzipped response.
-func acceptsGzip(r *http.Request) bool {
-	acceptedEncodings, _ := parseEncodings(r.Header.Get(acceptEncoding))
-	return acceptedEncodings["gzip"] > 0.0
-}
-
-// returns true if we've been configured to compress the specific content type.
-func handleContentType(contentTypes []parsedContentType, ct string) bool {
-	// If contentTypes is empty we handle all content types.
-	if len(contentTypes) == 0 {
-		return true
-	}
-
-	mediaType, params, err := mime.ParseMediaType(ct)
-	if err != nil {
-		return false
-	}
-
-	for _, c := range contentTypes {
-		if c.equals(mediaType, params) {
-			return true
-		}
-	}
-
-	return false
-}
-
-// parseEncodings attempts to parse a list of codings, per RFC 2616, as might
-// appear in an Accept-Encoding header. It returns a map of content-codings to
-// quality values, and an error containing the errors encountered. It's probably
-// safe to ignore those, because silently ignoring errors is how the internet
-// works.
-//
-// See: http://tools.ietf.org/html/rfc2616#section-14.3.
-func parseEncodings(s string) (codings, error) {
-	c := make(codings)
-	var e []string
-
-	for _, ss := range strings.Split(s, ",") {
-		coding, qvalue, err := parseCoding(ss)
-
-		if err != nil {
-			e = append(e, err.Error())
-		} else {
-			c[coding] = qvalue
-		}
-	}
-
-	// TODO (adammck): Use a proper multi-error struct, so the individual errors
-	//                 can be extracted if anyone cares.
-	if len(e) > 0 {
-		return c, fmt.Errorf("errors while parsing encodings: %s", strings.Join(e, ", "))
-	}
-
-	return c, nil
-}
-
-// parseCoding parses a single conding (content-coding with an optional qvalue),
-// as might appear in an Accept-Encoding header. It attempts to forgive minor
-// formatting errors.
-func parseCoding(s string) (coding string, qvalue float64, err error) {
-	for n, part := range strings.Split(s, ";") {
-		part = strings.TrimSpace(part)
-		qvalue = DefaultQValue
-
-		if n == 0 {
-			coding = strings.ToLower(part)
-		} else if strings.HasPrefix(part, "q=") {
-			qvalue, err = strconv.ParseFloat(strings.TrimPrefix(part, "q="), 64)
-
-			if qvalue < 0.0 {
-				qvalue = 0.0
-			} else if qvalue > 1.0 {
-				qvalue = 1.0
-			}
-		}
-	}
-
-	if coding == "" {
-		err = fmt.Errorf("empty content-coding")
-	}
-
-	return
-}
diff --git a/vendor/github.com/NYTimes/gziphandler/gzip_go18.go b/vendor/github.com/NYTimes/gziphandler/gzip_go18.go
deleted file mode 100644
index fa9665b7e80..00000000000
--- a/vendor/github.com/NYTimes/gziphandler/gzip_go18.go
+++ /dev/null
@@ -1,43 +0,0 @@
-// +build go1.8
-
-package gziphandler
-
-import "net/http"
-
-// Push initiates an HTTP/2 server push.
-// Push returns ErrNotSupported if the client has disabled push or if push
-// is not supported on the underlying connection.
-func (w *GzipResponseWriter) Push(target string, opts *http.PushOptions) error {
-	pusher, ok := w.ResponseWriter.(http.Pusher)
-	if ok && pusher != nil {
-		return pusher.Push(target, setAcceptEncodingForPushOptions(opts))
-	}
-	return http.ErrNotSupported
-}
-
-// setAcceptEncodingForPushOptions sets "Accept-Encoding" : "gzip" for PushOptions without overriding existing headers.
-func setAcceptEncodingForPushOptions(opts *http.PushOptions) *http.PushOptions {
-
-	if opts == nil {
-		opts = &http.PushOptions{
-			Header: http.Header{
-				acceptEncoding: []string{"gzip"},
-			},
-		}
-		return opts
-	}
-
-	if opts.Header == nil {
-		opts.Header = http.Header{
-			acceptEncoding: []string{"gzip"},
-		}
-		return opts
-	}
-
-	if encoding := opts.Header.Get(acceptEncoding); encoding == "" {
-		opts.Header.Add(acceptEncoding, "gzip")
-		return opts
-	}
-
-	return opts
-}
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go
new file mode 100644
index 00000000000..82882961a01
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/deflate.go
@@ -0,0 +1,989 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Copyright (c) 2015 Klaus Post
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"math"
+)
+
+const (
+	NoCompression      = 0
+	BestSpeed          = 1
+	BestCompression    = 9
+	DefaultCompression = -1
+
+	// HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman
+	// entropy encoding. This mode is useful in compressing data that has
+	// already been compressed with an LZ style algorithm (e.g. Snappy or LZ4)
+	// that lacks an entropy encoder. Compression gains are achieved when
+	// certain bytes in the input stream occur more frequently than others.
+	//
+	// Note that HuffmanOnly produces a compressed output that is
+	// RFC 1951 compliant. That is, any valid DEFLATE decompressor will
+	// continue to be able to decompress this output.
+	HuffmanOnly         = -2
+	ConstantCompression = HuffmanOnly // compatibility alias.
+
+	logWindowSize    = 15
+	windowSize       = 1 << logWindowSize
+	windowMask       = windowSize - 1
+	logMaxOffsetSize = 15  // Standard DEFLATE
+	minMatchLength   = 4   // The smallest match that the compressor looks for
+	maxMatchLength   = 258 // The longest match for the compressor
+	minOffsetSize    = 1   // The shortest offset that makes any sense
+
+	// The maximum number of tokens we will encode at the time.
+	// Smaller sizes usually creates less optimal blocks.
+	// Bigger can make context switching slow.
+	// We use this for levels 7-9, so we make it big.
+	maxFlateBlockTokens = 1 << 15
+	maxStoreBlockSize   = 65535
+	hashBits            = 17 // After 17 performance degrades
+	hashSize            = 1 << hashBits
+	hashMask            = (1 << hashBits) - 1
+	hashShift           = (hashBits + minMatchLength - 1) / minMatchLength
+	maxHashOffset       = 1 << 28
+
+	skipNever = math.MaxInt32
+
+	debugDeflate = false
+)
+
+type compressionLevel struct {
+	good, lazy, nice, chain, fastSkipHashing, level int
+}
+
+// Compression levels have been rebalanced from zlib deflate defaults
+// to give a bigger spread in speed and compression.
+// See https://blog.klauspost.com/rebalancing-deflate-compression-levels/
+var levels = []compressionLevel{
+	{}, // 0
+	// Level 1-6 uses specialized algorithm - values not used
+	{0, 0, 0, 0, 0, 1},
+	{0, 0, 0, 0, 0, 2},
+	{0, 0, 0, 0, 0, 3},
+	{0, 0, 0, 0, 0, 4},
+	{0, 0, 0, 0, 0, 5},
+	{0, 0, 0, 0, 0, 6},
+	// Levels 7-9 use increasingly more lazy matching
+	// and increasingly stringent conditions for "good enough".
+	{8, 12, 16, 24, skipNever, 7},
+	{16, 30, 40, 64, skipNever, 8},
+	{32, 258, 258, 1024, skipNever, 9},
+}
+
+// advancedState contains state for the advanced levels, with bigger hash tables, etc.
+type advancedState struct {
+	// deflate state
+	length         int
+	offset         int
+	maxInsertIndex int
+	chainHead      int
+	hashOffset     int
+
+	ii uint16 // position of last match, intended to overflow to reset.
+
+	// input window: unprocessed data is window[index:windowEnd]
+	index          int
+	estBitsPerByte int
+	hashMatch      [maxMatchLength + minMatchLength]uint32
+
+	// Input hash chains
+	// hashHead[hashValue] contains the largest inputIndex with the specified hash value
+	// If hashHead[hashValue] is within the current window, then
+	// hashPrev[hashHead[hashValue] & windowMask] contains the previous index
+	// with the same hash value.
+	hashHead [hashSize]uint32
+	hashPrev [windowSize]uint32
+}
+
+type compressor struct {
+	compressionLevel
+
+	h *huffmanEncoder
+	w *huffmanBitWriter
+
+	// compression algorithm
+	fill func(*compressor, []byte) int // copy data to window
+	step func(*compressor)             // process window
+
+	window     []byte
+	windowEnd  int
+	blockStart int // window index where current tokens start
+	err        error
+
+	// queued output tokens
+	tokens tokens
+	fast   fastEnc
+	state  *advancedState
+
+	sync          bool // requesting flush
+	byteAvailable bool // if true, still need to process window[index-1].
+}
+
+func (d *compressor) fillDeflate(b []byte) int {
+	s := d.state
+	if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
+		// shift the window by windowSize
+		//copy(d.window[:], d.window[windowSize:2*windowSize])
+		*(*[windowSize]byte)(d.window) = *(*[windowSize]byte)(d.window[windowSize:])
+		s.index -= windowSize
+		d.windowEnd -= windowSize
+		if d.blockStart >= windowSize {
+			d.blockStart -= windowSize
+		} else {
+			d.blockStart = math.MaxInt32
+		}
+		s.hashOffset += windowSize
+		if s.hashOffset > maxHashOffset {
+			delta := s.hashOffset - 1
+			s.hashOffset -= delta
+			s.chainHead -= delta
+			// Iterate over slices instead of arrays to avoid copying
+			// the entire table onto the stack (Issue #18625).
+			for i, v := range s.hashPrev[:] {
+				if int(v) > delta {
+					s.hashPrev[i] = uint32(int(v) - delta)
+				} else {
+					s.hashPrev[i] = 0
+				}
+			}
+			for i, v := range s.hashHead[:] {
+				if int(v) > delta {
+					s.hashHead[i] = uint32(int(v) - delta)
+				} else {
+					s.hashHead[i] = 0
+				}
+			}
+		}
+	}
+	n := copy(d.window[d.windowEnd:], b)
+	d.windowEnd += n
+	return n
+}
+
+func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error {
+	if index > 0 || eof {
+		var window []byte
+		if d.blockStart <= index {
+			window = d.window[d.blockStart:index]
+		}
+		d.blockStart = index
+		//d.w.writeBlock(tok, eof, window)
+		d.w.writeBlockDynamic(tok, eof, window, d.sync)
+		return d.w.err
+	}
+	return nil
+}
+
+// writeBlockSkip writes the current block and uses the number of tokens
+// to determine if the block should be stored on no matches, or
+// only huffman encoded.
+func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error {
+	if index > 0 || eof {
+		if d.blockStart <= index {
+			window := d.window[d.blockStart:index]
+			// If we removed less than a 64th of all literals
+			// we huffman compress the block.
+			if int(tok.n) > len(window)-int(tok.n>>6) {
+				d.w.writeBlockHuff(eof, window, d.sync)
+			} else {
+				// Write a dynamic huffman block.
+				d.w.writeBlockDynamic(tok, eof, window, d.sync)
+			}
+		} else {
+			d.w.writeBlock(tok, eof, nil)
+		}
+		d.blockStart = index
+		return d.w.err
+	}
+	return nil
+}
+
+// fillWindow will fill the current window with the supplied
+// dictionary and calculate all hashes.
+// This is much faster than doing a full encode.
+// Should only be used after a start/reset.
+func (d *compressor) fillWindow(b []byte) {
+	// Do not fill window if we are in store-only or huffman mode.
+	if d.level <= 0 {
+		return
+	}
+	if d.fast != nil {
+		// encode the last data, but discard the result
+		if len(b) > maxMatchOffset {
+			b = b[len(b)-maxMatchOffset:]
+		}
+		d.fast.Encode(&d.tokens, b)
+		d.tokens.Reset()
+		return
+	}
+	s := d.state
+	// If we are given too much, cut it.
+	if len(b) > windowSize {
+		b = b[len(b)-windowSize:]
+	}
+	// Add all to window.
+	n := copy(d.window[d.windowEnd:], b)
+
+	// Calculate 256 hashes at the time (more L1 cache hits)
+	loops := (n + 256 - minMatchLength) / 256
+	for j := 0; j < loops; j++ {
+		startindex := j * 256
+		end := startindex + 256 + minMatchLength - 1
+		if end > n {
+			end = n
+		}
+		tocheck := d.window[startindex:end]
+		dstSize := len(tocheck) - minMatchLength + 1
+
+		if dstSize <= 0 {
+			continue
+		}
+
+		dst := s.hashMatch[:dstSize]
+		bulkHash4(tocheck, dst)
+		var newH uint32
+		for i, val := range dst {
+			di := i + startindex
+			newH = val & hashMask
+			// Get previous value with the same hash.
+			// Our chain should point to the previous value.
+			s.hashPrev[di&windowMask] = s.hashHead[newH]
+			// Set the head of the hash chain to us.
+			s.hashHead[newH] = uint32(di + s.hashOffset)
+		}
+	}
+	// Update window information.
+	d.windowEnd += n
+	s.index = n
+}
+
+// Try to find a match starting at index whose length is greater than prevSize.
+// We only look at chainCount possibilities before giving up.
+// pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead
+func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, offset int, ok bool) {
+	minMatchLook := maxMatchLength
+	if lookahead < minMatchLook {
+		minMatchLook = lookahead
+	}
+
+	win := d.window[0 : pos+minMatchLook]
+
+	// We quit when we get a match that's at least nice long
+	nice := len(win) - pos
+	if d.nice < nice {
+		nice = d.nice
+	}
+
+	// If we've got a match that's good enough, only look in 1/4 the chain.
+	tries := d.chain
+	length = minMatchLength - 1
+
+	wEnd := win[pos+length]
+	wPos := win[pos:]
+	minIndex := pos - windowSize
+	if minIndex < 0 {
+		minIndex = 0
+	}
+	offset = 0
+
+	if d.chain < 100 {
+		for i := prevHead; tries > 0; tries-- {
+			if wEnd == win[i+length] {
+				n := matchLen(win[i:i+minMatchLook], wPos)
+				if n > length {
+					length = n
+					offset = pos - i
+					ok = true
+					if n >= nice {
+						// The match is good enough that we don't try to find a better one.
+						break
+					}
+					wEnd = win[pos+n]
+				}
+			}
+			if i <= minIndex {
+				// hashPrev[i & windowMask] has already been overwritten, so stop now.
+				break
+			}
+			i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset
+			if i < minIndex {
+				break
+			}
+		}
+		return
+	}
+
+	// Minimum gain to accept a match.
+	cGain := 4
+
+	// Some like it higher (CSV), some like it lower (JSON)
+	const baseCost = 3
+	// Base is 4 bytes at with an additional cost.
+	// Matches must be better than this.
+
+	for i := prevHead; tries > 0; tries-- {
+		if wEnd == win[i+length] {
+			n := matchLen(win[i:i+minMatchLook], wPos)
+			if n > length {
+				// Calculate gain. Estimate
+				newGain := d.h.bitLengthRaw(wPos[:n]) - int(offsetExtraBits[offsetCode(uint32(pos-i))]) - baseCost - int(lengthExtraBits[lengthCodes[(n-3)&255]])
+
+				//fmt.Println("gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n]), "this-len:", n, "prev-len:", length)
+				if newGain > cGain {
+					length = n
+					offset = pos - i
+					cGain = newGain
+					ok = true
+					if n >= nice {
+						// The match is good enough that we don't try to find a better one.
+						break
+					}
+					wEnd = win[pos+n]
+				}
+			}
+		}
+		if i <= minIndex {
+			// hashPrev[i & windowMask] has already been overwritten, so stop now.
+			break
+		}
+		i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset
+		if i < minIndex {
+			break
+		}
+	}
+	return
+}
+
+func (d *compressor) writeStoredBlock(buf []byte) error {
+	if d.w.writeStoredHeader(len(buf), false); d.w.err != nil {
+		return d.w.err
+	}
+	d.w.writeBytes(buf)
+	return d.w.err
+}
+
+// hash4 returns a hash representation of the first 4 bytes
+// of the supplied slice.
+// The caller must ensure that len(b) >= 4.
+func hash4(b []byte) uint32 {
+	return hash4u(binary.LittleEndian.Uint32(b), hashBits)
+}
+
+// hash4 returns the hash of u to fit in a hash table with h bits.
+// Preferably h should be a constant and should always be <32.
+func hash4u(u uint32, h uint8) uint32 {
+	return (u * prime4bytes) >> (32 - h)
+}
+
+// bulkHash4 will compute hashes using the same
+// algorithm as hash4
+func bulkHash4(b []byte, dst []uint32) {
+	if len(b) < 4 {
+		return
+	}
+	hb := binary.LittleEndian.Uint32(b)
+
+	dst[0] = hash4u(hb, hashBits)
+	end := len(b) - 4 + 1
+	for i := 1; i < end; i++ {
+		hb = (hb >> 8) | uint32(b[i+3])<<24
+		dst[i] = hash4u(hb, hashBits)
+	}
+}
+
+func (d *compressor) initDeflate() {
+	d.window = make([]byte, 2*windowSize)
+	d.byteAvailable = false
+	d.err = nil
+	if d.state == nil {
+		return
+	}
+	s := d.state
+	s.index = 0
+	s.hashOffset = 1
+	s.length = minMatchLength - 1
+	s.offset = 0
+	s.chainHead = -1
+}
+
+// deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever,
+// meaning it always has lazy matching on.
+func (d *compressor) deflateLazy() {
+	s := d.state
+	// Sanity enables additional runtime tests.
+	// It's intended to be used during development
+	// to supplement the currently ad-hoc unit tests.
+	const sanity = debugDeflate
+
+	if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
+		return
+	}
+	if d.windowEnd != s.index && d.chain > 100 {
+		// Get literal huffman coder.
+		if d.h == nil {
+			d.h = newHuffmanEncoder(maxFlateBlockTokens)
+		}
+		var tmp [256]uint16
+		for _, v := range d.window[s.index:d.windowEnd] {
+			tmp[v]++
+		}
+		d.h.generate(tmp[:], 15)
+	}
+
+	s.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
+
+	for {
+		if sanity && s.index > d.windowEnd {
+			panic("index > windowEnd")
+		}
+		lookahead := d.windowEnd - s.index
+		if lookahead < minMatchLength+maxMatchLength {
+			if !d.sync {
+				return
+			}
+			if sanity && s.index > d.windowEnd {
+				panic("index > windowEnd")
+			}
+			if lookahead == 0 {
+				// Flush current output block if any.
+				if d.byteAvailable {
+					// There is still one pending token that needs to be flushed
+					d.tokens.AddLiteral(d.window[s.index-1])
+					d.byteAvailable = false
+				}
+				if d.tokens.n > 0 {
+					if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
+						return
+					}
+					d.tokens.Reset()
+				}
+				return
+			}
+		}
+		if s.index < s.maxInsertIndex {
+			// Update the hash
+			hash := hash4(d.window[s.index:])
+			ch := s.hashHead[hash]
+			s.chainHead = int(ch)
+			s.hashPrev[s.index&windowMask] = ch
+			s.hashHead[hash] = uint32(s.index + s.hashOffset)
+		}
+		prevLength := s.length
+		prevOffset := s.offset
+		s.length = minMatchLength - 1
+		s.offset = 0
+		minIndex := s.index - windowSize
+		if minIndex < 0 {
+			minIndex = 0
+		}
+
+		if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy {
+			if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, lookahead); ok {
+				s.length = newLength
+				s.offset = newOffset
+			}
+		}
+
+		if prevLength >= minMatchLength && s.length <= prevLength {
+			// No better match, but check for better match at end...
+			//
+			// Skip forward a number of bytes.
+			// Offset of 2 seems to yield best results. 3 is sometimes better.
+			const checkOff = 2
+
+			// Check all, except full length
+			if prevLength < maxMatchLength-checkOff {
+				prevIndex := s.index - 1
+				if prevIndex+prevLength < s.maxInsertIndex {
+					end := lookahead
+					if lookahead > maxMatchLength+checkOff {
+						end = maxMatchLength + checkOff
+					}
+					end += prevIndex
+
+					// Hash at match end.
+					h := hash4(d.window[prevIndex+prevLength:])
+					ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength
+					if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff {
+						length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:])
+						// It seems like a pure length metric is best.
+						if length > prevLength {
+							prevLength = length
+							prevOffset = prevIndex - ch2
+
+							// Extend back...
+							for i := checkOff - 1; i >= 0; i-- {
+								if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i] {
+									// Emit tokens we "owe"
+									for j := 0; j <= i; j++ {
+										d.tokens.AddLiteral(d.window[prevIndex+j])
+										if d.tokens.n == maxFlateBlockTokens {
+											// The block includes the current character
+											if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
+												return
+											}
+											d.tokens.Reset()
+										}
+										s.index++
+										if s.index < s.maxInsertIndex {
+											h := hash4(d.window[s.index:])
+											ch := s.hashHead[h]
+											s.chainHead = int(ch)
+											s.hashPrev[s.index&windowMask] = ch
+											s.hashHead[h] = uint32(s.index + s.hashOffset)
+										}
+									}
+									break
+								} else {
+									prevLength++
+								}
+							}
+						} else if false {
+							// Check one further ahead.
+							// Only rarely better, disabled for now.
+							prevIndex++
+							h := hash4(d.window[prevIndex+prevLength:])
+							ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength
+							if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff {
+								length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:])
+								// It seems like a pure length metric is best.
+								if length > prevLength+checkOff {
+									prevLength = length
+									prevOffset = prevIndex - ch2
+									prevIndex--
+
+									// Extend back...
+									for i := checkOff; i >= 0; i-- {
+										if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i-1] {
+											// Emit tokens we "owe"
+											for j := 0; j <= i; j++ {
+												d.tokens.AddLiteral(d.window[prevIndex+j])
+												if d.tokens.n == maxFlateBlockTokens {
+													// The block includes the current character
+													if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
+														return
+													}
+													d.tokens.Reset()
+												}
+												s.index++
+												if s.index < s.maxInsertIndex {
+													h := hash4(d.window[s.index:])
+													ch := s.hashHead[h]
+													s.chainHead = int(ch)
+													s.hashPrev[s.index&windowMask] = ch
+													s.hashHead[h] = uint32(s.index + s.hashOffset)
+												}
+											}
+											break
+										} else {
+											prevLength++
+										}
+									}
+								}
+							}
+						}
+					}
+				}
+			}
+			// There was a match at the previous step, and the current match is
+			// not better. Output the previous match.
+			d.tokens.AddMatch(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))
+
+			// Insert in the hash table all strings up to the end of the match.
+			// index and index-1 are already inserted. If there is not enough
+			// lookahead, the last two strings are not inserted into the hash
+			// table.
+			newIndex := s.index + prevLength - 1
+			// Calculate missing hashes
+			end := newIndex
+			if end > s.maxInsertIndex {
+				end = s.maxInsertIndex
+			}
+			end += minMatchLength - 1
+			startindex := s.index + 1
+			if startindex > s.maxInsertIndex {
+				startindex = s.maxInsertIndex
+			}
+			tocheck := d.window[startindex:end]
+			dstSize := len(tocheck) - minMatchLength + 1
+			if dstSize > 0 {
+				dst := s.hashMatch[:dstSize]
+				bulkHash4(tocheck, dst)
+				var newH uint32
+				for i, val := range dst {
+					di := i + startindex
+					newH = val & hashMask
+					// Get previous value with the same hash.
+					// Our chain should point to the previous value.
+					s.hashPrev[di&windowMask] = s.hashHead[newH]
+					// Set the head of the hash chain to us.
+					s.hashHead[newH] = uint32(di + s.hashOffset)
+				}
+			}
+
+			s.index = newIndex
+			d.byteAvailable = false
+			s.length = minMatchLength - 1
+			if d.tokens.n == maxFlateBlockTokens {
+				// The block includes the current character
+				if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
+					return
+				}
+				d.tokens.Reset()
+			}
+			s.ii = 0
+		} else {
+			// Reset, if we got a match this run.
+			if s.length >= minMatchLength {
+				s.ii = 0
+			}
+			// We have a byte waiting. Emit it.
+			if d.byteAvailable {
+				s.ii++
+				d.tokens.AddLiteral(d.window[s.index-1])
+				if d.tokens.n == maxFlateBlockTokens {
+					if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
+						return
+					}
+					d.tokens.Reset()
+				}
+				s.index++
+
+				// If we have a long run of no matches, skip additional bytes
+				// Resets when s.ii overflows after 64KB.
+				if n := int(s.ii) - d.chain; n > 0 {
+					n = 1 + int(n>>6)
+					for j := 0; j < n; j++ {
+						if s.index >= d.windowEnd-1 {
+							break
+						}
+						d.tokens.AddLiteral(d.window[s.index-1])
+						if d.tokens.n == maxFlateBlockTokens {
+							if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
+								return
+							}
+							d.tokens.Reset()
+						}
+						// Index...
+						if s.index < s.maxInsertIndex {
+							h := hash4(d.window[s.index:])
+							ch := s.hashHead[h]
+							s.chainHead = int(ch)
+							s.hashPrev[s.index&windowMask] = ch
+							s.hashHead[h] = uint32(s.index + s.hashOffset)
+						}
+						s.index++
+					}
+					// Flush last byte
+					d.tokens.AddLiteral(d.window[s.index-1])
+					d.byteAvailable = false
+					// s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength
+					if d.tokens.n == maxFlateBlockTokens {
+						if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
+							return
+						}
+						d.tokens.Reset()
+					}
+				}
+			} else {
+				s.index++
+				d.byteAvailable = true
+			}
+		}
+	}
+}
+
+func (d *compressor) store() {
+	if d.windowEnd > 0 && (d.windowEnd == maxStoreBlockSize || d.sync) {
+		d.err = d.writeStoredBlock(d.window[:d.windowEnd])
+		d.windowEnd = 0
+	}
+}
+
+// fillWindow will fill the buffer with data for huffman-only compression.
+// The number of bytes copied is returned.
+func (d *compressor) fillBlock(b []byte) int {
+	n := copy(d.window[d.windowEnd:], b)
+	d.windowEnd += n
+	return n
+}
+
+// storeHuff will compress and store the currently added data,
+// if enough has been accumulated or we at the end of the stream.
+// Any error that occurred will be in d.err
+func (d *compressor) storeHuff() {
+	if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 {
+		return
+	}
+	d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync)
+	d.err = d.w.err
+	d.windowEnd = 0
+}
+
+// storeFast will compress and store the currently added data,
+// if enough has been accumulated or we at the end of the stream.
+// Any error that occurred will be in d.err
+func (d *compressor) storeFast() {
+	// We only compress if we have maxStoreBlockSize.
+	if d.windowEnd < len(d.window) {
+		if !d.sync {
+			return
+		}
+		// Handle extremely small sizes.
+		if d.windowEnd < 128 {
+			if d.windowEnd == 0 {
+				return
+			}
+			if d.windowEnd <= 32 {
+				d.err = d.writeStoredBlock(d.window[:d.windowEnd])
+			} else {
+				d.w.writeBlockHuff(false, d.window[:d.windowEnd], true)
+				d.err = d.w.err
+			}
+			d.tokens.Reset()
+			d.windowEnd = 0
+			d.fast.Reset()
+			return
+		}
+	}
+
+	d.fast.Encode(&d.tokens, d.window[:d.windowEnd])
+	// If we made zero matches, store the block as is.
+	if d.tokens.n == 0 {
+		d.err = d.writeStoredBlock(d.window[:d.windowEnd])
+		// If we removed less than 1/16th, huffman compress the block.
+	} else if int(d.tokens.n) > d.windowEnd-(d.windowEnd>>4) {
+		d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync)
+		d.err = d.w.err
+	} else {
+		d.w.writeBlockDynamic(&d.tokens, false, d.window[:d.windowEnd], d.sync)
+		d.err = d.w.err
+	}
+	d.tokens.Reset()
+	d.windowEnd = 0
+}
+
+// write will add input byte to the stream.
+// Unless an error occurs all bytes will be consumed.
+func (d *compressor) write(b []byte) (n int, err error) {
+	if d.err != nil {
+		return 0, d.err
+	}
+	n = len(b)
+	for len(b) > 0 {
+		if d.windowEnd == len(d.window) || d.sync {
+			d.step(d)
+		}
+		b = b[d.fill(d, b):]
+		if d.err != nil {
+			return 0, d.err
+		}
+	}
+	return n, d.err
+}
+
+func (d *compressor) syncFlush() error {
+	d.sync = true
+	if d.err != nil {
+		return d.err
+	}
+	d.step(d)
+	if d.err == nil {
+		d.w.writeStoredHeader(0, false)
+		d.w.flush()
+		d.err = d.w.err
+	}
+	d.sync = false
+	return d.err
+}
+
+func (d *compressor) init(w io.Writer, level int) (err error) {
+	d.w = newHuffmanBitWriter(w)
+
+	switch {
+	case level == NoCompression:
+		d.window = make([]byte, maxStoreBlockSize)
+		d.fill = (*compressor).fillBlock
+		d.step = (*compressor).store
+	case level == ConstantCompression:
+		d.w.logNewTablePenalty = 10
+		d.window = make([]byte, 32<<10)
+		d.fill = (*compressor).fillBlock
+		d.step = (*compressor).storeHuff
+	case level == DefaultCompression:
+		level = 5
+		fallthrough
+	case level >= 1 && level <= 6:
+		d.w.logNewTablePenalty = 7
+		d.fast = newFastEnc(level)
+		d.window = make([]byte, maxStoreBlockSize)
+		d.fill = (*compressor).fillBlock
+		d.step = (*compressor).storeFast
+	case 7 <= level && level <= 9:
+		d.w.logNewTablePenalty = 8
+		d.state = &advancedState{}
+		d.compressionLevel = levels[level]
+		d.initDeflate()
+		d.fill = (*compressor).fillDeflate
+		d.step = (*compressor).deflateLazy
+	default:
+		return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
+	}
+	d.level = level
+	return nil
+}
+
+// reset the state of the compressor.
+func (d *compressor) reset(w io.Writer) {
+	d.w.reset(w)
+	d.sync = false
+	d.err = nil
+	// We only need to reset a few things for Snappy.
+	if d.fast != nil {
+		d.fast.Reset()
+		d.windowEnd = 0
+		d.tokens.Reset()
+		return
+	}
+	switch d.compressionLevel.chain {
+	case 0:
+		// level was NoCompression or ConstantCompresssion.
+		d.windowEnd = 0
+	default:
+		s := d.state
+		s.chainHead = -1
+		for i := range s.hashHead {
+			s.hashHead[i] = 0
+		}
+		for i := range s.hashPrev {
+			s.hashPrev[i] = 0
+		}
+		s.hashOffset = 1
+		s.index, d.windowEnd = 0, 0
+		d.blockStart, d.byteAvailable = 0, false
+		d.tokens.Reset()
+		s.length = minMatchLength - 1
+		s.offset = 0
+		s.ii = 0
+		s.maxInsertIndex = 0
+	}
+}
+
+func (d *compressor) close() error {
+	if d.err != nil {
+		return d.err
+	}
+	d.sync = true
+	d.step(d)
+	if d.err != nil {
+		return d.err
+	}
+	if d.w.writeStoredHeader(0, true); d.w.err != nil {
+		return d.w.err
+	}
+	d.w.flush()
+	d.w.reset(nil)
+	return d.w.err
+}
+
+// NewWriter returns a new Writer compressing data at the given level.
+// Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression);
+// higher levels typically run slower but compress more.
+// Level 0 (NoCompression) does not attempt any compression; it only adds the
+// necessary DEFLATE framing.
+// Level -1 (DefaultCompression) uses the default compression level.
+// Level -2 (ConstantCompression) will use Huffman compression only, giving
+// a very fast compression for all types of input, but sacrificing considerable
+// compression efficiency.
+//
+// If level is in the range [-2, 9] then the error returned will be nil.
+// Otherwise the error returned will be non-nil.
+func NewWriter(w io.Writer, level int) (*Writer, error) {
+	var dw Writer
+	if err := dw.d.init(w, level); err != nil {
+		return nil, err
+	}
+	return &dw, nil
+}
+
+// NewWriterDict is like NewWriter but initializes the new
+// Writer with a preset dictionary.  The returned Writer behaves
+// as if the dictionary had been written to it without producing
+// any compressed output.  The compressed data written to w
+// can only be decompressed by a Reader initialized with the
+// same dictionary.
+func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
+	zw, err := NewWriter(w, level)
+	if err != nil {
+		return nil, err
+	}
+	zw.d.fillWindow(dict)
+	zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method.
+	return zw, err
+}
+
+// A Writer takes data written to it and writes the compressed
+// form of that data to an underlying writer (see NewWriter).
+type Writer struct {
+	d    compressor
+	dict []byte
+}
+
+// Write writes data to w, which will eventually write the
+// compressed form of data to its underlying writer.
+func (w *Writer) Write(data []byte) (n int, err error) {
+	return w.d.write(data)
+}
+
+// Flush flushes any pending data to the underlying writer.
+// It is useful mainly in compressed network protocols, to ensure that
+// a remote reader has enough data to reconstruct a packet.
+// Flush does not return until the data has been written.
+// Calling Flush when there is no pending data still causes the Writer
+// to emit a sync marker of at least 4 bytes.
+// If the underlying writer returns an error, Flush returns that error.
+//
+// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
+func (w *Writer) Flush() error {
+	// For more about flushing:
+	// http://www.bolet.org/~pornin/deflate-flush.html
+	return w.d.syncFlush()
+}
+
+// Close flushes and closes the writer.
+func (w *Writer) Close() error {
+	return w.d.close()
+}
+
+// Reset discards the writer's state and makes it equivalent to
+// the result of NewWriter or NewWriterDict called with dst
+// and w's level and dictionary.
+func (w *Writer) Reset(dst io.Writer) {
+	if len(w.dict) > 0 {
+		// w was created with NewWriterDict
+		w.d.reset(dst)
+		if dst != nil {
+			w.d.fillWindow(w.dict)
+		}
+	} else {
+		// w was created with NewWriter
+		w.d.reset(dst)
+	}
+}
+
+// ResetDict discards the writer's state and makes it equivalent to
+// the result of NewWriter or NewWriterDict called with dst
+// and w's level, but sets a specific dictionary.
+func (w *Writer) ResetDict(dst io.Writer, dict []byte) {
+	w.dict = dict
+	w.d.reset(dst)
+	w.d.fillWindow(w.dict)
+}
diff --git a/vendor/github.com/klauspost/compress/flate/dict_decoder.go b/vendor/github.com/klauspost/compress/flate/dict_decoder.go
new file mode 100644
index 00000000000..bb36351a5af
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/dict_decoder.go
@@ -0,0 +1,184 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+// dictDecoder implements the LZ77 sliding dictionary as used in decompression.
+// LZ77 decompresses data through sequences of two forms of commands:
+//
+//   - Literal insertions: Runs of one or more symbols are inserted into the data
+//     stream as is. This is accomplished through the writeByte method for a
+//     single symbol, or combinations of writeSlice/writeMark for multiple symbols.
+//     Any valid stream must start with a literal insertion if no preset dictionary
+//     is used.
+//
+//   - Backward copies: Runs of one or more symbols are copied from previously
+//     emitted data. Backward copies come as the tuple (dist, length) where dist
+//     determines how far back in the stream to copy from and length determines how
+//     many bytes to copy. Note that it is valid for the length to be greater than
+//     the distance. Since LZ77 uses forward copies, that situation is used to
+//     perform a form of run-length encoding on repeated runs of symbols.
+//     The writeCopy and tryWriteCopy are used to implement this command.
+//
+// For performance reasons, this implementation performs little to no sanity
+// checks about the arguments. As such, the invariants documented for each
+// method call must be respected.
+type dictDecoder struct {
+	hist []byte // Sliding window history
+
+	// Invariant: 0 <= rdPos <= wrPos <= len(hist)
+	wrPos int  // Current output position in buffer
+	rdPos int  // Have emitted hist[:rdPos] already
+	full  bool // Has a full window length been written yet?
+}
+
+// init initializes dictDecoder to have a sliding window dictionary of the given
+// size. If a preset dict is provided, it will initialize the dictionary with
+// the contents of dict.
+func (dd *dictDecoder) init(size int, dict []byte) {
+	*dd = dictDecoder{hist: dd.hist}
+
+	if cap(dd.hist) < size {
+		dd.hist = make([]byte, size)
+	}
+	dd.hist = dd.hist[:size]
+
+	if len(dict) > len(dd.hist) {
+		dict = dict[len(dict)-len(dd.hist):]
+	}
+	dd.wrPos = copy(dd.hist, dict)
+	if dd.wrPos == len(dd.hist) {
+		dd.wrPos = 0
+		dd.full = true
+	}
+	dd.rdPos = dd.wrPos
+}
+
+// histSize reports the total amount of historical data in the dictionary.
+func (dd *dictDecoder) histSize() int {
+	if dd.full {
+		return len(dd.hist)
+	}
+	return dd.wrPos
+}
+
+// availRead reports the number of bytes that can be flushed by readFlush.
+func (dd *dictDecoder) availRead() int {
+	return dd.wrPos - dd.rdPos
+}
+
+// availWrite reports the available amount of output buffer space.
+func (dd *dictDecoder) availWrite() int {
+	return len(dd.hist) - dd.wrPos
+}
+
+// writeSlice returns a slice of the available buffer to write data to.
+//
+// This invariant will be kept: len(s) <= availWrite()
+func (dd *dictDecoder) writeSlice() []byte {
+	return dd.hist[dd.wrPos:]
+}
+
+// writeMark advances the writer pointer by cnt.
+//
+// This invariant must be kept: 0 <= cnt <= availWrite()
+func (dd *dictDecoder) writeMark(cnt int) {
+	dd.wrPos += cnt
+}
+
+// writeByte writes a single byte to the dictionary.
+//
+// This invariant must be kept: 0 < availWrite()
+func (dd *dictDecoder) writeByte(c byte) {
+	dd.hist[dd.wrPos] = c
+	dd.wrPos++
+}
+
+// writeCopy copies a string at a given (dist, length) to the output.
+// This returns the number of bytes copied and may be less than the requested
+// length if the available space in the output buffer is too small.
+//
+// This invariant must be kept: 0 < dist <= histSize()
+func (dd *dictDecoder) writeCopy(dist, length int) int {
+	dstBase := dd.wrPos
+	dstPos := dstBase
+	srcPos := dstPos - dist
+	endPos := dstPos + length
+	if endPos > len(dd.hist) {
+		endPos = len(dd.hist)
+	}
+
+	// Copy non-overlapping section after destination position.
+	//
+	// This section is non-overlapping in that the copy length for this section
+	// is always less than or equal to the backwards distance. This can occur
+	// if a distance refers to data that wraps-around in the buffer.
+	// Thus, a backwards copy is performed here; that is, the exact bytes in
+	// the source prior to the copy is placed in the destination.
+	if srcPos < 0 {
+		srcPos += len(dd.hist)
+		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
+		srcPos = 0
+	}
+
+	// Copy possibly overlapping section before destination position.
+	//
+	// This section can overlap if the copy length for this section is larger
+	// than the backwards distance. This is allowed by LZ77 so that repeated
+	// strings can be succinctly represented using (dist, length) pairs.
+	// Thus, a forwards copy is performed here; that is, the bytes copied is
+	// possibly dependent on the resulting bytes in the destination as the copy
+	// progresses along. This is functionally equivalent to the following:
+	//
+	//	for i := 0; i < endPos-dstPos; i++ {
+	//		dd.hist[dstPos+i] = dd.hist[srcPos+i]
+	//	}
+	//	dstPos = endPos
+	//
+	for dstPos < endPos {
+		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
+	}
+
+	dd.wrPos = dstPos
+	return dstPos - dstBase
+}
+
+// tryWriteCopy tries to copy a string at a given (distance, length) to the
+// output. This specialized version is optimized for short distances.
+//
+// This method is designed to be inlined for performance reasons.
+//
+// This invariant must be kept: 0 < dist <= histSize()
+func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
+	dstPos := dd.wrPos
+	endPos := dstPos + length
+	if dstPos < dist || endPos > len(dd.hist) {
+		return 0
+	}
+	dstBase := dstPos
+	srcPos := dstPos - dist
+
+	// Copy possibly overlapping section before destination position.
+loop:
+	dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
+	if dstPos < endPos {
+		goto loop // Avoid for-loop so that this function can be inlined
+	}
+
+	dd.wrPos = dstPos
+	return dstPos - dstBase
+}
+
+// readFlush returns a slice of the historical buffer that is ready to be
+// emitted to the user. The data returned by readFlush must be fully consumed
+// before calling any other dictDecoder methods.
+func (dd *dictDecoder) readFlush() []byte {
+	toRead := dd.hist[dd.rdPos:dd.wrPos]
+	dd.rdPos = dd.wrPos
+	if dd.wrPos == len(dd.hist) {
+		dd.wrPos, dd.rdPos = 0, 0
+		dd.full = true
+	}
+	return toRead
+}
diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
new file mode 100644
index 00000000000..24caf5f70b0
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
@@ -0,0 +1,216 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Modified for deflate by Klaus Post (c) 2015.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+	"encoding/binary"
+	"fmt"
+	"math/bits"
+)
+
+type fastEnc interface {
+	Encode(dst *tokens, src []byte)
+	Reset()
+}
+
+func newFastEnc(level int) fastEnc {
+	switch level {
+	case 1:
+		return &fastEncL1{fastGen: fastGen{cur: maxStoreBlockSize}}
+	case 2:
+		return &fastEncL2{fastGen: fastGen{cur: maxStoreBlockSize}}
+	case 3:
+		return &fastEncL3{fastGen: fastGen{cur: maxStoreBlockSize}}
+	case 4:
+		return &fastEncL4{fastGen: fastGen{cur: maxStoreBlockSize}}
+	case 5:
+		return &fastEncL5{fastGen: fastGen{cur: maxStoreBlockSize}}
+	case 6:
+		return &fastEncL6{fastGen: fastGen{cur: maxStoreBlockSize}}
+	default:
+		panic("invalid level specified")
+	}
+}
+
+const (
+	tableBits       = 15             // Bits used in the table
+	tableSize       = 1 << tableBits // Size of the table
+	tableShift      = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
+	baseMatchOffset = 1              // The smallest match offset
+	baseMatchLength = 3              // The smallest match length per the RFC section 3.2.5
+	maxMatchOffset  = 1 << 15        // The largest match offset
+
+	bTableBits   = 17                                               // Bits used in the big tables
+	bTableSize   = 1 << bTableBits                                  // Size of the table
+	allocHistory = maxStoreBlockSize * 5                            // Size to preallocate for history.
+	bufferReset  = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this.
+)
+
+const (
+	prime3bytes = 506832829
+	prime4bytes = 2654435761
+	prime5bytes = 889523592379
+	prime6bytes = 227718039650203
+	prime7bytes = 58295818150454627
+	prime8bytes = 0xcf1bbcdcb7a56463
+)
+
+func load3232(b []byte, i int32) uint32 {
+	return binary.LittleEndian.Uint32(b[i:])
+}
+
+func load6432(b []byte, i int32) uint64 {
+	return binary.LittleEndian.Uint64(b[i:])
+}
+
+type tableEntry struct {
+	offset int32
+}
+
+// fastGen maintains the table for matches,
+// and the previous byte block for level 2.
+// This is the generic implementation.
+type fastGen struct {
+	hist []byte
+	cur  int32
+}
+
+func (e *fastGen) addBlock(src []byte) int32 {
+	// check if we have space already
+	if len(e.hist)+len(src) > cap(e.hist) {
+		if cap(e.hist) == 0 {
+			e.hist = make([]byte, 0, allocHistory)
+		} else {
+			if cap(e.hist) < maxMatchOffset*2 {
+				panic("unexpected buffer size")
+			}
+			// Move down
+			offset := int32(len(e.hist)) - maxMatchOffset
+			// copy(e.hist[0:maxMatchOffset], e.hist[offset:])
+			*(*[maxMatchOffset]byte)(e.hist) = *(*[maxMatchOffset]byte)(e.hist[offset:])
+			e.cur += offset
+			e.hist = e.hist[:maxMatchOffset]
+		}
+	}
+	s := int32(len(e.hist))
+	e.hist = append(e.hist, src...)
+	return s
+}
+
+type tableEntryPrev struct {
+	Cur  tableEntry
+	Prev tableEntry
+}
+
+// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
+// Preferably h should be a constant and should always be <64.
+func hash7(u uint64, h uint8) uint32 {
+	return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64))
+}
+
+// hashLen returns a hash of the lowest mls bytes of with length output bits.
+// mls must be >=3 and <=8. Any other value will return hash for 4 bytes.
+// length should always be < 32.
+// Preferably length and mls should be a constant for inlining.
+func hashLen(u uint64, length, mls uint8) uint32 {
+	switch mls {
+	case 3:
+		return (uint32(u<<8) * prime3bytes) >> (32 - length)
+	case 5:
+		return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length))
+	case 6:
+		return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length))
+	case 7:
+		return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length))
+	case 8:
+		return uint32((u * prime8bytes) >> (64 - length))
+	default:
+		return (uint32(u) * prime4bytes) >> (32 - length)
+	}
+}
+
+// matchlen will return the match length between offsets and t in src.
+// The maximum length returned is maxMatchLength - 4.
+// It is assumed that s > t, that t >=0 and s < len(src).
+func (e *fastGen) matchlen(s, t int32, src []byte) int32 {
+	if debugDecode {
+		if t >= s {
+			panic(fmt.Sprint("t >=s:", t, s))
+		}
+		if int(s) >= len(src) {
+			panic(fmt.Sprint("s >= len(src):", s, len(src)))
+		}
+		if t < 0 {
+			panic(fmt.Sprint("t < 0:", t))
+		}
+		if s-t > maxMatchOffset {
+			panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
+		}
+	}
+	s1 := int(s) + maxMatchLength - 4
+	if s1 > len(src) {
+		s1 = len(src)
+	}
+
+	// Extend the match to be as long as possible.
+	return int32(matchLen(src[s:s1], src[t:]))
+}
+
+// matchlenLong will return the match length between offsets and t in src.
+// It is assumed that s > t, that t >=0 and s < len(src).
+func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 {
+	if debugDeflate {
+		if t >= s {
+			panic(fmt.Sprint("t >=s:", t, s))
+		}
+		if int(s) >= len(src) {
+			panic(fmt.Sprint("s >= len(src):", s, len(src)))
+		}
+		if t < 0 {
+			panic(fmt.Sprint("t < 0:", t))
+		}
+		if s-t > maxMatchOffset {
+			panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
+		}
+	}
+	// Extend the match to be as long as possible.
+	return int32(matchLen(src[s:], src[t:]))
+}
+
+// Reset the encoding table.
+func (e *fastGen) Reset() {
+	if cap(e.hist) < allocHistory {
+		e.hist = make([]byte, 0, allocHistory)
+	}
+	// We offset current position so everything will be out of reach.
+	// If we are above the buffer reset it will be cleared anyway since len(hist) == 0.
+	if e.cur <= bufferReset {
+		e.cur += maxMatchOffset + int32(len(e.hist))
+	}
+	e.hist = e.hist[:0]
+}
+
+// matchLen returns the maximum length.
+// 'a' must be the shortest of the two.
+func matchLen(a, b []byte) int {
+	var checked int
+
+	for len(a) >= 8 {
+		if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 {
+			return checked + (bits.TrailingZeros64(diff) >> 3)
+		}
+		checked += 8
+		a = a[8:]
+		b = b[8:]
+	}
+	b = b[:len(a)]
+	for i := range a {
+		if a[i] != b[i] {
+			return i + checked
+		}
+	}
+	return len(a) + checked
+}
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
new file mode 100644
index 00000000000..89a5dd89f98
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
@@ -0,0 +1,1187 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"math"
+)
+
+const (
+	// The largest offset code.
+	offsetCodeCount = 30
+
+	// The special code used to mark the end of a block.
+	endBlockMarker = 256
+
+	// The first length code.
+	lengthCodesStart = 257
+
+	// The number of codegen codes.
+	codegenCodeCount = 19
+	badCode          = 255
+
+	// maxPredefinedTokens is the maximum number of tokens
+	// where we check if fixed size is smaller.
+	maxPredefinedTokens = 250
+
+	// bufferFlushSize indicates the buffer size
+	// after which bytes are flushed to the writer.
+	// Should preferably be a multiple of 6, since
+	// we accumulate 6 bytes between writes to the buffer.
+	bufferFlushSize = 246
+
+	// bufferSize is the actual output byte buffer size.
+	// It must have additional headroom for a flush
+	// which can contain up to 8 bytes.
+	bufferSize = bufferFlushSize + 8
+)
+
+// Minimum length code that emits bits.
+const lengthExtraBitsMinCode = 8
+
+// The number of extra bits needed by length code X - LENGTH_CODES_START.
+var lengthExtraBits = [32]uint8{
+	/* 257 */ 0, 0, 0,
+	/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
+	/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
+	/* 280 */ 4, 5, 5, 5, 5, 0,
+}
+
+// The length indicated by length code X - LENGTH_CODES_START.
+var lengthBase = [32]uint8{
+	0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
+	12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
+	64, 80, 96, 112, 128, 160, 192, 224, 255,
+}
+
+// Minimum offset code that emits bits.
+const offsetExtraBitsMinCode = 4
+
+// offset code word extra bits.
+var offsetExtraBits = [32]int8{
+	0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
+	4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
+	9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
+	/* extended window */
+	14, 14,
+}
+
+var offsetCombined = [32]uint32{}
+
+func init() {
+	var offsetBase = [32]uint32{
+		/* normal deflate */
+		0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
+		0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
+		0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
+		0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
+		0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
+		0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
+
+		/* extended window */
+		0x008000, 0x00c000,
+	}
+
+	for i := range offsetCombined[:] {
+		// Don't use extended window values...
+		if offsetExtraBits[i] == 0 || offsetBase[i] > 0x006000 {
+			continue
+		}
+		offsetCombined[i] = uint32(offsetExtraBits[i]) | (offsetBase[i] << 8)
+	}
+}
+
+// The odd order in which the codegen code sizes are written.
+var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
+
+type huffmanBitWriter struct {
+	// writer is the underlying writer.
+	// Do not use it directly; use the write method, which ensures
+	// that Write errors are sticky.
+	writer io.Writer
+
+	// Data waiting to be written is bytes[0:nbytes]
+	// and then the low nbits of bits.
+	bits            uint64
+	nbits           uint8
+	nbytes          uint8
+	lastHuffMan     bool
+	literalEncoding *huffmanEncoder
+	tmpLitEncoding  *huffmanEncoder
+	offsetEncoding  *huffmanEncoder
+	codegenEncoding *huffmanEncoder
+	err             error
+	lastHeader      int
+	// Set between 0 (reused block can be up to 2x the size)
+	logNewTablePenalty uint
+	bytes              [256 + 8]byte
+	literalFreq        [lengthCodesStart + 32]uint16
+	offsetFreq         [32]uint16
+	codegenFreq        [codegenCodeCount]uint16
+
+	// codegen must have an extra space for the final symbol.
+	codegen [literalCount + offsetCodeCount + 1]uint8
+}
+
+// Huffman reuse.
+//
+// The huffmanBitWriter supports reusing huffman tables and thereby combining block sections.
+//
+// This is controlled by several variables:
+//
+// If lastHeader is non-zero the Huffman table can be reused.
+// This also indicates that a Huffman table has been generated that can output all
+// possible symbols.
+// It also indicates that an EOB has not yet been emitted, so if a new tabel is generated
+// an EOB with the previous table must be written.
+//
+// If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid.
+//
+// An incoming block estimates the output size of a new table using a 'fresh' by calculating the
+// optimal size and adding a penalty in 'logNewTablePenalty'.
+// A Huffman table is not optimal, which is why we add a penalty, and generating a new table
+// is slower both for compression and decompression.
+
+func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
+	return &huffmanBitWriter{
+		writer:          w,
+		literalEncoding: newHuffmanEncoder(literalCount),
+		tmpLitEncoding:  newHuffmanEncoder(literalCount),
+		codegenEncoding: newHuffmanEncoder(codegenCodeCount),
+		offsetEncoding:  newHuffmanEncoder(offsetCodeCount),
+	}
+}
+
+func (w *huffmanBitWriter) reset(writer io.Writer) {
+	w.writer = writer
+	w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
+	w.lastHeader = 0
+	w.lastHuffMan = false
+}
+
+func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) {
+	a := t.offHist[:offsetCodeCount]
+	b := w.offsetEncoding.codes
+	b = b[:len(a)]
+	for i, v := range a {
+		if v != 0 && b[i].zero() {
+			return false
+		}
+	}
+
+	a = t.extraHist[:literalCount-256]
+	b = w.literalEncoding.codes[256:literalCount]
+	b = b[:len(a)]
+	for i, v := range a {
+		if v != 0 && b[i].zero() {
+			return false
+		}
+	}
+
+	a = t.litHist[:256]
+	b = w.literalEncoding.codes[:len(a)]
+	for i, v := range a {
+		if v != 0 && b[i].zero() {
+			return false
+		}
+	}
+	return true
+}
+
+func (w *huffmanBitWriter) flush() {
+	if w.err != nil {
+		w.nbits = 0
+		return
+	}
+	if w.lastHeader > 0 {
+		// We owe an EOB
+		w.writeCode(w.literalEncoding.codes[endBlockMarker])
+		w.lastHeader = 0
+	}
+	n := w.nbytes
+	for w.nbits != 0 {
+		w.bytes[n] = byte(w.bits)
+		w.bits >>= 8
+		if w.nbits > 8 { // Avoid underflow
+			w.nbits -= 8
+		} else {
+			w.nbits = 0
+		}
+		n++
+	}
+	w.bits = 0
+	w.write(w.bytes[:n])
+	w.nbytes = 0
+}
+
+func (w *huffmanBitWriter) write(b []byte) {
+	if w.err != nil {
+		return
+	}
+	_, w.err = w.writer.Write(b)
+}
+
+func (w *huffmanBitWriter) writeBits(b int32, nb uint8) {
+	w.bits |= uint64(b) << (w.nbits & 63)
+	w.nbits += nb
+	if w.nbits >= 48 {
+		w.writeOutBits()
+	}
+}
+
+func (w *huffmanBitWriter) writeBytes(bytes []byte) {
+	if w.err != nil {
+		return
+	}
+	n := w.nbytes
+	if w.nbits&7 != 0 {
+		w.err = InternalError("writeBytes with unfinished bits")
+		return
+	}
+	for w.nbits != 0 {
+		w.bytes[n] = byte(w.bits)
+		w.bits >>= 8
+		w.nbits -= 8
+		n++
+	}
+	if n != 0 {
+		w.write(w.bytes[:n])
+	}
+	w.nbytes = 0
+	w.write(bytes)
+}
+
+// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
+// the literal and offset lengths arrays (which are concatenated into a single
+// array).  This method generates that run-length encoding.
+//
+// The result is written into the codegen array, and the frequencies
+// of each code is written into the codegenFreq array.
+// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
+// information. Code badCode is an end marker
+//
+//	numLiterals      The number of literals in literalEncoding
+//	numOffsets       The number of offsets in offsetEncoding
+//	litenc, offenc   The literal and offset encoder to use
+func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) {
+	for i := range w.codegenFreq {
+		w.codegenFreq[i] = 0
+	}
+	// Note that we are using codegen both as a temporary variable for holding
+	// a copy of the frequencies, and as the place where we put the result.
+	// This is fine because the output is always shorter than the input used
+	// so far.
+	codegen := w.codegen[:] // cache
+	// Copy the concatenated code sizes to codegen. Put a marker at the end.
+	cgnl := codegen[:numLiterals]
+	for i := range cgnl {
+		cgnl[i] = litEnc.codes[i].len()
+	}
+
+	cgnl = codegen[numLiterals : numLiterals+numOffsets]
+	for i := range cgnl {
+		cgnl[i] = offEnc.codes[i].len()
+	}
+	codegen[numLiterals+numOffsets] = badCode
+
+	size := codegen[0]
+	count := 1
+	outIndex := 0
+	for inIndex := 1; size != badCode; inIndex++ {
+		// INVARIANT: We have seen "count" copies of size that have not yet
+		// had output generated for them.
+		nextSize := codegen[inIndex]
+		if nextSize == size {
+			count++
+			continue
+		}
+		// We need to generate codegen indicating "count" of size.
+		if size != 0 {
+			codegen[outIndex] = size
+			outIndex++
+			w.codegenFreq[size]++
+			count--
+			for count >= 3 {
+				n := 6
+				if n > count {
+					n = count
+				}
+				codegen[outIndex] = 16
+				outIndex++
+				codegen[outIndex] = uint8(n - 3)
+				outIndex++
+				w.codegenFreq[16]++
+				count -= n
+			}
+		} else {
+			for count >= 11 {
+				n := 138
+				if n > count {
+					n = count
+				}
+				codegen[outIndex] = 18
+				outIndex++
+				codegen[outIndex] = uint8(n - 11)
+				outIndex++
+				w.codegenFreq[18]++
+				count -= n
+			}
+			if count >= 3 {
+				// count >= 3 && count <= 10
+				codegen[outIndex] = 17
+				outIndex++
+				codegen[outIndex] = uint8(count - 3)
+				outIndex++
+				w.codegenFreq[17]++
+				count = 0
+			}
+		}
+		count--
+		for ; count >= 0; count-- {
+			codegen[outIndex] = size
+			outIndex++
+			w.codegenFreq[size]++
+		}
+		// Set up invariant for next time through the loop.
+		size = nextSize
+		count = 1
+	}
+	// Marker indicating the end of the codegen.
+	codegen[outIndex] = badCode
+}
+
+func (w *huffmanBitWriter) codegens() int {
+	numCodegens := len(w.codegenFreq)
+	for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
+		numCodegens--
+	}
+	return numCodegens
+}
+
+func (w *huffmanBitWriter) headerSize() (size, numCodegens int) {
+	numCodegens = len(w.codegenFreq)
+	for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
+		numCodegens--
+	}
+	return 3 + 5 + 5 + 4 + (3 * numCodegens) +
+		w.codegenEncoding.bitLength(w.codegenFreq[:]) +
+		int(w.codegenFreq[16])*2 +
+		int(w.codegenFreq[17])*3 +
+		int(w.codegenFreq[18])*7, numCodegens
+}
+
+// dynamicSize returns the size of dynamically encoded data in bits.
+func (w *huffmanBitWriter) dynamicReuseSize(litEnc, offEnc *huffmanEncoder) (size int) {
+	size = litEnc.bitLength(w.literalFreq[:]) +
+		offEnc.bitLength(w.offsetFreq[:])
+	return size
+}
+
+// dynamicSize returns the size of dynamically encoded data in bits.
+func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
+	header, numCodegens := w.headerSize()
+	size = header +
+		litEnc.bitLength(w.literalFreq[:]) +
+		offEnc.bitLength(w.offsetFreq[:]) +
+		extraBits
+	return size, numCodegens
+}
+
+// extraBitSize will return the number of bits that will be written
+// as "extra" bits on matches.
+func (w *huffmanBitWriter) extraBitSize() int {
+	total := 0
+	for i, n := range w.literalFreq[257:literalCount] {
+		total += int(n) * int(lengthExtraBits[i&31])
+	}
+	for i, n := range w.offsetFreq[:offsetCodeCount] {
+		total += int(n) * int(offsetExtraBits[i&31])
+	}
+	return total
+}
+
+// fixedSize returns the size of dynamically encoded data in bits.
+func (w *huffmanBitWriter) fixedSize(extraBits int) int {
+	return 3 +
+		fixedLiteralEncoding.bitLength(w.literalFreq[:]) +
+		fixedOffsetEncoding.bitLength(w.offsetFreq[:]) +
+		extraBits
+}
+
+// storedSize calculates the stored size, including header.
+// The function returns the size in bits and whether the block
+// fits inside a single block.
+func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
+	if in == nil {
+		return 0, false
+	}
+	if len(in) <= maxStoreBlockSize {
+		return (len(in) + 5) * 8, true
+	}
+	return 0, false
+}
+
+func (w *huffmanBitWriter) writeCode(c hcode) {
+	// The function does not get inlined if we "& 63" the shift.
+	w.bits |= c.code64() << (w.nbits & 63)
+	w.nbits += c.len()
+	if w.nbits >= 48 {
+		w.writeOutBits()
+	}
+}
+
+// writeOutBits will write bits to the buffer.
+func (w *huffmanBitWriter) writeOutBits() {
+	bits := w.bits
+	w.bits >>= 48
+	w.nbits -= 48
+	n := w.nbytes
+
+	// We over-write, but faster...
+	binary.LittleEndian.PutUint64(w.bytes[n:], bits)
+	n += 6
+
+	if n >= bufferFlushSize {
+		if w.err != nil {
+			n = 0
+			return
+		}
+		w.write(w.bytes[:n])
+		n = 0
+	}
+
+	w.nbytes = n
+}
+
+// Write the header of a dynamic Huffman block to the output stream.
+//
+//	numLiterals  The number of literals specified in codegen
+//	numOffsets   The number of offsets specified in codegen
+//	numCodegens  The number of codegens used in codegen
+func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
+	if w.err != nil {
+		return
+	}
+	var firstBits int32 = 4
+	if isEof {
+		firstBits = 5
+	}
+	w.writeBits(firstBits, 3)
+	w.writeBits(int32(numLiterals-257), 5)
+	w.writeBits(int32(numOffsets-1), 5)
+	w.writeBits(int32(numCodegens-4), 4)
+
+	for i := 0; i < numCodegens; i++ {
+		value := uint(w.codegenEncoding.codes[codegenOrder[i]].len())
+		w.writeBits(int32(value), 3)
+	}
+
+	i := 0
+	for {
+		var codeWord = uint32(w.codegen[i])
+		i++
+		if codeWord == badCode {
+			break
+		}
+		w.writeCode(w.codegenEncoding.codes[codeWord])
+
+		switch codeWord {
+		case 16:
+			w.writeBits(int32(w.codegen[i]), 2)
+			i++
+		case 17:
+			w.writeBits(int32(w.codegen[i]), 3)
+			i++
+		case 18:
+			w.writeBits(int32(w.codegen[i]), 7)
+			i++
+		}
+	}
+}
+
+// writeStoredHeader will write a stored header.
+// If the stored block is only used for EOF,
+// it is replaced with a fixed huffman block.
+func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
+	if w.err != nil {
+		return
+	}
+	if w.lastHeader > 0 {
+		// We owe an EOB
+		w.writeCode(w.literalEncoding.codes[endBlockMarker])
+		w.lastHeader = 0
+	}
+
+	// To write EOF, use a fixed encoding block. 10 bits instead of 5 bytes.
+	if length == 0 && isEof {
+		w.writeFixedHeader(isEof)
+		// EOB: 7 bits, value: 0
+		w.writeBits(0, 7)
+		w.flush()
+		return
+	}
+
+	var flag int32
+	if isEof {
+		flag = 1
+	}
+	w.writeBits(flag, 3)
+	w.flush()
+	w.writeBits(int32(length), 16)
+	w.writeBits(int32(^uint16(length)), 16)
+}
+
+func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
+	if w.err != nil {
+		return
+	}
+	if w.lastHeader > 0 {
+		// We owe an EOB
+		w.writeCode(w.literalEncoding.codes[endBlockMarker])
+		w.lastHeader = 0
+	}
+
+	// Indicate that we are a fixed Huffman block
+	var value int32 = 2
+	if isEof {
+		value = 3
+	}
+	w.writeBits(value, 3)
+}
+
+// writeBlock will write a block of tokens with the smallest encoding.
+// The original input can be supplied, and if the huffman encoded data
+// is larger than the original bytes, the data will be written as a
+// stored block.
+// If the input is nil, the tokens will always be Huffman encoded.
+func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) {
+	if w.err != nil {
+		return
+	}
+
+	tokens.AddEOB()
+	if w.lastHeader > 0 {
+		// We owe an EOB
+		w.writeCode(w.literalEncoding.codes[endBlockMarker])
+		w.lastHeader = 0
+	}
+	numLiterals, numOffsets := w.indexTokens(tokens, false)
+	w.generate()
+	var extraBits int
+	storedSize, storable := w.storedSize(input)
+	if storable {
+		extraBits = w.extraBitSize()
+	}
+
+	// Figure out smallest code.
+	// Fixed Huffman baseline.
+	var literalEncoding = fixedLiteralEncoding
+	var offsetEncoding = fixedOffsetEncoding
+	var size = math.MaxInt32
+	if tokens.n < maxPredefinedTokens {
+		size = w.fixedSize(extraBits)
+	}
+
+	// Dynamic Huffman?
+	var numCodegens int
+
+	// Generate codegen and codegenFrequencies, which indicates how to encode
+	// the literalEncoding and the offsetEncoding.
+	w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
+	w.codegenEncoding.generate(w.codegenFreq[:], 7)
+	dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
+
+	if dynamicSize < size {
+		size = dynamicSize
+		literalEncoding = w.literalEncoding
+		offsetEncoding = w.offsetEncoding
+	}
+
+	// Stored bytes?
+	if storable && storedSize <= size {
+		w.writeStoredHeader(len(input), eof)
+		w.writeBytes(input)
+		return
+	}
+
+	// Huffman.
+	if literalEncoding == fixedLiteralEncoding {
+		w.writeFixedHeader(eof)
+	} else {
+		w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
+	}
+
+	// Write the tokens.
+	w.writeTokens(tokens.Slice(), literalEncoding.codes, offsetEncoding.codes)
+}
+
+// writeBlockDynamic encodes a block using a dynamic Huffman table.
+// This should be used if the symbols used have a disproportionate
+// histogram distribution.
+// If input is supplied and the compression savings are below 1/16th of the
+// input size the block is stored.
+func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []byte, sync bool) {
+	if w.err != nil {
+		return
+	}
+
+	sync = sync || eof
+	if sync {
+		tokens.AddEOB()
+	}
+
+	// We cannot reuse pure huffman table, and must mark as EOF.
+	if (w.lastHuffMan || eof) && w.lastHeader > 0 {
+		// We will not try to reuse.
+		w.writeCode(w.literalEncoding.codes[endBlockMarker])
+		w.lastHeader = 0
+		w.lastHuffMan = false
+	}
+
+	// fillReuse enables filling of empty values.
+	// This will make encodings always reusable without testing.
+	// However, this does not appear to benefit on most cases.
+	const fillReuse = false
+
+	// Check if we can reuse...
+	if !fillReuse && w.lastHeader > 0 && !w.canReuse(tokens) {
+		w.writeCode(w.literalEncoding.codes[endBlockMarker])
+		w.lastHeader = 0
+	}
+
+	numLiterals, numOffsets := w.indexTokens(tokens, !sync)
+	extraBits := 0
+	ssize, storable := w.storedSize(input)
+
+	const usePrefs = true
+	if storable || w.lastHeader > 0 {
+		extraBits = w.extraBitSize()
+	}
+
+	var size int
+
+	// Check if we should reuse.
+	if w.lastHeader > 0 {
+		// Estimate size for using a new table.
+		// Use the previous header size as the best estimate.
+		newSize := w.lastHeader + tokens.EstimatedBits()
+		newSize += int(w.literalEncoding.codes[endBlockMarker].len()) + newSize>>w.logNewTablePenalty
+
+		// The estimated size is calculated as an optimal table.
+		// We add a penalty to make it more realistic and re-use a bit more.
+		reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + extraBits
+
+		// Check if a new table is better.
+		if newSize < reuseSize {
+			// Write the EOB we owe.
+			w.writeCode(w.literalEncoding.codes[endBlockMarker])
+			size = newSize
+			w.lastHeader = 0
+		} else {
+			size = reuseSize
+		}
+
+		if tokens.n < maxPredefinedTokens {
+			if preSize := w.fixedSize(extraBits) + 7; usePrefs && preSize < size {
+				// Check if we get a reasonable size decrease.
+				if storable && ssize <= size {
+					w.writeStoredHeader(len(input), eof)
+					w.writeBytes(input)
+					return
+				}
+				w.writeFixedHeader(eof)
+				if !sync {
+					tokens.AddEOB()
+				}
+				w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes)
+				return
+			}
+		}
+		// Check if we get a reasonable size decrease.
+		if storable && ssize <= size {
+			w.writeStoredHeader(len(input), eof)
+			w.writeBytes(input)
+			return
+		}
+	}
+
+	// We want a new block/table
+	if w.lastHeader == 0 {
+		if fillReuse && !sync {
+			w.fillTokens()
+			numLiterals, numOffsets = maxNumLit, maxNumDist
+		} else {
+			w.literalFreq[endBlockMarker] = 1
+		}
+
+		w.generate()
+		// Generate codegen and codegenFrequencies, which indicates how to encode
+		// the literalEncoding and the offsetEncoding.
+		w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
+		w.codegenEncoding.generate(w.codegenFreq[:], 7)
+
+		var numCodegens int
+		if fillReuse && !sync {
+			// Reindex for accurate size...
+			w.indexTokens(tokens, true)
+		}
+		size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
+
+		// Store predefined, if we don't get a reasonable improvement.
+		if tokens.n < maxPredefinedTokens {
+			if preSize := w.fixedSize(extraBits); usePrefs && preSize <= size {
+				// Store bytes, if we don't get an improvement.
+				if storable && ssize <= preSize {
+					w.writeStoredHeader(len(input), eof)
+					w.writeBytes(input)
+					return
+				}
+				w.writeFixedHeader(eof)
+				if !sync {
+					tokens.AddEOB()
+				}
+				w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes)
+				return
+			}
+		}
+
+		if storable && ssize <= size {
+			// Store bytes, if we don't get an improvement.
+			w.writeStoredHeader(len(input), eof)
+			w.writeBytes(input)
+			return
+		}
+
+		// Write Huffman table.
+		w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
+		if !sync {
+			w.lastHeader, _ = w.headerSize()
+		}
+		w.lastHuffMan = false
+	}
+
+	if sync {
+		w.lastHeader = 0
+	}
+	// Write the tokens.
+	w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes)
+}
+
+func (w *huffmanBitWriter) fillTokens() {
+	for i, v := range w.literalFreq[:literalCount] {
+		if v == 0 {
+			w.literalFreq[i] = 1
+		}
+	}
+	for i, v := range w.offsetFreq[:offsetCodeCount] {
+		if v == 0 {
+			w.offsetFreq[i] = 1
+		}
+	}
+}
+
+// indexTokens indexes a slice of tokens, and updates
+// literalFreq and offsetFreq, and generates literalEncoding
+// and offsetEncoding.
+// The number of literal and offset tokens is returned.
+func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) {
+	//copy(w.literalFreq[:], t.litHist[:])
+	*(*[256]uint16)(w.literalFreq[:]) = t.litHist
+	//copy(w.literalFreq[256:], t.extraHist[:])
+	*(*[32]uint16)(w.literalFreq[256:]) = t.extraHist
+	w.offsetFreq = t.offHist
+
+	if t.n == 0 {
+		return
+	}
+	if filled {
+		return maxNumLit, maxNumDist
+	}
+	// get the number of literals
+	numLiterals = len(w.literalFreq)
+	for w.literalFreq[numLiterals-1] == 0 {
+		numLiterals--
+	}
+	// get the number of offsets
+	numOffsets = len(w.offsetFreq)
+	for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 {
+		numOffsets--
+	}
+	if numOffsets == 0 {
+		// We haven't found a single match. If we want to go with the dynamic encoding,
+		// we should count at least one offset to be sure that the offset huffman tree could be encoded.
+		w.offsetFreq[0] = 1
+		numOffsets = 1
+	}
+	return
+}
+
+func (w *huffmanBitWriter) generate() {
+	w.literalEncoding.generate(w.literalFreq[:literalCount], 15)
+	w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15)
+}
+
+// writeTokens writes a slice of tokens to the output.
+// codes for literal and offset encoding must be supplied.
+func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) {
+	if w.err != nil {
+		return
+	}
+	if len(tokens) == 0 {
+		return
+	}
+
+	// Only last token should be endBlockMarker.
+	var deferEOB bool
+	if tokens[len(tokens)-1] == endBlockMarker {
+		tokens = tokens[:len(tokens)-1]
+		deferEOB = true
+	}
+
+	// Create slices up to the next power of two to avoid bounds checks.
+	lits := leCodes[:256]
+	offs := oeCodes[:32]
+	lengths := leCodes[lengthCodesStart:]
+	lengths = lengths[:32]
+
+	// Go 1.16 LOVES having these on stack.
+	bits, nbits, nbytes := w.bits, w.nbits, w.nbytes
+
+	for _, t := range tokens {
+		if t < 256 {
+			//w.writeCode(lits[t.literal()])
+			c := lits[t]
+			bits |= c.code64() << (nbits & 63)
+			nbits += c.len()
+			if nbits >= 48 {
+				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
+				bits >>= 48
+				nbits -= 48
+				nbytes += 6
+				if nbytes >= bufferFlushSize {
+					if w.err != nil {
+						nbytes = 0
+						return
+					}
+					_, w.err = w.writer.Write(w.bytes[:nbytes])
+					nbytes = 0
+				}
+			}
+			continue
+		}
+
+		// Write the length
+		length := t.length()
+		lengthCode := lengthCode(length) & 31
+		if false {
+			w.writeCode(lengths[lengthCode])
+		} else {
+			// inlined
+			c := lengths[lengthCode]
+			bits |= c.code64() << (nbits & 63)
+			nbits += c.len()
+			if nbits >= 48 {
+				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
+				bits >>= 48
+				nbits -= 48
+				nbytes += 6
+				if nbytes >= bufferFlushSize {
+					if w.err != nil {
+						nbytes = 0
+						return
+					}
+					_, w.err = w.writer.Write(w.bytes[:nbytes])
+					nbytes = 0
+				}
+			}
+		}
+
+		if lengthCode >= lengthExtraBitsMinCode {
+			extraLengthBits := lengthExtraBits[lengthCode]
+			//w.writeBits(extraLength, extraLengthBits)
+			extraLength := int32(length - lengthBase[lengthCode])
+			bits |= uint64(extraLength) << (nbits & 63)
+			nbits += extraLengthBits
+			if nbits >= 48 {
+				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
+				bits >>= 48
+				nbits -= 48
+				nbytes += 6
+				if nbytes >= bufferFlushSize {
+					if w.err != nil {
+						nbytes = 0
+						return
+					}
+					_, w.err = w.writer.Write(w.bytes[:nbytes])
+					nbytes = 0
+				}
+			}
+		}
+		// Write the offset
+		offset := t.offset()
+		offsetCode := (offset >> 16) & 31
+		if false {
+			w.writeCode(offs[offsetCode])
+		} else {
+			// inlined
+			c := offs[offsetCode]
+			bits |= c.code64() << (nbits & 63)
+			nbits += c.len()
+			if nbits >= 48 {
+				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
+				bits >>= 48
+				nbits -= 48
+				nbytes += 6
+				if nbytes >= bufferFlushSize {
+					if w.err != nil {
+						nbytes = 0
+						return
+					}
+					_, w.err = w.writer.Write(w.bytes[:nbytes])
+					nbytes = 0
+				}
+			}
+		}
+
+		if offsetCode >= offsetExtraBitsMinCode {
+			offsetComb := offsetCombined[offsetCode]
+			//w.writeBits(extraOffset, extraOffsetBits)
+			bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63)
+			nbits += uint8(offsetComb)
+			if nbits >= 48 {
+				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
+				bits >>= 48
+				nbits -= 48
+				nbytes += 6
+				if nbytes >= bufferFlushSize {
+					if w.err != nil {
+						nbytes = 0
+						return
+					}
+					_, w.err = w.writer.Write(w.bytes[:nbytes])
+					nbytes = 0
+				}
+			}
+		}
+	}
+	// Restore...
+	w.bits, w.nbits, w.nbytes = bits, nbits, nbytes
+
+	if deferEOB {
+		w.writeCode(leCodes[endBlockMarker])
+	}
+}
+
+// huffOffset is a static offset encoder used for huffman only encoding.
+// It can be reused since we will not be encoding offset values.
+var huffOffset *huffmanEncoder
+
+func init() {
+	w := newHuffmanBitWriter(nil)
+	w.offsetFreq[0] = 1
+	huffOffset = newHuffmanEncoder(offsetCodeCount)
+	huffOffset.generate(w.offsetFreq[:offsetCodeCount], 15)
+}
+
+// writeBlockHuff encodes a block of bytes as either
+// Huffman encoded literals or uncompressed bytes if the
+// results only gains very little from compression.
+func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
+	if w.err != nil {
+		return
+	}
+
+	// Clear histogram
+	for i := range w.literalFreq[:] {
+		w.literalFreq[i] = 0
+	}
+	if !w.lastHuffMan {
+		for i := range w.offsetFreq[:] {
+			w.offsetFreq[i] = 0
+		}
+	}
+
+	const numLiterals = endBlockMarker + 1
+	const numOffsets = 1
+
+	// Add everything as literals
+	// We have to estimate the header size.
+	// Assume header is around 70 bytes:
+	// https://stackoverflow.com/a/25454430
+	const guessHeaderSizeBits = 70 * 8
+	histogram(input, w.literalFreq[:numLiterals])
+	ssize, storable := w.storedSize(input)
+	if storable && len(input) > 1024 {
+		// Quick check for incompressible content.
+		abs := float64(0)
+		avg := float64(len(input)) / 256
+		max := float64(len(input) * 2)
+		for _, v := range w.literalFreq[:256] {
+			diff := float64(v) - avg
+			abs += diff * diff
+			if abs > max {
+				break
+			}
+		}
+		if abs < max {
+			if debugDeflate {
+				fmt.Println("stored", abs, "<", max)
+			}
+			// No chance we can compress this...
+			w.writeStoredHeader(len(input), eof)
+			w.writeBytes(input)
+			return
+		}
+	}
+	w.literalFreq[endBlockMarker] = 1
+	w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15)
+	estBits := w.tmpLitEncoding.canReuseBits(w.literalFreq[:numLiterals])
+	if estBits < math.MaxInt32 {
+		estBits += w.lastHeader
+		if w.lastHeader == 0 {
+			estBits += guessHeaderSizeBits
+		}
+		estBits += estBits >> w.logNewTablePenalty
+	}
+
+	// Store bytes, if we don't get a reasonable improvement.
+	if storable && ssize <= estBits {
+		if debugDeflate {
+			fmt.Println("stored,", ssize, "<=", estBits)
+		}
+		w.writeStoredHeader(len(input), eof)
+		w.writeBytes(input)
+		return
+	}
+
+	if w.lastHeader > 0 {
+		reuseSize := w.literalEncoding.canReuseBits(w.literalFreq[:256])
+
+		if estBits < reuseSize {
+			if debugDeflate {
+				fmt.Println("NOT reusing, reuse:", reuseSize/8, "> new:", estBits/8, "header est:", w.lastHeader/8, "bytes")
+			}
+			// We owe an EOB
+			w.writeCode(w.literalEncoding.codes[endBlockMarker])
+			w.lastHeader = 0
+		} else if debugDeflate {
+			fmt.Println("reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8)
+		}
+	}
+
+	count := 0
+	if w.lastHeader == 0 {
+		// Use the temp encoding, so swap.
+		w.literalEncoding, w.tmpLitEncoding = w.tmpLitEncoding, w.literalEncoding
+		// Generate codegen and codegenFrequencies, which indicates how to encode
+		// the literalEncoding and the offsetEncoding.
+		w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
+		w.codegenEncoding.generate(w.codegenFreq[:], 7)
+		numCodegens := w.codegens()
+
+		// Huffman.
+		w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
+		w.lastHuffMan = true
+		w.lastHeader, _ = w.headerSize()
+		if debugDeflate {
+			count += w.lastHeader
+			fmt.Println("header:", count/8)
+		}
+	}
+
+	encoding := w.literalEncoding.codes[:256]
+	// Go 1.16 LOVES having these on stack. At least 1.5x the speed.
+	bits, nbits, nbytes := w.bits, w.nbits, w.nbytes
+
+	if debugDeflate {
+		count -= int(nbytes)*8 + int(nbits)
+	}
+	// Unroll, write 3 codes/loop.
+	// Fastest number of unrolls.
+	for len(input) > 3 {
+		// We must have at least 48 bits free.
+		if nbits >= 8 {
+			n := nbits >> 3
+			binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+			bits >>= (n * 8) & 63
+			nbits -= n * 8
+			nbytes += n
+		}
+		if nbytes >= bufferFlushSize {
+			if w.err != nil {
+				nbytes = 0
+				return
+			}
+			if debugDeflate {
+				count += int(nbytes) * 8
+			}
+			_, w.err = w.writer.Write(w.bytes[:nbytes])
+			nbytes = 0
+		}
+		a, b := encoding[input[0]], encoding[input[1]]
+		bits |= a.code64() << (nbits & 63)
+		bits |= b.code64() << ((nbits + a.len()) & 63)
+		c := encoding[input[2]]
+		nbits += b.len() + a.len()
+		bits |= c.code64() << (nbits & 63)
+		nbits += c.len()
+		input = input[3:]
+	}
+
+	// Remaining...
+	for _, t := range input {
+		if nbits >= 48 {
+			binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+			//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
+			bits >>= 48
+			nbits -= 48
+			nbytes += 6
+			if nbytes >= bufferFlushSize {
+				if w.err != nil {
+					nbytes = 0
+					return
+				}
+				if debugDeflate {
+					count += int(nbytes) * 8
+				}
+				_, w.err = w.writer.Write(w.bytes[:nbytes])
+				nbytes = 0
+			}
+		}
+		// Bitwriting inlined, ~30% speedup
+		c := encoding[t]
+		bits |= c.code64() << (nbits & 63)
+
+		nbits += c.len()
+		if debugDeflate {
+			count += int(c.len())
+		}
+	}
+	// Restore...
+	w.bits, w.nbits, w.nbytes = bits, nbits, nbytes
+
+	if debugDeflate {
+		nb := count + int(nbytes)*8 + int(nbits)
+		fmt.Println("wrote", nb, "bits,", nb/8, "bytes.")
+	}
+	// Flush if needed to have space.
+	if w.nbits >= 48 {
+		w.writeOutBits()
+	}
+
+	if eof || sync {
+		w.writeCode(w.literalEncoding.codes[endBlockMarker])
+		w.lastHeader = 0
+		w.lastHuffMan = false
+	}
+}
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go
new file mode 100644
index 00000000000..be7b58b473f
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go
@@ -0,0 +1,417 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+	"math"
+	"math/bits"
+)
+
+const (
+	maxBitsLimit = 16
+	// number of valid literals
+	literalCount = 286
+)
+
+// hcode is a huffman code with a bit code and bit length.
+type hcode uint32
+
+func (h hcode) len() uint8 {
+	return uint8(h)
+}
+
+func (h hcode) code64() uint64 {
+	return uint64(h >> 8)
+}
+
+func (h hcode) zero() bool {
+	return h == 0
+}
+
+type huffmanEncoder struct {
+	codes    []hcode
+	bitCount [17]int32
+
+	// Allocate a reusable buffer with the longest possible frequency table.
+	// Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
+	// The largest of these is literalCount, so we allocate for that case.
+	freqcache [literalCount + 1]literalNode
+}
+
+type literalNode struct {
+	literal uint16
+	freq    uint16
+}
+
+// A levelInfo describes the state of the constructed tree for a given depth.
+type levelInfo struct {
+	// Our level.  for better printing
+	level int32
+
+	// The frequency of the last node at this level
+	lastFreq int32
+
+	// The frequency of the next character to add to this level
+	nextCharFreq int32
+
+	// The frequency of the next pair (from level below) to add to this level.
+	// Only valid if the "needed" value of the next lower level is 0.
+	nextPairFreq int32
+
+	// The number of chains remaining to generate for this level before moving
+	// up to the next level
+	needed int32
+}
+
+// set sets the code and length of an hcode.
+func (h *hcode) set(code uint16, length uint8) {
+	*h = hcode(length) | (hcode(code) << 8)
+}
+
+func newhcode(code uint16, length uint8) hcode {
+	return hcode(length) | (hcode(code) << 8)
+}
+
+func reverseBits(number uint16, bitLength byte) uint16 {
+	return bits.Reverse16(number << ((16 - bitLength) & 15))
+}
+
+func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} }
+
+func newHuffmanEncoder(size int) *huffmanEncoder {
+	// Make capacity to next power of two.
+	c := uint(bits.Len32(uint32(size - 1)))
+	return &huffmanEncoder{codes: make([]hcode, size, 1<<c)}
+}
+
+// Generates a HuffmanCode corresponding to the fixed literal table
+func generateFixedLiteralEncoding() *huffmanEncoder {
+	h := newHuffmanEncoder(literalCount)
+	codes := h.codes
+	var ch uint16
+	for ch = 0; ch < literalCount; ch++ {
+		var bits uint16
+		var size uint8
+		switch {
+		case ch < 144:
+			// size 8, 000110000  .. 10111111
+			bits = ch + 48
+			size = 8
+		case ch < 256:
+			// size 9, 110010000 .. 111111111
+			bits = ch + 400 - 144
+			size = 9
+		case ch < 280:
+			// size 7, 0000000 .. 0010111
+			bits = ch - 256
+			size = 7
+		default:
+			// size 8, 11000000 .. 11000111
+			bits = ch + 192 - 280
+			size = 8
+		}
+		codes[ch] = newhcode(reverseBits(bits, size), size)
+	}
+	return h
+}
+
+func generateFixedOffsetEncoding() *huffmanEncoder {
+	h := newHuffmanEncoder(30)
+	codes := h.codes
+	for ch := range codes {
+		codes[ch] = newhcode(reverseBits(uint16(ch), 5), 5)
+	}
+	return h
+}
+
+var fixedLiteralEncoding = generateFixedLiteralEncoding()
+var fixedOffsetEncoding = generateFixedOffsetEncoding()
+
+func (h *huffmanEncoder) bitLength(freq []uint16) int {
+	var total int
+	for i, f := range freq {
+		if f != 0 {
+			total += int(f) * int(h.codes[i].len())
+		}
+	}
+	return total
+}
+
+func (h *huffmanEncoder) bitLengthRaw(b []byte) int {
+	var total int
+	for _, f := range b {
+		total += int(h.codes[f].len())
+	}
+	return total
+}
+
+// canReuseBits returns the number of bits or math.MaxInt32 if the encoder cannot be reused.
+func (h *huffmanEncoder) canReuseBits(freq []uint16) int {
+	var total int
+	for i, f := range freq {
+		if f != 0 {
+			code := h.codes[i]
+			if code.zero() {
+				return math.MaxInt32
+			}
+			total += int(f) * int(code.len())
+		}
+	}
+	return total
+}
+
+// Return the number of literals assigned to each bit size in the Huffman encoding
+//
+// This method is only called when list.length >= 3
+// The cases of 0, 1, and 2 literals are handled by special case code.
+//
+// list  An array of the literals with non-zero frequencies
+//
+//	and their associated frequencies. The array is in order of increasing
+//	frequency, and has as its last element a special element with frequency
+//	MaxInt32
+//
+// maxBits     The maximum number of bits that should be used to encode any literal.
+//
+//	Must be less than 16.
+//
+// return      An integer array in which array[i] indicates the number of literals
+//
+//	that should be encoded in i bits.
+func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
+	if maxBits >= maxBitsLimit {
+		panic("flate: maxBits too large")
+	}
+	n := int32(len(list))
+	list = list[0 : n+1]
+	list[n] = maxNode()
+
+	// The tree can't have greater depth than n - 1, no matter what. This
+	// saves a little bit of work in some small cases
+	if maxBits > n-1 {
+		maxBits = n - 1
+	}
+
+	// Create information about each of the levels.
+	// A bogus "Level 0" whose sole purpose is so that
+	// level1.prev.needed==0.  This makes level1.nextPairFreq
+	// be a legitimate value that never gets chosen.
+	var levels [maxBitsLimit]levelInfo
+	// leafCounts[i] counts the number of literals at the left
+	// of ancestors of the rightmost node at level i.
+	// leafCounts[i][j] is the number of literals at the left
+	// of the level j ancestor.
+	var leafCounts [maxBitsLimit][maxBitsLimit]int32
+
+	// Descending to only have 1 bounds check.
+	l2f := int32(list[2].freq)
+	l1f := int32(list[1].freq)
+	l0f := int32(list[0].freq) + int32(list[1].freq)
+
+	for level := int32(1); level <= maxBits; level++ {
+		// For every level, the first two items are the first two characters.
+		// We initialize the levels as if we had already figured this out.
+		levels[level] = levelInfo{
+			level:        level,
+			lastFreq:     l1f,
+			nextCharFreq: l2f,
+			nextPairFreq: l0f,
+		}
+		leafCounts[level][level] = 2
+		if level == 1 {
+			levels[level].nextPairFreq = math.MaxInt32
+		}
+	}
+
+	// We need a total of 2*n - 2 items at top level and have already generated 2.
+	levels[maxBits].needed = 2*n - 4
+
+	level := uint32(maxBits)
+	for level < 16 {
+		l := &levels[level]
+		if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
+			// We've run out of both leafs and pairs.
+			// End all calculations for this level.
+			// To make sure we never come back to this level or any lower level,
+			// set nextPairFreq impossibly large.
+			l.needed = 0
+			levels[level+1].nextPairFreq = math.MaxInt32
+			level++
+			continue
+		}
+
+		prevFreq := l.lastFreq
+		if l.nextCharFreq < l.nextPairFreq {
+			// The next item on this row is a leaf node.
+			n := leafCounts[level][level] + 1
+			l.lastFreq = l.nextCharFreq
+			// Lower leafCounts are the same of the previous node.
+			leafCounts[level][level] = n
+			e := list[n]
+			if e.literal < math.MaxUint16 {
+				l.nextCharFreq = int32(e.freq)
+			} else {
+				l.nextCharFreq = math.MaxInt32
+			}
+		} else {
+			// The next item on this row is a pair from the previous row.
+			// nextPairFreq isn't valid until we generate two
+			// more values in the level below
+			l.lastFreq = l.nextPairFreq
+			// Take leaf counts from the lower level, except counts[level] remains the same.
+			if true {
+				save := leafCounts[level][level]
+				leafCounts[level] = leafCounts[level-1]
+				leafCounts[level][level] = save
+			} else {
+				copy(leafCounts[level][:level], leafCounts[level-1][:level])
+			}
+			levels[l.level-1].needed = 2
+		}
+
+		if l.needed--; l.needed == 0 {
+			// We've done everything we need to do for this level.
+			// Continue calculating one level up. Fill in nextPairFreq
+			// of that level with the sum of the two nodes we've just calculated on
+			// this level.
+			if l.level == maxBits {
+				// All done!
+				break
+			}
+			levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
+			level++
+		} else {
+			// If we stole from below, move down temporarily to replenish it.
+			for levels[level-1].needed > 0 {
+				level--
+			}
+		}
+	}
+
+	// Somethings is wrong if at the end, the top level is null or hasn't used
+	// all of the leaves.
+	if leafCounts[maxBits][maxBits] != n {
+		panic("leafCounts[maxBits][maxBits] != n")
+	}
+
+	bitCount := h.bitCount[:maxBits+1]
+	bits := 1
+	counts := &leafCounts[maxBits]
+	for level := maxBits; level > 0; level-- {
+		// chain.leafCount gives the number of literals requiring at least "bits"
+		// bits to encode.
+		bitCount[bits] = counts[level] - counts[level-1]
+		bits++
+	}
+	return bitCount
+}
+
+// Look at the leaves and assign them a bit count and an encoding as specified
+// in RFC 1951 3.2.2
+func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
+	code := uint16(0)
+	for n, bits := range bitCount {
+		code <<= 1
+		if n == 0 || bits == 0 {
+			continue
+		}
+		// The literals list[len(list)-bits] .. list[len(list)-bits]
+		// are encoded using "bits" bits, and get the values
+		// code, code + 1, ....  The code values are
+		// assigned in literal order (not frequency order).
+		chunk := list[len(list)-int(bits):]
+
+		sortByLiteral(chunk)
+		for _, node := range chunk {
+			h.codes[node.literal] = newhcode(reverseBits(code, uint8(n)), uint8(n))
+			code++
+		}
+		list = list[0 : len(list)-int(bits)]
+	}
+}
+
+// Update this Huffman Code object to be the minimum code for the specified frequency count.
+//
+// freq  An array of frequencies, in which frequency[i] gives the frequency of literal i.
+// maxBits  The maximum number of bits to use for any literal.
+func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
+	list := h.freqcache[:len(freq)+1]
+	codes := h.codes[:len(freq)]
+	// Number of non-zero literals
+	count := 0
+	// Set list to be the set of all non-zero literals and their frequencies
+	for i, f := range freq {
+		if f != 0 {
+			list[count] = literalNode{uint16(i), f}
+			count++
+		} else {
+			codes[i] = 0
+		}
+	}
+	list[count] = literalNode{}
+
+	list = list[:count]
+	if count <= 2 {
+		// Handle the small cases here, because they are awkward for the general case code. With
+		// two or fewer literals, everything has bit length 1.
+		for i, node := range list {
+			// "list" is in order of increasing literal value.
+			h.codes[node.literal].set(uint16(i), 1)
+		}
+		return
+	}
+	sortByFreq(list)
+
+	// Get the number of literals for each bit count
+	bitCount := h.bitCounts(list, maxBits)
+	// And do the assignment
+	h.assignEncodingAndSize(bitCount, list)
+}
+
+// atLeastOne clamps the result between 1 and 15.
+func atLeastOne(v float32) float32 {
+	if v < 1 {
+		return 1
+	}
+	if v > 15 {
+		return 15
+	}
+	return v
+}
+
+func histogram(b []byte, h []uint16) {
+	if true && len(b) >= 8<<10 {
+		// Split for bigger inputs
+		histogramSplit(b, h)
+	} else {
+		h = h[:256]
+		for _, t := range b {
+			h[t]++
+		}
+	}
+}
+
+func histogramSplit(b []byte, h []uint16) {
+	// Tested, and slightly faster than 2-way.
+	// Writing to separate arrays and combining is also slightly slower.
+	h = h[:256]
+	for len(b)&3 != 0 {
+		h[b[0]]++
+		b = b[1:]
+	}
+	n := len(b) / 4
+	x, y, z, w := b[:n], b[n:], b[n+n:], b[n+n+n:]
+	y, z, w = y[:len(x)], z[:len(x)], w[:len(x)]
+	for i, t := range x {
+		v0 := &h[t]
+		v1 := &h[y[i]]
+		v3 := &h[w[i]]
+		v2 := &h[z[i]]
+		*v0++
+		*v1++
+		*v2++
+		*v3++
+	}
+}
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go b/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go
new file mode 100644
index 00000000000..20778029900
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go
@@ -0,0 +1,178 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+// Sort sorts data.
+// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
+// data.Less and data.Swap. The sort is not guaranteed to be stable.
+func sortByFreq(data []literalNode) {
+	n := len(data)
+	quickSortByFreq(data, 0, n, maxDepth(n))
+}
+
+func quickSortByFreq(data []literalNode, a, b, maxDepth int) {
+	for b-a > 12 { // Use ShellSort for slices <= 12 elements
+		if maxDepth == 0 {
+			heapSort(data, a, b)
+			return
+		}
+		maxDepth--
+		mlo, mhi := doPivotByFreq(data, a, b)
+		// Avoiding recursion on the larger subproblem guarantees
+		// a stack depth of at most lg(b-a).
+		if mlo-a < b-mhi {
+			quickSortByFreq(data, a, mlo, maxDepth)
+			a = mhi // i.e., quickSortByFreq(data, mhi, b)
+		} else {
+			quickSortByFreq(data, mhi, b, maxDepth)
+			b = mlo // i.e., quickSortByFreq(data, a, mlo)
+		}
+	}
+	if b-a > 1 {
+		// Do ShellSort pass with gap 6
+		// It could be written in this simplified form cause b-a <= 12
+		for i := a + 6; i < b; i++ {
+			if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq {
+				data[i], data[i-6] = data[i-6], data[i]
+			}
+		}
+		insertionSortByFreq(data, a, b)
+	}
+}
+
+// siftDownByFreq implements the heap property on data[lo, hi).
+// first is an offset into the array where the root of the heap lies.
+func siftDownByFreq(data []literalNode, lo, hi, first int) {
+	root := lo
+	for {
+		child := 2*root + 1
+		if child >= hi {
+			break
+		}
+		if child+1 < hi && (data[first+child].freq == data[first+child+1].freq && data[first+child].literal < data[first+child+1].literal || data[first+child].freq < data[first+child+1].freq) {
+			child++
+		}
+		if data[first+root].freq == data[first+child].freq && data[first+root].literal > data[first+child].literal || data[first+root].freq > data[first+child].freq {
+			return
+		}
+		data[first+root], data[first+child] = data[first+child], data[first+root]
+		root = child
+	}
+}
+func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) {
+	m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
+	if hi-lo > 40 {
+		// Tukey's ``Ninther,'' median of three medians of three.
+		s := (hi - lo) / 8
+		medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s)
+		medianOfThreeSortByFreq(data, m, m-s, m+s)
+		medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s)
+	}
+	medianOfThreeSortByFreq(data, lo, m, hi-1)
+
+	// Invariants are:
+	//	data[lo] = pivot (set up by ChoosePivot)
+	//	data[lo < i < a] < pivot
+	//	data[a <= i < b] <= pivot
+	//	data[b <= i < c] unexamined
+	//	data[c <= i < hi-1] > pivot
+	//	data[hi-1] >= pivot
+	pivot := lo
+	a, c := lo+1, hi-1
+
+	for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ {
+	}
+	b := a
+	for {
+		for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot
+		}
+		for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot
+		}
+		if b >= c {
+			break
+		}
+		// data[b] > pivot; data[c-1] <= pivot
+		data[b], data[c-1] = data[c-1], data[b]
+		b++
+		c--
+	}
+	// If hi-c<3 then there are duplicates (by property of median of nine).
+	// Let's be a bit more conservative, and set border to 5.
+	protect := hi-c < 5
+	if !protect && hi-c < (hi-lo)/4 {
+		// Lets test some points for equality to pivot
+		dups := 0
+		if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot
+			data[c], data[hi-1] = data[hi-1], data[c]
+			c++
+			dups++
+		}
+		if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot
+			b--
+			dups++
+		}
+		// m-lo = (hi-lo)/2 > 6
+		// b-lo > (hi-lo)*3/4-1 > 8
+		// ==> m < b ==> data[m] <= pivot
+		if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot
+			data[m], data[b-1] = data[b-1], data[m]
+			b--
+			dups++
+		}
+		// if at least 2 points are equal to pivot, assume skewed distribution
+		protect = dups > 1
+	}
+	if protect {
+		// Protect against a lot of duplicates
+		// Add invariant:
+		//	data[a <= i < b] unexamined
+		//	data[b <= i < c] = pivot
+		for {
+			for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot
+			}
+			for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot
+			}
+			if a >= b {
+				break
+			}
+			// data[a] == pivot; data[b-1] < pivot
+			data[a], data[b-1] = data[b-1], data[a]
+			a++
+			b--
+		}
+	}
+	// Swap pivot into middle
+	data[pivot], data[b-1] = data[b-1], data[pivot]
+	return b - 1, c
+}
+
+// Insertion sort
+func insertionSortByFreq(data []literalNode, a, b int) {
+	for i := a + 1; i < b; i++ {
+		for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- {
+			data[j], data[j-1] = data[j-1], data[j]
+		}
+	}
+}
+
+// quickSortByFreq, loosely following Bentley and McIlroy,
+// ``Engineering a Sort Function,'' SP&E November 1993.
+
+// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
+func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) {
+	// sort 3 elements
+	if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
+		data[m1], data[m0] = data[m0], data[m1]
+	}
+	// data[m0] <= data[m1]
+	if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq {
+		data[m2], data[m1] = data[m1], data[m2]
+		// data[m0] <= data[m2] && data[m1] < data[m2]
+		if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
+			data[m1], data[m0] = data[m0], data[m1]
+		}
+	}
+	// now data[m0] <= data[m1] <= data[m2]
+}
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go b/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go
new file mode 100644
index 00000000000..93f1aea109e
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go
@@ -0,0 +1,201 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+// Sort sorts data.
+// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
+// data.Less and data.Swap. The sort is not guaranteed to be stable.
+func sortByLiteral(data []literalNode) {
+	n := len(data)
+	quickSort(data, 0, n, maxDepth(n))
+}
+
+func quickSort(data []literalNode, a, b, maxDepth int) {
+	for b-a > 12 { // Use ShellSort for slices <= 12 elements
+		if maxDepth == 0 {
+			heapSort(data, a, b)
+			return
+		}
+		maxDepth--
+		mlo, mhi := doPivot(data, a, b)
+		// Avoiding recursion on the larger subproblem guarantees
+		// a stack depth of at most lg(b-a).
+		if mlo-a < b-mhi {
+			quickSort(data, a, mlo, maxDepth)
+			a = mhi // i.e., quickSort(data, mhi, b)
+		} else {
+			quickSort(data, mhi, b, maxDepth)
+			b = mlo // i.e., quickSort(data, a, mlo)
+		}
+	}
+	if b-a > 1 {
+		// Do ShellSort pass with gap 6
+		// It could be written in this simplified form cause b-a <= 12
+		for i := a + 6; i < b; i++ {
+			if data[i].literal < data[i-6].literal {
+				data[i], data[i-6] = data[i-6], data[i]
+			}
+		}
+		insertionSort(data, a, b)
+	}
+}
+func heapSort(data []literalNode, a, b int) {
+	first := a
+	lo := 0
+	hi := b - a
+
+	// Build heap with greatest element at top.
+	for i := (hi - 1) / 2; i >= 0; i-- {
+		siftDown(data, i, hi, first)
+	}
+
+	// Pop elements, largest first, into end of data.
+	for i := hi - 1; i >= 0; i-- {
+		data[first], data[first+i] = data[first+i], data[first]
+		siftDown(data, lo, i, first)
+	}
+}
+
+// siftDown implements the heap property on data[lo, hi).
+// first is an offset into the array where the root of the heap lies.
+func siftDown(data []literalNode, lo, hi, first int) {
+	root := lo
+	for {
+		child := 2*root + 1
+		if child >= hi {
+			break
+		}
+		if child+1 < hi && data[first+child].literal < data[first+child+1].literal {
+			child++
+		}
+		if data[first+root].literal > data[first+child].literal {
+			return
+		}
+		data[first+root], data[first+child] = data[first+child], data[first+root]
+		root = child
+	}
+}
+func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) {
+	m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
+	if hi-lo > 40 {
+		// Tukey's ``Ninther,'' median of three medians of three.
+		s := (hi - lo) / 8
+		medianOfThree(data, lo, lo+s, lo+2*s)
+		medianOfThree(data, m, m-s, m+s)
+		medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
+	}
+	medianOfThree(data, lo, m, hi-1)
+
+	// Invariants are:
+	//	data[lo] = pivot (set up by ChoosePivot)
+	//	data[lo < i < a] < pivot
+	//	data[a <= i < b] <= pivot
+	//	data[b <= i < c] unexamined
+	//	data[c <= i < hi-1] > pivot
+	//	data[hi-1] >= pivot
+	pivot := lo
+	a, c := lo+1, hi-1
+
+	for ; a < c && data[a].literal < data[pivot].literal; a++ {
+	}
+	b := a
+	for {
+		for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot
+		}
+		for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot
+		}
+		if b >= c {
+			break
+		}
+		// data[b] > pivot; data[c-1] <= pivot
+		data[b], data[c-1] = data[c-1], data[b]
+		b++
+		c--
+	}
+	// If hi-c<3 then there are duplicates (by property of median of nine).
+	// Let's be a bit more conservative, and set border to 5.
+	protect := hi-c < 5
+	if !protect && hi-c < (hi-lo)/4 {
+		// Lets test some points for equality to pivot
+		dups := 0
+		if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot
+			data[c], data[hi-1] = data[hi-1], data[c]
+			c++
+			dups++
+		}
+		if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot
+			b--
+			dups++
+		}
+		// m-lo = (hi-lo)/2 > 6
+		// b-lo > (hi-lo)*3/4-1 > 8
+		// ==> m < b ==> data[m] <= pivot
+		if data[m].literal > data[pivot].literal { // data[m] = pivot
+			data[m], data[b-1] = data[b-1], data[m]
+			b--
+			dups++
+		}
+		// if at least 2 points are equal to pivot, assume skewed distribution
+		protect = dups > 1
+	}
+	if protect {
+		// Protect against a lot of duplicates
+		// Add invariant:
+		//	data[a <= i < b] unexamined
+		//	data[b <= i < c] = pivot
+		for {
+			for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot
+			}
+			for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot
+			}
+			if a >= b {
+				break
+			}
+			// data[a] == pivot; data[b-1] < pivot
+			data[a], data[b-1] = data[b-1], data[a]
+			a++
+			b--
+		}
+	}
+	// Swap pivot into middle
+	data[pivot], data[b-1] = data[b-1], data[pivot]
+	return b - 1, c
+}
+
+// Insertion sort
+func insertionSort(data []literalNode, a, b int) {
+	for i := a + 1; i < b; i++ {
+		for j := i; j > a && data[j].literal < data[j-1].literal; j-- {
+			data[j], data[j-1] = data[j-1], data[j]
+		}
+	}
+}
+
+// maxDepth returns a threshold at which quicksort should switch
+// to heapsort. It returns 2*ceil(lg(n+1)).
+func maxDepth(n int) int {
+	var depth int
+	for i := n; i > 0; i >>= 1 {
+		depth++
+	}
+	return depth * 2
+}
+
+// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
+func medianOfThree(data []literalNode, m1, m0, m2 int) {
+	// sort 3 elements
+	if data[m1].literal < data[m0].literal {
+		data[m1], data[m0] = data[m0], data[m1]
+	}
+	// data[m0] <= data[m1]
+	if data[m2].literal < data[m1].literal {
+		data[m2], data[m1] = data[m1], data[m2]
+		// data[m0] <= data[m2] && data[m1] < data[m2]
+		if data[m1].literal < data[m0].literal {
+			data[m1], data[m0] = data[m0], data[m1]
+		}
+	}
+	// now data[m0] <= data[m1] <= data[m2]
+}
diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go
new file mode 100644
index 00000000000..414c0bea9fa
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/inflate.go
@@ -0,0 +1,793 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package flate implements the DEFLATE compressed data format, described in
+// RFC 1951.  The gzip and zlib packages implement access to DEFLATE-based file
+// formats.
+package flate
+
+import (
+	"bufio"
+	"compress/flate"
+	"fmt"
+	"io"
+	"math/bits"
+	"sync"
+)
+
+const (
+	maxCodeLen     = 16 // max length of Huffman code
+	maxCodeLenMask = 15 // mask for max length of Huffman code
+	// The next three numbers come from the RFC section 3.2.7, with the
+	// additional proviso in section 3.2.5 which implies that distance codes
+	// 30 and 31 should never occur in compressed data.
+	maxNumLit  = 286
+	maxNumDist = 30
+	numCodes   = 19 // number of codes in Huffman meta-code
+
+	debugDecode = false
+)
+
+// Value of length - 3 and extra bits.
+type lengthExtra struct {
+	length, extra uint8
+}
+
+var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}}
+
+var bitMask32 = [32]uint32{
+	0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF,
+	0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF,
+	0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF,
+	0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF,
+} // up to 32 bits
+
+// Initialize the fixedHuffmanDecoder only once upon first use.
+var fixedOnce sync.Once
+var fixedHuffmanDecoder huffmanDecoder
+
+// A CorruptInputError reports the presence of corrupt input at a given offset.
+type CorruptInputError = flate.CorruptInputError
+
+// An InternalError reports an error in the flate code itself.
+type InternalError string
+
+func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
+
+// A ReadError reports an error encountered while reading input.
+//
+// Deprecated: No longer returned.
+type ReadError = flate.ReadError
+
+// A WriteError reports an error encountered while writing output.
+//
+// Deprecated: No longer returned.
+type WriteError = flate.WriteError
+
+// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
+// to switch to a new underlying Reader. This permits reusing a ReadCloser
+// instead of allocating a new one.
+type Resetter interface {
+	// Reset discards any buffered data and resets the Resetter as if it was
+	// newly initialized with the given reader.
+	Reset(r io.Reader, dict []byte) error
+}
+
+// The data structure for decoding Huffman tables is based on that of
+// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
+// For codes smaller than the table width, there are multiple entries
+// (each combination of trailing bits has the same value). For codes
+// larger than the table width, the table contains a link to an overflow
+// table. The width of each entry in the link table is the maximum code
+// size minus the chunk width.
+//
+// Note that you can do a lookup in the table even without all bits
+// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
+// have the property that shorter codes come before longer ones, the
+// bit length estimate in the result is a lower bound on the actual
+// number of bits.
+//
+// See the following:
+//	http://www.gzip.org/algorithm.txt
+
+// chunk & 15 is number of bits
+// chunk >> 4 is value, including table link
+
+const (
+	huffmanChunkBits  = 9
+	huffmanNumChunks  = 1 << huffmanChunkBits
+	huffmanCountMask  = 15
+	huffmanValueShift = 4
+)
+
+type huffmanDecoder struct {
+	maxRead  int                       // the maximum number of bits we can read and not overread
+	chunks   *[huffmanNumChunks]uint16 // chunks as described above
+	links    [][]uint16                // overflow links
+	linkMask uint32                    // mask the width of the link table
+}
+
+// Initialize Huffman decoding tables from array of code lengths.
+// Following this function, h is guaranteed to be initialized into a complete
+// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
+// degenerate case where the tree has only a single symbol with length 1. Empty
+// trees are permitted.
+func (h *huffmanDecoder) init(lengths []int) bool {
+	// Sanity enables additional runtime tests during Huffman
+	// table construction. It's intended to be used during
+	// development to supplement the currently ad-hoc unit tests.
+	const sanity = false
+
+	if h.chunks == nil {
+		h.chunks = &[huffmanNumChunks]uint16{}
+	}
+	if h.maxRead != 0 {
+		*h = huffmanDecoder{chunks: h.chunks, links: h.links}
+	}
+
+	// Count number of codes of each length,
+	// compute maxRead and max length.
+	var count [maxCodeLen]int
+	var min, max int
+	for _, n := range lengths {
+		if n == 0 {
+			continue
+		}
+		if min == 0 || n < min {
+			min = n
+		}
+		if n > max {
+			max = n
+		}
+		count[n&maxCodeLenMask]++
+	}
+
+	// Empty tree. The decompressor.huffSym function will fail later if the tree
+	// is used. Technically, an empty tree is only valid for the HDIST tree and
+	// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
+	// is guaranteed to fail since it will attempt to use the tree to decode the
+	// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
+	// guaranteed to fail later since the compressed data section must be
+	// composed of at least one symbol (the end-of-block marker).
+	if max == 0 {
+		return true
+	}
+
+	code := 0
+	var nextcode [maxCodeLen]int
+	for i := min; i <= max; i++ {
+		code <<= 1
+		nextcode[i&maxCodeLenMask] = code
+		code += count[i&maxCodeLenMask]
+	}
+
+	// Check that the coding is complete (i.e., that we've
+	// assigned all 2-to-the-max possible bit sequences).
+	// Exception: To be compatible with zlib, we also need to
+	// accept degenerate single-code codings. See also
+	// TestDegenerateHuffmanCoding.
+	if code != 1<<uint(max) && !(code == 1 && max == 1) {
+		if debugDecode {
+			fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)")
+		}
+		return false
+	}
+
+	h.maxRead = min
+	chunks := h.chunks[:]
+	for i := range chunks {
+		chunks[i] = 0
+	}
+
+	if max > huffmanChunkBits {
+		numLinks := 1 << (uint(max) - huffmanChunkBits)
+		h.linkMask = uint32(numLinks - 1)
+
+		// create link tables
+		link := nextcode[huffmanChunkBits+1] >> 1
+		if cap(h.links) < huffmanNumChunks-link {
+			h.links = make([][]uint16, huffmanNumChunks-link)
+		} else {
+			h.links = h.links[:huffmanNumChunks-link]
+		}
+		for j := uint(link); j < huffmanNumChunks; j++ {
+			reverse := int(bits.Reverse16(uint16(j)))
+			reverse >>= uint(16 - huffmanChunkBits)
+			off := j - uint(link)
+			if sanity && h.chunks[reverse] != 0 {
+				panic("impossible: overwriting existing chunk")
+			}
+			h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1))
+			if cap(h.links[off]) < numLinks {
+				h.links[off] = make([]uint16, numLinks)
+			} else {
+				links := h.links[off][:0]
+				h.links[off] = links[:numLinks]
+			}
+		}
+	} else {
+		h.links = h.links[:0]
+	}
+
+	for i, n := range lengths {
+		if n == 0 {
+			continue
+		}
+		code := nextcode[n]
+		nextcode[n]++
+		chunk := uint16(i<<huffmanValueShift | n)
+		reverse := int(bits.Reverse16(uint16(code)))
+		reverse >>= uint(16 - n)
+		if n <= huffmanChunkBits {
+			for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
+				// We should never need to overwrite
+				// an existing chunk. Also, 0 is
+				// never a valid chunk, because the
+				// lower 4 "count" bits should be
+				// between 1 and 15.
+				if sanity && h.chunks[off] != 0 {
+					panic("impossible: overwriting existing chunk")
+				}
+				h.chunks[off] = chunk
+			}
+		} else {
+			j := reverse & (huffmanNumChunks - 1)
+			if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
+				// Longer codes should have been
+				// associated with a link table above.
+				panic("impossible: not an indirect chunk")
+			}
+			value := h.chunks[j] >> huffmanValueShift
+			linktab := h.links[value]
+			reverse >>= huffmanChunkBits
+			for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
+				if sanity && linktab[off] != 0 {
+					panic("impossible: overwriting existing chunk")
+				}
+				linktab[off] = chunk
+			}
+		}
+	}
+
+	if sanity {
+		// Above we've sanity checked that we never overwrote
+		// an existing entry. Here we additionally check that
+		// we filled the tables completely.
+		for i, chunk := range h.chunks {
+			if chunk == 0 {
+				// As an exception, in the degenerate
+				// single-code case, we allow odd
+				// chunks to be missing.
+				if code == 1 && i%2 == 1 {
+					continue
+				}
+				panic("impossible: missing chunk")
+			}
+		}
+		for _, linktab := range h.links {
+			for _, chunk := range linktab {
+				if chunk == 0 {
+					panic("impossible: missing chunk")
+				}
+			}
+		}
+	}
+
+	return true
+}
+
+// The actual read interface needed by NewReader.
+// If the passed in io.Reader does not also have ReadByte,
+// the NewReader will introduce its own buffering.
+type Reader interface {
+	io.Reader
+	io.ByteReader
+}
+
+// Decompress state.
+type decompressor struct {
+	// Input source.
+	r       Reader
+	roffset int64
+
+	// Huffman decoders for literal/length, distance.
+	h1, h2 huffmanDecoder
+
+	// Length arrays used to define Huffman codes.
+	bits     *[maxNumLit + maxNumDist]int
+	codebits *[numCodes]int
+
+	// Output history, buffer.
+	dict dictDecoder
+
+	// Next step in the decompression,
+	// and decompression state.
+	step      func(*decompressor)
+	stepState int
+	err       error
+	toRead    []byte
+	hl, hd    *huffmanDecoder
+	copyLen   int
+	copyDist  int
+
+	// Temporary buffer (avoids repeated allocation).
+	buf [4]byte
+
+	// Input bits, in top of b.
+	b uint32
+
+	nb    uint
+	final bool
+}
+
+func (f *decompressor) nextBlock() {
+	for f.nb < 1+2 {
+		if f.err = f.moreBits(); f.err != nil {
+			return
+		}
+	}
+	f.final = f.b&1 == 1
+	f.b >>= 1
+	typ := f.b & 3
+	f.b >>= 2
+	f.nb -= 1 + 2
+	switch typ {
+	case 0:
+		f.dataBlock()
+		if debugDecode {
+			fmt.Println("stored block")
+		}
+	case 1:
+		// compressed, fixed Huffman tables
+		f.hl = &fixedHuffmanDecoder
+		f.hd = nil
+		f.huffmanBlockDecoder()()
+		if debugDecode {
+			fmt.Println("predefinied huffman block")
+		}
+	case 2:
+		// compressed, dynamic Huffman tables
+		if f.err = f.readHuffman(); f.err != nil {
+			break
+		}
+		f.hl = &f.h1
+		f.hd = &f.h2
+		f.huffmanBlockDecoder()()
+		if debugDecode {
+			fmt.Println("dynamic huffman block")
+		}
+	default:
+		// 3 is reserved.
+		if debugDecode {
+			fmt.Println("reserved data block encountered")
+		}
+		f.err = CorruptInputError(f.roffset)
+	}
+}
+
+func (f *decompressor) Read(b []byte) (int, error) {
+	for {
+		if len(f.toRead) > 0 {
+			n := copy(b, f.toRead)
+			f.toRead = f.toRead[n:]
+			if len(f.toRead) == 0 {
+				return n, f.err
+			}
+			return n, nil
+		}
+		if f.err != nil {
+			return 0, f.err
+		}
+		f.step(f)
+		if f.err != nil && len(f.toRead) == 0 {
+			f.toRead = f.dict.readFlush() // Flush what's left in case of error
+		}
+	}
+}
+
+// Support the io.WriteTo interface for io.Copy and friends.
+func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
+	total := int64(0)
+	flushed := false
+	for {
+		if len(f.toRead) > 0 {
+			n, err := w.Write(f.toRead)
+			total += int64(n)
+			if err != nil {
+				f.err = err
+				return total, err
+			}
+			if n != len(f.toRead) {
+				return total, io.ErrShortWrite
+			}
+			f.toRead = f.toRead[:0]
+		}
+		if f.err != nil && flushed {
+			if f.err == io.EOF {
+				return total, nil
+			}
+			return total, f.err
+		}
+		if f.err == nil {
+			f.step(f)
+		}
+		if len(f.toRead) == 0 && f.err != nil && !flushed {
+			f.toRead = f.dict.readFlush() // Flush what's left in case of error
+			flushed = true
+		}
+	}
+}
+
+func (f *decompressor) Close() error {
+	if f.err == io.EOF {
+		return nil
+	}
+	return f.err
+}
+
+// RFC 1951 section 3.2.7.
+// Compression with dynamic Huffman codes
+
+var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
+
+func (f *decompressor) readHuffman() error {
+	// HLIT[5], HDIST[5], HCLEN[4].
+	for f.nb < 5+5+4 {
+		if err := f.moreBits(); err != nil {
+			return err
+		}
+	}
+	nlit := int(f.b&0x1F) + 257
+	if nlit > maxNumLit {
+		if debugDecode {
+			fmt.Println("nlit > maxNumLit", nlit)
+		}
+		return CorruptInputError(f.roffset)
+	}
+	f.b >>= 5
+	ndist := int(f.b&0x1F) + 1
+	if ndist > maxNumDist {
+		if debugDecode {
+			fmt.Println("ndist > maxNumDist", ndist)
+		}
+		return CorruptInputError(f.roffset)
+	}
+	f.b >>= 5
+	nclen := int(f.b&0xF) + 4
+	// numCodes is 19, so nclen is always valid.
+	f.b >>= 4
+	f.nb -= 5 + 5 + 4
+
+	// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
+	for i := 0; i < nclen; i++ {
+		for f.nb < 3 {
+			if err := f.moreBits(); err != nil {
+				return err
+			}
+		}
+		f.codebits[codeOrder[i]] = int(f.b & 0x7)
+		f.b >>= 3
+		f.nb -= 3
+	}
+	for i := nclen; i < len(codeOrder); i++ {
+		f.codebits[codeOrder[i]] = 0
+	}
+	if !f.h1.init(f.codebits[0:]) {
+		if debugDecode {
+			fmt.Println("init codebits failed")
+		}
+		return CorruptInputError(f.roffset)
+	}
+
+	// HLIT + 257 code lengths, HDIST + 1 code lengths,
+	// using the code length Huffman code.
+	for i, n := 0, nlit+ndist; i < n; {
+		x, err := f.huffSym(&f.h1)
+		if err != nil {
+			return err
+		}
+		if x < 16 {
+			// Actual length.
+			f.bits[i] = x
+			i++
+			continue
+		}
+		// Repeat previous length or zero.
+		var rep int
+		var nb uint
+		var b int
+		switch x {
+		default:
+			return InternalError("unexpected length code")
+		case 16:
+			rep = 3
+			nb = 2
+			if i == 0 {
+				if debugDecode {
+					fmt.Println("i==0")
+				}
+				return CorruptInputError(f.roffset)
+			}
+			b = f.bits[i-1]
+		case 17:
+			rep = 3
+			nb = 3
+			b = 0
+		case 18:
+			rep = 11
+			nb = 7
+			b = 0
+		}
+		for f.nb < nb {
+			if err := f.moreBits(); err != nil {
+				if debugDecode {
+					fmt.Println("morebits:", err)
+				}
+				return err
+			}
+		}
+		rep += int(f.b & uint32(1<<(nb&regSizeMaskUint32)-1))
+		f.b >>= nb & regSizeMaskUint32
+		f.nb -= nb
+		if i+rep > n {
+			if debugDecode {
+				fmt.Println("i+rep > n", i, rep, n)
+			}
+			return CorruptInputError(f.roffset)
+		}
+		for j := 0; j < rep; j++ {
+			f.bits[i] = b
+			i++
+		}
+	}
+
+	if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
+		if debugDecode {
+			fmt.Println("init2 failed")
+		}
+		return CorruptInputError(f.roffset)
+	}
+
+	// As an optimization, we can initialize the maxRead bits to read at a time
+	// for the HLIT tree to the length of the EOB marker since we know that
+	// every block must terminate with one. This preserves the property that
+	// we never read any extra bytes after the end of the DEFLATE stream.
+	if f.h1.maxRead < f.bits[endBlockMarker] {
+		f.h1.maxRead = f.bits[endBlockMarker]
+	}
+	if !f.final {
+		// If not the final block, the smallest block possible is
+		// a predefined table, BTYPE=01, with a single EOB marker.
+		// This will take up 3 + 7 bits.
+		f.h1.maxRead += 10
+	}
+
+	return nil
+}
+
+// Copy a single uncompressed data block from input to output.
+func (f *decompressor) dataBlock() {
+	// Uncompressed.
+	// Discard current half-byte.
+	left := (f.nb) & 7
+	f.nb -= left
+	f.b >>= left
+
+	offBytes := f.nb >> 3
+	// Unfilled values will be overwritten.
+	f.buf[0] = uint8(f.b)
+	f.buf[1] = uint8(f.b >> 8)
+	f.buf[2] = uint8(f.b >> 16)
+	f.buf[3] = uint8(f.b >> 24)
+
+	f.roffset += int64(offBytes)
+	f.nb, f.b = 0, 0
+
+	// Length then ones-complement of length.
+	nr, err := io.ReadFull(f.r, f.buf[offBytes:4])
+	f.roffset += int64(nr)
+	if err != nil {
+		f.err = noEOF(err)
+		return
+	}
+	n := uint16(f.buf[0]) | uint16(f.buf[1])<<8
+	nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8
+	if nn != ^n {
+		if debugDecode {
+			ncomp := ^n
+			fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp)
+		}
+		f.err = CorruptInputError(f.roffset)
+		return
+	}
+
+	if n == 0 {
+		f.toRead = f.dict.readFlush()
+		f.finishBlock()
+		return
+	}
+
+	f.copyLen = int(n)
+	f.copyData()
+}
+
+// copyData copies f.copyLen bytes from the underlying reader into f.hist.
+// It pauses for reads when f.hist is full.
+func (f *decompressor) copyData() {
+	buf := f.dict.writeSlice()
+	if len(buf) > f.copyLen {
+		buf = buf[:f.copyLen]
+	}
+
+	cnt, err := io.ReadFull(f.r, buf)
+	f.roffset += int64(cnt)
+	f.copyLen -= cnt
+	f.dict.writeMark(cnt)
+	if err != nil {
+		f.err = noEOF(err)
+		return
+	}
+
+	if f.dict.availWrite() == 0 || f.copyLen > 0 {
+		f.toRead = f.dict.readFlush()
+		f.step = (*decompressor).copyData
+		return
+	}
+	f.finishBlock()
+}
+
+func (f *decompressor) finishBlock() {
+	if f.final {
+		if f.dict.availRead() > 0 {
+			f.toRead = f.dict.readFlush()
+		}
+		f.err = io.EOF
+	}
+	f.step = (*decompressor).nextBlock
+}
+
+// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
+func noEOF(e error) error {
+	if e == io.EOF {
+		return io.ErrUnexpectedEOF
+	}
+	return e
+}
+
+func (f *decompressor) moreBits() error {
+	c, err := f.r.ReadByte()
+	if err != nil {
+		return noEOF(err)
+	}
+	f.roffset++
+	f.b |= uint32(c) << (f.nb & regSizeMaskUint32)
+	f.nb += 8
+	return nil
+}
+
+// Read the next Huffman-encoded symbol from f according to h.
+func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
+	// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+	// with single element, huffSym must error on these two edge cases. In both
+	// cases, the chunks slice will be 0 for the invalid sequence, leading it
+	// satisfy the n == 0 check below.
+	n := uint(h.maxRead)
+	// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+	// but is smart enough to keep local variables in registers, so use nb and b,
+	// inline call to moreBits and reassign b,nb back to f on return.
+	nb, b := f.nb, f.b
+	for {
+		for nb < n {
+			c, err := f.r.ReadByte()
+			if err != nil {
+				f.b = b
+				f.nb = nb
+				return 0, noEOF(err)
+			}
+			f.roffset++
+			b |= uint32(c) << (nb & regSizeMaskUint32)
+			nb += 8
+		}
+		chunk := h.chunks[b&(huffmanNumChunks-1)]
+		n = uint(chunk & huffmanCountMask)
+		if n > huffmanChunkBits {
+			chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
+			n = uint(chunk & huffmanCountMask)
+		}
+		if n <= nb {
+			if n == 0 {
+				f.b = b
+				f.nb = nb
+				if debugDecode {
+					fmt.Println("huffsym: n==0")
+				}
+				f.err = CorruptInputError(f.roffset)
+				return 0, f.err
+			}
+			f.b = b >> (n & regSizeMaskUint32)
+			f.nb = nb - n
+			return int(chunk >> huffmanValueShift), nil
+		}
+	}
+}
+
+func makeReader(r io.Reader) Reader {
+	if rr, ok := r.(Reader); ok {
+		return rr
+	}
+	return bufio.NewReader(r)
+}
+
+func fixedHuffmanDecoderInit() {
+	fixedOnce.Do(func() {
+		// These come from the RFC section 3.2.6.
+		var bits [288]int
+		for i := 0; i < 144; i++ {
+			bits[i] = 8
+		}
+		for i := 144; i < 256; i++ {
+			bits[i] = 9
+		}
+		for i := 256; i < 280; i++ {
+			bits[i] = 7
+		}
+		for i := 280; i < 288; i++ {
+			bits[i] = 8
+		}
+		fixedHuffmanDecoder.init(bits[:])
+	})
+}
+
+func (f *decompressor) Reset(r io.Reader, dict []byte) error {
+	*f = decompressor{
+		r:        makeReader(r),
+		bits:     f.bits,
+		codebits: f.codebits,
+		h1:       f.h1,
+		h2:       f.h2,
+		dict:     f.dict,
+		step:     (*decompressor).nextBlock,
+	}
+	f.dict.init(maxMatchOffset, dict)
+	return nil
+}
+
+// NewReader returns a new ReadCloser that can be used
+// to read the uncompressed version of r.
+// If r does not also implement io.ByteReader,
+// the decompressor may read more data than necessary from r.
+// It is the caller's responsibility to call Close on the ReadCloser
+// when finished reading.
+//
+// The ReadCloser returned by NewReader also implements Resetter.
+func NewReader(r io.Reader) io.ReadCloser {
+	fixedHuffmanDecoderInit()
+
+	var f decompressor
+	f.r = makeReader(r)
+	f.bits = new([maxNumLit + maxNumDist]int)
+	f.codebits = new([numCodes]int)
+	f.step = (*decompressor).nextBlock
+	f.dict.init(maxMatchOffset, nil)
+	return &f
+}
+
+// NewReaderDict is like NewReader but initializes the reader
+// with a preset dictionary. The returned Reader behaves as if
+// the uncompressed data stream started with the given dictionary,
+// which has already been read. NewReaderDict is typically used
+// to read data compressed by NewWriterDict.
+//
+// The ReadCloser returned by NewReader also implements Resetter.
+func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
+	fixedHuffmanDecoderInit()
+
+	var f decompressor
+	f.r = makeReader(r)
+	f.bits = new([maxNumLit + maxNumDist]int)
+	f.codebits = new([numCodes]int)
+	f.step = (*decompressor).nextBlock
+	f.dict.init(maxMatchOffset, dict)
+	return &f
+}
diff --git a/vendor/github.com/klauspost/compress/flate/inflate_gen.go b/vendor/github.com/klauspost/compress/flate/inflate_gen.go
new file mode 100644
index 00000000000..61342b6b88f
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/inflate_gen.go
@@ -0,0 +1,1283 @@
+// Code generated by go generate gen_inflate.go. DO NOT EDIT.
+
+package flate
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"math/bits"
+	"strings"
+)
+
+// Decode a single Huffman block from f.
+// hl and hd are the Huffman states for the lit/length values
+// and the distance values, respectively. If hd == nil, using the
+// fixed distance encoding associated with fixed Huffman blocks.
+func (f *decompressor) huffmanBytesBuffer() {
+	const (
+		stateInit = iota // Zero value must be stateInit
+		stateDict
+	)
+	fr := f.r.(*bytes.Buffer)
+
+	// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+	// but is smart enough to keep local variables in registers, so use nb and b,
+	// inline call to moreBits and reassign b,nb back to f on return.
+	fnb, fb, dict := f.nb, f.b, &f.dict
+
+	switch f.stepState {
+	case stateInit:
+		goto readLiteral
+	case stateDict:
+		goto copyHistory
+	}
+
+readLiteral:
+	// Read literal and/or (length, distance) according to RFC section 3.2.3.
+	{
+		var v int
+		{
+			// Inlined v, err := f.huffSym(f.hl)
+			// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+			// with single element, huffSym must error on these two edge cases. In both
+			// cases, the chunks slice will be 0 for the invalid sequence, leading it
+			// satisfy the n == 0 check below.
+			n := uint(f.hl.maxRead)
+			for {
+				for fnb < n {
+					c, err := fr.ReadByte()
+					if err != nil {
+						f.b, f.nb = fb, fnb
+						f.err = noEOF(err)
+						return
+					}
+					f.roffset++
+					fb |= uint32(c) << (fnb & regSizeMaskUint32)
+					fnb += 8
+				}
+				chunk := f.hl.chunks[fb&(huffmanNumChunks-1)]
+				n = uint(chunk & huffmanCountMask)
+				if n > huffmanChunkBits {
+					chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask]
+					n = uint(chunk & huffmanCountMask)
+				}
+				if n <= fnb {
+					if n == 0 {
+						f.b, f.nb = fb, fnb
+						if debugDecode {
+							fmt.Println("huffsym: n==0")
+						}
+						f.err = CorruptInputError(f.roffset)
+						return
+					}
+					fb = fb >> (n & regSizeMaskUint32)
+					fnb = fnb - n
+					v = int(chunk >> huffmanValueShift)
+					break
+				}
+			}
+		}
+
+		var length int
+		switch {
+		case v < 256:
+			dict.writeByte(byte(v))
+			if dict.availWrite() == 0 {
+				f.toRead = dict.readFlush()
+				f.step = (*decompressor).huffmanBytesBuffer
+				f.stepState = stateInit
+				f.b, f.nb = fb, fnb
+				return
+			}
+			goto readLiteral
+		case v == 256:
+			f.b, f.nb = fb, fnb
+			f.finishBlock()
+			return
+		// otherwise, reference to older data
+		case v < 265:
+			length = v - (257 - 3)
+		case v < maxNumLit:
+			val := decCodeToLen[(v - 257)]
+			length = int(val.length) + 3
+			n := uint(val.extra)
+			for fnb < n {
+				c, err := fr.ReadByte()
+				if err != nil {
+					f.b, f.nb = fb, fnb
+					if debugDecode {
+						fmt.Println("morebits n>0:", err)
+					}
+					f.err = err
+					return
+				}
+				f.roffset++
+				fb |= uint32(c) << (fnb & regSizeMaskUint32)
+				fnb += 8
+			}
+			length += int(fb & bitMask32[n])
+			fb >>= n & regSizeMaskUint32
+			fnb -= n
+		default:
+			if debugDecode {
+				fmt.Println(v, ">= maxNumLit")
+			}
+			f.err = CorruptInputError(f.roffset)
+			f.b, f.nb = fb, fnb
+			return
+		}
+
+		var dist uint32
+		if f.hd == nil {
+			for fnb < 5 {
+				c, err := fr.ReadByte()
+				if err != nil {
+					f.b, f.nb = fb, fnb
+					if debugDecode {
+						fmt.Println("morebits f.nb<5:", err)
+					}
+					f.err = err
+					return
+				}
+				f.roffset++
+				fb |= uint32(c) << (fnb & regSizeMaskUint32)
+				fnb += 8
+			}
+			dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3)))
+			fb >>= 5
+			fnb -= 5
+		} else {
+			// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+			// with single element, huffSym must error on these two edge cases. In both
+			// cases, the chunks slice will be 0 for the invalid sequence, leading it
+			// satisfy the n == 0 check below.
+			n := uint(f.hd.maxRead)
+			// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+			// but is smart enough to keep local variables in registers, so use nb and b,
+			// inline call to moreBits and reassign b,nb back to f on return.
+			for {
+				for fnb < n {
+					c, err := fr.ReadByte()
+					if err != nil {
+						f.b, f.nb = fb, fnb
+						f.err = noEOF(err)
+						return
+					}
+					f.roffset++
+					fb |= uint32(c) << (fnb & regSizeMaskUint32)
+					fnb += 8
+				}
+				chunk := f.hd.chunks[fb&(huffmanNumChunks-1)]
+				n = uint(chunk & huffmanCountMask)
+				if n > huffmanChunkBits {
+					chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask]
+					n = uint(chunk & huffmanCountMask)
+				}
+				if n <= fnb {
+					if n == 0 {
+						f.b, f.nb = fb, fnb
+						if debugDecode {
+							fmt.Println("huffsym: n==0")
+						}
+						f.err = CorruptInputError(f.roffset)
+						return
+					}
+					fb = fb >> (n & regSizeMaskUint32)
+					fnb = fnb - n
+					dist = uint32(chunk >> huffmanValueShift)
+					break
+				}
+			}
+		}
+
+		switch {
+		case dist < 4:
+			dist++
+		case dist < maxNumDist:
+			nb := uint(dist-2) >> 1
+			// have 1 bit in bottom of dist, need nb more.
+			extra := (dist & 1) << (nb & regSizeMaskUint32)
+			for fnb < nb {
+				c, err := fr.ReadByte()
+				if err != nil {
+					f.b, f.nb = fb, fnb
+					if debugDecode {
+						fmt.Println("morebits f.nb<nb:", err)
+					}
+					f.err = err
+					return
+				}
+				f.roffset++
+				fb |= uint32(c) << (fnb & regSizeMaskUint32)
+				fnb += 8
+			}
+			extra |= fb & bitMask32[nb]
+			fb >>= nb & regSizeMaskUint32
+			fnb -= nb
+			dist = 1<<((nb+1)&regSizeMaskUint32) + 1 + extra
+			// slower: dist = bitMask32[nb+1] + 2 + extra
+		default:
+			f.b, f.nb = fb, fnb
+			if debugDecode {
+				fmt.Println("dist too big:", dist, maxNumDist)
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		// No check on length; encoding can be prescient.
+		if dist > uint32(dict.histSize()) {
+			f.b, f.nb = fb, fnb
+			if debugDecode {
+				fmt.Println("dist > dict.histSize():", dist, dict.histSize())
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		f.copyLen, f.copyDist = length, int(dist)
+		goto copyHistory
+	}
+
+copyHistory:
+	// Perform a backwards copy according to RFC section 3.2.3.
+	{
+		cnt := dict.tryWriteCopy(f.copyDist, f.copyLen)
+		if cnt == 0 {
+			cnt = dict.writeCopy(f.copyDist, f.copyLen)
+		}
+		f.copyLen -= cnt
+
+		if dict.availWrite() == 0 || f.copyLen > 0 {
+			f.toRead = dict.readFlush()
+			f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work
+			f.stepState = stateDict
+			f.b, f.nb = fb, fnb
+			return
+		}
+		goto readLiteral
+	}
+	// Not reached
+}
+
+// Decode a single Huffman block from f.
+// hl and hd are the Huffman states for the lit/length values
+// and the distance values, respectively. If hd == nil, using the
+// fixed distance encoding associated with fixed Huffman blocks.
+func (f *decompressor) huffmanBytesReader() {
+	const (
+		stateInit = iota // Zero value must be stateInit
+		stateDict
+	)
+	fr := f.r.(*bytes.Reader)
+
+	// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+	// but is smart enough to keep local variables in registers, so use nb and b,
+	// inline call to moreBits and reassign b,nb back to f on return.
+	fnb, fb, dict := f.nb, f.b, &f.dict
+
+	switch f.stepState {
+	case stateInit:
+		goto readLiteral
+	case stateDict:
+		goto copyHistory
+	}
+
+readLiteral:
+	// Read literal and/or (length, distance) according to RFC section 3.2.3.
+	{
+		var v int
+		{
+			// Inlined v, err := f.huffSym(f.hl)
+			// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+			// with single element, huffSym must error on these two edge cases. In both
+			// cases, the chunks slice will be 0 for the invalid sequence, leading it
+			// satisfy the n == 0 check below.
+			n := uint(f.hl.maxRead)
+			for {
+				for fnb < n {
+					c, err := fr.ReadByte()
+					if err != nil {
+						f.b, f.nb = fb, fnb
+						f.err = noEOF(err)
+						return
+					}
+					f.roffset++
+					fb |= uint32(c) << (fnb & regSizeMaskUint32)
+					fnb += 8
+				}
+				chunk := f.hl.chunks[fb&(huffmanNumChunks-1)]
+				n = uint(chunk & huffmanCountMask)
+				if n > huffmanChunkBits {
+					chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask]
+					n = uint(chunk & huffmanCountMask)
+				}
+				if n <= fnb {
+					if n == 0 {
+						f.b, f.nb = fb, fnb
+						if debugDecode {
+							fmt.Println("huffsym: n==0")
+						}
+						f.err = CorruptInputError(f.roffset)
+						return
+					}
+					fb = fb >> (n & regSizeMaskUint32)
+					fnb = fnb - n
+					v = int(chunk >> huffmanValueShift)
+					break
+				}
+			}
+		}
+
+		var length int
+		switch {
+		case v < 256:
+			dict.writeByte(byte(v))
+			if dict.availWrite() == 0 {
+				f.toRead = dict.readFlush()
+				f.step = (*decompressor).huffmanBytesReader
+				f.stepState = stateInit
+				f.b, f.nb = fb, fnb
+				return
+			}
+			goto readLiteral
+		case v == 256:
+			f.b, f.nb = fb, fnb
+			f.finishBlock()
+			return
+		// otherwise, reference to older data
+		case v < 265:
+			length = v - (257 - 3)
+		case v < maxNumLit:
+			val := decCodeToLen[(v - 257)]
+			length = int(val.length) + 3
+			n := uint(val.extra)
+			for fnb < n {
+				c, err := fr.ReadByte()
+				if err != nil {
+					f.b, f.nb = fb, fnb
+					if debugDecode {
+						fmt.Println("morebits n>0:", err)
+					}
+					f.err = err
+					return
+				}
+				f.roffset++
+				fb |= uint32(c) << (fnb & regSizeMaskUint32)
+				fnb += 8
+			}
+			length += int(fb & bitMask32[n])
+			fb >>= n & regSizeMaskUint32
+			fnb -= n
+		default:
+			if debugDecode {
+				fmt.Println(v, ">= maxNumLit")
+			}
+			f.err = CorruptInputError(f.roffset)
+			f.b, f.nb = fb, fnb
+			return
+		}
+
+		var dist uint32
+		if f.hd == nil {
+			for fnb < 5 {
+				c, err := fr.ReadByte()
+				if err != nil {
+					f.b, f.nb = fb, fnb
+					if debugDecode {
+						fmt.Println("morebits f.nb<5:", err)
+					}
+					f.err = err
+					return
+				}
+				f.roffset++
+				fb |= uint32(c) << (fnb & regSizeMaskUint32)
+				fnb += 8
+			}
+			dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3)))
+			fb >>= 5
+			fnb -= 5
+		} else {
+			// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+			// with single element, huffSym must error on these two edge cases. In both
+			// cases, the chunks slice will be 0 for the invalid sequence, leading it
+			// satisfy the n == 0 check below.
+			n := uint(f.hd.maxRead)
+			// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+			// but is smart enough to keep local variables in registers, so use nb and b,
+			// inline call to moreBits and reassign b,nb back to f on return.
+			for {
+				for fnb < n {
+					c, err := fr.ReadByte()
+					if err != nil {
+						f.b, f.nb = fb, fnb
+						f.err = noEOF(err)
+						return
+					}
+					f.roffset++
+					fb |= uint32(c) << (fnb & regSizeMaskUint32)
+					fnb += 8
+				}
+				chunk := f.hd.chunks[fb&(huffmanNumChunks-1)]
+				n = uint(chunk & huffmanCountMask)
+				if n > huffmanChunkBits {
+					chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask]
+					n = uint(chunk & huffmanCountMask)
+				}
+				if n <= fnb {
+					if n == 0 {
+						f.b, f.nb = fb, fnb
+						if debugDecode {
+							fmt.Println("huffsym: n==0")
+						}
+						f.err = CorruptInputError(f.roffset)
+						return
+					}
+					fb = fb >> (n & regSizeMaskUint32)
+					fnb = fnb - n
+					dist = uint32(chunk >> huffmanValueShift)
+					break
+				}
+			}
+		}
+
+		switch {
+		case dist < 4:
+			dist++
+		case dist < maxNumDist:
+			nb := uint(dist-2) >> 1
+			// have 1 bit in bottom of dist, need nb more.
+			extra := (dist & 1) << (nb & regSizeMaskUint32)
+			for fnb < nb {
+				c, err := fr.ReadByte()
+				if err != nil {
+					f.b, f.nb = fb, fnb
+					if debugDecode {
+						fmt.Println("morebits f.nb<nb:", err)
+					}
+					f.err = err
+					return
+				}
+				f.roffset++
+				fb |= uint32(c) << (fnb & regSizeMaskUint32)
+				fnb += 8
+			}
+			extra |= fb & bitMask32[nb]
+			fb >>= nb & regSizeMaskUint32
+			fnb -= nb
+			dist = 1<<((nb+1)&regSizeMaskUint32) + 1 + extra
+			// slower: dist = bitMask32[nb+1] + 2 + extra
+		default:
+			f.b, f.nb = fb, fnb
+			if debugDecode {
+				fmt.Println("dist too big:", dist, maxNumDist)
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		// No check on length; encoding can be prescient.
+		if dist > uint32(dict.histSize()) {
+			f.b, f.nb = fb, fnb
+			if debugDecode {
+				fmt.Println("dist > dict.histSize():", dist, dict.histSize())
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		f.copyLen, f.copyDist = length, int(dist)
+		goto copyHistory
+	}
+
+copyHistory:
+	// Perform a backwards copy according to RFC section 3.2.3.
+	{
+		cnt := dict.tryWriteCopy(f.copyDist, f.copyLen)
+		if cnt == 0 {
+			cnt = dict.writeCopy(f.copyDist, f.copyLen)
+		}
+		f.copyLen -= cnt
+
+		if dict.availWrite() == 0 || f.copyLen > 0 {
+			f.toRead = dict.readFlush()
+			f.step = (*decompressor).huffmanBytesReader // We need to continue this work
+			f.stepState = stateDict
+			f.b, f.nb = fb, fnb
+			return
+		}
+		goto readLiteral
+	}
+	// Not reached
+}
+
+// Decode a single Huffman block from f.
+// hl and hd are the Huffman states for the lit/length values
+// and the distance values, respectively. If hd == nil, using the
+// fixed distance encoding associated with fixed Huffman blocks.
+func (f *decompressor) huffmanBufioReader() {
+	const (
+		stateInit = iota // Zero value must be stateInit
+		stateDict
+	)
+	fr := f.r.(*bufio.Reader)
+
+	// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+	// but is smart enough to keep local variables in registers, so use nb and b,
+	// inline call to moreBits and reassign b,nb back to f on return.
+	fnb, fb, dict := f.nb, f.b, &f.dict
+
+	switch f.stepState {
+	case stateInit:
+		goto readLiteral
+	case stateDict:
+		goto copyHistory
+	}
+
+readLiteral:
+	// Read literal and/or (length, distance) according to RFC section 3.2.3.
+	{
+		var v int
+		{
+			// Inlined v, err := f.huffSym(f.hl)
+			// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+			// with single element, huffSym must error on these two edge cases. In both
+			// cases, the chunks slice will be 0 for the invalid sequence, leading it
+			// satisfy the n == 0 check below.
+			n := uint(f.hl.maxRead)
+			for {
+				for fnb < n {
+					c, err := fr.ReadByte()
+					if err != nil {
+						f.b, f.nb = fb, fnb
+						f.err = noEOF(err)
+						return
+					}
+					f.roffset++
+					fb |= uint32(c) << (fnb & regSizeMaskUint32)
+					fnb += 8
+				}
+				chunk := f.hl.chunks[fb&(huffmanNumChunks-1)]
+				n = uint(chunk & huffmanCountMask)
+				if n > huffmanChunkBits {
+					chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask]
+					n = uint(chunk & huffmanCountMask)
+				}
+				if n <= fnb {
+					if n == 0 {
+						f.b, f.nb = fb, fnb
+						if debugDecode {
+							fmt.Println("huffsym: n==0")
+						}
+						f.err = CorruptInputError(f.roffset)
+						return
+					}
+					fb = fb >> (n & regSizeMaskUint32)
+					fnb = fnb - n
+					v = int(chunk >> huffmanValueShift)
+					break
+				}
+			}
+		}
+
+		var length int
+		switch {
+		case v < 256:
+			dict.writeByte(byte(v))
+			if dict.availWrite() == 0 {
+				f.toRead = dict.readFlush()
+				f.step = (*decompressor).huffmanBufioReader
+				f.stepState = stateInit
+				f.b, f.nb = fb, fnb
+				return
+			}
+			goto readLiteral
+		case v == 256:
+			f.b, f.nb = fb, fnb
+			f.finishBlock()
+			return
+		// otherwise, reference to older data
+		case v < 265:
+			length = v - (257 - 3)
+		case v < maxNumLit:
+			val := decCodeToLen[(v - 257)]
+			length = int(val.length) + 3
+			n := uint(val.extra)
+			for fnb < n {
+				c, err := fr.ReadByte()
+				if err != nil {
+					f.b, f.nb = fb, fnb
+					if debugDecode {
+						fmt.Println("morebits n>0:", err)
+					}
+					f.err = err
+					return
+				}
+				f.roffset++
+				fb |= uint32(c) << (fnb & regSizeMaskUint32)
+				fnb += 8
+			}
+			length += int(fb & bitMask32[n])
+			fb >>= n & regSizeMaskUint32
+			fnb -= n
+		default:
+			if debugDecode {
+				fmt.Println(v, ">= maxNumLit")
+			}
+			f.err = CorruptInputError(f.roffset)
+			f.b, f.nb = fb, fnb
+			return
+		}
+
+		var dist uint32
+		if f.hd == nil {
+			for fnb < 5 {
+				c, err := fr.ReadByte()
+				if err != nil {
+					f.b, f.nb = fb, fnb
+					if debugDecode {
+						fmt.Println("morebits f.nb<5:", err)
+					}
+					f.err = err
+					return
+				}
+				f.roffset++
+				fb |= uint32(c) << (fnb & regSizeMaskUint32)
+				fnb += 8
+			}
+			dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3)))
+			fb >>= 5
+			fnb -= 5
+		} else {
+			// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+			// with single element, huffSym must error on these two edge cases. In both
+			// cases, the chunks slice will be 0 for the invalid sequence, leading it
+			// satisfy the n == 0 check below.
+			n := uint(f.hd.maxRead)
+			// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+			// but is smart enough to keep local variables in registers, so use nb and b,
+			// inline call to moreBits and reassign b,nb back to f on return.
+			for {
+				for fnb < n {
+					c, err := fr.ReadByte()
+					if err != nil {
+						f.b, f.nb = fb, fnb
+						f.err = noEOF(err)
+						return
+					}
+					f.roffset++
+					fb |= uint32(c) << (fnb & regSizeMaskUint32)
+					fnb += 8
+				}
+				chunk := f.hd.chunks[fb&(huffmanNumChunks-1)]
+				n = uint(chunk & huffmanCountMask)
+				if n > huffmanChunkBits {
+					chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask]
+					n = uint(chunk & huffmanCountMask)
+				}
+				if n <= fnb {
+					if n == 0 {
+						f.b, f.nb = fb, fnb
+						if debugDecode {
+							fmt.Println("huffsym: n==0")
+						}
+						f.err = CorruptInputError(f.roffset)
+						return
+					}
+					fb = fb >> (n & regSizeMaskUint32)
+					fnb = fnb - n
+					dist = uint32(chunk >> huffmanValueShift)
+					break
+				}
+			}
+		}
+
+		switch {
+		case dist < 4:
+			dist++
+		case dist < maxNumDist:
+			nb := uint(dist-2) >> 1
+			// have 1 bit in bottom of dist, need nb more.
+			extra := (dist & 1) << (nb & regSizeMaskUint32)
+			for fnb < nb {
+				c, err := fr.ReadByte()
+				if err != nil {
+					f.b, f.nb = fb, fnb
+					if debugDecode {
+						fmt.Println("morebits f.nb<nb:", err)
+					}
+					f.err = err
+					return
+				}
+				f.roffset++
+				fb |= uint32(c) << (fnb & regSizeMaskUint32)
+				fnb += 8
+			}
+			extra |= fb & bitMask32[nb]
+			fb >>= nb & regSizeMaskUint32
+			fnb -= nb
+			dist = 1<<((nb+1)&regSizeMaskUint32) + 1 + extra
+			// slower: dist = bitMask32[nb+1] + 2 + extra
+		default:
+			f.b, f.nb = fb, fnb
+			if debugDecode {
+				fmt.Println("dist too big:", dist, maxNumDist)
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		// No check on length; encoding can be prescient.
+		if dist > uint32(dict.histSize()) {
+			f.b, f.nb = fb, fnb
+			if debugDecode {
+				fmt.Println("dist > dict.histSize():", dist, dict.histSize())
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		f.copyLen, f.copyDist = length, int(dist)
+		goto copyHistory
+	}
+
+copyHistory:
+	// Perform a backwards copy according to RFC section 3.2.3.
+	{
+		cnt := dict.tryWriteCopy(f.copyDist, f.copyLen)
+		if cnt == 0 {
+			cnt = dict.writeCopy(f.copyDist, f.copyLen)
+		}
+		f.copyLen -= cnt
+
+		if dict.availWrite() == 0 || f.copyLen > 0 {
+			f.toRead = dict.readFlush()
+			f.step = (*decompressor).huffmanBufioReader // We need to continue this work
+			f.stepState = stateDict
+			f.b, f.nb = fb, fnb
+			return
+		}
+		goto readLiteral
+	}
+	// Not reached
+}
+
+// Decode a single Huffman block from f.
+// hl and hd are the Huffman states for the lit/length values
+// and the distance values, respectively. If hd == nil, using the
+// fixed distance encoding associated with fixed Huffman blocks.
+func (f *decompressor) huffmanStringsReader() {
+	const (
+		stateInit = iota // Zero value must be stateInit
+		stateDict
+	)
+	fr := f.r.(*strings.Reader)
+
+	// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+	// but is smart enough to keep local variables in registers, so use nb and b,
+	// inline call to moreBits and reassign b,nb back to f on return.
+	fnb, fb, dict := f.nb, f.b, &f.dict
+
+	switch f.stepState {
+	case stateInit:
+		goto readLiteral
+	case stateDict:
+		goto copyHistory
+	}
+
+readLiteral:
+	// Read literal and/or (length, distance) according to RFC section 3.2.3.
+	{
+		var v int
+		{
+			// Inlined v, err := f.huffSym(f.hl)
+			// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+			// with single element, huffSym must error on these two edge cases. In both
+			// cases, the chunks slice will be 0 for the invalid sequence, leading it
+			// satisfy the n == 0 check below.
+			n := uint(f.hl.maxRead)
+			for {
+				for fnb < n {
+					c, err := fr.ReadByte()
+					if err != nil {
+						f.b, f.nb = fb, fnb
+						f.err = noEOF(err)
+						return
+					}
+					f.roffset++
+					fb |= uint32(c) << (fnb & regSizeMaskUint32)
+					fnb += 8
+				}
+				chunk := f.hl.chunks[fb&(huffmanNumChunks-1)]
+				n = uint(chunk & huffmanCountMask)
+				if n > huffmanChunkBits {
+					chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask]
+					n = uint(chunk & huffmanCountMask)
+				}
+				if n <= fnb {
+					if n == 0 {
+						f.b, f.nb = fb, fnb
+						if debugDecode {
+							fmt.Println("huffsym: n==0")
+						}
+						f.err = CorruptInputError(f.roffset)
+						return
+					}
+					fb = fb >> (n & regSizeMaskUint32)
+					fnb = fnb - n
+					v = int(chunk >> huffmanValueShift)
+					break
+				}
+			}
+		}
+
+		var length int
+		switch {
+		case v < 256:
+			dict.writeByte(byte(v))
+			if dict.availWrite() == 0 {
+				f.toRead = dict.readFlush()
+				f.step = (*decompressor).huffmanStringsReader
+				f.stepState = stateInit
+				f.b, f.nb = fb, fnb
+				return
+			}
+			goto readLiteral
+		case v == 256:
+			f.b, f.nb = fb, fnb
+			f.finishBlock()
+			return
+		// otherwise, reference to older data
+		case v < 265:
+			length = v - (257 - 3)
+		case v < maxNumLit:
+			val := decCodeToLen[(v - 257)]
+			length = int(val.length) + 3
+			n := uint(val.extra)
+			for fnb < n {
+				c, err := fr.ReadByte()
+				if err != nil {
+					f.b, f.nb = fb, fnb
+					if debugDecode {
+						fmt.Println("morebits n>0:", err)
+					}
+					f.err = err
+					return
+				}
+				f.roffset++
+				fb |= uint32(c) << (fnb & regSizeMaskUint32)
+				fnb += 8
+			}
+			length += int(fb & bitMask32[n])
+			fb >>= n & regSizeMaskUint32
+			fnb -= n
+		default:
+			if debugDecode {
+				fmt.Println(v, ">= maxNumLit")
+			}
+			f.err = CorruptInputError(f.roffset)
+			f.b, f.nb = fb, fnb
+			return
+		}
+
+		var dist uint32
+		if f.hd == nil {
+			for fnb < 5 {
+				c, err := fr.ReadByte()
+				if err != nil {
+					f.b, f.nb = fb, fnb
+					if debugDecode {
+						fmt.Println("morebits f.nb<5:", err)
+					}
+					f.err = err
+					return
+				}
+				f.roffset++
+				fb |= uint32(c) << (fnb & regSizeMaskUint32)
+				fnb += 8
+			}
+			dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3)))
+			fb >>= 5
+			fnb -= 5
+		} else {
+			// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+			// with single element, huffSym must error on these two edge cases. In both
+			// cases, the chunks slice will be 0 for the invalid sequence, leading it
+			// satisfy the n == 0 check below.
+			n := uint(f.hd.maxRead)
+			// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+			// but is smart enough to keep local variables in registers, so use nb and b,
+			// inline call to moreBits and reassign b,nb back to f on return.
+			for {
+				for fnb < n {
+					c, err := fr.ReadByte()
+					if err != nil {
+						f.b, f.nb = fb, fnb
+						f.err = noEOF(err)
+						return
+					}
+					f.roffset++
+					fb |= uint32(c) << (fnb & regSizeMaskUint32)
+					fnb += 8
+				}
+				chunk := f.hd.chunks[fb&(huffmanNumChunks-1)]
+				n = uint(chunk & huffmanCountMask)
+				if n > huffmanChunkBits {
+					chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask]
+					n = uint(chunk & huffmanCountMask)
+				}
+				if n <= fnb {
+					if n == 0 {
+						f.b, f.nb = fb, fnb
+						if debugDecode {
+							fmt.Println("huffsym: n==0")
+						}
+						f.err = CorruptInputError(f.roffset)
+						return
+					}
+					fb = fb >> (n & regSizeMaskUint32)
+					fnb = fnb - n
+					dist = uint32(chunk >> huffmanValueShift)
+					break
+				}
+			}
+		}
+
+		switch {
+		case dist < 4:
+			dist++
+		case dist < maxNumDist:
+			nb := uint(dist-2) >> 1
+			// have 1 bit in bottom of dist, need nb more.
+			extra := (dist & 1) << (nb & regSizeMaskUint32)
+			for fnb < nb {
+				c, err := fr.ReadByte()
+				if err != nil {
+					f.b, f.nb = fb, fnb
+					if debugDecode {
+						fmt.Println("morebits f.nb<nb:", err)
+					}
+					f.err = err
+					return
+				}
+				f.roffset++
+				fb |= uint32(c) << (fnb & regSizeMaskUint32)
+				fnb += 8
+			}
+			extra |= fb & bitMask32[nb]
+			fb >>= nb & regSizeMaskUint32
+			fnb -= nb
+			dist = 1<<((nb+1)&regSizeMaskUint32) + 1 + extra
+			// slower: dist = bitMask32[nb+1] + 2 + extra
+		default:
+			f.b, f.nb = fb, fnb
+			if debugDecode {
+				fmt.Println("dist too big:", dist, maxNumDist)
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		// No check on length; encoding can be prescient.
+		if dist > uint32(dict.histSize()) {
+			f.b, f.nb = fb, fnb
+			if debugDecode {
+				fmt.Println("dist > dict.histSize():", dist, dict.histSize())
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		f.copyLen, f.copyDist = length, int(dist)
+		goto copyHistory
+	}
+
+copyHistory:
+	// Perform a backwards copy according to RFC section 3.2.3.
+	{
+		cnt := dict.tryWriteCopy(f.copyDist, f.copyLen)
+		if cnt == 0 {
+			cnt = dict.writeCopy(f.copyDist, f.copyLen)
+		}
+		f.copyLen -= cnt
+
+		if dict.availWrite() == 0 || f.copyLen > 0 {
+			f.toRead = dict.readFlush()
+			f.step = (*decompressor).huffmanStringsReader // We need to continue this work
+			f.stepState = stateDict
+			f.b, f.nb = fb, fnb
+			return
+		}
+		goto readLiteral
+	}
+	// Not reached
+}
+
+// Decode a single Huffman block from f.
+// hl and hd are the Huffman states for the lit/length values
+// and the distance values, respectively. If hd == nil, using the
+// fixed distance encoding associated with fixed Huffman blocks.
+func (f *decompressor) huffmanGenericReader() {
+	const (
+		stateInit = iota // Zero value must be stateInit
+		stateDict
+	)
+	fr := f.r.(Reader)
+
+	// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+	// but is smart enough to keep local variables in registers, so use nb and b,
+	// inline call to moreBits and reassign b,nb back to f on return.
+	fnb, fb, dict := f.nb, f.b, &f.dict
+
+	switch f.stepState {
+	case stateInit:
+		goto readLiteral
+	case stateDict:
+		goto copyHistory
+	}
+
+readLiteral:
+	// Read literal and/or (length, distance) according to RFC section 3.2.3.
+	{
+		var v int
+		{
+			// Inlined v, err := f.huffSym(f.hl)
+			// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+			// with single element, huffSym must error on these two edge cases. In both
+			// cases, the chunks slice will be 0 for the invalid sequence, leading it
+			// satisfy the n == 0 check below.
+			n := uint(f.hl.maxRead)
+			for {
+				for fnb < n {
+					c, err := fr.ReadByte()
+					if err != nil {
+						f.b, f.nb = fb, fnb
+						f.err = noEOF(err)
+						return
+					}
+					f.roffset++
+					fb |= uint32(c) << (fnb & regSizeMaskUint32)
+					fnb += 8
+				}
+				chunk := f.hl.chunks[fb&(huffmanNumChunks-1)]
+				n = uint(chunk & huffmanCountMask)
+				if n > huffmanChunkBits {
+					chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask]
+					n = uint(chunk & huffmanCountMask)
+				}
+				if n <= fnb {
+					if n == 0 {
+						f.b, f.nb = fb, fnb
+						if debugDecode {
+							fmt.Println("huffsym: n==0")
+						}
+						f.err = CorruptInputError(f.roffset)
+						return
+					}
+					fb = fb >> (n & regSizeMaskUint32)
+					fnb = fnb - n
+					v = int(chunk >> huffmanValueShift)
+					break
+				}
+			}
+		}
+
+		var length int
+		switch {
+		case v < 256:
+			dict.writeByte(byte(v))
+			if dict.availWrite() == 0 {
+				f.toRead = dict.readFlush()
+				f.step = (*decompressor).huffmanGenericReader
+				f.stepState = stateInit
+				f.b, f.nb = fb, fnb
+				return
+			}
+			goto readLiteral
+		case v == 256:
+			f.b, f.nb = fb, fnb
+			f.finishBlock()
+			return
+		// otherwise, reference to older data
+		case v < 265:
+			length = v - (257 - 3)
+		case v < maxNumLit:
+			val := decCodeToLen[(v - 257)]
+			length = int(val.length) + 3
+			n := uint(val.extra)
+			for fnb < n {
+				c, err := fr.ReadByte()
+				if err != nil {
+					f.b, f.nb = fb, fnb
+					if debugDecode {
+						fmt.Println("morebits n>0:", err)
+					}
+					f.err = err
+					return
+				}
+				f.roffset++
+				fb |= uint32(c) << (fnb & regSizeMaskUint32)
+				fnb += 8
+			}
+			length += int(fb & bitMask32[n])
+			fb >>= n & regSizeMaskUint32
+			fnb -= n
+		default:
+			if debugDecode {
+				fmt.Println(v, ">= maxNumLit")
+			}
+			f.err = CorruptInputError(f.roffset)
+			f.b, f.nb = fb, fnb
+			return
+		}
+
+		var dist uint32
+		if f.hd == nil {
+			for fnb < 5 {
+				c, err := fr.ReadByte()
+				if err != nil {
+					f.b, f.nb = fb, fnb
+					if debugDecode {
+						fmt.Println("morebits f.nb<5:", err)
+					}
+					f.err = err
+					return
+				}
+				f.roffset++
+				fb |= uint32(c) << (fnb & regSizeMaskUint32)
+				fnb += 8
+			}
+			dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3)))
+			fb >>= 5
+			fnb -= 5
+		} else {
+			// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+			// with single element, huffSym must error on these two edge cases. In both
+			// cases, the chunks slice will be 0 for the invalid sequence, leading it
+			// satisfy the n == 0 check below.
+			n := uint(f.hd.maxRead)
+			// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+			// but is smart enough to keep local variables in registers, so use nb and b,
+			// inline call to moreBits and reassign b,nb back to f on return.
+			for {
+				for fnb < n {
+					c, err := fr.ReadByte()
+					if err != nil {
+						f.b, f.nb = fb, fnb
+						f.err = noEOF(err)
+						return
+					}
+					f.roffset++
+					fb |= uint32(c) << (fnb & regSizeMaskUint32)
+					fnb += 8
+				}
+				chunk := f.hd.chunks[fb&(huffmanNumChunks-1)]
+				n = uint(chunk & huffmanCountMask)
+				if n > huffmanChunkBits {
+					chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask]
+					n = uint(chunk & huffmanCountMask)
+				}
+				if n <= fnb {
+					if n == 0 {
+						f.b, f.nb = fb, fnb
+						if debugDecode {
+							fmt.Println("huffsym: n==0")
+						}
+						f.err = CorruptInputError(f.roffset)
+						return
+					}
+					fb = fb >> (n & regSizeMaskUint32)
+					fnb = fnb - n
+					dist = uint32(chunk >> huffmanValueShift)
+					break
+				}
+			}
+		}
+
+		switch {
+		case dist < 4:
+			dist++
+		case dist < maxNumDist:
+			nb := uint(dist-2) >> 1
+			// have 1 bit in bottom of dist, need nb more.
+			extra := (dist & 1) << (nb & regSizeMaskUint32)
+			for fnb < nb {
+				c, err := fr.ReadByte()
+				if err != nil {
+					f.b, f.nb = fb, fnb
+					if debugDecode {
+						fmt.Println("morebits f.nb<nb:", err)
+					}
+					f.err = err
+					return
+				}
+				f.roffset++
+				fb |= uint32(c) << (fnb & regSizeMaskUint32)
+				fnb += 8
+			}
+			extra |= fb & bitMask32[nb]
+			fb >>= nb & regSizeMaskUint32
+			fnb -= nb
+			dist = 1<<((nb+1)&regSizeMaskUint32) + 1 + extra
+			// slower: dist = bitMask32[nb+1] + 2 + extra
+		default:
+			f.b, f.nb = fb, fnb
+			if debugDecode {
+				fmt.Println("dist too big:", dist, maxNumDist)
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		// No check on length; encoding can be prescient.
+		if dist > uint32(dict.histSize()) {
+			f.b, f.nb = fb, fnb
+			if debugDecode {
+				fmt.Println("dist > dict.histSize():", dist, dict.histSize())
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		f.copyLen, f.copyDist = length, int(dist)
+		goto copyHistory
+	}
+
+copyHistory:
+	// Perform a backwards copy according to RFC section 3.2.3.
+	{
+		cnt := dict.tryWriteCopy(f.copyDist, f.copyLen)
+		if cnt == 0 {
+			cnt = dict.writeCopy(f.copyDist, f.copyLen)
+		}
+		f.copyLen -= cnt
+
+		if dict.availWrite() == 0 || f.copyLen > 0 {
+			f.toRead = dict.readFlush()
+			f.step = (*decompressor).huffmanGenericReader // We need to continue this work
+			f.stepState = stateDict
+			f.b, f.nb = fb, fnb
+			return
+		}
+		goto readLiteral
+	}
+	// Not reached
+}
+
+func (f *decompressor) huffmanBlockDecoder() func() {
+	switch f.r.(type) {
+	case *bytes.Buffer:
+		return f.huffmanBytesBuffer
+	case *bytes.Reader:
+		return f.huffmanBytesReader
+	case *bufio.Reader:
+		return f.huffmanBufioReader
+	case *strings.Reader:
+		return f.huffmanStringsReader
+	case Reader:
+		return f.huffmanGenericReader
+	default:
+		return f.huffmanGenericReader
+	}
+}
diff --git a/vendor/github.com/klauspost/compress/flate/level1.go b/vendor/github.com/klauspost/compress/flate/level1.go
new file mode 100644
index 00000000000..703b9a89aa3
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/level1.go
@@ -0,0 +1,241 @@
+package flate
+
+import (
+	"encoding/binary"
+	"fmt"
+	"math/bits"
+)
+
+// fastGen maintains the table for matches,
+// and the previous byte block for level 2.
+// This is the generic implementation.
+type fastEncL1 struct {
+	fastGen
+	table [tableSize]tableEntry
+}
+
+// EncodeL1 uses a similar algorithm to level 1
+func (e *fastEncL1) Encode(dst *tokens, src []byte) {
+	const (
+		inputMargin            = 12 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+		hashBytes              = 5
+	)
+	if debugDeflate && e.cur < 0 {
+		panic(fmt.Sprint("e.cur < 0: ", e.cur))
+	}
+
+	// Protect against e.cur wraparound.
+	for e.cur >= bufferReset {
+		if len(e.hist) == 0 {
+			for i := range e.table[:] {
+				e.table[i] = tableEntry{}
+			}
+			e.cur = maxMatchOffset
+			break
+		}
+		// Shift down everything in the table that isn't already too far away.
+		minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+		for i := range e.table[:] {
+			v := e.table[i].offset
+			if v <= minOff {
+				v = 0
+			} else {
+				v = v - e.cur + maxMatchOffset
+			}
+			e.table[i].offset = v
+		}
+		e.cur = maxMatchOffset
+	}
+
+	s := e.addBlock(src)
+
+	// This check isn't in the Snappy implementation, but there, the caller
+	// instead of the callee handles this case.
+	if len(src) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = uint16(len(src))
+		return
+	}
+
+	// Override src
+	src = e.hist
+	nextEmit := s
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int32(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	cv := load6432(src, s)
+
+	for {
+		const skipLog = 5
+		const doEvery = 2
+
+		nextS := s
+		var candidate tableEntry
+		for {
+			nextHash := hashLen(cv, tableBits, hashBytes)
+			candidate = e.table[nextHash]
+			nextS = s + doEvery + (s-nextEmit)>>skipLog
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+
+			now := load6432(src, nextS)
+			e.table[nextHash] = tableEntry{offset: s + e.cur}
+			nextHash = hashLen(now, tableBits, hashBytes)
+
+			offset := s - (candidate.offset - e.cur)
+			if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
+				e.table[nextHash] = tableEntry{offset: nextS + e.cur}
+				break
+			}
+
+			// Do one right away...
+			cv = now
+			s = nextS
+			nextS++
+			candidate = e.table[nextHash]
+			now >>= 8
+			e.table[nextHash] = tableEntry{offset: s + e.cur}
+
+			offset = s - (candidate.offset - e.cur)
+			if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
+				e.table[nextHash] = tableEntry{offset: nextS + e.cur}
+				break
+			}
+			cv = now
+			s = nextS
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+
+			// Extend the 4-byte match as long as possible.
+			t := candidate.offset - e.cur
+			var l = int32(4)
+			if false {
+				l = e.matchlenLong(s+4, t+4, src) + 4
+			} else {
+				// inlined:
+				a := src[s+4:]
+				b := src[t+4:]
+				for len(a) >= 8 {
+					if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 {
+						l += int32(bits.TrailingZeros64(diff) >> 3)
+						break
+					}
+					l += 8
+					a = a[8:]
+					b = b[8:]
+				}
+				if len(a) < 8 {
+					b = b[:len(a)]
+					for i := range a {
+						if a[i] != b[i] {
+							break
+						}
+						l++
+					}
+				}
+			}
+
+			// Extend backwards
+			for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+				s--
+				t--
+				l++
+			}
+			if nextEmit < s {
+				if false {
+					emitLiteral(dst, src[nextEmit:s])
+				} else {
+					for _, v := range src[nextEmit:s] {
+						dst.tokens[dst.n] = token(v)
+						dst.litHist[v]++
+						dst.n++
+					}
+				}
+			}
+
+			// Save the match found
+			if false {
+				dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+			} else {
+				// Inlined...
+				xoffset := uint32(s - t - baseMatchOffset)
+				xlength := l
+				oc := offsetCode(xoffset)
+				xoffset |= oc << 16
+				for xlength > 0 {
+					xl := xlength
+					if xl > 258 {
+						if xl > 258+baseMatchLength {
+							xl = 258
+						} else {
+							xl = 258 - baseMatchLength
+						}
+					}
+					xlength -= xl
+					xl -= baseMatchLength
+					dst.extraHist[lengthCodes1[uint8(xl)]]++
+					dst.offHist[oc]++
+					dst.tokens[dst.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
+					dst.n++
+				}
+			}
+			s += l
+			nextEmit = s
+			if nextS >= s {
+				s = nextS + 1
+			}
+			if s >= sLimit {
+				// Index first pair after match end.
+				if int(s+l+8) < len(src) {
+					cv := load6432(src, s)
+					e.table[hashLen(cv, tableBits, hashBytes)] = tableEntry{offset: s + e.cur}
+				}
+				goto emitRemainder
+			}
+
+			// We could immediately start working at s now, but to improve
+			// compression we first update the hash table at s-2 and at s. If
+			// another emitCopy is not our next move, also calculate nextHash
+			// at s+1. At least on GOARCH=amd64, these three hash calculations
+			// are faster as one load64 call (with some shifts) instead of
+			// three load32 calls.
+			x := load6432(src, s-2)
+			o := e.cur + s - 2
+			prevHash := hashLen(x, tableBits, hashBytes)
+			e.table[prevHash] = tableEntry{offset: o}
+			x >>= 16
+			currHash := hashLen(x, tableBits, hashBytes)
+			candidate = e.table[currHash]
+			e.table[currHash] = tableEntry{offset: o + 2}
+
+			offset := s - (candidate.offset - e.cur)
+			if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) {
+				cv = x >> 8
+				s++
+				break
+			}
+		}
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		// If nothing was added, don't encode literals.
+		if dst.n == 0 {
+			return
+		}
+		emitLiteral(dst, src[nextEmit:])
+	}
+}
diff --git a/vendor/github.com/klauspost/compress/flate/level2.go b/vendor/github.com/klauspost/compress/flate/level2.go
new file mode 100644
index 00000000000..876dfbe3054
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/level2.go
@@ -0,0 +1,214 @@
+package flate
+
+import "fmt"
+
+// fastGen maintains the table for matches,
+// and the previous byte block for level 2.
+// This is the generic implementation.
+type fastEncL2 struct {
+	fastGen
+	table [bTableSize]tableEntry
+}
+
+// EncodeL2 uses a similar algorithm to level 1, but is capable
+// of matching across blocks giving better compression at a small slowdown.
+func (e *fastEncL2) Encode(dst *tokens, src []byte) {
+	const (
+		inputMargin            = 12 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+		hashBytes              = 5
+	)
+
+	if debugDeflate && e.cur < 0 {
+		panic(fmt.Sprint("e.cur < 0: ", e.cur))
+	}
+
+	// Protect against e.cur wraparound.
+	for e.cur >= bufferReset {
+		if len(e.hist) == 0 {
+			for i := range e.table[:] {
+				e.table[i] = tableEntry{}
+			}
+			e.cur = maxMatchOffset
+			break
+		}
+		// Shift down everything in the table that isn't already too far away.
+		minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+		for i := range e.table[:] {
+			v := e.table[i].offset
+			if v <= minOff {
+				v = 0
+			} else {
+				v = v - e.cur + maxMatchOffset
+			}
+			e.table[i].offset = v
+		}
+		e.cur = maxMatchOffset
+	}
+
+	s := e.addBlock(src)
+
+	// This check isn't in the Snappy implementation, but there, the caller
+	// instead of the callee handles this case.
+	if len(src) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = uint16(len(src))
+		return
+	}
+
+	// Override src
+	src = e.hist
+	nextEmit := s
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int32(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	cv := load6432(src, s)
+	for {
+		// When should we start skipping if we haven't found matches in a long while.
+		const skipLog = 5
+		const doEvery = 2
+
+		nextS := s
+		var candidate tableEntry
+		for {
+			nextHash := hashLen(cv, bTableBits, hashBytes)
+			s = nextS
+			nextS = s + doEvery + (s-nextEmit)>>skipLog
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			candidate = e.table[nextHash]
+			now := load6432(src, nextS)
+			e.table[nextHash] = tableEntry{offset: s + e.cur}
+			nextHash = hashLen(now, bTableBits, hashBytes)
+
+			offset := s - (candidate.offset - e.cur)
+			if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
+				e.table[nextHash] = tableEntry{offset: nextS + e.cur}
+				break
+			}
+
+			// Do one right away...
+			cv = now
+			s = nextS
+			nextS++
+			candidate = e.table[nextHash]
+			now >>= 8
+			e.table[nextHash] = tableEntry{offset: s + e.cur}
+
+			offset = s - (candidate.offset - e.cur)
+			if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
+				break
+			}
+			cv = now
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+
+		// Call emitCopy, and then see if another emitCopy could be our next
+		// move. Repeat until we find no match for the input immediately after
+		// what was consumed by the last emitCopy call.
+		//
+		// If we exit this loop normally then we need to call emitLiteral next,
+		// though we don't yet know how big the literal will be. We handle that
+		// by proceeding to the next iteration of the main loop. We also can
+		// exit this loop via goto if we get close to exhausting the input.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+
+			// Extend the 4-byte match as long as possible.
+			t := candidate.offset - e.cur
+			l := e.matchlenLong(s+4, t+4, src) + 4
+
+			// Extend backwards
+			for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+				s--
+				t--
+				l++
+			}
+			if nextEmit < s {
+				if false {
+					emitLiteral(dst, src[nextEmit:s])
+				} else {
+					for _, v := range src[nextEmit:s] {
+						dst.tokens[dst.n] = token(v)
+						dst.litHist[v]++
+						dst.n++
+					}
+				}
+			}
+
+			dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+			s += l
+			nextEmit = s
+			if nextS >= s {
+				s = nextS + 1
+			}
+
+			if s >= sLimit {
+				// Index first pair after match end.
+				if int(s+l+8) < len(src) {
+					cv := load6432(src, s)
+					e.table[hashLen(cv, bTableBits, hashBytes)] = tableEntry{offset: s + e.cur}
+				}
+				goto emitRemainder
+			}
+
+			// Store every second hash in-between, but offset by 1.
+			for i := s - l + 2; i < s-5; i += 7 {
+				x := load6432(src, i)
+				nextHash := hashLen(x, bTableBits, hashBytes)
+				e.table[nextHash] = tableEntry{offset: e.cur + i}
+				// Skip one
+				x >>= 16
+				nextHash = hashLen(x, bTableBits, hashBytes)
+				e.table[nextHash] = tableEntry{offset: e.cur + i + 2}
+				// Skip one
+				x >>= 16
+				nextHash = hashLen(x, bTableBits, hashBytes)
+				e.table[nextHash] = tableEntry{offset: e.cur + i + 4}
+			}
+
+			// We could immediately start working at s now, but to improve
+			// compression we first update the hash table at s-2 to s. If
+			// another emitCopy is not our next move, also calculate nextHash
+			// at s+1. At least on GOARCH=amd64, these three hash calculations
+			// are faster as one load64 call (with some shifts) instead of
+			// three load32 calls.
+			x := load6432(src, s-2)
+			o := e.cur + s - 2
+			prevHash := hashLen(x, bTableBits, hashBytes)
+			prevHash2 := hashLen(x>>8, bTableBits, hashBytes)
+			e.table[prevHash] = tableEntry{offset: o}
+			e.table[prevHash2] = tableEntry{offset: o + 1}
+			currHash := hashLen(x>>16, bTableBits, hashBytes)
+			candidate = e.table[currHash]
+			e.table[currHash] = tableEntry{offset: o + 2}
+
+			offset := s - (candidate.offset - e.cur)
+			if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) {
+				cv = x >> 24
+				s++
+				break
+			}
+		}
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		// If nothing was added, don't encode literals.
+		if dst.n == 0 {
+			return
+		}
+
+		emitLiteral(dst, src[nextEmit:])
+	}
+}
diff --git a/vendor/github.com/klauspost/compress/flate/level3.go b/vendor/github.com/klauspost/compress/flate/level3.go
new file mode 100644
index 00000000000..7aa2b72a129
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/level3.go
@@ -0,0 +1,241 @@
+package flate
+
+import "fmt"
+
+// fastEncL3
+type fastEncL3 struct {
+	fastGen
+	table [1 << 16]tableEntryPrev
+}
+
+// Encode uses a similar algorithm to level 2, will check up to two candidates.
+func (e *fastEncL3) Encode(dst *tokens, src []byte) {
+	const (
+		inputMargin            = 12 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+		tableBits              = 16
+		tableSize              = 1 << tableBits
+		hashBytes              = 5
+	)
+
+	if debugDeflate && e.cur < 0 {
+		panic(fmt.Sprint("e.cur < 0: ", e.cur))
+	}
+
+	// Protect against e.cur wraparound.
+	for e.cur >= bufferReset {
+		if len(e.hist) == 0 {
+			for i := range e.table[:] {
+				e.table[i] = tableEntryPrev{}
+			}
+			e.cur = maxMatchOffset
+			break
+		}
+		// Shift down everything in the table that isn't already too far away.
+		minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+		for i := range e.table[:] {
+			v := e.table[i]
+			if v.Cur.offset <= minOff {
+				v.Cur.offset = 0
+			} else {
+				v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
+			}
+			if v.Prev.offset <= minOff {
+				v.Prev.offset = 0
+			} else {
+				v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
+			}
+			e.table[i] = v
+		}
+		e.cur = maxMatchOffset
+	}
+
+	s := e.addBlock(src)
+
+	// Skip if too small.
+	if len(src) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = uint16(len(src))
+		return
+	}
+
+	// Override src
+	src = e.hist
+	nextEmit := s
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int32(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	cv := load6432(src, s)
+	for {
+		const skipLog = 7
+		nextS := s
+		var candidate tableEntry
+		for {
+			nextHash := hashLen(cv, tableBits, hashBytes)
+			s = nextS
+			nextS = s + 1 + (s-nextEmit)>>skipLog
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			candidates := e.table[nextHash]
+			now := load6432(src, nextS)
+
+			// Safe offset distance until s + 4...
+			minOffset := e.cur + s - (maxMatchOffset - 4)
+			e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}}
+
+			// Check both candidates
+			candidate = candidates.Cur
+			if candidate.offset < minOffset {
+				cv = now
+				// Previous will also be invalid, we have nothing.
+				continue
+			}
+
+			if uint32(cv) == load3232(src, candidate.offset-e.cur) {
+				if candidates.Prev.offset < minOffset || uint32(cv) != load3232(src, candidates.Prev.offset-e.cur) {
+					break
+				}
+				// Both match and are valid, pick longest.
+				offset := s - (candidate.offset - e.cur)
+				o2 := s - (candidates.Prev.offset - e.cur)
+				l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:])
+				if l2 > l1 {
+					candidate = candidates.Prev
+				}
+				break
+			} else {
+				// We only check if value mismatches.
+				// Offset will always be invalid in other cases.
+				candidate = candidates.Prev
+				if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
+					break
+				}
+			}
+			cv = now
+		}
+
+		// Call emitCopy, and then see if another emitCopy could be our next
+		// move. Repeat until we find no match for the input immediately after
+		// what was consumed by the last emitCopy call.
+		//
+		// If we exit this loop normally then we need to call emitLiteral next,
+		// though we don't yet know how big the literal will be. We handle that
+		// by proceeding to the next iteration of the main loop. We also can
+		// exit this loop via goto if we get close to exhausting the input.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+
+			// Extend the 4-byte match as long as possible.
+			//
+			t := candidate.offset - e.cur
+			l := e.matchlenLong(s+4, t+4, src) + 4
+
+			// Extend backwards
+			for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+				s--
+				t--
+				l++
+			}
+			if nextEmit < s {
+				if false {
+					emitLiteral(dst, src[nextEmit:s])
+				} else {
+					for _, v := range src[nextEmit:s] {
+						dst.tokens[dst.n] = token(v)
+						dst.litHist[v]++
+						dst.n++
+					}
+				}
+			}
+
+			dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+			s += l
+			nextEmit = s
+			if nextS >= s {
+				s = nextS + 1
+			}
+
+			if s >= sLimit {
+				t += l
+				// Index first pair after match end.
+				if int(t+8) < len(src) && t > 0 {
+					cv = load6432(src, t)
+					nextHash := hashLen(cv, tableBits, hashBytes)
+					e.table[nextHash] = tableEntryPrev{
+						Prev: e.table[nextHash].Cur,
+						Cur:  tableEntry{offset: e.cur + t},
+					}
+				}
+				goto emitRemainder
+			}
+
+			// Store every 5th hash in-between.
+			for i := s - l + 2; i < s-5; i += 6 {
+				nextHash := hashLen(load6432(src, i), tableBits, hashBytes)
+				e.table[nextHash] = tableEntryPrev{
+					Prev: e.table[nextHash].Cur,
+					Cur:  tableEntry{offset: e.cur + i}}
+			}
+			// We could immediately start working at s now, but to improve
+			// compression we first update the hash table at s-2 to s.
+			x := load6432(src, s-2)
+			prevHash := hashLen(x, tableBits, hashBytes)
+
+			e.table[prevHash] = tableEntryPrev{
+				Prev: e.table[prevHash].Cur,
+				Cur:  tableEntry{offset: e.cur + s - 2},
+			}
+			x >>= 8
+			prevHash = hashLen(x, tableBits, hashBytes)
+
+			e.table[prevHash] = tableEntryPrev{
+				Prev: e.table[prevHash].Cur,
+				Cur:  tableEntry{offset: e.cur + s - 1},
+			}
+			x >>= 8
+			currHash := hashLen(x, tableBits, hashBytes)
+			candidates := e.table[currHash]
+			cv = x
+			e.table[currHash] = tableEntryPrev{
+				Prev: candidates.Cur,
+				Cur:  tableEntry{offset: s + e.cur},
+			}
+
+			// Check both candidates
+			candidate = candidates.Cur
+			minOffset := e.cur + s - (maxMatchOffset - 4)
+
+			if candidate.offset > minOffset {
+				if uint32(cv) == load3232(src, candidate.offset-e.cur) {
+					// Found a match...
+					continue
+				}
+				candidate = candidates.Prev
+				if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
+					// Match at prev...
+					continue
+				}
+			}
+			cv = x >> 8
+			s++
+			break
+		}
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		// If nothing was added, don't encode literals.
+		if dst.n == 0 {
+			return
+		}
+
+		emitLiteral(dst, src[nextEmit:])
+	}
+}
diff --git a/vendor/github.com/klauspost/compress/flate/level4.go b/vendor/github.com/klauspost/compress/flate/level4.go
new file mode 100644
index 00000000000..23c08b325cf
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/level4.go
@@ -0,0 +1,221 @@
+package flate
+
+import "fmt"
+
+type fastEncL4 struct {
+	fastGen
+	table  [tableSize]tableEntry
+	bTable [tableSize]tableEntry
+}
+
+func (e *fastEncL4) Encode(dst *tokens, src []byte) {
+	const (
+		inputMargin            = 12 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+		hashShortBytes         = 4
+	)
+	if debugDeflate && e.cur < 0 {
+		panic(fmt.Sprint("e.cur < 0: ", e.cur))
+	}
+	// Protect against e.cur wraparound.
+	for e.cur >= bufferReset {
+		if len(e.hist) == 0 {
+			for i := range e.table[:] {
+				e.table[i] = tableEntry{}
+			}
+			for i := range e.bTable[:] {
+				e.bTable[i] = tableEntry{}
+			}
+			e.cur = maxMatchOffset
+			break
+		}
+		// Shift down everything in the table that isn't already too far away.
+		minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+		for i := range e.table[:] {
+			v := e.table[i].offset
+			if v <= minOff {
+				v = 0
+			} else {
+				v = v - e.cur + maxMatchOffset
+			}
+			e.table[i].offset = v
+		}
+		for i := range e.bTable[:] {
+			v := e.bTable[i].offset
+			if v <= minOff {
+				v = 0
+			} else {
+				v = v - e.cur + maxMatchOffset
+			}
+			e.bTable[i].offset = v
+		}
+		e.cur = maxMatchOffset
+	}
+
+	s := e.addBlock(src)
+
+	// This check isn't in the Snappy implementation, but there, the caller
+	// instead of the callee handles this case.
+	if len(src) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = uint16(len(src))
+		return
+	}
+
+	// Override src
+	src = e.hist
+	nextEmit := s
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int32(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	cv := load6432(src, s)
+	for {
+		const skipLog = 6
+		const doEvery = 1
+
+		nextS := s
+		var t int32
+		for {
+			nextHashS := hashLen(cv, tableBits, hashShortBytes)
+			nextHashL := hash7(cv, tableBits)
+
+			s = nextS
+			nextS = s + doEvery + (s-nextEmit)>>skipLog
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			// Fetch a short+long candidate
+			sCandidate := e.table[nextHashS]
+			lCandidate := e.bTable[nextHashL]
+			next := load6432(src, nextS)
+			entry := tableEntry{offset: s + e.cur}
+			e.table[nextHashS] = entry
+			e.bTable[nextHashL] = entry
+
+			t = lCandidate.offset - e.cur
+			if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.offset-e.cur) {
+				// We got a long match. Use that.
+				break
+			}
+
+			t = sCandidate.offset - e.cur
+			if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
+				// Found a 4 match...
+				lCandidate = e.bTable[hash7(next, tableBits)]
+
+				// If the next long is a candidate, check if we should use that instead...
+				lOff := nextS - (lCandidate.offset - e.cur)
+				if lOff < maxMatchOffset && load3232(src, lCandidate.offset-e.cur) == uint32(next) {
+					l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:])
+					if l2 > l1 {
+						s = nextS
+						t = lCandidate.offset - e.cur
+					}
+				}
+				break
+			}
+			cv = next
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+
+		// Extend the 4-byte match as long as possible.
+		l := e.matchlenLong(s+4, t+4, src) + 4
+
+		// Extend backwards
+		for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+			s--
+			t--
+			l++
+		}
+		if nextEmit < s {
+			if false {
+				emitLiteral(dst, src[nextEmit:s])
+			} else {
+				for _, v := range src[nextEmit:s] {
+					dst.tokens[dst.n] = token(v)
+					dst.litHist[v]++
+					dst.n++
+				}
+			}
+		}
+		if debugDeflate {
+			if t >= s {
+				panic("s-t")
+			}
+			if (s - t) > maxMatchOffset {
+				panic(fmt.Sprintln("mmo", t))
+			}
+			if l < baseMatchLength {
+				panic("bml")
+			}
+		}
+
+		dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+		s += l
+		nextEmit = s
+		if nextS >= s {
+			s = nextS + 1
+		}
+
+		if s >= sLimit {
+			// Index first pair after match end.
+			if int(s+8) < len(src) {
+				cv := load6432(src, s)
+				e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: s + e.cur}
+				e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur}
+			}
+			goto emitRemainder
+		}
+
+		// Store every 3rd hash in-between
+		if true {
+			i := nextS
+			if i < s-1 {
+				cv := load6432(src, i)
+				t := tableEntry{offset: i + e.cur}
+				t2 := tableEntry{offset: t.offset + 1}
+				e.bTable[hash7(cv, tableBits)] = t
+				e.bTable[hash7(cv>>8, tableBits)] = t2
+				e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
+
+				i += 3
+				for ; i < s-1; i += 3 {
+					cv := load6432(src, i)
+					t := tableEntry{offset: i + e.cur}
+					t2 := tableEntry{offset: t.offset + 1}
+					e.bTable[hash7(cv, tableBits)] = t
+					e.bTable[hash7(cv>>8, tableBits)] = t2
+					e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
+				}
+			}
+		}
+
+		// We could immediately start working at s now, but to improve
+		// compression we first update the hash table at s-1 and at s.
+		x := load6432(src, s-1)
+		o := e.cur + s - 1
+		prevHashS := hashLen(x, tableBits, hashShortBytes)
+		prevHashL := hash7(x, tableBits)
+		e.table[prevHashS] = tableEntry{offset: o}
+		e.bTable[prevHashL] = tableEntry{offset: o}
+		cv = x >> 8
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		// If nothing was added, don't encode literals.
+		if dst.n == 0 {
+			return
+		}
+
+		emitLiteral(dst, src[nextEmit:])
+	}
+}
diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go
new file mode 100644
index 00000000000..83ef50ba45f
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/level5.go
@@ -0,0 +1,310 @@
+package flate
+
+import "fmt"
+
+type fastEncL5 struct {
+	fastGen
+	table  [tableSize]tableEntry
+	bTable [tableSize]tableEntryPrev
+}
+
+func (e *fastEncL5) Encode(dst *tokens, src []byte) {
+	const (
+		inputMargin            = 12 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+		hashShortBytes         = 4
+	)
+	if debugDeflate && e.cur < 0 {
+		panic(fmt.Sprint("e.cur < 0: ", e.cur))
+	}
+
+	// Protect against e.cur wraparound.
+	for e.cur >= bufferReset {
+		if len(e.hist) == 0 {
+			for i := range e.table[:] {
+				e.table[i] = tableEntry{}
+			}
+			for i := range e.bTable[:] {
+				e.bTable[i] = tableEntryPrev{}
+			}
+			e.cur = maxMatchOffset
+			break
+		}
+		// Shift down everything in the table that isn't already too far away.
+		minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+		for i := range e.table[:] {
+			v := e.table[i].offset
+			if v <= minOff {
+				v = 0
+			} else {
+				v = v - e.cur + maxMatchOffset
+			}
+			e.table[i].offset = v
+		}
+		for i := range e.bTable[:] {
+			v := e.bTable[i]
+			if v.Cur.offset <= minOff {
+				v.Cur.offset = 0
+				v.Prev.offset = 0
+			} else {
+				v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
+				if v.Prev.offset <= minOff {
+					v.Prev.offset = 0
+				} else {
+					v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
+				}
+			}
+			e.bTable[i] = v
+		}
+		e.cur = maxMatchOffset
+	}
+
+	s := e.addBlock(src)
+
+	// This check isn't in the Snappy implementation, but there, the caller
+	// instead of the callee handles this case.
+	if len(src) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = uint16(len(src))
+		return
+	}
+
+	// Override src
+	src = e.hist
+	nextEmit := s
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int32(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	cv := load6432(src, s)
+	for {
+		const skipLog = 6
+		const doEvery = 1
+
+		nextS := s
+		var l int32
+		var t int32
+		for {
+			nextHashS := hashLen(cv, tableBits, hashShortBytes)
+			nextHashL := hash7(cv, tableBits)
+
+			s = nextS
+			nextS = s + doEvery + (s-nextEmit)>>skipLog
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			// Fetch a short+long candidate
+			sCandidate := e.table[nextHashS]
+			lCandidate := e.bTable[nextHashL]
+			next := load6432(src, nextS)
+			entry := tableEntry{offset: s + e.cur}
+			e.table[nextHashS] = entry
+			eLong := &e.bTable[nextHashL]
+			eLong.Cur, eLong.Prev = entry, eLong.Cur
+
+			nextHashS = hashLen(next, tableBits, hashShortBytes)
+			nextHashL = hash7(next, tableBits)
+
+			t = lCandidate.Cur.offset - e.cur
+			if s-t < maxMatchOffset {
+				if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
+					// Store the next match
+					e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+					eLong := &e.bTable[nextHashL]
+					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+
+					t2 := lCandidate.Prev.offset - e.cur
+					if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
+						l = e.matchlen(s+4, t+4, src) + 4
+						ml1 := e.matchlen(s+4, t2+4, src) + 4
+						if ml1 > l {
+							t = t2
+							l = ml1
+							break
+						}
+					}
+					break
+				}
+				t = lCandidate.Prev.offset - e.cur
+				if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
+					// Store the next match
+					e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+					eLong := &e.bTable[nextHashL]
+					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+					break
+				}
+			}
+
+			t = sCandidate.offset - e.cur
+			if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
+				// Found a 4 match...
+				l = e.matchlen(s+4, t+4, src) + 4
+				lCandidate = e.bTable[nextHashL]
+				// Store the next match
+
+				e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+				eLong := &e.bTable[nextHashL]
+				eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+
+				// If the next long is a candidate, use that...
+				t2 := lCandidate.Cur.offset - e.cur
+				if nextS-t2 < maxMatchOffset {
+					if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
+						ml := e.matchlen(nextS+4, t2+4, src) + 4
+						if ml > l {
+							t = t2
+							s = nextS
+							l = ml
+							break
+						}
+					}
+					// If the previous long is a candidate, use that...
+					t2 = lCandidate.Prev.offset - e.cur
+					if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
+						ml := e.matchlen(nextS+4, t2+4, src) + 4
+						if ml > l {
+							t = t2
+							s = nextS
+							l = ml
+							break
+						}
+					}
+				}
+				break
+			}
+			cv = next
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+
+		if l == 0 {
+			// Extend the 4-byte match as long as possible.
+			l = e.matchlenLong(s+4, t+4, src) + 4
+		} else if l == maxMatchLength {
+			l += e.matchlenLong(s+l, t+l, src)
+		}
+
+		// Try to locate a better match by checking the end of best match...
+		if sAt := s + l; l < 30 && sAt < sLimit {
+			// Allow some bytes at the beginning to mismatch.
+			// Sweet spot is 2/3 bytes depending on input.
+			// 3 is only a little better when it is but sometimes a lot worse.
+			// The skipped bytes are tested in Extend backwards,
+			// and still picked up as part of the match if they do.
+			const skipBeginning = 2
+			eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset
+			t2 := eLong - e.cur - l + skipBeginning
+			s2 := s + skipBeginning
+			off := s2 - t2
+			if t2 >= 0 && off < maxMatchOffset && off > 0 {
+				if l2 := e.matchlenLong(s2, t2, src); l2 > l {
+					t = t2
+					l = l2
+					s = s2
+				}
+			}
+		}
+
+		// Extend backwards
+		for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+			s--
+			t--
+			l++
+		}
+		if nextEmit < s {
+			if false {
+				emitLiteral(dst, src[nextEmit:s])
+			} else {
+				for _, v := range src[nextEmit:s] {
+					dst.tokens[dst.n] = token(v)
+					dst.litHist[v]++
+					dst.n++
+				}
+			}
+		}
+		if debugDeflate {
+			if t >= s {
+				panic(fmt.Sprintln("s-t", s, t))
+			}
+			if (s - t) > maxMatchOffset {
+				panic(fmt.Sprintln("mmo", s-t))
+			}
+			if l < baseMatchLength {
+				panic("bml")
+			}
+		}
+
+		dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+		s += l
+		nextEmit = s
+		if nextS >= s {
+			s = nextS + 1
+		}
+
+		if s >= sLimit {
+			goto emitRemainder
+		}
+
+		// Store every 3rd hash in-between.
+		if true {
+			const hashEvery = 3
+			i := s - l + 1
+			if i < s-1 {
+				cv := load6432(src, i)
+				t := tableEntry{offset: i + e.cur}
+				e.table[hashLen(cv, tableBits, hashShortBytes)] = t
+				eLong := &e.bTable[hash7(cv, tableBits)]
+				eLong.Cur, eLong.Prev = t, eLong.Cur
+
+				// Do an long at i+1
+				cv >>= 8
+				t = tableEntry{offset: t.offset + 1}
+				eLong = &e.bTable[hash7(cv, tableBits)]
+				eLong.Cur, eLong.Prev = t, eLong.Cur
+
+				// We only have enough bits for a short entry at i+2
+				cv >>= 8
+				t = tableEntry{offset: t.offset + 1}
+				e.table[hashLen(cv, tableBits, hashShortBytes)] = t
+
+				// Skip one - otherwise we risk hitting 's'
+				i += 4
+				for ; i < s-1; i += hashEvery {
+					cv := load6432(src, i)
+					t := tableEntry{offset: i + e.cur}
+					t2 := tableEntry{offset: t.offset + 1}
+					eLong := &e.bTable[hash7(cv, tableBits)]
+					eLong.Cur, eLong.Prev = t, eLong.Cur
+					e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
+				}
+			}
+		}
+
+		// We could immediately start working at s now, but to improve
+		// compression we first update the hash table at s-1 and at s.
+		x := load6432(src, s-1)
+		o := e.cur + s - 1
+		prevHashS := hashLen(x, tableBits, hashShortBytes)
+		prevHashL := hash7(x, tableBits)
+		e.table[prevHashS] = tableEntry{offset: o}
+		eLong := &e.bTable[prevHashL]
+		eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur
+		cv = x >> 8
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		// If nothing was added, don't encode literals.
+		if dst.n == 0 {
+			return
+		}
+
+		emitLiteral(dst, src[nextEmit:])
+	}
+}
diff --git a/vendor/github.com/klauspost/compress/flate/level6.go b/vendor/github.com/klauspost/compress/flate/level6.go
new file mode 100644
index 00000000000..f1e9d98fa50
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/level6.go
@@ -0,0 +1,325 @@
+package flate
+
+import "fmt"
+
+type fastEncL6 struct {
+	fastGen
+	table  [tableSize]tableEntry
+	bTable [tableSize]tableEntryPrev
+}
+
+func (e *fastEncL6) Encode(dst *tokens, src []byte) {
+	const (
+		inputMargin            = 12 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+		hashShortBytes         = 4
+	)
+	if debugDeflate && e.cur < 0 {
+		panic(fmt.Sprint("e.cur < 0: ", e.cur))
+	}
+
+	// Protect against e.cur wraparound.
+	for e.cur >= bufferReset {
+		if len(e.hist) == 0 {
+			for i := range e.table[:] {
+				e.table[i] = tableEntry{}
+			}
+			for i := range e.bTable[:] {
+				e.bTable[i] = tableEntryPrev{}
+			}
+			e.cur = maxMatchOffset
+			break
+		}
+		// Shift down everything in the table that isn't already too far away.
+		minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+		for i := range e.table[:] {
+			v := e.table[i].offset
+			if v <= minOff {
+				v = 0
+			} else {
+				v = v - e.cur + maxMatchOffset
+			}
+			e.table[i].offset = v
+		}
+		for i := range e.bTable[:] {
+			v := e.bTable[i]
+			if v.Cur.offset <= minOff {
+				v.Cur.offset = 0
+				v.Prev.offset = 0
+			} else {
+				v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
+				if v.Prev.offset <= minOff {
+					v.Prev.offset = 0
+				} else {
+					v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
+				}
+			}
+			e.bTable[i] = v
+		}
+		e.cur = maxMatchOffset
+	}
+
+	s := e.addBlock(src)
+
+	// This check isn't in the Snappy implementation, but there, the caller
+	// instead of the callee handles this case.
+	if len(src) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = uint16(len(src))
+		return
+	}
+
+	// Override src
+	src = e.hist
+	nextEmit := s
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int32(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	cv := load6432(src, s)
+	// Repeat MUST be > 1 and within range
+	repeat := int32(1)
+	for {
+		const skipLog = 7
+		const doEvery = 1
+
+		nextS := s
+		var l int32
+		var t int32
+		for {
+			nextHashS := hashLen(cv, tableBits, hashShortBytes)
+			nextHashL := hash7(cv, tableBits)
+			s = nextS
+			nextS = s + doEvery + (s-nextEmit)>>skipLog
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			// Fetch a short+long candidate
+			sCandidate := e.table[nextHashS]
+			lCandidate := e.bTable[nextHashL]
+			next := load6432(src, nextS)
+			entry := tableEntry{offset: s + e.cur}
+			e.table[nextHashS] = entry
+			eLong := &e.bTable[nextHashL]
+			eLong.Cur, eLong.Prev = entry, eLong.Cur
+
+			// Calculate hashes of 'next'
+			nextHashS = hashLen(next, tableBits, hashShortBytes)
+			nextHashL = hash7(next, tableBits)
+
+			t = lCandidate.Cur.offset - e.cur
+			if s-t < maxMatchOffset {
+				if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
+					// Long candidate matches at least 4 bytes.
+
+					// Store the next match
+					e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+					eLong := &e.bTable[nextHashL]
+					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+
+					// Check the previous long candidate as well.
+					t2 := lCandidate.Prev.offset - e.cur
+					if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
+						l = e.matchlen(s+4, t+4, src) + 4
+						ml1 := e.matchlen(s+4, t2+4, src) + 4
+						if ml1 > l {
+							t = t2
+							l = ml1
+							break
+						}
+					}
+					break
+				}
+				// Current value did not match, but check if previous long value does.
+				t = lCandidate.Prev.offset - e.cur
+				if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
+					// Store the next match
+					e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+					eLong := &e.bTable[nextHashL]
+					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+					break
+				}
+			}
+
+			t = sCandidate.offset - e.cur
+			if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
+				// Found a 4 match...
+				l = e.matchlen(s+4, t+4, src) + 4
+
+				// Look up next long candidate (at nextS)
+				lCandidate = e.bTable[nextHashL]
+
+				// Store the next match
+				e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+				eLong := &e.bTable[nextHashL]
+				eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+
+				// Check repeat at s + repOff
+				const repOff = 1
+				t2 := s - repeat + repOff
+				if load3232(src, t2) == uint32(cv>>(8*repOff)) {
+					ml := e.matchlen(s+4+repOff, t2+4, src) + 4
+					if ml > l {
+						t = t2
+						l = ml
+						s += repOff
+						// Not worth checking more.
+						break
+					}
+				}
+
+				// If the next long is a candidate, use that...
+				t2 = lCandidate.Cur.offset - e.cur
+				if nextS-t2 < maxMatchOffset {
+					if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
+						ml := e.matchlen(nextS+4, t2+4, src) + 4
+						if ml > l {
+							t = t2
+							s = nextS
+							l = ml
+							// This is ok, but check previous as well.
+						}
+					}
+					// If the previous long is a candidate, use that...
+					t2 = lCandidate.Prev.offset - e.cur
+					if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
+						ml := e.matchlen(nextS+4, t2+4, src) + 4
+						if ml > l {
+							t = t2
+							s = nextS
+							l = ml
+							break
+						}
+					}
+				}
+				break
+			}
+			cv = next
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+
+		// Extend the 4-byte match as long as possible.
+		if l == 0 {
+			l = e.matchlenLong(s+4, t+4, src) + 4
+		} else if l == maxMatchLength {
+			l += e.matchlenLong(s+l, t+l, src)
+		}
+
+		// Try to locate a better match by checking the end-of-match...
+		if sAt := s + l; sAt < sLimit {
+			// Allow some bytes at the beginning to mismatch.
+			// Sweet spot is 2/3 bytes depending on input.
+			// 3 is only a little better when it is but sometimes a lot worse.
+			// The skipped bytes are tested in Extend backwards,
+			// and still picked up as part of the match if they do.
+			const skipBeginning = 2
+			eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)]
+			// Test current
+			t2 := eLong.Cur.offset - e.cur - l + skipBeginning
+			s2 := s + skipBeginning
+			off := s2 - t2
+			if off < maxMatchOffset {
+				if off > 0 && t2 >= 0 {
+					if l2 := e.matchlenLong(s2, t2, src); l2 > l {
+						t = t2
+						l = l2
+						s = s2
+					}
+				}
+				// Test next:
+				t2 = eLong.Prev.offset - e.cur - l + skipBeginning
+				off := s2 - t2
+				if off > 0 && off < maxMatchOffset && t2 >= 0 {
+					if l2 := e.matchlenLong(s2, t2, src); l2 > l {
+						t = t2
+						l = l2
+						s = s2
+					}
+				}
+			}
+		}
+
+		// Extend backwards
+		for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+			s--
+			t--
+			l++
+		}
+		if nextEmit < s {
+			if false {
+				emitLiteral(dst, src[nextEmit:s])
+			} else {
+				for _, v := range src[nextEmit:s] {
+					dst.tokens[dst.n] = token(v)
+					dst.litHist[v]++
+					dst.n++
+				}
+			}
+		}
+		if false {
+			if t >= s {
+				panic(fmt.Sprintln("s-t", s, t))
+			}
+			if (s - t) > maxMatchOffset {
+				panic(fmt.Sprintln("mmo", s-t))
+			}
+			if l < baseMatchLength {
+				panic("bml")
+			}
+		}
+
+		dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+		repeat = s - t
+		s += l
+		nextEmit = s
+		if nextS >= s {
+			s = nextS + 1
+		}
+
+		if s >= sLimit {
+			// Index after match end.
+			for i := nextS + 1; i < int32(len(src))-8; i += 2 {
+				cv := load6432(src, i)
+				e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: i + e.cur}
+				eLong := &e.bTable[hash7(cv, tableBits)]
+				eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur
+			}
+			goto emitRemainder
+		}
+
+		// Store every long hash in-between and every second short.
+		if true {
+			for i := nextS + 1; i < s-1; i += 2 {
+				cv := load6432(src, i)
+				t := tableEntry{offset: i + e.cur}
+				t2 := tableEntry{offset: t.offset + 1}
+				eLong := &e.bTable[hash7(cv, tableBits)]
+				eLong2 := &e.bTable[hash7(cv>>8, tableBits)]
+				e.table[hashLen(cv, tableBits, hashShortBytes)] = t
+				eLong.Cur, eLong.Prev = t, eLong.Cur
+				eLong2.Cur, eLong2.Prev = t2, eLong2.Cur
+			}
+		}
+
+		// We could immediately start working at s now, but to improve
+		// compression we first update the hash table at s-1 and at s.
+		cv = load6432(src, s)
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		// If nothing was added, don't encode literals.
+		if dst.n == 0 {
+			return
+		}
+
+		emitLiteral(dst, src[nextEmit:])
+	}
+}
diff --git a/vendor/github.com/klauspost/compress/flate/regmask_amd64.go b/vendor/github.com/klauspost/compress/flate/regmask_amd64.go
new file mode 100644
index 00000000000..6ed28061b2b
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/regmask_amd64.go
@@ -0,0 +1,37 @@
+package flate
+
+const (
+	// Masks for shifts with register sizes of the shift value.
+	// This can be used to work around the x86 design of shifting by mod register size.
+	// It can be used when a variable shift is always smaller than the register size.
+
+	// reg8SizeMaskX - shift value is 8 bits, shifted is X
+	reg8SizeMask8  = 7
+	reg8SizeMask16 = 15
+	reg8SizeMask32 = 31
+	reg8SizeMask64 = 63
+
+	// reg16SizeMaskX - shift value is 16 bits, shifted is X
+	reg16SizeMask8  = reg8SizeMask8
+	reg16SizeMask16 = reg8SizeMask16
+	reg16SizeMask32 = reg8SizeMask32
+	reg16SizeMask64 = reg8SizeMask64
+
+	// reg32SizeMaskX - shift value is 32 bits, shifted is X
+	reg32SizeMask8  = reg8SizeMask8
+	reg32SizeMask16 = reg8SizeMask16
+	reg32SizeMask32 = reg8SizeMask32
+	reg32SizeMask64 = reg8SizeMask64
+
+	// reg64SizeMaskX - shift value is 64 bits, shifted is X
+	reg64SizeMask8  = reg8SizeMask8
+	reg64SizeMask16 = reg8SizeMask16
+	reg64SizeMask32 = reg8SizeMask32
+	reg64SizeMask64 = reg8SizeMask64
+
+	// regSizeMaskUintX - shift value is uint, shifted is X
+	regSizeMaskUint8  = reg8SizeMask8
+	regSizeMaskUint16 = reg8SizeMask16
+	regSizeMaskUint32 = reg8SizeMask32
+	regSizeMaskUint64 = reg8SizeMask64
+)
diff --git a/vendor/github.com/klauspost/compress/flate/regmask_other.go b/vendor/github.com/klauspost/compress/flate/regmask_other.go
new file mode 100644
index 00000000000..1b7a2cbd793
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/regmask_other.go
@@ -0,0 +1,40 @@
+//go:build !amd64
+// +build !amd64
+
+package flate
+
+const (
+	// Masks for shifts with register sizes of the shift value.
+	// This can be used to work around the x86 design of shifting by mod register size.
+	// It can be used when a variable shift is always smaller than the register size.
+
+	// reg8SizeMaskX - shift value is 8 bits, shifted is X
+	reg8SizeMask8  = 0xff
+	reg8SizeMask16 = 0xff
+	reg8SizeMask32 = 0xff
+	reg8SizeMask64 = 0xff
+
+	// reg16SizeMaskX - shift value is 16 bits, shifted is X
+	reg16SizeMask8  = 0xffff
+	reg16SizeMask16 = 0xffff
+	reg16SizeMask32 = 0xffff
+	reg16SizeMask64 = 0xffff
+
+	// reg32SizeMaskX - shift value is 32 bits, shifted is X
+	reg32SizeMask8  = 0xffffffff
+	reg32SizeMask16 = 0xffffffff
+	reg32SizeMask32 = 0xffffffff
+	reg32SizeMask64 = 0xffffffff
+
+	// reg64SizeMaskX - shift value is 64 bits, shifted is X
+	reg64SizeMask8  = 0xffffffffffffffff
+	reg64SizeMask16 = 0xffffffffffffffff
+	reg64SizeMask32 = 0xffffffffffffffff
+	reg64SizeMask64 = 0xffffffffffffffff
+
+	// regSizeMaskUintX - shift value is uint, shifted is X
+	regSizeMaskUint8  = ^uint(0)
+	regSizeMaskUint16 = ^uint(0)
+	regSizeMaskUint32 = ^uint(0)
+	regSizeMaskUint64 = ^uint(0)
+)
diff --git a/vendor/github.com/klauspost/compress/flate/stateless.go b/vendor/github.com/klauspost/compress/flate/stateless.go
new file mode 100644
index 00000000000..f3d4139ef36
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/stateless.go
@@ -0,0 +1,318 @@
+package flate
+
+import (
+	"io"
+	"math"
+	"sync"
+)
+
+const (
+	maxStatelessBlock = math.MaxInt16
+	// dictionary will be taken from maxStatelessBlock, so limit it.
+	maxStatelessDict = 8 << 10
+
+	slTableBits  = 13
+	slTableSize  = 1 << slTableBits
+	slTableShift = 32 - slTableBits
+)
+
+type statelessWriter struct {
+	dst    io.Writer
+	closed bool
+}
+
+func (s *statelessWriter) Close() error {
+	if s.closed {
+		return nil
+	}
+	s.closed = true
+	// Emit EOF block
+	return StatelessDeflate(s.dst, nil, true, nil)
+}
+
+func (s *statelessWriter) Write(p []byte) (n int, err error) {
+	err = StatelessDeflate(s.dst, p, false, nil)
+	if err != nil {
+		return 0, err
+	}
+	return len(p), nil
+}
+
+func (s *statelessWriter) Reset(w io.Writer) {
+	s.dst = w
+	s.closed = false
+}
+
+// NewStatelessWriter will do compression but without maintaining any state
+// between Write calls.
+// There will be no memory kept between Write calls,
+// but compression and speed will be suboptimal.
+// Because of this, the size of actual Write calls will affect output size.
+func NewStatelessWriter(dst io.Writer) io.WriteCloser {
+	return &statelessWriter{dst: dst}
+}
+
+// bitWriterPool contains bit writers that can be reused.
+var bitWriterPool = sync.Pool{
+	New: func() interface{} {
+		return newHuffmanBitWriter(nil)
+	},
+}
+
+// StatelessDeflate allows compressing directly to a Writer without retaining state.
+// When returning everything will be flushed.
+// Up to 8KB of an optional dictionary can be given which is presumed to precede the block.
+// Longer dictionaries will be truncated and will still produce valid output.
+// Sending nil dictionary is perfectly fine.
+func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
+	var dst tokens
+	bw := bitWriterPool.Get().(*huffmanBitWriter)
+	bw.reset(out)
+	defer func() {
+		// don't keep a reference to our output
+		bw.reset(nil)
+		bitWriterPool.Put(bw)
+	}()
+	if eof && len(in) == 0 {
+		// Just write an EOF block.
+		// Could be faster...
+		bw.writeStoredHeader(0, true)
+		bw.flush()
+		return bw.err
+	}
+
+	// Truncate dict
+	if len(dict) > maxStatelessDict {
+		dict = dict[len(dict)-maxStatelessDict:]
+	}
+
+	// For subsequent loops, keep shallow dict reference to avoid alloc+copy.
+	var inDict []byte
+
+	for len(in) > 0 {
+		todo := in
+		if len(inDict) > 0 {
+			if len(todo) > maxStatelessBlock-maxStatelessDict {
+				todo = todo[:maxStatelessBlock-maxStatelessDict]
+			}
+		} else if len(todo) > maxStatelessBlock-len(dict) {
+			todo = todo[:maxStatelessBlock-len(dict)]
+		}
+		inOrg := in
+		in = in[len(todo):]
+		uncompressed := todo
+		if len(dict) > 0 {
+			// combine dict and source
+			bufLen := len(todo) + len(dict)
+			combined := make([]byte, bufLen)
+			copy(combined, dict)
+			copy(combined[len(dict):], todo)
+			todo = combined
+		}
+		// Compress
+		if len(inDict) == 0 {
+			statelessEnc(&dst, todo, int16(len(dict)))
+		} else {
+			statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict)
+		}
+		isEof := eof && len(in) == 0
+
+		if dst.n == 0 {
+			bw.writeStoredHeader(len(uncompressed), isEof)
+			if bw.err != nil {
+				return bw.err
+			}
+			bw.writeBytes(uncompressed)
+		} else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 {
+			// If we removed less than 1/16th, huffman compress the block.
+			bw.writeBlockHuff(isEof, uncompressed, len(in) == 0)
+		} else {
+			bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0)
+		}
+		if len(in) > 0 {
+			// Retain a dict if we have more
+			inDict = inOrg[len(uncompressed)-maxStatelessDict:]
+			dict = nil
+			dst.Reset()
+		}
+		if bw.err != nil {
+			return bw.err
+		}
+	}
+	if !eof {
+		// Align, only a stored block can do that.
+		bw.writeStoredHeader(0, false)
+	}
+	bw.flush()
+	return bw.err
+}
+
+func hashSL(u uint32) uint32 {
+	return (u * 0x1e35a7bd) >> slTableShift
+}
+
+func load3216(b []byte, i int16) uint32 {
+	// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
+	b = b[i:]
+	b = b[:4]
+	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+}
+
+func load6416(b []byte, i int16) uint64 {
+	// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
+	b = b[i:]
+	b = b[:8]
+	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+}
+
+func statelessEnc(dst *tokens, src []byte, startAt int16) {
+	const (
+		inputMargin            = 12 - 1
+		minNonLiteralBlockSize = 1 + 1 + inputMargin
+	)
+
+	type tableEntry struct {
+		offset int16
+	}
+
+	var table [slTableSize]tableEntry
+
+	// This check isn't in the Snappy implementation, but there, the caller
+	// instead of the callee handles this case.
+	if len(src)-int(startAt) < minNonLiteralBlockSize {
+		// We do not fill the token table.
+		// This will be picked up by caller.
+		dst.n = 0
+		return
+	}
+	// Index until startAt
+	if startAt > 0 {
+		cv := load3232(src, 0)
+		for i := int16(0); i < startAt; i++ {
+			table[hashSL(cv)] = tableEntry{offset: i}
+			cv = (cv >> 8) | (uint32(src[i+4]) << 24)
+		}
+	}
+
+	s := startAt + 1
+	nextEmit := startAt
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := int16(len(src) - inputMargin)
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	cv := load3216(src, s)
+
+	for {
+		const skipLog = 5
+		const doEvery = 2
+
+		nextS := s
+		var candidate tableEntry
+		for {
+			nextHash := hashSL(cv)
+			candidate = table[nextHash]
+			nextS = s + doEvery + (s-nextEmit)>>skipLog
+			if nextS > sLimit || nextS <= 0 {
+				goto emitRemainder
+			}
+
+			now := load6416(src, nextS)
+			table[nextHash] = tableEntry{offset: s}
+			nextHash = hashSL(uint32(now))
+
+			if cv == load3216(src, candidate.offset) {
+				table[nextHash] = tableEntry{offset: nextS}
+				break
+			}
+
+			// Do one right away...
+			cv = uint32(now)
+			s = nextS
+			nextS++
+			candidate = table[nextHash]
+			now >>= 8
+			table[nextHash] = tableEntry{offset: s}
+
+			if cv == load3216(src, candidate.offset) {
+				table[nextHash] = tableEntry{offset: nextS}
+				break
+			}
+			cv = uint32(now)
+			s = nextS
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+
+			// Extend the 4-byte match as long as possible.
+			t := candidate.offset
+			l := int16(matchLen(src[s+4:], src[t+4:]) + 4)
+
+			// Extend backwards
+			for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+				s--
+				t--
+				l++
+			}
+			if nextEmit < s {
+				if false {
+					emitLiteral(dst, src[nextEmit:s])
+				} else {
+					for _, v := range src[nextEmit:s] {
+						dst.tokens[dst.n] = token(v)
+						dst.litHist[v]++
+						dst.n++
+					}
+				}
+			}
+
+			// Save the match found
+			dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset))
+			s += l
+			nextEmit = s
+			if nextS >= s {
+				s = nextS + 1
+			}
+			if s >= sLimit {
+				goto emitRemainder
+			}
+
+			// We could immediately start working at s now, but to improve
+			// compression we first update the hash table at s-2 and at s. If
+			// another emitCopy is not our next move, also calculate nextHash
+			// at s+1. At least on GOARCH=amd64, these three hash calculations
+			// are faster as one load64 call (with some shifts) instead of
+			// three load32 calls.
+			x := load6416(src, s-2)
+			o := s - 2
+			prevHash := hashSL(uint32(x))
+			table[prevHash] = tableEntry{offset: o}
+			x >>= 16
+			currHash := hashSL(uint32(x))
+			candidate = table[currHash]
+			table[currHash] = tableEntry{offset: o + 2}
+
+			if uint32(x) != load3216(src, candidate.offset) {
+				cv = uint32(x >> 8)
+				s++
+				break
+			}
+		}
+	}
+
+emitRemainder:
+	if int(nextEmit) < len(src) {
+		// If nothing was added, don't encode literals.
+		if dst.n == 0 {
+			return
+		}
+		emitLiteral(dst, src[nextEmit:])
+	}
+}
diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go
new file mode 100644
index 00000000000..d818790c132
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/token.go
@@ -0,0 +1,379 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"math"
+)
+
+const (
+	// bits 0-16  	xoffset = offset - MIN_OFFSET_SIZE, or literal - 16 bits
+	// bits 16-22	offsetcode - 5 bits
+	// bits 22-30   xlength = length - MIN_MATCH_LENGTH - 8 bits
+	// bits 30-32   type   0 = literal  1=EOF  2=Match   3=Unused - 2 bits
+	lengthShift         = 22
+	offsetMask          = 1<<lengthShift - 1
+	typeMask            = 3 << 30
+	literalType         = 0 << 30
+	matchType           = 1 << 30
+	matchOffsetOnlyMask = 0xffff
+)
+
+// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
+// is lengthCodes[length - MIN_MATCH_LENGTH]
+var lengthCodes = [256]uint8{
+	0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
+	9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
+	13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
+	15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
+	17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
+	18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
+	19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
+	20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+	21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+	21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
+	22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+	22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
+	23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
+	24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+	24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+	24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+	25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
+	26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+	26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+	26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
+	27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+	27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+	27, 27, 27, 27, 27, 28,
+}
+
+// lengthCodes1 is length codes, but starting at 1.
+var lengthCodes1 = [256]uint8{
+	1, 2, 3, 4, 5, 6, 7, 8, 9, 9,
+	10, 10, 11, 11, 12, 12, 13, 13, 13, 13,
+	14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
+	16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
+	18, 18, 18, 18, 18, 18, 18, 18, 19, 19,
+	19, 19, 19, 19, 19, 19, 20, 20, 20, 20,
+	20, 20, 20, 20, 21, 21, 21, 21, 21, 21,
+	21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+	22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+	22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
+	23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+	23, 23, 24, 24, 24, 24, 24, 24, 24, 24,
+	24, 24, 24, 24, 24, 24, 24, 24, 25, 25,
+	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+	25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+	26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+	26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+	26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+	26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
+	27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+	27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+	27, 27, 27, 27, 28, 28, 28, 28, 28, 28,
+	28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+	28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+	28, 28, 28, 28, 28, 29,
+}
+
+var offsetCodes = [256]uint32{
+	0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
+	8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
+	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+	13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+	13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+	14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+	14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+	14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+	14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+}
+
+// offsetCodes14 are offsetCodes, but with 14 added.
+var offsetCodes14 = [256]uint32{
+	14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21,
+	22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
+	24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+	25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+	26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+	26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+	27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+	27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+	28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+	28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+	28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+	28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+	29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+	29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+	29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+	29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+}
+
+type token uint32
+
+type tokens struct {
+	extraHist [32]uint16  // codes 256->maxnumlit
+	offHist   [32]uint16  // offset codes
+	litHist   [256]uint16 // codes 0->255
+	nFilled   int
+	n         uint16 // Must be able to contain maxStoreBlockSize
+	tokens    [maxStoreBlockSize + 1]token
+}
+
+func (t *tokens) Reset() {
+	if t.n == 0 {
+		return
+	}
+	t.n = 0
+	t.nFilled = 0
+	for i := range t.litHist[:] {
+		t.litHist[i] = 0
+	}
+	for i := range t.extraHist[:] {
+		t.extraHist[i] = 0
+	}
+	for i := range t.offHist[:] {
+		t.offHist[i] = 0
+	}
+}
+
+func (t *tokens) Fill() {
+	if t.n == 0 {
+		return
+	}
+	for i, v := range t.litHist[:] {
+		if v == 0 {
+			t.litHist[i] = 1
+			t.nFilled++
+		}
+	}
+	for i, v := range t.extraHist[:literalCount-256] {
+		if v == 0 {
+			t.nFilled++
+			t.extraHist[i] = 1
+		}
+	}
+	for i, v := range t.offHist[:offsetCodeCount] {
+		if v == 0 {
+			t.offHist[i] = 1
+		}
+	}
+}
+
+func indexTokens(in []token) tokens {
+	var t tokens
+	t.indexTokens(in)
+	return t
+}
+
+func (t *tokens) indexTokens(in []token) {
+	t.Reset()
+	for _, tok := range in {
+		if tok < matchType {
+			t.AddLiteral(tok.literal())
+			continue
+		}
+		t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask)
+	}
+}
+
+// emitLiteral writes a literal chunk and returns the number of bytes written.
+func emitLiteral(dst *tokens, lit []byte) {
+	for _, v := range lit {
+		dst.tokens[dst.n] = token(v)
+		dst.litHist[v]++
+		dst.n++
+	}
+}
+
+func (t *tokens) AddLiteral(lit byte) {
+	t.tokens[t.n] = token(lit)
+	t.litHist[lit]++
+	t.n++
+}
+
+// from https://stackoverflow.com/a/28730362
+func mFastLog2(val float32) float32 {
+	ux := int32(math.Float32bits(val))
+	log2 := (float32)(((ux >> 23) & 255) - 128)
+	ux &= -0x7f800001
+	ux += 127 << 23
+	uval := math.Float32frombits(uint32(ux))
+	log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759
+	return log2
+}
+
+// EstimatedBits will return an minimum size estimated by an *optimal*
+// compression of the block.
+// The size of the block
+func (t *tokens) EstimatedBits() int {
+	shannon := float32(0)
+	bits := int(0)
+	nMatches := 0
+	total := int(t.n) + t.nFilled
+	if total > 0 {
+		invTotal := 1.0 / float32(total)
+		for _, v := range t.litHist[:] {
+			if v > 0 {
+				n := float32(v)
+				shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
+			}
+		}
+		// Just add 15 for EOB
+		shannon += 15
+		for i, v := range t.extraHist[1 : literalCount-256] {
+			if v > 0 {
+				n := float32(v)
+				shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
+				bits += int(lengthExtraBits[i&31]) * int(v)
+				nMatches += int(v)
+			}
+		}
+	}
+	if nMatches > 0 {
+		invTotal := 1.0 / float32(nMatches)
+		for i, v := range t.offHist[:offsetCodeCount] {
+			if v > 0 {
+				n := float32(v)
+				shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
+				bits += int(offsetExtraBits[i&31]) * int(v)
+			}
+		}
+	}
+	return int(shannon) + bits
+}
+
+// AddMatch adds a match to the tokens.
+// This function is very sensitive to inlining and right on the border.
+func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
+	if debugDeflate {
+		if xlength >= maxMatchLength+baseMatchLength {
+			panic(fmt.Errorf("invalid length: %v", xlength))
+		}
+		if xoffset >= maxMatchOffset+baseMatchOffset {
+			panic(fmt.Errorf("invalid offset: %v", xoffset))
+		}
+	}
+	oCode := offsetCode(xoffset)
+	xoffset |= oCode << 16
+
+	t.extraHist[lengthCodes1[uint8(xlength)]]++
+	t.offHist[oCode&31]++
+	t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
+	t.n++
+}
+
+// AddMatchLong adds a match to the tokens, potentially longer than max match length.
+// Length should NOT have the base subtracted, only offset should.
+func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
+	if debugDeflate {
+		if xoffset >= maxMatchOffset+baseMatchOffset {
+			panic(fmt.Errorf("invalid offset: %v", xoffset))
+		}
+	}
+	oc := offsetCode(xoffset)
+	xoffset |= oc << 16
+	for xlength > 0 {
+		xl := xlength
+		if xl > 258 {
+			// We need to have at least baseMatchLength left over for next loop.
+			if xl > 258+baseMatchLength {
+				xl = 258
+			} else {
+				xl = 258 - baseMatchLength
+			}
+		}
+		xlength -= xl
+		xl -= baseMatchLength
+		t.extraHist[lengthCodes1[uint8(xl)]]++
+		t.offHist[oc&31]++
+		t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
+		t.n++
+	}
+}
+
+func (t *tokens) AddEOB() {
+	t.tokens[t.n] = token(endBlockMarker)
+	t.extraHist[0]++
+	t.n++
+}
+
+func (t *tokens) Slice() []token {
+	return t.tokens[:t.n]
+}
+
+// VarInt returns the tokens as varint encoded bytes.
+func (t *tokens) VarInt() []byte {
+	var b = make([]byte, binary.MaxVarintLen32*int(t.n))
+	var off int
+	for _, v := range t.tokens[:t.n] {
+		off += binary.PutUvarint(b[off:], uint64(v))
+	}
+	return b[:off]
+}
+
+// FromVarInt restores t to the varint encoded tokens provided.
+// Any data in t is removed.
+func (t *tokens) FromVarInt(b []byte) error {
+	var buf = bytes.NewReader(b)
+	var toks []token
+	for {
+		r, err := binary.ReadUvarint(buf)
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return err
+		}
+		toks = append(toks, token(r))
+	}
+	t.indexTokens(toks)
+	return nil
+}
+
+// Returns the type of a token
+func (t token) typ() uint32 { return uint32(t) & typeMask }
+
+// Returns the literal of a literal token
+func (t token) literal() uint8 { return uint8(t) }
+
+// Returns the extra offset of a match token
+func (t token) offset() uint32 { return uint32(t) & offsetMask }
+
+func (t token) length() uint8 { return uint8(t >> lengthShift) }
+
+// Convert length to code.
+func lengthCode(len uint8) uint8 { return lengthCodes[len] }
+
+// Returns the offset code corresponding to a specific offset
+func offsetCode(off uint32) uint32 {
+	if false {
+		if off < uint32(len(offsetCodes)) {
+			return offsetCodes[off&255]
+		} else if off>>7 < uint32(len(offsetCodes)) {
+			return offsetCodes[(off>>7)&255] + 14
+		} else {
+			return offsetCodes[(off>>14)&255] + 28
+		}
+	}
+	if off < uint32(len(offsetCodes)) {
+		return offsetCodes[uint8(off)]
+	}
+	return offsetCodes14[uint8(off>>7)]
+}
diff --git a/vendor/github.com/NYTimes/gziphandler/LICENSE b/vendor/github.com/klauspost/compress/gzhttp/LICENSE
similarity index 100%
rename from vendor/github.com/NYTimes/gziphandler/LICENSE
rename to vendor/github.com/klauspost/compress/gzhttp/LICENSE
diff --git a/vendor/github.com/klauspost/compress/gzhttp/README.md b/vendor/github.com/klauspost/compress/gzhttp/README.md
new file mode 100644
index 00000000000..8b784d2baad
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/gzhttp/README.md
@@ -0,0 +1,283 @@
+Gzip Middleware
+===============
+
+This Go package which wraps HTTP *server* handlers to transparently gzip the
+response body, for clients which support it. 
+
+For HTTP *clients* we provide a transport wrapper that will do gzip decompression 
+faster than what the standard library offers.
+
+Both the client and server wrappers are fully compatible with other servers and clients.
+
+This package is forked from the dead [nytimes/gziphandler](https://github.com/nytimes/gziphandler)
+and extends functionality for it.
+
+## Install
+```bash
+go get -u github.com/klauspost/compress
+```
+
+## Documentation
+
+[![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/compress/gzhttp.svg)](https://pkg.go.dev/github.com/klauspost/compress/gzhttp)
+
+
+## Usage
+
+There are 2 main parts, one for http servers and one for http clients.
+
+### Client
+
+The standard library automatically adds gzip compression to most requests 
+and handles decompression of the responses.
+
+However, by wrapping the transport we are able to override this and provide 
+our own (faster) decompressor.
+
+Wrapping is done on the Transport of the http client:
+
+```Go
+func ExampleTransport() {
+	// Get an HTTP client.
+	client := http.Client{
+		// Wrap the transport:
+		Transport: gzhttp.Transport(http.DefaultTransport),
+	}
+
+	resp, err := client.Get("https://google.com")
+	if err != nil {
+		return
+	}
+	defer resp.Body.Close()
+	
+	body, _ := ioutil.ReadAll(resp.Body)
+	fmt.Println("body:", string(body))
+}
+```
+
+Speed compared to standard library `DefaultTransport` for an approximate 127KB JSON payload:
+
+```
+BenchmarkTransport
+
+Single core:
+BenchmarkTransport/gzhttp-32                1995        609791 ns/op     214.14 MB/s       10129 B/op         73 allocs/op
+BenchmarkTransport/stdlib-32                1567        772161 ns/op     169.11 MB/s       53950 B/op         99 allocs/op
+BenchmarkTransport/zstd-32                  4579        238503 ns/op     547.51 MB/s       5775 B/op          69 allocs/op
+
+Multi Core:
+BenchmarkTransport/gzhttp-par-32           29113         36802 ns/op    3548.27 MB/s       11061 B/op         73 allocs/op
+BenchmarkTransport/stdlib-par-32           16114         66442 ns/op    1965.38 MB/s       54971 B/op         99 allocs/op
+BenchmarkTransport/zstd-par-32             90177         13110 ns/op    9960.83 MB/s       5361 B/op          67 allocs/op
+```
+
+This includes both serving the http request, parsing requests and decompressing. 
+
+### Server
+
+For the simplest usage call `GzipHandler` with any handler (an object which implements the
+`http.Handler` interface), and it'll return a new handler which gzips the
+response. For example:
+
+```go
+package main
+
+import (
+	"io"
+	"net/http"
+	"github.com/klauspost/compress/gzhttp"
+)
+
+func main() {
+	handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/plain")
+		io.WriteString(w, "Hello, World")
+	})
+    
+	http.Handle("/", gzhttp.GzipHandler(handler))
+	http.ListenAndServe("0.0.0.0:8000", nil)
+}
+```
+
+This will wrap a handler using the default options. 
+
+To specify custom options a reusable wrapper can be created that can be used to wrap
+any number of handlers.
+
+```Go
+package main
+
+import (
+	"io"
+	"log"
+	"net/http"
+	
+	"github.com/klauspost/compress/gzhttp"
+	"github.com/klauspost/compress/gzip"
+)
+
+func main() {
+	handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/plain")
+		io.WriteString(w, "Hello, World")
+	})
+	
+	// Create a reusable wrapper with custom options.
+	wrapper, err := gzhttp.NewWrapper(gzhttp.MinSize(2000), gzhttp.CompressionLevel(gzip.BestSpeed))
+	if err != nil {
+		log.Fatalln(err)
+	}
+	
+	http.Handle("/", wrapper(handler))
+	http.ListenAndServe("0.0.0.0:8000", nil)
+}
+
+```
+
+
+### Performance
+
+Speed compared to  [nytimes/gziphandler](https://github.com/nytimes/gziphandler) with default settings, 2KB, 20KB and 100KB:
+
+```
+λ benchcmp before.txt after.txt
+benchmark                         old ns/op     new ns/op     delta
+BenchmarkGzipHandler_S2k-32       51302         23679         -53.84%
+BenchmarkGzipHandler_S20k-32      301426        156331        -48.14%
+BenchmarkGzipHandler_S100k-32     1546203       818981        -47.03%
+BenchmarkGzipHandler_P2k-32       3973          1522          -61.69%
+BenchmarkGzipHandler_P20k-32      20319         9397          -53.75%
+BenchmarkGzipHandler_P100k-32     96079         46361         -51.75%
+
+benchmark                         old MB/s     new MB/s     speedup
+BenchmarkGzipHandler_S2k-32       39.92        86.49        2.17x
+BenchmarkGzipHandler_S20k-32      67.94        131.00       1.93x
+BenchmarkGzipHandler_S100k-32     66.23        125.03       1.89x
+BenchmarkGzipHandler_P2k-32       515.44       1345.31      2.61x
+BenchmarkGzipHandler_P20k-32      1007.92      2179.47      2.16x
+BenchmarkGzipHandler_P100k-32     1065.79      2208.75      2.07x
+
+benchmark                         old allocs     new allocs     delta
+BenchmarkGzipHandler_S2k-32       22             16             -27.27%
+BenchmarkGzipHandler_S20k-32      25             19             -24.00%
+BenchmarkGzipHandler_S100k-32     28             21             -25.00%
+BenchmarkGzipHandler_P2k-32       22             16             -27.27%
+BenchmarkGzipHandler_P20k-32      25             19             -24.00%
+BenchmarkGzipHandler_P100k-32     27             21             -22.22%
+
+benchmark                         old bytes     new bytes     delta
+BenchmarkGzipHandler_S2k-32       8836          2980          -66.27%
+BenchmarkGzipHandler_S20k-32      69034         20562         -70.21%
+BenchmarkGzipHandler_S100k-32     356582        86682         -75.69%
+BenchmarkGzipHandler_P2k-32       9062          2971          -67.21%
+BenchmarkGzipHandler_P20k-32      67799         20051         -70.43%
+BenchmarkGzipHandler_P100k-32     300972        83077         -72.40%
+```
+
+### Stateless compression
+
+In cases where you expect to run many thousands of compressors concurrently, 
+but with very little activity you can use stateless compression. 
+This is not intended for regular web servers serving individual requests.
+
+Use `CompressionLevel(-3)` or `CompressionLevel(gzip.StatelessCompression)` to enable.
+Consider adding a [`bufio.Writer`](https://golang.org/pkg/bufio/#NewWriterSize) with a small buffer.
+
+See [more details on stateless compression](https://github.com/klauspost/compress#stateless-compression).
+
+### Migrating from gziphandler
+
+This package removes some of the extra constructors.
+When replacing, this can be used to find a replacement.
+
+* `GzipHandler(h)` -> `GzipHandler(h)` (keep as-is)
+* `GzipHandlerWithOpts(opts...)` -> `NewWrapper(opts...)`
+* `MustNewGzipLevelHandler(n)` -> `NewWrapper(CompressionLevel(n))`
+* `NewGzipLevelAndMinSize(n, s)` -> `NewWrapper(CompressionLevel(n), MinSize(s))` 
+
+By default, some mime types will now be excluded.
+To re-enable compression of all types, use the `ContentTypeFilter(gzhttp.CompressAllContentTypeFilter)` option.
+
+### Range Requests
+
+Ranged requests are not well supported with compression.
+Therefore any request with a "Content-Range" header is not compressed.
+
+To signify that range requests are not supported any "Accept-Ranges" header set is removed when data is compressed.
+If you do not want this behavior use the `KeepAcceptRanges()` option.
+
+### Flushing data
+
+The wrapper supports the [http.Flusher](https://golang.org/pkg/net/http/#Flusher) interface.
+
+The only caveat is that the writer may not yet have received enough bytes to determine if `MinSize`
+has been reached. In this case it will assume that the minimum size has been reached.
+
+If nothing has been written to the response writer, nothing will be flushed.
+
+## BREACH mitigation
+
+[BREACH](http://css.csail.mit.edu/6.858/2020/readings/breach.pdf) is a specialized attack where attacker controlled data
+is injected alongside secret data in a response body. This can lead to sidechannel attacks, where observing the compressed response
+size can reveal if there are overlaps between the secret data and the injected data.
+
+For more information see https://breachattack.com/
+
+It can be hard to judge if you are vulnerable to BREACH. 
+In general, if you do not include any user provided content in the response body you are safe,
+but if you do, or you are in doubt, you can apply mitigations.
+
+`gzhttp` can apply [Heal the Breach](https://ieeexplore.ieee.org/document/9754554), or improved content aware padding.
+
+```Go
+// RandomJitter adds 1->n random bytes to output based on checksum of payload.
+// Specify the amount of input to buffer before applying jitter.
+// This should cover the sensitive part of your response.
+// This can be used to obfuscate the exact compressed size.
+// Specifying 0 will use a buffer size of 64KB.
+// 'paranoid' will use a slower hashing function, that MAY provide more safety. 
+// If a negative buffer is given, the amount of jitter will not be content dependent.
+// This provides *less* security than applying content based jitter.
+func RandomJitter(n, buffer int, paranoid bool) option
+...	
+```
+
+The jitter is added as a "Comment" field. This field has a 1 byte overhead, so actual extra size will be 2 -> n+1 (inclusive).
+
+A good option would be to apply 32 random bytes, with default 64KB buffer: `gzhttp.RandomJitter(32, 0, false)`.
+
+Note that flushing the data forces the padding to be applied, which means that only data before the flush is considered for content aware padding.
+
+The *padding* in the comment is the text `Padding-Padding-Padding-Padding-Pad....`
+
+The *length* is `1 + crc32c(payload) MOD n` or `1 + sha256(payload) MOD n` (paranoid), or just random from `crypto/rand` if buffer < 0.
+
+### Paranoid?
+
+The padding size is determined by the remainder of a CRC32 of the content. 
+
+Since the payload contains elements unknown to the attacker, there is no reason to believe they can derive any information
+from this remainder, or predict it.
+
+However, for those that feel uncomfortable with a CRC32 being used for this can enable "paranoid" mode which will use SHA256 for determining the padding.
+
+The hashing itself is about 2 orders of magnitude slower, but in overall terms will maybe only reduce speed by 10%.
+
+Paranoid mode has no effect if buffer is < 0 (non-content aware padding).
+
+### Examples
+
+Adding the option `gzhttp.RandomJitter(32, 50000)` will apply from 1 up to 32 bytes of random data to the output.
+
+The number of bytes added depends on the content of the first 50000 bytes, or all of them if the output was less than that.
+
+Adding the option `gzhttp.RandomJitter(32, -1)` will apply from 1 up to 32 bytes of random data to the output.
+Each call will apply a random amount of jitter. This should be considered less secure than content based jitter.
+
+This can be used if responses are very big, deterministic and the buffer size would be too big to cover where the mutation occurs.  
+
+## License
+
+[Apache 2.0](LICENSE)
+
+
diff --git a/vendor/github.com/klauspost/compress/gzhttp/compress.go b/vendor/github.com/klauspost/compress/gzhttp/compress.go
new file mode 100644
index 00000000000..6ca11b1c108
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/gzhttp/compress.go
@@ -0,0 +1,984 @@
+package gzhttp
+
+import (
+	"bufio"
+	"crypto/rand"
+	"crypto/sha256"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"hash/crc32"
+	"io"
+	"math"
+	"math/bits"
+	"mime"
+	"net"
+	"net/http"
+	"strconv"
+	"strings"
+	"sync"
+
+	"github.com/klauspost/compress/gzhttp/writer"
+	"github.com/klauspost/compress/gzhttp/writer/gzkp"
+	"github.com/klauspost/compress/gzip"
+)
+
+const (
+	// HeaderNoCompression can be used to disable compression.
+	// Any header value will disable compression.
+	// The Header is always removed from output.
+	HeaderNoCompression = "No-Gzip-Compression"
+
+	vary            = "Vary"
+	acceptEncoding  = "Accept-Encoding"
+	contentEncoding = "Content-Encoding"
+	contentRange    = "Content-Range"
+	acceptRanges    = "Accept-Ranges"
+	contentType     = "Content-Type"
+	contentLength   = "Content-Length"
+	eTag            = "ETag"
+)
+
+type codings map[string]float64
+
+const (
+	// DefaultQValue is the default qvalue to assign to an encoding if no explicit qvalue is set.
+	// This is actually kind of ambiguous in RFC 2616, so hopefully it's correct.
+	// The examples seem to indicate that it is.
+	DefaultQValue = 1.0
+
+	// DefaultMinSize is the default minimum size until we enable gzip compression.
+	// 1500 bytes is the MTU size for the internet since that is the largest size allowed at the network layer.
+	// If you take a file that is 1300 bytes and compress it to 800 bytes, it’s still transmitted in that same 1500 byte packet regardless, so you’ve gained nothing.
+	// That being the case, you should restrict the gzip compression to files with a size (plus header) greater than a single packet,
+	// 1024 bytes (1KB) is therefore default.
+	DefaultMinSize = 1024
+)
+
+// GzipResponseWriter provides an http.ResponseWriter interface, which gzips
+// bytes before writing them to the underlying response. This doesn't close the
+// writers, so don't forget to do that.
+// It can be configured to skip response smaller than minSize.
+type GzipResponseWriter struct {
+	http.ResponseWriter
+	level     int
+	gwFactory writer.GzipWriterFactory
+	gw        writer.GzipWriter
+
+	code int // Saves the WriteHeader value.
+
+	minSize          int    // Specifies the minimum response size to gzip. If the response length is bigger than this value, it is compressed.
+	buf              []byte // Holds the first part of the write before reaching the minSize or the end of the write.
+	ignore           bool   // If true, then we immediately passthru writes to the underlying ResponseWriter.
+	keepAcceptRanges bool   // Keep "Accept-Ranges" header.
+	setContentType   bool   // Add content type, if missing and detected.
+	suffixETag       string // Suffix to add to ETag header if response is compressed.
+	dropETag         bool   // Drop ETag header if response is compressed (supersedes suffixETag).
+	sha256Jitter     bool   // Use sha256 for jitter.
+	randomJitter     string // Add random bytes to output as header field.
+	jitterBuffer     int    // Maximum buffer to accumulate before doing jitter.
+
+	contentTypeFilter func(ct string) bool // Only compress if the response is one of these content-types. All are accepted if empty.
+}
+
+type GzipResponseWriterWithCloseNotify struct {
+	*GzipResponseWriter
+}
+
+func (w GzipResponseWriterWithCloseNotify) CloseNotify() <-chan bool {
+	return w.ResponseWriter.(http.CloseNotifier).CloseNotify()
+}
+
+// Write appends data to the gzip writer.
+func (w *GzipResponseWriter) Write(b []byte) (int, error) {
+	// GZIP responseWriter is initialized. Use the GZIP responseWriter.
+	if w.gw != nil {
+		return w.gw.Write(b)
+	}
+
+	// If we have already decided not to use GZIP, immediately passthrough.
+	if w.ignore {
+		return w.ResponseWriter.Write(b)
+	}
+
+	// Save the write into a buffer for later use in GZIP responseWriter
+	// (if content is long enough) or at close with regular responseWriter.
+	wantBuf := 512
+	if w.minSize > wantBuf {
+		wantBuf = w.minSize
+	}
+	if w.jitterBuffer > 0 && w.jitterBuffer > wantBuf {
+		wantBuf = w.jitterBuffer
+	}
+	toAdd := len(b)
+	if len(w.buf)+toAdd > wantBuf {
+		toAdd = wantBuf - len(w.buf)
+	}
+	w.buf = append(w.buf, b[:toAdd]...)
+	remain := b[toAdd:]
+	hdr := w.Header()
+
+	// Only continue if they didn't already choose an encoding or a known unhandled content length or type.
+	if len(hdr[HeaderNoCompression]) == 0 && hdr.Get(contentEncoding) == "" && hdr.Get(contentRange) == "" {
+		// Check more expensive parts now.
+		cl, _ := atoi(hdr.Get(contentLength))
+		ct := hdr.Get(contentType)
+		if cl == 0 || cl >= w.minSize && (ct == "" || w.contentTypeFilter(ct)) {
+			// If the current buffer is less than minSize and a Content-Length isn't set, then wait until we have more data.
+			if len(w.buf) < w.minSize && cl == 0 || (w.jitterBuffer > 0 && len(w.buf) < w.jitterBuffer) {
+				return len(b), nil
+			}
+
+			// If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue.
+			if cl >= w.minSize || len(w.buf) >= w.minSize {
+				// If a Content-Type wasn't specified, infer it from the current buffer.
+				if ct == "" {
+					ct = http.DetectContentType(w.buf)
+				}
+
+				// Handles the intended case of setting a nil Content-Type (as for http/server or http/fs)
+				// Set the header only if the key does not exist
+				if _, ok := hdr[contentType]; w.setContentType && !ok {
+					hdr.Set(contentType, ct)
+				}
+
+				// If the Content-Type is acceptable to GZIP, initialize the GZIP writer.
+				if w.contentTypeFilter(ct) {
+					if err := w.startGzip(remain); err != nil {
+						return 0, err
+					}
+					if len(remain) > 0 {
+						if _, err := w.gw.Write(remain); err != nil {
+							return 0, err
+						}
+					}
+					return len(b), nil
+				}
+			}
+		}
+	}
+	// If we got here, we should not GZIP this response.
+	if err := w.startPlain(); err != nil {
+		return 0, err
+	}
+	if len(remain) > 0 {
+		if _, err := w.ResponseWriter.Write(remain); err != nil {
+			return 0, err
+		}
+	}
+	return len(b), nil
+}
+
+var castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
+
+// startGzip initializes a GZIP writer and writes the buffer.
+func (w *GzipResponseWriter) startGzip(remain []byte) error {
+	// Set the GZIP header.
+	w.Header().Set(contentEncoding, "gzip")
+
+	// if the Content-Length is already set, then calls to Write on gzip
+	// will fail to set the Content-Length header since its already set
+	// See: https://github.com/golang/go/issues/14975.
+	w.Header().Del(contentLength)
+
+	// Delete Accept-Ranges.
+	if !w.keepAcceptRanges {
+		w.Header().Del(acceptRanges)
+	}
+
+	// Suffix ETag.
+	if w.suffixETag != "" && !w.dropETag && w.Header().Get(eTag) != "" {
+		orig := w.Header().Get(eTag)
+		insertPoint := strings.LastIndex(orig, `"`)
+		if insertPoint == -1 {
+			insertPoint = len(orig)
+		}
+		w.Header().Set(eTag, orig[:insertPoint]+w.suffixETag+orig[insertPoint:])
+	}
+
+	// Delete ETag.
+	if w.dropETag {
+		w.Header().Del(eTag)
+	}
+
+	// Write the header to gzip response.
+	if w.code != 0 {
+		w.ResponseWriter.WriteHeader(w.code)
+		// Ensure that no other WriteHeader's happen
+		w.code = 0
+	}
+
+	// Initialize and flush the buffer into the gzip response if there are any bytes.
+	// If there aren't any, we shouldn't initialize it yet because on Close it will
+	// write the gzip header even if nothing was ever written.
+	if len(w.buf) > 0 {
+		// Initialize the GZIP response.
+		w.init()
+
+		// Set random jitter based on CRC or SHA-256 of current buffer.
+		// Before first write.
+		if len(w.randomJitter) > 0 {
+			var jitRNG uint32
+			if w.jitterBuffer > 0 {
+				if w.sha256Jitter {
+					h := sha256.New()
+					h.Write(w.buf)
+					// Use only up to "w.jitterBuffer", otherwise the output depends on write sizes.
+					if len(remain) > 0 && len(w.buf) < w.jitterBuffer {
+						remain := remain
+						if len(remain)+len(w.buf) > w.jitterBuffer {
+							remain = remain[:w.jitterBuffer-len(w.buf)]
+						}
+						h.Write(remain)
+					}
+					var tmp [sha256.Size]byte
+					jitRNG = binary.LittleEndian.Uint32(h.Sum(tmp[:0]))
+				} else {
+					h := crc32.Update(0, castagnoliTable, w.buf)
+					// Use only up to "w.jitterBuffer", otherwise the output depends on write sizes.
+					if len(remain) > 0 && len(w.buf) < w.jitterBuffer {
+						remain := remain
+						if len(remain)+len(w.buf) > w.jitterBuffer {
+							remain = remain[:w.jitterBuffer-len(w.buf)]
+						}
+						h = crc32.Update(h, castagnoliTable, remain)
+					}
+					jitRNG = bits.RotateLeft32(h, 19) ^ 0xab0755de
+				}
+			} else {
+				// Get from rand.Reader
+				var tmp [4]byte
+				_, err := rand.Read(tmp[:])
+				if err != nil {
+					return fmt.Errorf("gzhttp: %w", err)
+				}
+				jitRNG = binary.LittleEndian.Uint32(tmp[:])
+			}
+			jit := w.randomJitter[:1+jitRNG%uint32(len(w.randomJitter)-1)]
+			w.gw.(writer.GzipWriterExt).SetHeader(writer.Header{Comment: jit})
+		}
+		n, err := w.gw.Write(w.buf)
+
+		// This should never happen (per io.Writer docs), but if the write didn't
+		// accept the entire buffer but returned no specific error, we have no clue
+		// what's going on, so abort just to be safe.
+		if err == nil && n < len(w.buf) {
+			err = io.ErrShortWrite
+		}
+		w.buf = w.buf[:0]
+		return err
+	}
+	return nil
+}
+
+// startPlain writes to sent bytes and buffer the underlying ResponseWriter without gzip.
+func (w *GzipResponseWriter) startPlain() error {
+	w.Header().Del(HeaderNoCompression)
+	if w.code != 0 {
+		w.ResponseWriter.WriteHeader(w.code)
+		// Ensure that no other WriteHeader's happen
+		w.code = 0
+	}
+
+	w.ignore = true
+	// If Write was never called then don't call Write on the underlying ResponseWriter.
+	if len(w.buf) == 0 {
+		return nil
+	}
+	n, err := w.ResponseWriter.Write(w.buf)
+	// This should never happen (per io.Writer docs), but if the write didn't
+	// accept the entire buffer but returned no specific error, we have no clue
+	// what's going on, so abort just to be safe.
+	if err == nil && n < len(w.buf) {
+		err = io.ErrShortWrite
+	}
+
+	w.buf = w.buf[:0]
+	return err
+}
+
+// WriteHeader just saves the response code until close or GZIP effective writes.
+func (w *GzipResponseWriter) WriteHeader(code int) {
+	if w.code == 0 {
+		w.code = code
+	}
+}
+
+// init graps a new gzip writer from the gzipWriterPool and writes the correct
+// content encoding header.
+func (w *GzipResponseWriter) init() {
+	// Bytes written during ServeHTTP are redirected to this gzip writer
+	// before being written to the underlying response.
+	w.gw = w.gwFactory.New(w.ResponseWriter, w.level)
+}
+
+// Close will close the gzip.Writer and will put it back in the gzipWriterPool.
+func (w *GzipResponseWriter) Close() error {
+	if w.ignore {
+		return nil
+	}
+	if w.gw == nil {
+		var (
+			ct = w.Header().Get(contentType)
+			ce = w.Header().Get(contentEncoding)
+			cr = w.Header().Get(contentRange)
+		)
+		// fmt.Println(len(w.buf) == 0, len(w.buf) < w.minSize, len(w.Header()[HeaderNoCompression]) != 0, ce != "", cr != "", !w.contentTypeFilter(ct))
+		if len(w.buf) == 0 || len(w.buf) < w.minSize || len(w.Header()[HeaderNoCompression]) != 0 || ce != "" || cr != "" || !w.contentTypeFilter(ct) {
+			// GZIP not triggered, write out regular response.
+			return w.startPlain()
+		}
+		err := w.startGzip(nil)
+		if err != nil {
+			return err
+		}
+	}
+
+	err := w.gw.Close()
+	w.gw = nil
+	return err
+}
+
+// Flush flushes the underlying *gzip.Writer and then the underlying
+// http.ResponseWriter if it is an http.Flusher. This makes GzipResponseWriter
+// an http.Flusher.
+// If not enough bytes has been written to determine if we have reached minimum size,
+// this will be ignored.
+// If nothing has been written yet, nothing will be flushed.
+func (w *GzipResponseWriter) Flush() {
+	if w.gw == nil && !w.ignore {
+		if len(w.buf) == 0 {
+			// Nothing written yet.
+			return
+		}
+		var (
+			cl, _ = atoi(w.Header().Get(contentLength))
+			ct    = w.Header().Get(contentType)
+			ce    = w.Header().Get(contentEncoding)
+			cr    = w.Header().Get(contentRange)
+		)
+
+		if ct == "" {
+			ct = http.DetectContentType(w.buf)
+
+			// Handles the intended case of setting a nil Content-Type (as for http/server or http/fs)
+			// Set the header only if the key does not exist
+			if _, ok := w.Header()[contentType]; w.setContentType && !ok {
+				w.Header().Set(contentType, ct)
+			}
+		}
+		if cl == 0 {
+			// Assume minSize.
+			cl = w.minSize
+		}
+
+		// See if we should compress...
+		if len(w.Header()[HeaderNoCompression]) == 0 && ce == "" && cr == "" && cl >= w.minSize && w.contentTypeFilter(ct) {
+			w.startGzip(nil)
+		} else {
+			w.startPlain()
+		}
+	}
+
+	if w.gw != nil {
+		w.gw.Flush()
+	}
+
+	if fw, ok := w.ResponseWriter.(http.Flusher); ok {
+		fw.Flush()
+	}
+}
+
+// Hijack implements http.Hijacker. If the underlying ResponseWriter is a
+// Hijacker, its Hijack method is returned. Otherwise an error is returned.
+func (w *GzipResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
+	if hj, ok := w.ResponseWriter.(http.Hijacker); ok {
+		return hj.Hijack()
+	}
+	return nil, nil, fmt.Errorf("http.Hijacker interface is not supported")
+}
+
+// verify Hijacker interface implementation
+var _ http.Hijacker = &GzipResponseWriter{}
+
+var onceDefault sync.Once
+var defaultWrapper func(http.Handler) http.HandlerFunc
+
+// GzipHandler allows to easily wrap an http handler with default settings.
+func GzipHandler(h http.Handler) http.HandlerFunc {
+	onceDefault.Do(func() {
+		var err error
+		defaultWrapper, err = NewWrapper()
+		if err != nil {
+			panic(err)
+		}
+	})
+
+	return defaultWrapper(h)
+}
+
+var grwPool = sync.Pool{New: func() interface{} { return &GzipResponseWriter{} }}
+
+// NewWrapper returns a reusable wrapper with the supplied options.
+func NewWrapper(opts ...option) (func(http.Handler) http.HandlerFunc, error) {
+	c := &config{
+		level:   gzip.DefaultCompression,
+		minSize: DefaultMinSize,
+		writer: writer.GzipWriterFactory{
+			Levels: gzkp.Levels,
+			New:    gzkp.NewWriter,
+		},
+		contentTypes:   DefaultContentTypeFilter,
+		setContentType: true,
+	}
+
+	for _, o := range opts {
+		o(c)
+	}
+
+	if err := c.validate(); err != nil {
+		return nil, err
+	}
+
+	return func(h http.Handler) http.HandlerFunc {
+		return func(w http.ResponseWriter, r *http.Request) {
+			w.Header().Add(vary, acceptEncoding)
+			if acceptsGzip(r) {
+				gw := grwPool.Get().(*GzipResponseWriter)
+				*gw = GzipResponseWriter{
+					ResponseWriter:    w,
+					gwFactory:         c.writer,
+					level:             c.level,
+					minSize:           c.minSize,
+					contentTypeFilter: c.contentTypes,
+					keepAcceptRanges:  c.keepAcceptRanges,
+					dropETag:          c.dropETag,
+					suffixETag:        c.suffixETag,
+					buf:               gw.buf,
+					setContentType:    c.setContentType,
+					randomJitter:      c.randomJitter,
+					jitterBuffer:      c.jitterBuffer,
+					sha256Jitter:      c.sha256Jitter,
+				}
+				if len(gw.buf) > 0 {
+					gw.buf = gw.buf[:0]
+				}
+				defer func() {
+					gw.Close()
+					gw.ResponseWriter = nil
+					grwPool.Put(gw)
+				}()
+
+				if _, ok := w.(http.CloseNotifier); ok {
+					gwcn := GzipResponseWriterWithCloseNotify{gw}
+					h.ServeHTTP(gwcn, r)
+				} else {
+					h.ServeHTTP(gw, r)
+				}
+				w.Header().Del(HeaderNoCompression)
+			} else {
+				h.ServeHTTP(newNoGzipResponseWriter(w), r)
+				w.Header().Del(HeaderNoCompression)
+			}
+		}
+	}, nil
+}
+
+// Parsed representation of one of the inputs to ContentTypes.
+// See https://golang.org/pkg/mime/#ParseMediaType
+type parsedContentType struct {
+	mediaType string
+	params    map[string]string
+}
+
+// equals returns whether this content type matches another content type.
+func (pct parsedContentType) equals(mediaType string, params map[string]string) bool {
+	if pct.mediaType != mediaType {
+		return false
+	}
+	// if pct has no params, don't care about other's params
+	if len(pct.params) == 0 {
+		return true
+	}
+
+	// if pct has any params, they must be identical to other's.
+	if len(pct.params) != len(params) {
+		return false
+	}
+	for k, v := range pct.params {
+		if w, ok := params[k]; !ok || v != w {
+			return false
+		}
+	}
+	return true
+}
+
+// Used for functional configuration.
+type config struct {
+	minSize          int
+	level            int
+	writer           writer.GzipWriterFactory
+	contentTypes     func(ct string) bool
+	keepAcceptRanges bool
+	setContentType   bool
+	suffixETag       string
+	dropETag         bool
+	jitterBuffer     int
+	randomJitter     string
+	sha256Jitter     bool
+}
+
+func (c *config) validate() error {
+	min, max := c.writer.Levels()
+	if c.level < min || c.level > max {
+		return fmt.Errorf("invalid compression level requested: %d, valid range %d -> %d", c.level, min, max)
+	}
+
+	if c.minSize < 0 {
+		return fmt.Errorf("minimum size must be more than zero")
+	}
+	if len(c.randomJitter) >= math.MaxUint16 {
+		return fmt.Errorf("random jitter size exceeded")
+	}
+	if len(c.randomJitter) > 0 {
+		gzw, ok := c.writer.New(io.Discard, c.level).(writer.GzipWriterExt)
+		if !ok {
+			return errors.New("the custom compressor does not allow setting headers for random jitter")
+		}
+		gzw.Close()
+	}
+	return nil
+}
+
+type option func(c *config)
+
+func MinSize(size int) option {
+	return func(c *config) {
+		c.minSize = size
+	}
+}
+
+// CompressionLevel sets the compression level
+func CompressionLevel(level int) option {
+	return func(c *config) {
+		c.level = level
+	}
+}
+
+// SetContentType sets the content type before returning
+// requests, if unset before returning, and it was detected.
+// Default: true.
+func SetContentType(b bool) option {
+	return func(c *config) {
+		c.setContentType = b
+	}
+}
+
+// Implementation changes the implementation of GzipWriter
+//
+// The default implementation is backed by github.com/klauspost/compress
+// To support RandomJitter, the GzipWriterExt must also be
+// supported by the returned writers.
+func Implementation(writer writer.GzipWriterFactory) option {
+	return func(c *config) {
+		c.writer = writer
+	}
+}
+
+// ContentTypes specifies a list of content types to compare
+// the Content-Type header to before compressing. If none
+// match, the response will be returned as-is.
+//
+// Content types are compared in a case-insensitive, whitespace-ignored
+// manner.
+//
+// A MIME type without any other directive will match a content type
+// that has the same MIME type, regardless of that content type's other
+// directives. I.e., "text/html" will match both "text/html" and
+// "text/html; charset=utf-8".
+//
+// A MIME type with any other directive will only match a content type
+// that has the same MIME type and other directives. I.e.,
+// "text/html; charset=utf-8" will only match "text/html; charset=utf-8".
+//
+// By default common compressed audio, video and archive formats, see DefaultContentTypeFilter.
+//
+// Setting this will override default and any previous Content Type settings.
+func ContentTypes(types []string) option {
+	return func(c *config) {
+		var contentTypes []parsedContentType
+		for _, v := range types {
+			mediaType, params, err := mime.ParseMediaType(v)
+			if err == nil {
+				contentTypes = append(contentTypes, parsedContentType{mediaType, params})
+			}
+		}
+		c.contentTypes = func(ct string) bool {
+			return handleContentType(contentTypes, ct)
+		}
+	}
+}
+
+// ExceptContentTypes specifies a list of content types to compare
+// the Content-Type header to before compressing. If none
+// match, the response will be compressed.
+//
+// Content types are compared in a case-insensitive, whitespace-ignored
+// manner.
+//
+// A MIME type without any other directive will match a content type
+// that has the same MIME type, regardless of that content type's other
+// directives. I.e., "text/html" will match both "text/html" and
+// "text/html; charset=utf-8".
+//
+// A MIME type with any other directive will only match a content type
+// that has the same MIME type and other directives. I.e.,
+// "text/html; charset=utf-8" will only match "text/html; charset=utf-8".
+//
+// By default common compressed audio, video and archive formats, see DefaultContentTypeFilter.
+//
+// Setting this will override default and any previous Content Type settings.
+func ExceptContentTypes(types []string) option {
+	return func(c *config) {
+		var contentTypes []parsedContentType
+		for _, v := range types {
+			mediaType, params, err := mime.ParseMediaType(v)
+			if err == nil {
+				contentTypes = append(contentTypes, parsedContentType{mediaType, params})
+			}
+		}
+		c.contentTypes = func(ct string) bool {
+			return !handleContentType(contentTypes, ct)
+		}
+	}
+}
+
+// KeepAcceptRanges will keep Accept-Ranges header on gzipped responses.
+// This will likely break ranged requests since that cannot be transparently
+// handled by the filter.
+func KeepAcceptRanges() option {
+	return func(c *config) {
+		c.keepAcceptRanges = true
+	}
+}
+
+// ContentTypeFilter allows adding a custom content type filter.
+//
+// The supplied function must return true/false to indicate if content
+// should be compressed.
+//
+// When called no parsing of the content type 'ct' has been done.
+// It may have been set or auto-detected.
+//
+// Setting this will override default and any previous Content Type settings.
+func ContentTypeFilter(compress func(ct string) bool) option {
+	return func(c *config) {
+		c.contentTypes = compress
+	}
+}
+
+// SuffixETag adds the specified suffix to the ETag header (if it exists) of
+// responses which are compressed.
+//
+// Per [RFC 7232 Section 2.3.3](https://www.rfc-editor.org/rfc/rfc7232#section-2.3.3),
+// the ETag of a compressed response must differ from it's uncompressed version.
+//
+// A suffix such as "-gzip" is sometimes used as a workaround for generating a
+// unique new ETag (see https://bz.apache.org/bugzilla/show_bug.cgi?id=39727).
+func SuffixETag(suffix string) option {
+	return func(c *config) {
+		c.suffixETag = suffix
+	}
+}
+
+// DropETag removes the ETag of responses which are compressed. If DropETag is
+// specified in conjunction with SuffixETag, this option will take precedence
+// and the ETag will be dropped.
+//
+// Per [RFC 7232 Section 2.3.3](https://www.rfc-editor.org/rfc/rfc7232#section-2.3.3),
+// the ETag of a compressed response must differ from it's uncompressed version.
+//
+// This workaround eliminates ETag conflicts between the compressed and
+// uncompressed versions by removing the ETag from the compressed version.
+func DropETag() option {
+	return func(c *config) {
+		c.dropETag = true
+	}
+}
+
+// RandomJitter adds 1->n random bytes to output based on checksum of payload.
+// Specify the amount of input to buffer before applying jitter.
+// This should cover the sensitive part of your response.
+// This can be used to obfuscate the exact compressed size.
+// Specifying 0 will use a buffer size of 64KB.
+// 'paranoid' will use a slower hashing function, that MAY provide more safety.
+// See README.md for more information.
+// If a negative buffer is given, the amount of jitter will not be content dependent.
+// This provides *less* security than applying content based jitter.
+func RandomJitter(n, buffer int, paranoid bool) option {
+	return func(c *config) {
+		if n > 0 {
+			c.sha256Jitter = paranoid
+			c.randomJitter = strings.Repeat("Padding-", 1+(n/8))[:n+1]
+			c.jitterBuffer = buffer
+			if c.jitterBuffer == 0 {
+				c.jitterBuffer = 64 << 10
+			}
+		} else {
+			c.randomJitter = ""
+			c.jitterBuffer = 0
+		}
+	}
+}
+
+// acceptsGzip returns true if the given HTTP request indicates that it will
+// accept a gzipped response.
+func acceptsGzip(r *http.Request) bool {
+	// Note that we don't request this for HEAD requests,
+	// due to a bug in nginx:
+	//   https://trac.nginx.org/nginx/ticket/358
+	//   https://golang.org/issue/5522
+	return r.Method != http.MethodHead && parseEncodingGzip(r.Header.Get(acceptEncoding)) > 0
+}
+
+// returns true if we've been configured to compress the specific content type.
+func handleContentType(contentTypes []parsedContentType, ct string) bool {
+	// If contentTypes is empty we handle all content types.
+	if len(contentTypes) == 0 {
+		return true
+	}
+
+	mediaType, params, err := mime.ParseMediaType(ct)
+	if err != nil {
+		return false
+	}
+
+	for _, c := range contentTypes {
+		if c.equals(mediaType, params) {
+			return true
+		}
+	}
+
+	return false
+}
+
+// parseEncodingGzip returns the qvalue of gzip compression.
+func parseEncodingGzip(s string) float64 {
+	s = strings.TrimSpace(s)
+
+	for len(s) > 0 {
+		stop := strings.IndexByte(s, ',')
+		if stop < 0 {
+			stop = len(s)
+		}
+		coding, qvalue, _ := parseCoding(s[:stop])
+
+		if coding == "gzip" {
+			return qvalue
+		}
+		if stop == len(s) {
+			break
+		}
+		s = s[stop+1:]
+	}
+	return 0
+}
+
+func parseEncodings(s string) (codings, error) {
+	split := strings.Split(s, ",")
+	c := make(codings, len(split))
+	var e []string
+
+	for _, ss := range split {
+		coding, qvalue, err := parseCoding(ss)
+
+		if err != nil {
+			e = append(e, err.Error())
+		} else {
+			c[coding] = qvalue
+		}
+	}
+
+	// TODO (adammck): Use a proper multi-error struct, so the individual errors
+	//                 can be extracted if anyone cares.
+	if len(e) > 0 {
+		return c, fmt.Errorf("errors while parsing encodings: %s", strings.Join(e, ", "))
+	}
+
+	return c, nil
+}
+
+var errEmptyEncoding = errors.New("empty content-coding")
+
+// parseCoding parses a single coding (content-coding with an optional qvalue),
+// as might appear in an Accept-Encoding header. It attempts to forgive minor
+// formatting errors.
+func parseCoding(s string) (coding string, qvalue float64, err error) {
+	// Avoid splitting if we can...
+	if len(s) == 0 {
+		return "", 0, errEmptyEncoding
+	}
+	if !strings.ContainsRune(s, ';') {
+		coding = strings.ToLower(strings.TrimSpace(s))
+		if coding == "" {
+			err = errEmptyEncoding
+		}
+		return coding, DefaultQValue, err
+	}
+	for n, part := range strings.Split(s, ";") {
+		part = strings.TrimSpace(part)
+		qvalue = DefaultQValue
+
+		if n == 0 {
+			coding = strings.ToLower(part)
+		} else if strings.HasPrefix(part, "q=") {
+			qvalue, err = strconv.ParseFloat(strings.TrimPrefix(part, "q="), 64)
+
+			if qvalue < 0.0 {
+				qvalue = 0.0
+			} else if qvalue > 1.0 {
+				qvalue = 1.0
+			}
+		}
+	}
+
+	if coding == "" {
+		err = errEmptyEncoding
+	}
+
+	return
+}
+
+// Don't compress any audio/video types.
+var excludePrefixDefault = []string{"video/", "audio/", "image/jp"}
+
+// Skip a bunch of compressed types that contains this string.
+// Curated by supposedly still active formats on https://en.wikipedia.org/wiki/List_of_archive_formats
+var excludeContainsDefault = []string{"compress", "zip", "snappy", "lzma", "xz", "zstd", "brotli", "stuffit"}
+
+// DefaultContentTypeFilter excludes common compressed audio, video and archive formats.
+func DefaultContentTypeFilter(ct string) bool {
+	ct = strings.TrimSpace(strings.ToLower(ct))
+	if ct == "" {
+		return true
+	}
+	for _, s := range excludeContainsDefault {
+		if strings.Contains(ct, s) {
+			return false
+		}
+	}
+
+	for _, prefix := range excludePrefixDefault {
+		if strings.HasPrefix(ct, prefix) {
+			return false
+		}
+	}
+	return true
+}
+
+// CompressAllContentTypeFilter will compress all mime types.
+func CompressAllContentTypeFilter(ct string) bool {
+	return true
+}
+
+const intSize = 32 << (^uint(0) >> 63)
+
+// atoi is equivalent to ParseInt(s, 10, 0), converted to type int.
+func atoi(s string) (int, bool) {
+	if len(s) == 0 {
+		return 0, false
+	}
+	sLen := len(s)
+	if intSize == 32 && (0 < sLen && sLen < 10) ||
+		intSize == 64 && (0 < sLen && sLen < 19) {
+		// Fast path for small integers that fit int type.
+		s0 := s
+		if s[0] == '-' || s[0] == '+' {
+			s = s[1:]
+			if len(s) < 1 {
+				return 0, false
+			}
+		}
+
+		n := 0
+		for _, ch := range []byte(s) {
+			ch -= '0'
+			if ch > 9 {
+				return 0, false
+			}
+			n = n*10 + int(ch)
+		}
+		if s0[0] == '-' {
+			n = -n
+		}
+		return n, true
+	}
+
+	// Slow path for invalid, big, or underscored integers.
+	i64, err := strconv.ParseInt(s, 10, 0)
+	return int(i64), err == nil
+}
+
+// newNoGzipResponseWriter will return a response writer that
+// cleans up compression artifacts.
+// Depending on whether http.Hijacker is supported the returned will as well.
+func newNoGzipResponseWriter(w http.ResponseWriter) http.ResponseWriter {
+	n := &NoGzipResponseWriter{ResponseWriter: w}
+	if hj, ok := w.(http.Hijacker); ok {
+		x := struct {
+			http.ResponseWriter
+			http.Hijacker
+			http.Flusher
+		}{
+			ResponseWriter: n,
+			Hijacker:       hj,
+			Flusher:        n,
+		}
+		return x
+	}
+
+	return n
+}
+
+// NoGzipResponseWriter filters out HeaderNoCompression.
+type NoGzipResponseWriter struct {
+	http.ResponseWriter
+	hdrCleaned bool
+}
+
+func (n *NoGzipResponseWriter) CloseNotify() <-chan bool {
+	if cn, ok := n.ResponseWriter.(http.CloseNotifier); ok {
+		return cn.CloseNotify()
+	}
+	return nil
+}
+
+func (n *NoGzipResponseWriter) Flush() {
+	if !n.hdrCleaned {
+		n.ResponseWriter.Header().Del(HeaderNoCompression)
+		n.hdrCleaned = true
+	}
+	if f, ok := n.ResponseWriter.(http.Flusher); ok {
+		f.Flush()
+	}
+}
+
+func (n *NoGzipResponseWriter) Header() http.Header {
+	return n.ResponseWriter.Header()
+}
+
+func (n *NoGzipResponseWriter) Write(bytes []byte) (int, error) {
+	if !n.hdrCleaned {
+		n.ResponseWriter.Header().Del(HeaderNoCompression)
+		n.hdrCleaned = true
+	}
+	return n.ResponseWriter.Write(bytes)
+}
+
+func (n *NoGzipResponseWriter) WriteHeader(statusCode int) {
+	if !n.hdrCleaned {
+		n.ResponseWriter.Header().Del(HeaderNoCompression)
+		n.hdrCleaned = true
+	}
+	n.ResponseWriter.WriteHeader(statusCode)
+}
diff --git a/vendor/github.com/klauspost/compress/gzhttp/transport.go b/vendor/github.com/klauspost/compress/gzhttp/transport.go
new file mode 100644
index 00000000000..a199fbc6e85
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/gzhttp/transport.go
@@ -0,0 +1,211 @@
+// Copyright (c) 2021 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gzhttp
+
+import (
+	"io"
+	"net/http"
+	"strings"
+	"sync"
+
+	"github.com/klauspost/compress/gzip"
+	"github.com/klauspost/compress/zstd"
+)
+
+// Transport will wrap a transport with a custom handler
+// that will request gzip and automatically decompress it.
+// Using this is significantly faster than using the default transport.
+func Transport(parent http.RoundTripper, opts ...transportOption) http.RoundTripper {
+	g := gzRoundtripper{parent: parent, withZstd: true, withGzip: true}
+	for _, o := range opts {
+		o(&g)
+	}
+	var ae []string
+	if g.withZstd {
+		ae = append(ae, "zstd")
+	}
+	if g.withGzip {
+		ae = append(ae, "gzip")
+	}
+	g.acceptEncoding = strings.Join(ae, ",")
+	return &g
+}
+
+type transportOption func(c *gzRoundtripper)
+
+// TransportEnableZstd will send Zstandard as a compression option to the server.
+// Enabled by default, but may be disabled if future problems arise.
+func TransportEnableZstd(b bool) transportOption {
+	return func(c *gzRoundtripper) {
+		c.withZstd = b
+	}
+}
+
+// TransportEnableGzip will send Gzip as a compression option to the server.
+// Enabled by default.
+func TransportEnableGzip(b bool) transportOption {
+	return func(c *gzRoundtripper) {
+		c.withGzip = b
+	}
+}
+
+type gzRoundtripper struct {
+	parent             http.RoundTripper
+	acceptEncoding     string
+	withZstd, withGzip bool
+}
+
+func (g *gzRoundtripper) RoundTrip(req *http.Request) (*http.Response, error) {
+	var requestedComp bool
+	if req.Header.Get("Accept-Encoding") == "" &&
+		req.Header.Get("Range") == "" &&
+		req.Method != "HEAD" {
+		// Request gzip only, not deflate. Deflate is ambiguous and
+		// not as universally supported anyway.
+		// See: https://zlib.net/zlib_faq.html#faq39
+		//
+		// Note that we don't request this for HEAD requests,
+		// due to a bug in nginx:
+		//   https://trac.nginx.org/nginx/ticket/358
+		//   https://golang.org/issue/5522
+		//
+		// We don't request gzip if the request is for a range, since
+		// auto-decoding a portion of a gzipped document will just fail
+		// anyway. See https://golang.org/issue/8923
+		requestedComp = len(g.acceptEncoding) > 0
+		req.Header.Set("Accept-Encoding", g.acceptEncoding)
+	}
+
+	resp, err := g.parent.RoundTrip(req)
+	if err != nil || !requestedComp {
+		return resp, err
+	}
+
+	// Decompress
+	if g.withGzip && asciiEqualFold(resp.Header.Get("Content-Encoding"), "gzip") {
+		resp.Body = &gzipReader{body: resp.Body}
+		resp.Header.Del("Content-Encoding")
+		resp.Header.Del("Content-Length")
+		resp.ContentLength = -1
+		resp.Uncompressed = true
+	}
+	if g.withZstd && asciiEqualFold(resp.Header.Get("Content-Encoding"), "zstd") {
+		resp.Body = &zstdReader{body: resp.Body}
+		resp.Header.Del("Content-Encoding")
+		resp.Header.Del("Content-Length")
+		resp.ContentLength = -1
+		resp.Uncompressed = true
+	}
+
+	return resp, nil
+}
+
+var gzReaderPool sync.Pool
+
+// gzipReader wraps a response body so it can lazily
+// call gzip.NewReader on the first call to Read
+type gzipReader struct {
+	body io.ReadCloser // underlying HTTP/1 response body framing
+	zr   *gzip.Reader  // lazily-initialized gzip reader
+	zerr error         // any error from gzip.NewReader; sticky
+}
+
+func (gz *gzipReader) Read(p []byte) (n int, err error) {
+	if gz.zr == nil {
+		if gz.zerr == nil {
+			zr, ok := gzReaderPool.Get().(*gzip.Reader)
+			if ok {
+				gz.zr, gz.zerr = zr, zr.Reset(gz.body)
+			} else {
+				gz.zr, gz.zerr = gzip.NewReader(gz.body)
+			}
+		}
+		if gz.zerr != nil {
+			return 0, gz.zerr
+		}
+	}
+
+	return gz.zr.Read(p)
+}
+
+func (gz *gzipReader) Close() error {
+	if gz.zr != nil {
+		gzReaderPool.Put(gz.zr)
+		gz.zr = nil
+	}
+	return gz.body.Close()
+}
+
+// asciiEqualFold is strings.EqualFold, ASCII only. It reports whether s and t
+// are equal, ASCII-case-insensitively.
+func asciiEqualFold(s, t string) bool {
+	if len(s) != len(t) {
+		return false
+	}
+	for i := 0; i < len(s); i++ {
+		if lower(s[i]) != lower(t[i]) {
+			return false
+		}
+	}
+	return true
+}
+
+// lower returns the ASCII lowercase version of b.
+func lower(b byte) byte {
+	if 'A' <= b && b <= 'Z' {
+		return b + ('a' - 'A')
+	}
+	return b
+}
+
+// zstdReaderPool pools zstd decoders.
+var zstdReaderPool sync.Pool
+
+// zstdReader wraps a response body so it can lazily
+// call gzip.NewReader on the first call to Read
+type zstdReader struct {
+	body io.ReadCloser // underlying HTTP/1 response body framing
+	zr   *zstd.Decoder // lazily-initialized gzip reader
+	zerr error         // any error from zstd.NewReader; sticky
+}
+
+func (zr *zstdReader) Read(p []byte) (n int, err error) {
+	if zr.zerr != nil {
+		return 0, zr.zerr
+	}
+	if zr.zr == nil {
+		if zr.zerr == nil {
+			reader, ok := zstdReaderPool.Get().(*zstd.Decoder)
+			if ok {
+				zr.zerr = reader.Reset(zr.body)
+				zr.zr = reader
+			} else {
+				zr.zr, zr.zerr = zstd.NewReader(zr.body, zstd.WithDecoderLowmem(true), zstd.WithDecoderMaxWindow(32<<20), zstd.WithDecoderConcurrency(1))
+			}
+		}
+		if zr.zerr != nil {
+			return 0, zr.zerr
+		}
+	}
+	n, err = zr.zr.Read(p)
+	if err != nil {
+		// Usually this will be io.EOF,
+		// stash the decoder and keep the error.
+		zr.zr.Reset(nil)
+		zstdReaderPool.Put(zr.zr)
+		zr.zr = nil
+		zr.zerr = err
+	}
+	return
+}
+
+func (zr *zstdReader) Close() error {
+	if zr.zr != nil {
+		zr.zr.Reset(nil)
+		zstdReaderPool.Put(zr.zr)
+		zr.zr = nil
+	}
+	return zr.body.Close()
+}
diff --git a/vendor/github.com/klauspost/compress/gzhttp/writer/gzkp/gzkp.go b/vendor/github.com/klauspost/compress/gzhttp/writer/gzkp/gzkp.go
new file mode 100644
index 00000000000..e31c46c4cf1
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/gzhttp/writer/gzkp/gzkp.go
@@ -0,0 +1,79 @@
+// package gzkp provides gzip compression through github.com/klauspost/compress/gzip.
+
+package gzkp
+
+import (
+	"io"
+	"sync"
+
+	"github.com/klauspost/compress/gzhttp/writer"
+	"github.com/klauspost/compress/gzip"
+)
+
+// gzipWriterPools stores a sync.Pool for each compression level for reuse of
+// gzip.Writers. Use poolIndex to covert a compression level to an index into
+// gzipWriterPools.
+var gzipWriterPools [gzip.BestCompression - gzip.StatelessCompression + 1]*sync.Pool
+
+func init() {
+	for i := gzip.StatelessCompression; i <= gzip.BestCompression; i++ {
+		addLevelPool(i)
+	}
+}
+
+// poolIndex maps a compression level to its index into gzipWriterPools. It
+// assumes that level is a valid gzip compression level.
+func poolIndex(level int) int {
+	return level - gzip.StatelessCompression
+}
+
+func addLevelPool(level int) {
+	gzipWriterPools[poolIndex(level)] = &sync.Pool{
+		New: func() interface{} {
+			// NewWriterLevel only returns error on a bad level, we are guaranteeing
+			// that this will be a valid level so it is okay to ignore the returned
+			// error.
+			w, _ := gzip.NewWriterLevel(nil, level)
+			return w
+		},
+	}
+}
+
+type pooledWriter struct {
+	*gzip.Writer
+	index int
+}
+
+func (pw *pooledWriter) Close() error {
+	err := pw.Writer.Close()
+	gzipWriterPools[pw.index].Put(pw.Writer)
+	pw.Writer = nil
+	return err
+}
+
+func NewWriter(w io.Writer, level int) writer.GzipWriter {
+	index := poolIndex(level)
+	gzw := gzipWriterPools[index].Get().(*gzip.Writer)
+	gzw.Reset(w)
+	return &pooledWriter{
+		Writer: gzw,
+		index:  index,
+	}
+}
+
+// SetHeader will override the gzip header on pw.
+func (pw *pooledWriter) SetHeader(h writer.Header) {
+	pw.Name = h.Name
+	pw.Extra = h.Extra
+	pw.Comment = h.Comment
+	pw.ModTime = h.ModTime
+	pw.OS = h.OS
+}
+
+func Levels() (min, max int) {
+	return gzip.StatelessCompression, gzip.BestCompression
+}
+
+func ImplementationInfo() string {
+	return "klauspost/compress/gzip"
+}
diff --git a/vendor/github.com/klauspost/compress/gzhttp/writer/interface.go b/vendor/github.com/klauspost/compress/gzhttp/writer/interface.go
new file mode 100644
index 00000000000..1ad16806f27
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/gzhttp/writer/interface.go
@@ -0,0 +1,41 @@
+package writer
+
+import (
+	"io"
+	"time"
+)
+
+// GzipWriter implements the functions needed for compressing content.
+type GzipWriter interface {
+	Write(p []byte) (int, error)
+	Close() error
+	Flush() error
+}
+
+// GzipWriterExt implements the functions needed for compressing content
+// and optional extensions.
+type GzipWriterExt interface {
+	GzipWriter
+
+	// SetHeader will populate header fields with non-nil values in h.
+	SetHeader(h Header)
+}
+
+// Header is a gzip header.
+type Header struct {
+	Comment string    // comment
+	Extra   []byte    // "extra data"
+	ModTime time.Time // modification time
+	Name    string    // file name
+	OS      byte      // operating system type
+}
+
+// GzipWriterFactory contains the information needed for custom gzip implementations.
+type GzipWriterFactory struct {
+	// Must return the minimum and maximum supported level.
+	Levels func() (min, max int)
+
+	// New must return a new GzipWriter.
+	// level will always be within the return limits above.
+	New func(writer io.Writer, level int) GzipWriter
+}
diff --git a/vendor/github.com/klauspost/compress/gzip/gunzip.go b/vendor/github.com/klauspost/compress/gzip/gunzip.go
new file mode 100644
index 00000000000..66fe5ddf72c
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/gzip/gunzip.go
@@ -0,0 +1,349 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package gzip implements reading and writing of gzip format compressed files,
+// as specified in RFC 1952.
+package gzip
+
+import (
+	"bufio"
+	"compress/gzip"
+	"encoding/binary"
+	"hash/crc32"
+	"io"
+	"time"
+
+	"github.com/klauspost/compress/flate"
+)
+
+const (
+	gzipID1     = 0x1f
+	gzipID2     = 0x8b
+	gzipDeflate = 8
+	flagText    = 1 << 0
+	flagHdrCrc  = 1 << 1
+	flagExtra   = 1 << 2
+	flagName    = 1 << 3
+	flagComment = 1 << 4
+)
+
+var (
+	// ErrChecksum is returned when reading GZIP data that has an invalid checksum.
+	ErrChecksum = gzip.ErrChecksum
+	// ErrHeader is returned when reading GZIP data that has an invalid header.
+	ErrHeader = gzip.ErrHeader
+)
+
+var le = binary.LittleEndian
+
+// noEOF converts io.EOF to io.ErrUnexpectedEOF.
+func noEOF(err error) error {
+	if err == io.EOF {
+		return io.ErrUnexpectedEOF
+	}
+	return err
+}
+
+// The gzip file stores a header giving metadata about the compressed file.
+// That header is exposed as the fields of the Writer and Reader structs.
+//
+// Strings must be UTF-8 encoded and may only contain Unicode code points
+// U+0001 through U+00FF, due to limitations of the GZIP file format.
+type Header struct {
+	Comment string    // comment
+	Extra   []byte    // "extra data"
+	ModTime time.Time // modification time
+	Name    string    // file name
+	OS      byte      // operating system type
+}
+
+// A Reader is an io.Reader that can be read to retrieve
+// uncompressed data from a gzip-format compressed file.
+//
+// In general, a gzip file can be a concatenation of gzip files,
+// each with its own header. Reads from the Reader
+// return the concatenation of the uncompressed data of each.
+// Only the first header is recorded in the Reader fields.
+//
+// Gzip files store a length and checksum of the uncompressed data.
+// The Reader will return a ErrChecksum when Read
+// reaches the end of the uncompressed data if it does not
+// have the expected length or checksum. Clients should treat data
+// returned by Read as tentative until they receive the io.EOF
+// marking the end of the data.
+type Reader struct {
+	Header       // valid after NewReader or Reader.Reset
+	r            flate.Reader
+	br           *bufio.Reader
+	decompressor io.ReadCloser
+	digest       uint32 // CRC-32, IEEE polynomial (section 8)
+	size         uint32 // Uncompressed size (section 2.3.1)
+	buf          [512]byte
+	err          error
+	multistream  bool
+}
+
+// NewReader creates a new Reader reading the given reader.
+// If r does not also implement io.ByteReader,
+// the decompressor may read more data than necessary from r.
+//
+// It is the caller's responsibility to call Close on the Reader when done.
+//
+// The Reader.Header fields will be valid in the Reader returned.
+func NewReader(r io.Reader) (*Reader, error) {
+	z := new(Reader)
+	if err := z.Reset(r); err != nil {
+		return nil, err
+	}
+	return z, nil
+}
+
+// Reset discards the Reader z's state and makes it equivalent to the
+// result of its original state from NewReader, but reading from r instead.
+// This permits reusing a Reader rather than allocating a new one.
+func (z *Reader) Reset(r io.Reader) error {
+	*z = Reader{
+		decompressor: z.decompressor,
+		multistream:  true,
+	}
+	if rr, ok := r.(flate.Reader); ok {
+		z.r = rr
+	} else {
+		// Reuse if we can.
+		if z.br != nil {
+			z.br.Reset(r)
+		} else {
+			z.br = bufio.NewReader(r)
+		}
+		z.r = z.br
+	}
+	z.Header, z.err = z.readHeader()
+	return z.err
+}
+
+// Multistream controls whether the reader supports multistream files.
+//
+// If enabled (the default), the Reader expects the input to be a sequence
+// of individually gzipped data streams, each with its own header and
+// trailer, ending at EOF. The effect is that the concatenation of a sequence
+// of gzipped files is treated as equivalent to the gzip of the concatenation
+// of the sequence. This is standard behavior for gzip readers.
+//
+// Calling Multistream(false) disables this behavior; disabling the behavior
+// can be useful when reading file formats that distinguish individual gzip
+// data streams or mix gzip data streams with other data streams.
+// In this mode, when the Reader reaches the end of the data stream,
+// Read returns io.EOF. If the underlying reader implements io.ByteReader,
+// it will be left positioned just after the gzip stream.
+// To start the next stream, call z.Reset(r) followed by z.Multistream(false).
+// If there is no next stream, z.Reset(r) will return io.EOF.
+func (z *Reader) Multistream(ok bool) {
+	z.multistream = ok
+}
+
+// readString reads a NUL-terminated string from z.r.
+// It treats the bytes read as being encoded as ISO 8859-1 (Latin-1) and
+// will output a string encoded using UTF-8.
+// This method always updates z.digest with the data read.
+func (z *Reader) readString() (string, error) {
+	var err error
+	needConv := false
+	for i := 0; ; i++ {
+		if i >= len(z.buf) {
+			return "", ErrHeader
+		}
+		z.buf[i], err = z.r.ReadByte()
+		if err != nil {
+			return "", err
+		}
+		if z.buf[i] > 0x7f {
+			needConv = true
+		}
+		if z.buf[i] == 0 {
+			// Digest covers the NUL terminator.
+			z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:i+1])
+
+			// Strings are ISO 8859-1, Latin-1 (RFC 1952, section 2.3.1).
+			if needConv {
+				s := make([]rune, 0, i)
+				for _, v := range z.buf[:i] {
+					s = append(s, rune(v))
+				}
+				return string(s), nil
+			}
+			return string(z.buf[:i]), nil
+		}
+	}
+}
+
+// readHeader reads the GZIP header according to section 2.3.1.
+// This method does not set z.err.
+func (z *Reader) readHeader() (hdr Header, err error) {
+	if _, err = io.ReadFull(z.r, z.buf[:10]); err != nil {
+		// RFC 1952, section 2.2, says the following:
+		//	A gzip file consists of a series of "members" (compressed data sets).
+		//
+		// Other than this, the specification does not clarify whether a
+		// "series" is defined as "one or more" or "zero or more". To err on the
+		// side of caution, Go interprets this to mean "zero or more".
+		// Thus, it is okay to return io.EOF here.
+		return hdr, err
+	}
+	if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate {
+		return hdr, ErrHeader
+	}
+	flg := z.buf[3]
+	hdr.ModTime = time.Unix(int64(le.Uint32(z.buf[4:8])), 0)
+	// z.buf[8] is XFL and is currently ignored.
+	hdr.OS = z.buf[9]
+	z.digest = crc32.ChecksumIEEE(z.buf[:10])
+
+	if flg&flagExtra != 0 {
+		if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil {
+			return hdr, noEOF(err)
+		}
+		z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:2])
+		data := make([]byte, le.Uint16(z.buf[:2]))
+		if _, err = io.ReadFull(z.r, data); err != nil {
+			return hdr, noEOF(err)
+		}
+		z.digest = crc32.Update(z.digest, crc32.IEEETable, data)
+		hdr.Extra = data
+	}
+
+	var s string
+	if flg&flagName != 0 {
+		if s, err = z.readString(); err != nil {
+			return hdr, err
+		}
+		hdr.Name = s
+	}
+
+	if flg&flagComment != 0 {
+		if s, err = z.readString(); err != nil {
+			return hdr, err
+		}
+		hdr.Comment = s
+	}
+
+	if flg&flagHdrCrc != 0 {
+		if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil {
+			return hdr, noEOF(err)
+		}
+		digest := le.Uint16(z.buf[:2])
+		if digest != uint16(z.digest) {
+			return hdr, ErrHeader
+		}
+	}
+
+	z.digest = 0
+	if z.decompressor == nil {
+		z.decompressor = flate.NewReader(z.r)
+	} else {
+		z.decompressor.(flate.Resetter).Reset(z.r, nil)
+	}
+	return hdr, nil
+}
+
+// Read implements io.Reader, reading uncompressed bytes from its underlying Reader.
+func (z *Reader) Read(p []byte) (n int, err error) {
+	if z.err != nil {
+		return 0, z.err
+	}
+
+	for n == 0 {
+		n, z.err = z.decompressor.Read(p)
+		z.digest = crc32.Update(z.digest, crc32.IEEETable, p[:n])
+		z.size += uint32(n)
+		if z.err != io.EOF {
+			// In the normal case we return here.
+			return n, z.err
+		}
+
+		// Finished file; check checksum and size.
+		if _, err := io.ReadFull(z.r, z.buf[:8]); err != nil {
+			z.err = noEOF(err)
+			return n, z.err
+		}
+		digest := le.Uint32(z.buf[:4])
+		size := le.Uint32(z.buf[4:8])
+		if digest != z.digest || size != z.size {
+			z.err = ErrChecksum
+			return n, z.err
+		}
+		z.digest, z.size = 0, 0
+
+		// File is ok; check if there is another.
+		if !z.multistream {
+			return n, io.EOF
+		}
+		z.err = nil // Remove io.EOF
+
+		if _, z.err = z.readHeader(); z.err != nil {
+			return n, z.err
+		}
+	}
+
+	return n, nil
+}
+
+// Support the io.WriteTo interface for io.Copy and friends.
+func (z *Reader) WriteTo(w io.Writer) (int64, error) {
+	total := int64(0)
+	crcWriter := crc32.NewIEEE()
+	for {
+		if z.err != nil {
+			if z.err == io.EOF {
+				return total, nil
+			}
+			return total, z.err
+		}
+
+		// We write both to output and digest.
+		mw := io.MultiWriter(w, crcWriter)
+		n, err := z.decompressor.(io.WriterTo).WriteTo(mw)
+		total += n
+		z.size += uint32(n)
+		if err != nil {
+			z.err = err
+			return total, z.err
+		}
+
+		// Finished file; check checksum + size.
+		if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil {
+			if err == io.EOF {
+				err = io.ErrUnexpectedEOF
+			}
+			z.err = err
+			return total, err
+		}
+		z.digest = crcWriter.Sum32()
+		digest := le.Uint32(z.buf[:4])
+		size := le.Uint32(z.buf[4:8])
+		if digest != z.digest || size != z.size {
+			z.err = ErrChecksum
+			return total, z.err
+		}
+		z.digest, z.size = 0, 0
+
+		// File is ok; check if there is another.
+		if !z.multistream {
+			return total, nil
+		}
+		crcWriter.Reset()
+		z.err = nil // Remove io.EOF
+
+		if _, z.err = z.readHeader(); z.err != nil {
+			if z.err == io.EOF {
+				return total, nil
+			}
+			return total, z.err
+		}
+	}
+}
+
+// Close closes the Reader. It does not close the underlying io.Reader.
+// In order for the GZIP checksum to be verified, the reader must be
+// fully consumed until the io.EOF.
+func (z *Reader) Close() error { return z.decompressor.Close() }
diff --git a/vendor/github.com/klauspost/compress/gzip/gzip.go b/vendor/github.com/klauspost/compress/gzip/gzip.go
new file mode 100644
index 00000000000..26203851bdf
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/gzip/gzip.go
@@ -0,0 +1,269 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gzip
+
+import (
+	"errors"
+	"fmt"
+	"hash/crc32"
+	"io"
+
+	"github.com/klauspost/compress/flate"
+)
+
+// These constants are copied from the flate package, so that code that imports
+// "compress/gzip" does not also have to import "compress/flate".
+const (
+	NoCompression       = flate.NoCompression
+	BestSpeed           = flate.BestSpeed
+	BestCompression     = flate.BestCompression
+	DefaultCompression  = flate.DefaultCompression
+	ConstantCompression = flate.ConstantCompression
+	HuffmanOnly         = flate.HuffmanOnly
+
+	// StatelessCompression will do compression but without maintaining any state
+	// between Write calls.
+	// There will be no memory kept between Write calls,
+	// but compression and speed will be suboptimal.
+	// Because of this, the size of actual Write calls will affect output size.
+	StatelessCompression = -3
+)
+
+// A Writer is an io.WriteCloser.
+// Writes to a Writer are compressed and written to w.
+type Writer struct {
+	Header      // written at first call to Write, Flush, or Close
+	w           io.Writer
+	level       int
+	err         error
+	compressor  *flate.Writer
+	digest      uint32 // CRC-32, IEEE polynomial (section 8)
+	size        uint32 // Uncompressed size (section 2.3.1)
+	wroteHeader bool
+	closed      bool
+	buf         [10]byte
+}
+
+// NewWriter returns a new Writer.
+// Writes to the returned writer are compressed and written to w.
+//
+// It is the caller's responsibility to call Close on the WriteCloser when done.
+// Writes may be buffered and not flushed until Close.
+//
+// Callers that wish to set the fields in Writer.Header must do so before
+// the first call to Write, Flush, or Close.
+func NewWriter(w io.Writer) *Writer {
+	z, _ := NewWriterLevel(w, DefaultCompression)
+	return z
+}
+
+// NewWriterLevel is like NewWriter but specifies the compression level instead
+// of assuming DefaultCompression.
+//
+// The compression level can be DefaultCompression, NoCompression, or any
+// integer value between BestSpeed and BestCompression inclusive. The error
+// returned will be nil if the level is valid.
+func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
+	if level < StatelessCompression || level > BestCompression {
+		return nil, fmt.Errorf("gzip: invalid compression level: %d", level)
+	}
+	z := new(Writer)
+	z.init(w, level)
+	return z, nil
+}
+
+func (z *Writer) init(w io.Writer, level int) {
+	compressor := z.compressor
+	if level != StatelessCompression {
+		if compressor != nil {
+			compressor.Reset(w)
+		}
+	}
+
+	*z = Writer{
+		Header: Header{
+			OS: 255, // unknown
+		},
+		w:          w,
+		level:      level,
+		compressor: compressor,
+	}
+}
+
+// Reset discards the Writer z's state and makes it equivalent to the
+// result of its original state from NewWriter or NewWriterLevel, but
+// writing to w instead. This permits reusing a Writer rather than
+// allocating a new one.
+func (z *Writer) Reset(w io.Writer) {
+	z.init(w, z.level)
+}
+
+// writeBytes writes a length-prefixed byte slice to z.w.
+func (z *Writer) writeBytes(b []byte) error {
+	if len(b) > 0xffff {
+		return errors.New("gzip.Write: Extra data is too large")
+	}
+	le.PutUint16(z.buf[:2], uint16(len(b)))
+	_, err := z.w.Write(z.buf[:2])
+	if err != nil {
+		return err
+	}
+	_, err = z.w.Write(b)
+	return err
+}
+
+// writeString writes a UTF-8 string s in GZIP's format to z.w.
+// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
+func (z *Writer) writeString(s string) (err error) {
+	// GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII.
+	needconv := false
+	for _, v := range s {
+		if v == 0 || v > 0xff {
+			return errors.New("gzip.Write: non-Latin-1 header string")
+		}
+		if v > 0x7f {
+			needconv = true
+		}
+	}
+	if needconv {
+		b := make([]byte, 0, len(s))
+		for _, v := range s {
+			b = append(b, byte(v))
+		}
+		_, err = z.w.Write(b)
+	} else {
+		_, err = io.WriteString(z.w, s)
+	}
+	if err != nil {
+		return err
+	}
+	// GZIP strings are NUL-terminated.
+	z.buf[0] = 0
+	_, err = z.w.Write(z.buf[:1])
+	return err
+}
+
+// Write writes a compressed form of p to the underlying io.Writer. The
+// compressed bytes are not necessarily flushed until the Writer is closed.
+func (z *Writer) Write(p []byte) (int, error) {
+	if z.err != nil {
+		return 0, z.err
+	}
+	var n int
+	// Write the GZIP header lazily.
+	if !z.wroteHeader {
+		z.wroteHeader = true
+		z.buf[0] = gzipID1
+		z.buf[1] = gzipID2
+		z.buf[2] = gzipDeflate
+		z.buf[3] = 0
+		if z.Extra != nil {
+			z.buf[3] |= 0x04
+		}
+		if z.Name != "" {
+			z.buf[3] |= 0x08
+		}
+		if z.Comment != "" {
+			z.buf[3] |= 0x10
+		}
+		le.PutUint32(z.buf[4:8], uint32(z.ModTime.Unix()))
+		if z.level == BestCompression {
+			z.buf[8] = 2
+		} else if z.level == BestSpeed {
+			z.buf[8] = 4
+		} else {
+			z.buf[8] = 0
+		}
+		z.buf[9] = z.OS
+		n, z.err = z.w.Write(z.buf[:10])
+		if z.err != nil {
+			return n, z.err
+		}
+		if z.Extra != nil {
+			z.err = z.writeBytes(z.Extra)
+			if z.err != nil {
+				return n, z.err
+			}
+		}
+		if z.Name != "" {
+			z.err = z.writeString(z.Name)
+			if z.err != nil {
+				return n, z.err
+			}
+		}
+		if z.Comment != "" {
+			z.err = z.writeString(z.Comment)
+			if z.err != nil {
+				return n, z.err
+			}
+		}
+
+		if z.compressor == nil && z.level != StatelessCompression {
+			z.compressor, _ = flate.NewWriter(z.w, z.level)
+		}
+	}
+	z.size += uint32(len(p))
+	z.digest = crc32.Update(z.digest, crc32.IEEETable, p)
+	if z.level == StatelessCompression {
+		return len(p), flate.StatelessDeflate(z.w, p, false, nil)
+	}
+	n, z.err = z.compressor.Write(p)
+	return n, z.err
+}
+
+// Flush flushes any pending compressed data to the underlying writer.
+//
+// It is useful mainly in compressed network protocols, to ensure that
+// a remote reader has enough data to reconstruct a packet. Flush does
+// not return until the data has been written. If the underlying
+// writer returns an error, Flush returns that error.
+//
+// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
+func (z *Writer) Flush() error {
+	if z.err != nil {
+		return z.err
+	}
+	if z.closed || z.level == StatelessCompression {
+		return nil
+	}
+	if !z.wroteHeader {
+		z.Write(nil)
+		if z.err != nil {
+			return z.err
+		}
+	}
+	z.err = z.compressor.Flush()
+	return z.err
+}
+
+// Close closes the Writer, flushing any unwritten data to the underlying
+// io.Writer, but does not close the underlying io.Writer.
+func (z *Writer) Close() error {
+	if z.err != nil {
+		return z.err
+	}
+	if z.closed {
+		return nil
+	}
+	z.closed = true
+	if !z.wroteHeader {
+		z.Write(nil)
+		if z.err != nil {
+			return z.err
+		}
+	}
+	if z.level == StatelessCompression {
+		z.err = flate.StatelessDeflate(z.w, nil, true, nil)
+	} else {
+		z.err = z.compressor.Close()
+	}
+	if z.err != nil {
+		return z.err
+	}
+	le.PutUint32(z.buf[:4], z.digest)
+	le.PutUint32(z.buf[4:8], z.size)
+	_, z.err = z.w.Write(z.buf[:8])
+	return z.err
+}
diff --git a/vendor/modules.txt b/vendor/modules.txt
index 18f3aa7ab56..d3ee677681d 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -90,9 +90,6 @@ github.com/AzureAD/microsoft-authentication-library-for-go/apps/public
 # github.com/Masterminds/squirrel v1.5.4
 ## explicit; go 1.14
 github.com/Masterminds/squirrel
-# github.com/NYTimes/gziphandler v1.1.1
-## explicit; go 1.11
-github.com/NYTimes/gziphandler
 # github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751
 ## explicit
 github.com/alecthomas/template
@@ -532,7 +529,12 @@ github.com/julienschmidt/httprouter
 # github.com/klauspost/compress v1.16.3
 ## explicit; go 1.18
 github.com/klauspost/compress
+github.com/klauspost/compress/flate
 github.com/klauspost/compress/fse
+github.com/klauspost/compress/gzhttp
+github.com/klauspost/compress/gzhttp/writer
+github.com/klauspost/compress/gzhttp/writer/gzkp
+github.com/klauspost/compress/gzip
 github.com/klauspost/compress/huff0
 github.com/klauspost/compress/internal/cpuinfo
 github.com/klauspost/compress/internal/snapref