Skip to content

Commit 0845880

Browse files
subtle-bytegopherbot
authored andcommitted
compress/flate, archive/zip: reduce memory allocations
The existing implementation allocates a new 4KB buffer each time it opens flate-encoded file in a zip archive. This commit allows the flate reader to reuse the buffer on call Reset instead of allocating a new one. It is noticeable when a zip archive contains a huge amount of files, e.g. zip archive has 50_000 files, for each file 4KB buffer is allocated, so it is 200MB memory allocations. If files are read sequentially only one buffer is needed. Fixes #59774 Change-Id: Ib16336b101ba58e8f0f30a45dc5fd4eeebc801a1 GitHub-Last-Rev: f3f395b GitHub-Pull-Request: #59775 Reviewed-on: https://go-review.googlesource.com/c/go/+/487675 Run-TryBot: Ian Lance Taylor <[email protected]> Auto-Submit: Ian Lance Taylor <[email protected]> Run-TryBot: Ian Lance Taylor <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]> Reviewed-by: Matthew Dempsky <[email protected]>
1 parent a965318 commit 0845880

File tree

3 files changed

+58
-7
lines changed

3 files changed

+58
-7
lines changed

src/archive/zip/zip_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ func TestOver65kFiles(t *testing.T) {
3030
for i := 0; i < nFiles; i++ {
3131
_, err := w.CreateHeader(&FileHeader{
3232
Name: fmt.Sprintf("%d.dat", i),
33-
Method: Store, // avoid Issue 6136 and Issue 6138
33+
Method: Store, // Deflate is too slow when it is compiled with -race flag
3434
})
3535
if err != nil {
3636
t.Fatalf("creating file %d: %v", i, err)

src/compress/flate/inflate.go

+17-6
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ type Reader interface {
267267
type decompressor struct {
268268
// Input source.
269269
r Reader
270+
rBuf *bufio.Reader // created if provided io.Reader does not implement io.ByteReader
270271
roffset int64
271272

272273
// Input bits, in top of b.
@@ -746,11 +747,20 @@ func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
746747
}
747748
}
748749

749-
func makeReader(r io.Reader) Reader {
750+
func (f *decompressor) makeReader(r io.Reader) {
750751
if rr, ok := r.(Reader); ok {
751-
return rr
752+
f.rBuf = nil
753+
f.r = rr
754+
return
755+
}
756+
// Reuse rBuf if possible. Invariant: rBuf is always created (and owned) by decompressor.
757+
if f.rBuf != nil {
758+
f.rBuf.Reset(r)
759+
} else {
760+
// bufio.NewReader will not return r, as r does not implement flate.Reader, so it is not bufio.Reader.
761+
f.rBuf = bufio.NewReader(r)
752762
}
753-
return bufio.NewReader(r)
763+
f.r = f.rBuf
754764
}
755765

756766
func fixedHuffmanDecoderInit() {
@@ -775,12 +785,13 @@ func fixedHuffmanDecoderInit() {
775785

776786
func (f *decompressor) Reset(r io.Reader, dict []byte) error {
777787
*f = decompressor{
778-
r: makeReader(r),
788+
rBuf: f.rBuf,
779789
bits: f.bits,
780790
codebits: f.codebits,
781791
dict: f.dict,
782792
step: (*decompressor).nextBlock,
783793
}
794+
f.makeReader(r)
784795
f.dict.init(maxMatchOffset, dict)
785796
return nil
786797
}
@@ -797,7 +808,7 @@ func NewReader(r io.Reader) io.ReadCloser {
797808
fixedHuffmanDecoderInit()
798809

799810
var f decompressor
800-
f.r = makeReader(r)
811+
f.makeReader(r)
801812
f.bits = new([maxNumLit + maxNumDist]int)
802813
f.codebits = new([numCodes]int)
803814
f.step = (*decompressor).nextBlock
@@ -816,7 +827,7 @@ func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
816827
fixedHuffmanDecoderInit()
817828

818829
var f decompressor
819-
f.r = makeReader(r)
830+
f.makeReader(r)
820831
f.bits = new([maxNumLit + maxNumDist]int)
821832
f.codebits = new([numCodes]int)
822833
f.step = (*decompressor).nextBlock

src/compress/flate/inflate_test.go

+40
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
package flate
66

77
import (
8+
"bufio"
89
"bytes"
910
"io"
1011
"strings"
@@ -95,3 +96,42 @@ func TestResetDict(t *testing.T) {
9596
}
9697
}
9798
}
99+
100+
func TestReaderReusesReaderBuffer(t *testing.T) {
101+
encodedReader := bytes.NewReader([]byte{})
102+
encodedNotByteReader := struct{ io.Reader }{encodedReader}
103+
104+
t.Run("BufferIsReused", func(t *testing.T) {
105+
f := NewReader(encodedNotByteReader).(*decompressor)
106+
bufioR, ok := f.r.(*bufio.Reader)
107+
if !ok {
108+
t.Fatalf("bufio.Reader should be created")
109+
}
110+
f.Reset(encodedNotByteReader, nil)
111+
if bufioR != f.r {
112+
t.Fatalf("bufio.Reader was not reused")
113+
}
114+
})
115+
t.Run("BufferIsNotReusedWhenGotByteReader", func(t *testing.T) {
116+
f := NewReader(encodedNotByteReader).(*decompressor)
117+
if _, ok := f.r.(*bufio.Reader); !ok {
118+
t.Fatalf("bufio.Reader should be created")
119+
}
120+
f.Reset(encodedReader, nil)
121+
if f.r != encodedReader {
122+
t.Fatalf("provided io.ByteReader should be used directly")
123+
}
124+
})
125+
t.Run("BufferIsCreatedAfterByteReader", func(t *testing.T) {
126+
for i, r := range []io.Reader{encodedReader, bufio.NewReader(encodedReader)} {
127+
f := NewReader(r).(*decompressor)
128+
if f.r != r {
129+
t.Fatalf("provided io.ByteReader should be used directly, i=%d", i)
130+
}
131+
f.Reset(encodedNotByteReader, nil)
132+
if _, ok := f.r.(*bufio.Reader); !ok {
133+
t.Fatalf("bufio.Reader should be created, i=%d", i)
134+
}
135+
}
136+
})
137+
}

0 commit comments

Comments
 (0)