Skip to content

Commit d0e8d3a

Browse files
dsnetianlancetaylor
authored andcommitted
compress/gzip: fix Reader to properly check FHCRC
RFC 1952, section 3.2.3 says: >>> If FHCRC is set, a CRC16 for the gzip header is present, immediately before the compressed data. The CRC16 consists of the two least significant bytes of the CRC32 for all bytes of the gzip header up to and not including the CRC16. <<< Thus, instead of computing the CRC only over the first 10 bytes of the header, we compute it over the whole header (minus CRC16). Fixes #15070 Change-Id: I55703fd30b535b12abeb5e3962d4da0a86ed615a Reviewed-on: https://go-review.googlesource.com/21466 Reviewed-by: Brad Fitzpatrick <[email protected]> Run-TryBot: Brad Fitzpatrick <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
1 parent d093a62 commit d0e8d3a

File tree

3 files changed

+107
-82
lines changed

3 files changed

+107
-82
lines changed

src/compress/gzip/gunzip.go

Lines changed: 56 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ package gzip
99
import (
1010
"bufio"
1111
"compress/flate"
12+
"encoding/binary"
1213
"errors"
1314
"hash/crc32"
1415
"io"
@@ -33,6 +34,16 @@ var (
3334
ErrHeader = errors.New("gzip: invalid header")
3435
)
3536

37+
var le = binary.LittleEndian
38+
39+
// noEOF converts io.EOF to io.ErrUnexpectedEOF.
40+
func noEOF(err error) error {
41+
if err == io.EOF {
42+
return io.ErrUnexpectedEOF
43+
}
44+
return err
45+
}
46+
3647
// The gzip file stores a header giving metadata about the compressed file.
3748
// That header is exposed as the fields of the Writer and Reader structs.
3849
//
@@ -99,7 +110,8 @@ func (z *Reader) Reset(r io.Reader) error {
99110
} else {
100111
z.r = bufio.NewReader(r)
101112
}
102-
return z.readHeader(true)
113+
z.Header, z.err = z.readHeader()
114+
return z.err
103115
}
104116

105117
// Multistream controls whether the reader supports multistream files.
@@ -122,14 +134,13 @@ func (z *Reader) Multistream(ok bool) {
122134
z.multistream = ok
123135
}
124136

125-
// GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950).
126-
func get4(p []byte) uint32 {
127-
return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
128-
}
129-
137+
// readString reads a NUL-terminated string from z.r.
138+
// It treats the bytes read as being encoded as ISO 8859-1 (Latin-1) and
139+
// will output a string encoded using UTF-8.
140+
// This method always updates z.digest with the data read.
130141
func (z *Reader) readString() (string, error) {
131142
var err error
132-
needconv := false
143+
needConv := false
133144
for i := 0; ; i++ {
134145
if i >= len(z.buf) {
135146
return "", ErrHeader
@@ -139,11 +150,14 @@ func (z *Reader) readString() (string, error) {
139150
return "", err
140151
}
141152
if z.buf[i] > 0x7f {
142-
needconv = true
153+
needConv = true
143154
}
144155
if z.buf[i] == 0 {
145-
// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
146-
if needconv {
156+
// Digest covers the NUL terminator.
157+
z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:i+1])
158+
159+
// Strings are ISO 8859-1, Latin-1 (RFC 1952, section 2.3.1).
160+
if needConv {
147161
s := make([]rune, 0, i)
148162
for _, v := range z.buf[:i] {
149163
s = append(s, rune(v))
@@ -155,84 +169,63 @@ func (z *Reader) readString() (string, error) {
155169
}
156170
}
157171

158-
func (z *Reader) read2() (uint32, error) {
159-
_, err := io.ReadFull(z.r, z.buf[:2])
160-
if err != nil {
161-
if err == io.EOF {
162-
err = io.ErrUnexpectedEOF
163-
}
164-
return 0, err
165-
}
166-
return uint32(z.buf[0]) | uint32(z.buf[1])<<8, nil
167-
}
168-
169-
func (z *Reader) readHeader(save bool) error {
170-
_, err := io.ReadFull(z.r, z.buf[:10])
171-
if err != nil {
172+
// readHeader reads the GZIP header according to section 2.3.1.
173+
// This method does not set z.err.
174+
func (z *Reader) readHeader() (hdr Header, err error) {
175+
if _, err = io.ReadFull(z.r, z.buf[:10]); err != nil {
172176
// RFC 1952, section 2.2, says the following:
173177
// A gzip file consists of a series of "members" (compressed data sets).
174178
//
175179
// Other than this, the specification does not clarify whether a
176180
// "series" is defined as "one or more" or "zero or more". To err on the
177181
// side of caution, Go interprets this to mean "zero or more".
178182
// Thus, it is okay to return io.EOF here.
179-
return err
183+
return hdr, err
180184
}
181185
if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate {
182-
return ErrHeader
186+
return hdr, ErrHeader
183187
}
184188
flg := z.buf[3]
185-
if save {
186-
z.ModTime = time.Unix(int64(get4(z.buf[4:8])), 0)
187-
// z.buf[8] is xfl, ignored
188-
z.OS = z.buf[9]
189-
}
190-
z.digest = crc32.Update(0, crc32.IEEETable, z.buf[:10])
189+
hdr.ModTime = time.Unix(int64(le.Uint32(z.buf[4:8])), 0)
190+
// z.buf[8] is XFL and is currently ignored.
191+
hdr.OS = z.buf[9]
192+
z.digest = crc32.ChecksumIEEE(z.buf[:10])
191193

192194
if flg&flagExtra != 0 {
193-
n, err := z.read2()
194-
if err != nil {
195-
return err
195+
if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil {
196+
return hdr, noEOF(err)
196197
}
197-
data := make([]byte, n)
198+
z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:2])
199+
data := make([]byte, le.Uint16(z.buf[:2]))
198200
if _, err = io.ReadFull(z.r, data); err != nil {
199-
if err == io.EOF {
200-
err = io.ErrUnexpectedEOF
201-
}
202-
return err
203-
}
204-
if save {
205-
z.Extra = data
201+
return hdr, noEOF(err)
206202
}
203+
z.digest = crc32.Update(z.digest, crc32.IEEETable, data)
204+
hdr.Extra = data
207205
}
208206

209207
var s string
210208
if flg&flagName != 0 {
211209
if s, err = z.readString(); err != nil {
212-
return err
213-
}
214-
if save {
215-
z.Name = s
210+
return hdr, err
216211
}
212+
hdr.Name = s
217213
}
218214

219215
if flg&flagComment != 0 {
220216
if s, err = z.readString(); err != nil {
221-
return err
222-
}
223-
if save {
224-
z.Comment = s
217+
return hdr, err
225218
}
219+
hdr.Comment = s
226220
}
227221

228222
if flg&flagHdrCrc != 0 {
229-
n, err := z.read2()
230-
if err != nil {
231-
return err
223+
if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil {
224+
return hdr, noEOF(err)
232225
}
233-
sum := z.digest & 0xFFFF
234-
if n != sum {
235-
return ErrHeader
226+
digest := le.Uint16(z.buf[:2])
227+
if digest != uint16(z.digest) {
228+
return hdr, ErrHeader
236229
}
237230
}
238231

@@ -242,7 +235,7 @@ func (z *Reader) readHeader(save bool) error {
242235
} else {
243236
z.decompressor.(flate.Resetter).Reset(z.r, nil)
244237
}
245-
return nil
238+
return hdr, nil
246239
}
247240

248241
func (z *Reader) Read(p []byte) (n int, err error) {
@@ -260,13 +253,11 @@ func (z *Reader) Read(p []byte) (n int, err error) {
260253

261254
// Finished file; check checksum and size.
262255
if _, err := io.ReadFull(z.r, z.buf[:8]); err != nil {
263-
if err == io.EOF {
264-
err = io.ErrUnexpectedEOF
265-
}
266-
z.err = err
267-
return n, err
256+
z.err = noEOF(err)
257+
return n, z.err
268258
}
269-
digest, size := get4(z.buf[:4]), get4(z.buf[4:8])
259+
digest := le.Uint32(z.buf[:4])
260+
size := le.Uint32(z.buf[4:8])
270261
if digest != z.digest || size != z.size {
271262
z.err = ErrChecksum
272263
return n, z.err
@@ -279,7 +270,7 @@ func (z *Reader) Read(p []byte) (n int, err error) {
279270
}
280271
z.err = nil // Remove io.EOF
281272

282-
if z.err = z.readHeader(false); z.err != nil {
273+
if _, z.err = z.readHeader(); z.err != nil {
283274
return n, z.err
284275
}
285276

src/compress/gzip/gunzip_test.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,53 @@ var gunzipTests = []gunzipTest{
292292
},
293293
ErrChecksum,
294294
},
295+
{
296+
"f1l3n4m3.tXt",
297+
"header with all fields used",
298+
"",
299+
[]byte{
300+
0x1f, 0x8b, 0x08, 0x1e, 0x70, 0xf0, 0xf9, 0x4a,
301+
0x00, 0xaa, 0x09, 0x00, 0x7a, 0x7a, 0x05, 0x00,
302+
0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x31, 0x6c,
303+
0x33, 0x6e, 0x34, 0x6d, 0x33, 0x2e, 0x74, 0x58,
304+
0x74, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
305+
0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
306+
0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
307+
0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e,
308+
0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26,
309+
0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e,
310+
0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
311+
0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e,
312+
0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46,
313+
0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e,
314+
0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
315+
0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e,
316+
0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66,
317+
0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e,
318+
0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76,
319+
0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e,
320+
0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86,
321+
0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
322+
0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
323+
0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e,
324+
0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6,
325+
0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae,
326+
0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
327+
0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe,
328+
0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6,
329+
0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce,
330+
0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
331+
0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde,
332+
0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
333+
0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee,
334+
0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6,
335+
0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe,
336+
0xff, 0x00, 0x92, 0xfd, 0x01, 0x00, 0x00, 0xff,
337+
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
338+
0x00,
339+
},
340+
nil,
341+
},
295342
}
296343

297344
func TestDecompressor(t *testing.T) {

src/compress/gzip/gzip.go

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -87,25 +87,12 @@ func (z *Writer) Reset(w io.Writer) {
8787
z.init(w, z.level)
8888
}
8989

90-
// GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950).
91-
func put2(p []byte, v uint16) {
92-
p[0] = uint8(v >> 0)
93-
p[1] = uint8(v >> 8)
94-
}
95-
96-
func put4(p []byte, v uint32) {
97-
p[0] = uint8(v >> 0)
98-
p[1] = uint8(v >> 8)
99-
p[2] = uint8(v >> 16)
100-
p[3] = uint8(v >> 24)
101-
}
102-
10390
// writeBytes writes a length-prefixed byte slice to z.w.
10491
func (z *Writer) writeBytes(b []byte) error {
10592
if len(b) > 0xffff {
10693
return errors.New("gzip.Write: Extra data is too large")
10794
}
108-
put2(z.buf[:2], uint16(len(b)))
95+
le.PutUint16(z.buf[:2], uint16(len(b)))
10996
_, err := z.w.Write(z.buf[:2])
11097
if err != nil {
11198
return err
@@ -168,7 +155,7 @@ func (z *Writer) Write(p []byte) (int, error) {
168155
if z.Comment != "" {
169156
z.buf[3] |= 0x10
170157
}
171-
put4(z.buf[4:8], uint32(z.ModTime.Unix()))
158+
le.PutUint32(z.buf[4:8], uint32(z.ModTime.Unix()))
172159
if z.level == BestCompression {
173160
z.buf[8] = 2
174161
} else if z.level == BestSpeed {
@@ -254,8 +241,8 @@ func (z *Writer) Close() error {
254241
if z.err != nil {
255242
return z.err
256243
}
257-
put4(z.buf[:4], z.digest)
258-
put4(z.buf[4:8], z.size)
244+
le.PutUint32(z.buf[:4], z.digest)
245+
le.PutUint32(z.buf[4:8], z.size)
259246
_, z.err = z.w.Write(z.buf[:8])
260247
return z.err
261248
}

0 commit comments

Comments
 (0)