Skip to content

Commit e32baeb

Browse files
[3.8] gh-109858: Protect zipfile from "quoted-overlap" zipbomb (GH-110016)
Raise BadZipFile when try to read an entry that overlaps with other entry or central directory. (cherry picked from commit 66363b9) Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent fb57c39 commit e32baeb

File tree

3 files changed

+75
-0
lines changed

3 files changed

+75
-0
lines changed

Lib/test/test_zipfile.py

+60
Original file line numberDiff line numberDiff line change
@@ -1998,6 +1998,66 @@ def test_decompress_without_3rd_party_library(self):
19981998
with zipfile.ZipFile(zip_file) as zf:
19991999
self.assertRaises(RuntimeError, zf.extract, 'a.txt')
20002000

2001+
@requires_zlib()
2002+
def test_full_overlap(self):
2003+
data = (
2004+
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
2005+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed'
2006+
b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P'
2007+
b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2'
2008+
b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00'
2009+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK'
2010+
b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
2011+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00'
2012+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00bPK\x05'
2013+
b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00'
2014+
b'\x00\x00\x00'
2015+
)
2016+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
2017+
self.assertEqual(zipf.namelist(), ['a', 'b'])
2018+
zi = zipf.getinfo('a')
2019+
self.assertEqual(zi.header_offset, 0)
2020+
self.assertEqual(zi.compress_size, 16)
2021+
self.assertEqual(zi.file_size, 1033)
2022+
zi = zipf.getinfo('b')
2023+
self.assertEqual(zi.header_offset, 0)
2024+
self.assertEqual(zi.compress_size, 16)
2025+
self.assertEqual(zi.file_size, 1033)
2026+
self.assertEqual(len(zipf.read('a')), 1033)
2027+
with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'):
2028+
zipf.read('b')
2029+
2030+
@requires_zlib()
2031+
def test_quoted_overlap(self):
2032+
data = (
2033+
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05Y\xfc'
2034+
b'8\x044\x00\x00\x00(\x04\x00\x00\x01\x00\x00\x00a\x00'
2035+
b'\x1f\x00\xe0\xffPK\x03\x04\x14\x00\x00\x00\x08\x00\xa0l'
2036+
b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00'
2037+
b'\x00\x00b\xed\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\'
2038+
b'd\x0b`PK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0'
2039+
b'lH\x05Y\xfc8\x044\x00\x00\x00(\x04\x00\x00\x01'
2040+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
2041+
b'\x00aPK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0l'
2042+
b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00'
2043+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00$\x00\x00\x00'
2044+
b'bPK\x05\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00'
2045+
b'\x00S\x00\x00\x00\x00\x00'
2046+
)
2047+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
2048+
self.assertEqual(zipf.namelist(), ['a', 'b'])
2049+
zi = zipf.getinfo('a')
2050+
self.assertEqual(zi.header_offset, 0)
2051+
self.assertEqual(zi.compress_size, 52)
2052+
self.assertEqual(zi.file_size, 1064)
2053+
zi = zipf.getinfo('b')
2054+
self.assertEqual(zi.header_offset, 36)
2055+
self.assertEqual(zi.compress_size, 16)
2056+
self.assertEqual(zi.file_size, 1033)
2057+
with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'):
2058+
zipf.read('a')
2059+
self.assertEqual(len(zipf.read('b')), 1033)
2060+
20012061
def tearDown(self):
20022062
unlink(TESTFN)
20032063
unlink(TESTFN2)

Lib/zipfile.py

+12
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,7 @@ class ZipInfo (object):
339339
'compress_size',
340340
'file_size',
341341
'_raw_time',
342+
'_end_offset',
342343
)
343344

344345
def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
@@ -378,6 +379,7 @@ def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
378379
self.volume = 0 # Volume number of file header
379380
self.internal_attr = 0 # Internal attributes
380381
self.external_attr = 0 # External file attributes
382+
self._end_offset = None # Start of the next local header or central directory
381383
# Other attributes are set by class ZipFile:
382384
# header_offset Byte offset to the file header
383385
# CRC CRC-32 of the uncompressed file
@@ -1402,6 +1404,12 @@ def _RealGetContents(self):
14021404
if self.debug > 2:
14031405
print("total", total)
14041406

1407+
end_offset = self.start_dir
1408+
for zinfo in sorted(self.filelist,
1409+
key=lambda zinfo: zinfo.header_offset,
1410+
reverse=True):
1411+
zinfo._end_offset = end_offset
1412+
end_offset = zinfo.header_offset
14051413

14061414
def namelist(self):
14071415
"""Return a list of file names in the archive."""
@@ -1557,6 +1565,10 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
15571565
'File name in directory %r and header %r differ.'
15581566
% (zinfo.orig_filename, fname))
15591567

1568+
if (zinfo._end_offset is not None and
1569+
zef_file.tell() + zinfo.compress_size > zinfo._end_offset):
1570+
raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)")
1571+
15601572
# check for encrypted flag & handle password
15611573
is_encrypted = zinfo.flag_bits & 0x1
15621574
if is_encrypted:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Protect :mod:`zipfile` from "quoted-overlap" zipbomb. It now raises
2+
BadZipFile when try to read an entry that overlaps with other entry or
3+
central directory.

0 commit comments

Comments
 (0)