Skip to content

Commit b725b2f

Browse files
gh-109858: Protect zipfile from "quoted-overlap" zipbomb
Raise BadZipFile when try to read an entry that overlaps with other entry or central directory.
1 parent 98c0c1d commit b725b2f

File tree

3 files changed

+73
-0
lines changed

3 files changed

+73
-0
lines changed

Lib/test/test_zipfile/test_core.py

+58
Original file line numberDiff line numberDiff line change
@@ -2245,6 +2245,64 @@ def test_decompress_without_3rd_party_library(self):
22452245
with zipfile.ZipFile(zip_file) as zf:
22462246
self.assertRaises(RuntimeError, zf.extract, 'a.txt')
22472247

2248+
def test_full_overlap(self):
2249+
data = (
2250+
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
2251+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed'
2252+
b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P'
2253+
b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2'
2254+
b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00'
2255+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK'
2256+
b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
2257+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00'
2258+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00bPK\x05'
2259+
b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00'
2260+
b'\x00\x00\x00'
2261+
)
2262+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
2263+
self.assertEqual(zipf.namelist(), ['a', 'b'])
2264+
zi = zipf.getinfo('a')
2265+
self.assertEqual(zi.header_offset, 0)
2266+
self.assertEqual(zi.compress_size, 16)
2267+
self.assertEqual(zi.file_size, 1033)
2268+
zi = zipf.getinfo('b')
2269+
self.assertEqual(zi.header_offset, 0)
2270+
self.assertEqual(zi.compress_size, 16)
2271+
self.assertEqual(zi.file_size, 1033)
2272+
self.assertEqual(len(zipf.read('a')), 1033)
2273+
with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'):
2274+
zipf.read('b')
2275+
2276+
def test_quoted_overlap(self):
2277+
data = (
2278+
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05Y\xfc'
2279+
b'8\x044\x00\x00\x00(\x04\x00\x00\x01\x00\x00\x00a\x00'
2280+
b'\x1f\x00\xe0\xffPK\x03\x04\x14\x00\x00\x00\x08\x00\xa0l'
2281+
b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00'
2282+
b'\x00\x00b\xed\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\'
2283+
b'd\x0b`PK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0'
2284+
b'lH\x05Y\xfc8\x044\x00\x00\x00(\x04\x00\x00\x01'
2285+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
2286+
b'\x00aPK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0l'
2287+
b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00'
2288+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00$\x00\x00\x00'
2289+
b'bPK\x05\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00'
2290+
b'\x00S\x00\x00\x00\x00\x00'
2291+
)
2292+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
2293+
self.assertEqual(zipf.namelist(), ['a', 'b'])
2294+
zi = zipf.getinfo('a')
2295+
self.assertEqual(zi.header_offset, 0)
2296+
self.assertEqual(zi.compress_size, 52)
2297+
self.assertEqual(zi.file_size, 1064)
2298+
zi = zipf.getinfo('b')
2299+
self.assertEqual(zi.header_offset, 36)
2300+
self.assertEqual(zi.compress_size, 16)
2301+
self.assertEqual(zi.file_size, 1033)
2302+
with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'):
2303+
zipf.read('a')
2304+
self.assertEqual(len(zipf.read('b')), 1033)
2305+
22482306
def tearDown(self):
22492307
unlink(TESTFN)
22502308
unlink(TESTFN2)

Lib/zipfile/__init__.py

+12
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,7 @@ class ZipInfo (object):
395395
'compress_size',
396396
'file_size',
397397
'_raw_time',
398+
'_end_offset',
398399
)
399400

400401
def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
@@ -429,6 +430,7 @@ def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
429430
self.external_attr = 0 # External file attributes
430431
self.compress_size = 0 # Size of the compressed file
431432
self.file_size = 0 # Size of the uncompressed file
433+
self._end_offset = None # Start of the next local header or central directory
432434
# Other attributes are set by class ZipFile:
433435
# header_offset Byte offset to the file header
434436
# CRC CRC-32 of the uncompressed file
@@ -1487,6 +1489,12 @@ def _RealGetContents(self):
14871489
if self.debug > 2:
14881490
print("total", total)
14891491

1492+
end_offset = self.start_dir
1493+
for zinfo in sorted(self.filelist,
1494+
key=lambda zinfo: zinfo.header_offset,
1495+
reverse=True):
1496+
zinfo._end_offset = end_offset
1497+
end_offset = zinfo.header_offset
14901498

14911499
def namelist(self):
14921500
"""Return a list of file names in the archive."""
@@ -1643,6 +1651,10 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
16431651
'File name in directory %r and header %r differ.'
16441652
% (zinfo.orig_filename, fname))
16451653

1654+
if (zinfo._end_offset is not None and
1655+
zef_file.tell() + zinfo.compress_size > zinfo._end_offset):
1656+
raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r}")
1657+
16461658
# check for encrypted flag & handle password
16471659
is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED
16481660
if is_encrypted:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Protect :mod:`zipfile` from "quoted-overlap" zipbomb. It now raises
2+
BadZipFile when try to read an entry that overlaps with other entry or
3+
central directory.

0 commit comments

Comments
 (0)