Skip to content

Commit 38b9607

Browse files
serhiy-storchakamcepl
authored andcommitted
[3.8] pythongh-109858: Protect zipfile from "quoted-overlap" zipbomb (pythonGH-110016) (pythonGH-113916)
Raise BadZipFile when try to read an entry that overlaps with other entry or central directory. (cherry picked from commit 66363b9)
1 parent 76499e5 commit 38b9607

File tree

3 files changed

+123
-0
lines changed

3 files changed

+123
-0
lines changed

Lib/test/test_zipfile.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1610,6 +1610,114 @@ def test_open_conflicting_handles(self):
16101610
self.assertEqual(zipf.read('baz'), msg3)
16111611
self.assertEqual(zipf.namelist(), ['foo', 'bar', 'baz'])
16121612

1613+
def test_seek_tell(self):
1614+
# Test seek functionality
1615+
txt = b"Where's Bruce?"
1616+
bloc = txt.find(b"Bruce")
1617+
# Check seek on a file
1618+
with zipfile.ZipFile(TESTFN, "w") as zipf:
1619+
zipf.writestr("foo.txt", txt)
1620+
with zipfile.ZipFile(TESTFN, "r") as zipf:
1621+
with zipf.open("foo.txt", "r") as fp:
1622+
fp.seek(bloc, os.SEEK_SET)
1623+
self.assertEqual(fp.tell(), bloc)
1624+
fp.seek(-bloc, os.SEEK_CUR)
1625+
self.assertEqual(fp.tell(), 0)
1626+
fp.seek(bloc, os.SEEK_CUR)
1627+
self.assertEqual(fp.tell(), bloc)
1628+
self.assertEqual(fp.read(5), txt[bloc:bloc+5])
1629+
fp.seek(0, os.SEEK_END)
1630+
self.assertEqual(fp.tell(), len(txt))
1631+
fp.seek(0, os.SEEK_SET)
1632+
self.assertEqual(fp.tell(), 0)
1633+
# Check seek on memory file
1634+
data = io.BytesIO()
1635+
with zipfile.ZipFile(data, mode="w") as zipf:
1636+
zipf.writestr("foo.txt", txt)
1637+
with zipfile.ZipFile(data, mode="r") as zipf:
1638+
with zipf.open("foo.txt", "r") as fp:
1639+
fp.seek(bloc, os.SEEK_SET)
1640+
self.assertEqual(fp.tell(), bloc)
1641+
fp.seek(-bloc, os.SEEK_CUR)
1642+
self.assertEqual(fp.tell(), 0)
1643+
fp.seek(bloc, os.SEEK_CUR)
1644+
self.assertEqual(fp.tell(), bloc)
1645+
self.assertEqual(fp.read(5), txt[bloc:bloc+5])
1646+
fp.seek(0, os.SEEK_END)
1647+
self.assertEqual(fp.tell(), len(txt))
1648+
fp.seek(0, os.SEEK_SET)
1649+
self.assertEqual(fp.tell(), 0)
1650+
1651+
@requires_bz2
1652+
def test_decompress_without_3rd_party_library(self):
1653+
data = b'PK\x05\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
1654+
zip_file = io.BytesIO(data)
1655+
with zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_BZIP2) as zf:
1656+
zf.writestr('a.txt', b'a')
1657+
with mock.patch('zipfile.bz2', None):
1658+
with zipfile.ZipFile(zip_file) as zf:
1659+
self.assertRaises(RuntimeError, zf.extract, 'a.txt')
1660+
1661+
@requires_zlib
1662+
def test_full_overlap(self):
1663+
data = (
1664+
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
1665+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed'
1666+
b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P'
1667+
b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2'
1668+
b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00'
1669+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK'
1670+
b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
1671+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00'
1672+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00bPK\x05'
1673+
b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00'
1674+
b'\x00\x00\x00'
1675+
)
1676+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
1677+
self.assertEqual(zipf.namelist(), ['a', 'b'])
1678+
zi = zipf.getinfo('a')
1679+
self.assertEqual(zi.header_offset, 0)
1680+
self.assertEqual(zi.compress_size, 16)
1681+
self.assertEqual(zi.file_size, 1033)
1682+
zi = zipf.getinfo('b')
1683+
self.assertEqual(zi.header_offset, 0)
1684+
self.assertEqual(zi.compress_size, 16)
1685+
self.assertEqual(zi.file_size, 1033)
1686+
self.assertEqual(len(zipf.read('a')), 1033)
1687+
with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'):
1688+
zipf.read('b')
1689+
1690+
@requires_zlib
1691+
def test_quoted_overlap(self):
1692+
data = (
1693+
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05Y\xfc'
1694+
b'8\x044\x00\x00\x00(\x04\x00\x00\x01\x00\x00\x00a\x00'
1695+
b'\x1f\x00\xe0\xffPK\x03\x04\x14\x00\x00\x00\x08\x00\xa0l'
1696+
b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00'
1697+
b'\x00\x00b\xed\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\'
1698+
b'd\x0b`PK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0'
1699+
b'lH\x05Y\xfc8\x044\x00\x00\x00(\x04\x00\x00\x01'
1700+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
1701+
b'\x00aPK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0l'
1702+
b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00'
1703+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00$\x00\x00\x00'
1704+
b'bPK\x05\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00'
1705+
b'\x00S\x00\x00\x00\x00\x00'
1706+
)
1707+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
1708+
self.assertEqual(zipf.namelist(), ['a', 'b'])
1709+
zi = zipf.getinfo('a')
1710+
self.assertEqual(zi.header_offset, 0)
1711+
self.assertEqual(zi.compress_size, 52)
1712+
self.assertEqual(zi.file_size, 1064)
1713+
zi = zipf.getinfo('b')
1714+
self.assertEqual(zi.header_offset, 36)
1715+
self.assertEqual(zi.compress_size, 16)
1716+
self.assertEqual(zi.file_size, 1033)
1717+
with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'):
1718+
zipf.read('a')
1719+
self.assertEqual(len(zipf.read('b')), 1033)
1720+
16131721
def tearDown(self):
16141722
unlink(TESTFN)
16151723
unlink(TESTFN2)

Lib/zipfile.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ class ZipInfo (object):
338338
'compress_size',
339339
'file_size',
340340
'_raw_time',
341+
'_end_offset',
341342
)
342343

343344
def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
@@ -376,6 +377,7 @@ def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
376377
self.volume = 0 # Volume number of file header
377378
self.internal_attr = 0 # Internal attributes
378379
self.external_attr = 0 # External file attributes
380+
self._end_offset = None # Start of the next local header or central directory
379381
# Other attributes are set by class ZipFile:
380382
# header_offset Byte offset to the file header
381383
# CRC CRC-32 of the uncompressed file
@@ -1264,6 +1266,12 @@ def _RealGetContents(self):
12641266
if self.debug > 2:
12651267
print("total", total)
12661268

1269+
end_offset = self.start_dir
1270+
for zinfo in sorted(self.filelist,
1271+
key=lambda zinfo: zinfo.header_offset,
1272+
reverse=True):
1273+
zinfo._end_offset = end_offset
1274+
end_offset = zinfo.header_offset
12671275

12681276
def namelist(self):
12691277
"""Return a list of file names in the archive."""
@@ -1418,6 +1426,10 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
14181426
'File name in directory %r and header %r differ.'
14191427
% (zinfo.orig_filename, fname))
14201428

1429+
if (zinfo._end_offset is not None and
1430+
zef_file.tell() + zinfo.compress_size > zinfo._end_offset):
1431+
raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)")
1432+
14211433
# check for encrypted flag & handle password
14221434
is_encrypted = zinfo.flag_bits & 0x1
14231435
zd = None
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Protect :mod:`zipfile` from "quoted-overlap" zipbomb. It now raises
2+
BadZipFile when try to read an entry that overlaps with other entry or
3+
central directory.

0 commit comments

Comments
 (0)