Skip to content

Commit 0f04f24

Browse files
gh-117779: Fix reading duplicated entries in zipfile by name (GH-129254)
1 parent ac3c439 commit 0f04f24

File tree

3 files changed

+121
-6
lines changed

3 files changed

+121
-6
lines changed

Lib/test/test_zipfile/test_core.py

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2415,7 +2415,36 @@ def test_decompress_without_3rd_party_library(self):
24152415
self.assertRaises(RuntimeError, zf.extract, 'a.txt')
24162416

24172417
@requires_zlib()
2418-
def test_full_overlap(self):
2418+
def test_full_overlap_different_names(self):
2419+
data = (
2420+
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
2421+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00b\xed'
2422+
b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P'
2423+
b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2'
2424+
b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00'
2425+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK'
2426+
b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
2427+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00'
2428+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00bPK\x05'
2429+
b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00'
2430+
b'\x00\x00\x00'
2431+
)
2432+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
2433+
self.assertEqual(zipf.namelist(), ['a', 'b'])
2434+
zi = zipf.getinfo('a')
2435+
self.assertEqual(zi.header_offset, 0)
2436+
self.assertEqual(zi.compress_size, 16)
2437+
self.assertEqual(zi.file_size, 1033)
2438+
zi = zipf.getinfo('b')
2439+
self.assertEqual(zi.header_offset, 0)
2440+
self.assertEqual(zi.compress_size, 16)
2441+
self.assertEqual(zi.file_size, 1033)
2442+
self.assertEqual(len(zipf.read('b')), 1033)
2443+
with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'):
2444+
zipf.read('a')
2445+
2446+
@requires_zlib()
2447+
def test_full_overlap_different_names2(self):
24192448
data = (
24202449
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
24212450
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed'
@@ -2439,9 +2468,43 @@ def test_full_overlap(self):
24392468
self.assertEqual(zi.header_offset, 0)
24402469
self.assertEqual(zi.compress_size, 16)
24412470
self.assertEqual(zi.file_size, 1033)
2442-
self.assertEqual(len(zipf.read('a')), 1033)
24432471
with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'):
24442472
zipf.read('b')
2473+
with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as cm:
2474+
self.assertEqual(len(zipf.read('a')), 1033)
2475+
self.assertEqual(cm.filename, __file__)
2476+
2477+
@requires_zlib()
2478+
def test_full_overlap_same_name(self):
2479+
data = (
2480+
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
2481+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed'
2482+
b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P'
2483+
b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2'
2484+
b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00'
2485+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK'
2486+
b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
2487+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00'
2488+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK\x05'
2489+
b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00'
2490+
b'\x00\x00\x00'
2491+
)
2492+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
2493+
self.assertEqual(zipf.namelist(), ['a', 'a'])
2494+
self.assertEqual(len(zipf.infolist()), 2)
2495+
zi = zipf.getinfo('a')
2496+
self.assertEqual(zi.header_offset, 0)
2497+
self.assertEqual(zi.compress_size, 16)
2498+
self.assertEqual(zi.file_size, 1033)
2499+
self.assertEqual(len(zipf.read('a')), 1033)
2500+
self.assertEqual(len(zipf.read(zi)), 1033)
2501+
self.assertEqual(len(zipf.read(zipf.infolist()[1])), 1033)
2502+
with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as cm:
2503+
self.assertEqual(len(zipf.read(zipf.infolist()[0])), 1033)
2504+
self.assertEqual(cm.filename, __file__)
2505+
with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as cm:
2506+
zipf.open(zipf.infolist()[0]).close()
2507+
self.assertEqual(cm.filename, __file__)
24452508

24462509
@requires_zlib()
24472510
def test_quoted_overlap(self):
@@ -2474,6 +2537,47 @@ def test_quoted_overlap(self):
24742537
zipf.read('a')
24752538
self.assertEqual(len(zipf.read('b')), 1033)
24762539

2540+
@requires_zlib()
2541+
def test_overlap_with_central_dir(self):
2542+
data = (
2543+
b'PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00G_|Z'
2544+
b'\xe2\x1e8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00'
2545+
b'\x00\x00\x00\x00\x00\x00\x00\x00\xb4\x81\x00\x00\x00\x00aP'
2546+
b'K\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00/\x00\x00\x00\x00'
2547+
b'\x00\x00\x00\x00\x00'
2548+
)
2549+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
2550+
self.assertEqual(zipf.namelist(), ['a'])
2551+
self.assertEqual(len(zipf.infolist()), 1)
2552+
zi = zipf.getinfo('a')
2553+
self.assertEqual(zi.header_offset, 0)
2554+
self.assertEqual(zi.compress_size, 11)
2555+
self.assertEqual(zi.file_size, 1033)
2556+
with self.assertRaisesRegex(zipfile.BadZipFile, 'Bad magic number'):
2557+
zipf.read('a')
2558+
2559+
@requires_zlib()
2560+
def test_overlap_with_archive_comment(self):
2561+
data = (
2562+
b'PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00G_|Z'
2563+
b'\xe2\x1e8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00'
2564+
b'\x00\x00\x00\x00\x00\x00\x00\x00\xb4\x81E\x00\x00\x00aP'
2565+
b'K\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00/\x00\x00\x00\x00'
2566+
b'\x00\x00\x00*\x00'
2567+
b'PK\x03\x04\x14\x00\x00\x00\x08\x00G_|Z\xe2\x1e'
2568+
b'8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00aK'
2569+
b'L\x1c\x05\xa3`\x14\x8cx\x00\x00'
2570+
)
2571+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
2572+
self.assertEqual(zipf.namelist(), ['a'])
2573+
self.assertEqual(len(zipf.infolist()), 1)
2574+
zi = zipf.getinfo('a')
2575+
self.assertEqual(zi.header_offset, 69)
2576+
self.assertEqual(zi.compress_size, 11)
2577+
self.assertEqual(zi.file_size, 1033)
2578+
with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'):
2579+
zipf.read('a')
2580+
24772581
def tearDown(self):
24782582
unlink(TESTFN)
24792583
unlink(TESTFN2)

Lib/zipfile/__init__.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1552,9 +1552,8 @@ def _RealGetContents(self):
15521552
print("total", total)
15531553

15541554
end_offset = self.start_dir
1555-
for zinfo in sorted(self.filelist,
1556-
key=lambda zinfo: zinfo.header_offset,
1557-
reverse=True):
1555+
for zinfo in reversed(sorted(self.filelist,
1556+
key=lambda zinfo: zinfo.header_offset)):
15581557
zinfo._end_offset = end_offset
15591558
end_offset = zinfo.header_offset
15601559

@@ -1722,7 +1721,16 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
17221721

17231722
if (zinfo._end_offset is not None and
17241723
zef_file.tell() + zinfo.compress_size > zinfo._end_offset):
1725-
raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)")
1724+
if zinfo._end_offset == zinfo.header_offset:
1725+
import warnings
1726+
warnings.warn(
1727+
f"Overlapped entries: {zinfo.orig_filename!r} "
1728+
f"(possible zip bomb)",
1729+
skip_file_prefixes=(os.path.dirname(__file__),))
1730+
else:
1731+
raise BadZipFile(
1732+
f"Overlapped entries: {zinfo.orig_filename!r} "
1733+
f"(possible zip bomb)")
17261734

17271735
# check for encrypted flag & handle password
17281736
is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix reading duplicated entries in :mod:`zipfile` by name.
2+
Reading duplicated entries (except the last one) by ``ZipInfo``
3+
now emits a warning instead of raising an exception.

0 commit comments

Comments
 (0)