Skip to content

Commit 2231286

Browse files
authored
gh-123270: Replaced SanitizedNames with a more surgical fix. (#123354)
Applies changes from zipp 3.20.1 and jaraco/zipp#124
1 parent 7e38e67 commit 2231286

File tree

3 files changed

+87
-71
lines changed

3 files changed

+87
-71
lines changed

Lib/test/test_zipfile/_path/test_path.py

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pickle
66
import stat
77
import sys
8+
import time
89
import unittest
910
import zipfile
1011
import zipfile._path
@@ -592,7 +593,11 @@ def test_getinfo_missing(self, alpharep):
592593

593594
def test_malformed_paths(self):
594595
"""
595-
Path should handle malformed paths.
596+
Path should handle malformed paths gracefully.
597+
598+
Paths with leading slashes are not visible.
599+
600+
Paths with dots are treated like regular files.
596601
"""
597602
data = io.BytesIO()
598603
zf = zipfile.ZipFile(data, "w")
@@ -601,15 +606,71 @@ def test_malformed_paths(self):
601606
zf.writestr("../parent.txt", b"content")
602607
zf.filename = ''
603608
root = zipfile.Path(zf)
604-
assert list(map(str, root.iterdir())) == [
605-
'one-slash.txt',
606-
'two-slash.txt',
607-
'parent.txt',
608-
]
609+
assert list(map(str, root.iterdir())) == ['../']
610+
assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content'
611+
612+
def test_unsupported_names(self):
613+
"""
614+
Path segments with special characters are readable.
615+
616+
On some platforms or file systems, characters like
617+
``:`` and ``?`` are not allowed, but they are valid
618+
in the zip file.
619+
"""
620+
data = io.BytesIO()
621+
zf = zipfile.ZipFile(data, "w")
622+
zf.writestr("path?", b"content")
623+
zf.writestr("V: NMS.flac", b"fLaC...")
624+
zf.filename = ''
625+
root = zipfile.Path(zf)
626+
contents = root.iterdir()
627+
assert next(contents).name == 'path?'
628+
assert next(contents).name == 'V: NMS.flac'
629+
assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..."
630+
631+
def test_backslash_not_separator(self):
632+
"""
633+
In a zip file, backslashes are not separators.
634+
"""
635+
data = io.BytesIO()
636+
zf = zipfile.ZipFile(data, "w")
637+
zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content")
638+
zf.filename = ''
639+
root = zipfile.Path(zf)
640+
(first,) = root.iterdir()
641+
assert not first.is_dir()
642+
assert first.name == 'foo\\bar'
609643

610644
@pass_alpharep
611645
def test_interface(self, alpharep):
612646
from importlib.resources.abc import Traversable
613647

614648
zf = zipfile.Path(alpharep)
615649
assert isinstance(zf, Traversable)
650+
651+
652+
class DirtyZipInfo(zipfile.ZipInfo):
653+
"""
654+
Bypass name sanitization.
655+
"""
656+
657+
def __init__(self, filename, *args, **kwargs):
658+
super().__init__(filename, *args, **kwargs)
659+
self.filename = filename
660+
661+
@classmethod
662+
def for_name(cls, name, archive):
663+
"""
664+
Construct the same way that ZipFile.writestr does.
665+
666+
TODO: extract this functionality and re-use
667+
"""
668+
self = cls(filename=name, date_time=time.localtime(time.time())[:6])
669+
self.compress_type = archive.compression
670+
self.compress_level = archive.compresslevel
671+
if self.filename.endswith('/'): # pragma: no cover
672+
self.external_attr = 0o40775 << 16 # drwxrwxr-x
673+
self.external_attr |= 0x10 # MS-DOS directory flag
674+
else:
675+
self.external_attr = 0o600 << 16 # ?rw-------
676+
return self

Lib/zipfile/_path/__init__.py

Lines changed: 17 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
"""
2+
A Path-like interface for zipfiles.
3+
4+
This codebase is shared between zipfile.Path in the stdlib
5+
and zipp in PyPI. See
6+
https://github.com/python/importlib_metadata/wiki/Development-Methodology
7+
for more detail.
8+
"""
9+
110
import io
211
import posixpath
312
import zipfile
@@ -36,7 +45,7 @@ def _parents(path):
3645
def _ancestry(path):
3746
"""
3847
Given a path with elements separated by
39-
posixpath.sep, generate all elements of that path
48+
posixpath.sep, generate all elements of that path.
4049
4150
>>> list(_ancestry('b/d'))
4251
['b/d', 'b']
@@ -48,9 +57,14 @@ def _ancestry(path):
4857
['b']
4958
>>> list(_ancestry(''))
5059
[]
60+
61+
Multiple separators are treated like a single.
62+
63+
>>> list(_ancestry('//b//d///f//'))
64+
['//b//d///f', '//b//d', '//b']
5165
"""
5266
path = path.rstrip(posixpath.sep)
53-
while path and path != posixpath.sep:
67+
while path.rstrip(posixpath.sep):
5468
yield path
5569
path, tail = posixpath.split(path)
5670

@@ -85,69 +99,7 @@ def __setstate__(self, state):
8599
super().__init__(*args, **kwargs)
86100

87101

88-
class SanitizedNames:
89-
"""
90-
ZipFile mix-in to ensure names are sanitized.
91-
"""
92-
93-
def namelist(self):
94-
return list(map(self._sanitize, super().namelist()))
95-
96-
@staticmethod
97-
def _sanitize(name):
98-
r"""
99-
Ensure a relative path with posix separators and no dot names.
100-
101-
Modeled after
102-
https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813
103-
but provides consistent cross-platform behavior.
104-
105-
>>> san = SanitizedNames._sanitize
106-
>>> san('/foo/bar')
107-
'foo/bar'
108-
>>> san('//foo.txt')
109-
'foo.txt'
110-
>>> san('foo/.././bar.txt')
111-
'foo/bar.txt'
112-
>>> san('foo../.bar.txt')
113-
'foo../.bar.txt'
114-
>>> san('\\foo\\bar.txt')
115-
'foo/bar.txt'
116-
>>> san('D:\\foo.txt')
117-
'D/foo.txt'
118-
>>> san('\\\\server\\share\\file.txt')
119-
'server/share/file.txt'
120-
>>> san('\\\\?\\GLOBALROOT\\Volume3')
121-
'?/GLOBALROOT/Volume3'
122-
>>> san('\\\\.\\PhysicalDrive1\\root')
123-
'PhysicalDrive1/root'
124-
125-
Retain any trailing slash.
126-
>>> san('abc/')
127-
'abc/'
128-
129-
Raises a ValueError if the result is empty.
130-
>>> san('../..')
131-
Traceback (most recent call last):
132-
...
133-
ValueError: Empty filename
134-
"""
135-
136-
def allowed(part):
137-
return part and part not in {'..', '.'}
138-
139-
# Remove the drive letter.
140-
# Don't use ntpath.splitdrive, because that also strips UNC paths
141-
bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE)
142-
clean = bare.replace('\\', '/')
143-
parts = clean.split('/')
144-
joined = '/'.join(filter(allowed, parts))
145-
if not joined:
146-
raise ValueError("Empty filename")
147-
return joined + '/' * name.endswith('/')
148-
149-
150-
class CompleteDirs(InitializedState, SanitizedNames, zipfile.ZipFile):
102+
class CompleteDirs(InitializedState, zipfile.ZipFile):
151103
"""
152104
A ZipFile subclass that ensures that implied directories
153105
are always included in the namelist.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Applied a more surgical fix for malformed payloads in :class:`zipfile.Path`
2+
causing infinite loops (gh-122905) without breaking contents using
3+
legitimate characters.

0 commit comments

Comments
 (0)