Skip to content

Commit 95b073b

Browse files
[3.12] gh-123270: Replaced SanitizedNames with a more surgical fix. (GH-123354) (#123411)
gh-123270: Replaced SanitizedNames with a more surgical fix. (GH-123354) Applies changes from zipp 3.20.1 and jaraco/zippGH-124 (cherry picked from commit 2231286) Co-authored-by: Jason R. Coombs <[email protected]>
1 parent 514787a commit 95b073b

File tree

3 files changed

+87
-71
lines changed

3 files changed

+87
-71
lines changed

Lib/test/test_zipfile/_path/test_path.py

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pathlib
55
import pickle
66
import sys
7+
import time
78
import unittest
89
import zipfile
910

@@ -592,7 +593,11 @@ def test_getinfo_missing(self, alpharep):
592593

593594
def test_malformed_paths(self):
594595
"""
595-
Path should handle malformed paths.
596+
Path should handle malformed paths gracefully.
597+
598+
Paths with leading slashes are not visible.
599+
600+
Paths with dots are treated like regular files.
596601
"""
597602
data = io.BytesIO()
598603
zf = zipfile.ZipFile(data, "w")
@@ -601,15 +606,71 @@ def test_malformed_paths(self):
601606
zf.writestr("../parent.txt", b"content")
602607
zf.filename = ''
603608
root = zipfile.Path(zf)
604-
assert list(map(str, root.iterdir())) == [
605-
'one-slash.txt',
606-
'two-slash.txt',
607-
'parent.txt',
608-
]
609+
assert list(map(str, root.iterdir())) == ['../']
610+
assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content'
611+
612+
def test_unsupported_names(self):
613+
"""
614+
Path segments with special characters are readable.
615+
616+
On some platforms or file systems, characters like
617+
``:`` and ``?`` are not allowed, but they are valid
618+
in the zip file.
619+
"""
620+
data = io.BytesIO()
621+
zf = zipfile.ZipFile(data, "w")
622+
zf.writestr("path?", b"content")
623+
zf.writestr("V: NMS.flac", b"fLaC...")
624+
zf.filename = ''
625+
root = zipfile.Path(zf)
626+
contents = root.iterdir()
627+
assert next(contents).name == 'path?'
628+
assert next(contents).name == 'V: NMS.flac'
629+
assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..."
630+
631+
def test_backslash_not_separator(self):
632+
"""
633+
In a zip file, backslashes are not separators.
634+
"""
635+
data = io.BytesIO()
636+
zf = zipfile.ZipFile(data, "w")
637+
zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content")
638+
zf.filename = ''
639+
root = zipfile.Path(zf)
640+
(first,) = root.iterdir()
641+
assert not first.is_dir()
642+
assert first.name == 'foo\\bar'
609643

610644
@pass_alpharep
611645
def test_interface(self, alpharep):
612646
from importlib.resources.abc import Traversable
613647

614648
zf = zipfile.Path(alpharep)
615649
assert isinstance(zf, Traversable)
650+
651+
652+
class DirtyZipInfo(zipfile.ZipInfo):
653+
"""
654+
Bypass name sanitization.
655+
"""
656+
657+
def __init__(self, filename, *args, **kwargs):
658+
super().__init__(filename, *args, **kwargs)
659+
self.filename = filename
660+
661+
@classmethod
662+
def for_name(cls, name, archive):
663+
"""
664+
Construct the same way that ZipFile.writestr does.
665+
666+
TODO: extract this functionality and re-use
667+
"""
668+
self = cls(filename=name, date_time=time.localtime(time.time())[:6])
669+
self.compress_type = archive.compression
670+
self.compress_level = archive.compresslevel
671+
if self.filename.endswith('/'): # pragma: no cover
672+
self.external_attr = 0o40775 << 16 # drwxrwxr-x
673+
self.external_attr |= 0x10 # MS-DOS directory flag
674+
else:
675+
self.external_attr = 0o600 << 16 # ?rw-------
676+
return self

Lib/zipfile/_path/__init__.py

Lines changed: 17 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
"""
2+
A Path-like interface for zipfiles.
3+
4+
This codebase is shared between zipfile.Path in the stdlib
5+
and zipp in PyPI. See
6+
https://github.com/python/importlib_metadata/wiki/Development-Methodology
7+
for more detail.
8+
"""
9+
110
import io
211
import posixpath
312
import zipfile
@@ -34,7 +43,7 @@ def _parents(path):
3443
def _ancestry(path):
3544
"""
3645
Given a path with elements separated by
37-
posixpath.sep, generate all elements of that path
46+
posixpath.sep, generate all elements of that path.
3847
3948
>>> list(_ancestry('b/d'))
4049
['b/d', 'b']
@@ -46,9 +55,14 @@ def _ancestry(path):
4655
['b']
4756
>>> list(_ancestry(''))
4857
[]
58+
59+
Multiple separators are treated like a single.
60+
61+
>>> list(_ancestry('//b//d///f//'))
62+
['//b//d///f', '//b//d', '//b']
4963
"""
5064
path = path.rstrip(posixpath.sep)
51-
while path and path != posixpath.sep:
65+
while path.rstrip(posixpath.sep):
5266
yield path
5367
path, tail = posixpath.split(path)
5468

@@ -83,69 +97,7 @@ def __setstate__(self, state):
8397
super().__init__(*args, **kwargs)
8498

8599

86-
class SanitizedNames:
87-
"""
88-
ZipFile mix-in to ensure names are sanitized.
89-
"""
90-
91-
def namelist(self):
92-
return list(map(self._sanitize, super().namelist()))
93-
94-
@staticmethod
95-
def _sanitize(name):
96-
r"""
97-
Ensure a relative path with posix separators and no dot names.
98-
99-
Modeled after
100-
https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813
101-
but provides consistent cross-platform behavior.
102-
103-
>>> san = SanitizedNames._sanitize
104-
>>> san('/foo/bar')
105-
'foo/bar'
106-
>>> san('//foo.txt')
107-
'foo.txt'
108-
>>> san('foo/.././bar.txt')
109-
'foo/bar.txt'
110-
>>> san('foo../.bar.txt')
111-
'foo../.bar.txt'
112-
>>> san('\\foo\\bar.txt')
113-
'foo/bar.txt'
114-
>>> san('D:\\foo.txt')
115-
'D/foo.txt'
116-
>>> san('\\\\server\\share\\file.txt')
117-
'server/share/file.txt'
118-
>>> san('\\\\?\\GLOBALROOT\\Volume3')
119-
'?/GLOBALROOT/Volume3'
120-
>>> san('\\\\.\\PhysicalDrive1\\root')
121-
'PhysicalDrive1/root'
122-
123-
Retain any trailing slash.
124-
>>> san('abc/')
125-
'abc/'
126-
127-
Raises a ValueError if the result is empty.
128-
>>> san('../..')
129-
Traceback (most recent call last):
130-
...
131-
ValueError: Empty filename
132-
"""
133-
134-
def allowed(part):
135-
return part and part not in {'..', '.'}
136-
137-
# Remove the drive letter.
138-
# Don't use ntpath.splitdrive, because that also strips UNC paths
139-
bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE)
140-
clean = bare.replace('\\', '/')
141-
parts = clean.split('/')
142-
joined = '/'.join(filter(allowed, parts))
143-
if not joined:
144-
raise ValueError("Empty filename")
145-
return joined + '/' * name.endswith('/')
146-
147-
148-
class CompleteDirs(InitializedState, SanitizedNames, zipfile.ZipFile):
100+
class CompleteDirs(InitializedState, zipfile.ZipFile):
149101
"""
150102
A ZipFile subclass that ensures that implied directories
151103
are always included in the namelist.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Applied a more surgical fix for malformed payloads in :class:`zipfile.Path`
2+
causing infinite loops (gh-122905) without breaking contents using
3+
legitimate characters.

0 commit comments

Comments
 (0)