Skip to content

Commit 2b1df88

Browse files
[3.10] gh-67693: Fix urlunparse() and urlunsplit() for URIs with path starting with multiple slashes and no authority (GH-113563)
(cherry picked from commit e237b25) Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent 333c7dc commit 2b1df88

File tree

3 files changed

+70
-6
lines changed

3 files changed

+70
-6
lines changed

Lib/test/test_urlparse.py

Lines changed: 67 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,16 +70,17 @@
7070

7171
class UrlParseTestCase(unittest.TestCase):
7272

73-
def checkRoundtrips(self, url, parsed, split):
73+
def checkRoundtrips(self, url, parsed, split, url2=None):
74+
if url2 is None:
75+
url2 = url
7476
result = urllib.parse.urlparse(url)
7577
self.assertEqual(result, parsed)
7678
t = (result.scheme, result.netloc, result.path,
7779
result.params, result.query, result.fragment)
7880
self.assertEqual(t, parsed)
7981
# put it back together and it should be the same
8082
result2 = urllib.parse.urlunparse(result)
81-
self.assertEqual(result2, url)
82-
self.assertEqual(result2, result.geturl())
83+
self.assertEqual(result2, url2)
8384

8485
# the result of geturl() is a fixpoint; we can always parse it
8586
# again to get the same result:
@@ -104,8 +105,7 @@ def checkRoundtrips(self, url, parsed, split):
104105
result.query, result.fragment)
105106
self.assertEqual(t, split)
106107
result2 = urllib.parse.urlunsplit(result)
107-
self.assertEqual(result2, url)
108-
self.assertEqual(result2, result.geturl())
108+
self.assertEqual(result2, url2)
109109

110110
# check the fixpoint property of re-parsing the result of geturl()
111111
result3 = urllib.parse.urlsplit(result.geturl())
@@ -142,9 +142,39 @@ def test_qs(self):
142142

143143
def test_roundtrips(self):
144144
str_cases = [
145+
('path/to/file',
146+
('', '', 'path/to/file', '', '', ''),
147+
('', '', 'path/to/file', '', '')),
148+
('/path/to/file',
149+
('', '', '/path/to/file', '', '', ''),
150+
('', '', '/path/to/file', '', '')),
151+
('//path/to/file',
152+
('', 'path', '/to/file', '', '', ''),
153+
('', 'path', '/to/file', '', '')),
154+
('////path/to/file',
155+
('', '', '//path/to/file', '', '', ''),
156+
('', '', '//path/to/file', '', '')),
157+
('scheme:path/to/file',
158+
('scheme', '', 'path/to/file', '', '', ''),
159+
('scheme', '', 'path/to/file', '', '')),
160+
('scheme:/path/to/file',
161+
('scheme', '', '/path/to/file', '', '', ''),
162+
('scheme', '', '/path/to/file', '', '')),
163+
('scheme://path/to/file',
164+
('scheme', 'path', '/to/file', '', '', ''),
165+
('scheme', 'path', '/to/file', '', '')),
166+
('scheme:////path/to/file',
167+
('scheme', '', '//path/to/file', '', '', ''),
168+
('scheme', '', '//path/to/file', '', '')),
145169
('file:///tmp/junk.txt',
146170
('file', '', '/tmp/junk.txt', '', '', ''),
147171
('file', '', '/tmp/junk.txt', '', '')),
172+
('file:////tmp/junk.txt',
173+
('file', '', '//tmp/junk.txt', '', '', ''),
174+
('file', '', '//tmp/junk.txt', '', '')),
175+
('file://///tmp/junk.txt',
176+
('file', '', '///tmp/junk.txt', '', '', ''),
177+
('file', '', '///tmp/junk.txt', '', '')),
148178
('imap://mail.python.org/mbox1',
149179
('imap', 'mail.python.org', '/mbox1', '', '', ''),
150180
('imap', 'mail.python.org', '/mbox1', '', '')),
@@ -175,6 +205,38 @@ def _encode(t):
175205
for url, parsed, split in str_cases + bytes_cases:
176206
self.checkRoundtrips(url, parsed, split)
177207

208+
def test_roundtrips_normalization(self):
209+
str_cases = [
210+
('///path/to/file',
211+
'/path/to/file',
212+
('', '', '/path/to/file', '', '', ''),
213+
('', '', '/path/to/file', '', '')),
214+
('scheme:///path/to/file',
215+
'scheme:/path/to/file',
216+
('scheme', '', '/path/to/file', '', '', ''),
217+
('scheme', '', '/path/to/file', '', '')),
218+
('file:/tmp/junk.txt',
219+
'file:///tmp/junk.txt',
220+
('file', '', '/tmp/junk.txt', '', '', ''),
221+
('file', '', '/tmp/junk.txt', '', '')),
222+
('http:/tmp/junk.txt',
223+
'http:///tmp/junk.txt',
224+
('http', '', '/tmp/junk.txt', '', '', ''),
225+
('http', '', '/tmp/junk.txt', '', '')),
226+
('https:/tmp/junk.txt',
227+
'https:///tmp/junk.txt',
228+
('https', '', '/tmp/junk.txt', '', '', ''),
229+
('https', '', '/tmp/junk.txt', '', '')),
230+
]
231+
def _encode(t):
232+
return (t[0].encode('ascii'),
233+
t[1].encode('ascii'),
234+
tuple(x.encode('ascii') for x in t[2]),
235+
tuple(x.encode('ascii') for x in t[3]))
236+
bytes_cases = [_encode(x) for x in str_cases]
237+
for url, url2, parsed, split in str_cases + bytes_cases:
238+
self.checkRoundtrips(url, parsed, split, url2)
239+
178240
def test_http_roundtrips(self):
179241
# urllib.parse.urlsplit treats 'http:' as an optimized special case,
180242
# so we test both 'http:' and 'https:' in all the following.

Lib/urllib/parse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ def urlunsplit(components):
521521
empty query; the RFC states that these are equivalent)."""
522522
scheme, netloc, url, query, fragment, _coerce_result = (
523523
_coerce_args(*components))
524-
if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
524+
if netloc or (scheme and scheme in uses_netloc) or url[:2] == '//':
525525
if url and url[:1] != '/': url = '/' + url
526526
url = '//' + (netloc or '') + url
527527
if scheme:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix :func:`urllib.parse.urlunparse` and :func:`urllib.parse.urlunsplit` for URIs with path starting with multiple slashes and no authority.
2+
Based on patch by Ashwin Ramaswami.

0 commit comments

Comments
 (0)