From 05e3b187baf847f67b40d54b5d854dbbfa647709 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 22 Nov 2024 02:31:00 +0000 Subject: [PATCH 1/2] GH-126766: `url2pathname()`: handle 'localhost' authority Discard any 'localhost' authority from the beginning of a `file:` URI. As a result, file URIs like `//localhost/etc/hosts` are correctly decoded as `/etc/hosts`. --- Lib/nturl2path.py | 3 +++ Lib/test/test_urllib.py | 4 +++- Lib/urllib/request.py | 3 +++ .../Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst | 2 ++ 4 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index ed7880fd1a775f..8a757a35d46484 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -23,6 +23,9 @@ def url2pathname(url): # URL has an empty authority section, so the path begins on the # third character. url = url[2:] + elif url[:12] == '//localhost/': + # Skip past 'localhost' authority. + url = url[11:] # make sure not to convert quoted slashes :-) return urllib.parse.unquote(url.replace('/', '\\')) comp = url.split('|') diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 3e5dc256d317a7..e1c1d3170d9807 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1496,6 +1496,8 @@ def test_url2pathname_win(self): # Localhost paths self.assertEqual(fn('//localhost/C:/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn('//localhost/C|/path/to/file'), 'C:\\path\\to\\file') + self.assertEqual(fn('//localhost/path/to/file'), '\\path\\to\\file') + self.assertEqual(fn('//localhost//server/path/to/file'), '\\\\server\\path\\to\\file') # Percent-encoded forward slashes are preserved for backwards compatibility self.assertEqual(fn('C:/foo%2fbar'), 'C:\\foo/bar') self.assertEqual(fn('//server/share/foo%2fbar'), '\\\\server\\share\\foo/bar') @@ -1514,7 +1516,7 @@ def test_url2pathname_posix(self): self.assertEqual(fn('//foo/bar'), '//foo/bar') self.assertEqual(fn('///foo/bar'), '/foo/bar') self.assertEqual(fn('////foo/bar'), '//foo/bar') - self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar') + self.assertEqual(fn('//localhost/foo/bar'), '/foo/bar') @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII') def test_url2pathname_nonascii(self): diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index bcfdcc51fac369..80be65c613e971 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1657,6 +1657,9 @@ def url2pathname(pathname): # URL has an empty authority section, so the path begins on the # third character. pathname = pathname[2:] + elif pathname[:12] == '//localhost/': + # Skip past 'localhost' authority. + pathname = pathname[11:] encoding = sys.getfilesystemencoding() errors = sys.getfilesystemencodeerrors() return unquote(pathname, encoding=encoding, errors=errors) diff --git a/Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst b/Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst new file mode 100644 index 00000000000000..998c99bf4358d5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst @@ -0,0 +1,2 @@ +Fix issue where :func:`urllib.request.url2pathname` failed to discard any +'localhost' authority present in the URL. From fb5e8af06b581bfa622dcf81fb39b65631d4032e Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 22 Nov 2024 02:50:42 +0000 Subject: [PATCH 2/2] Align Windows and POSIX implementations a little more. --- Lib/nturl2path.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index 8a757a35d46484..3308ee7c1c784e 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -15,17 +15,17 @@ def url2pathname(url): # become # C:\foo\bar\spam.foo import string, urllib.parse + if url[:3] == '///': + # URL has an empty authority section, so the path begins on the third + # character. + url = url[2:] + elif url[:12] == '//localhost/': + # Skip past 'localhost' authority. + url = url[11:] # Windows itself uses ":" even in URLs. url = url.replace(':', '|') if not '|' in url: # No drive specifier, just convert slashes - if url[:3] == '///': - # URL has an empty authority section, so the path begins on the - # third character. - url = url[2:] - elif url[:12] == '//localhost/': - # Skip past 'localhost' authority. - url = url[11:] # make sure not to convert quoted slashes :-) return urllib.parse.unquote(url.replace('/', '\\')) comp = url.split('|')