Skip to content

Commit 6dc661b

Browse files
authored
gh-77102: site: try utf-8 and fallback to locale encoding when reading .pth file (#117802)
1 parent 3831144 commit 6dc661b

File tree

4 files changed

+49
-26
lines changed

4 files changed

+49
-26
lines changed

Doc/library/site.rst

+4
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ with ``import`` (followed by space or tab) are executed.
7474
Limiting a code chunk to a single line is a deliberate measure
7575
to discourage putting anything more complex here.
7676

77+
.. versionchanged:: 3.13
78+
The :file:`.pth` files are now decoded by UTF-8 at first and then by the
79+
:term:`locale encoding` if it fails.
80+
7781
.. index::
7882
single: package
7983
triple: path; configuration; file

Doc/whatsnew/3.13.rst

+7
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,13 @@ re
630630
* Rename :exc:`!re.error` to :exc:`re.PatternError` for improved clarity.
631631
:exc:`!re.error` is kept for backward compatibility.
632632

633+
site
634+
----
635+
636+
* :file:`.pth` files are now decoded by UTF-8 first, and then by the
637+
:term:`locale encoding` if the UTF-8 decoding fails.
638+
(Contributed by Inada Naoki in :gh:`117802`.)
639+
633640
sqlite3
634641
-------
635642

Lib/site.py

+35-26
Original file line numberDiff line numberDiff line change
@@ -179,35 +179,44 @@ def addpackage(sitedir, name, known_paths):
179179
return
180180
_trace(f"Processing .pth file: {fullname!r}")
181181
try:
182-
# locale encoding is not ideal especially on Windows. But we have used
183-
# it for a long time. setuptools uses the locale encoding too.
184-
f = io.TextIOWrapper(io.open_code(fullname), encoding="locale")
182+
with io.open_code(fullname) as f:
183+
pth_content = f.read()
185184
except OSError:
186185
return
187-
with f:
188-
for n, line in enumerate(f):
189-
if line.startswith("#"):
190-
continue
191-
if line.strip() == "":
186+
187+
try:
188+
pth_content = pth_content.decode()
189+
except UnicodeDecodeError:
190+
# Fallback to locale encoding for backward compatibility.
191+
# We will deprecate this fallback in the future.
192+
import locale
193+
pth_content = pth_content.decode(locale.getencoding())
194+
_trace(f"Cannot read {fullname!r} as UTF-8. "
195+
f"Using fallback encoding {locale.getencoding()!r}")
196+
197+
for n, line in enumerate(pth_content.splitlines(), 1):
198+
if line.startswith("#"):
199+
continue
200+
if line.strip() == "":
201+
continue
202+
try:
203+
if line.startswith(("import ", "import\t")):
204+
exec(line)
192205
continue
193-
try:
194-
if line.startswith(("import ", "import\t")):
195-
exec(line)
196-
continue
197-
line = line.rstrip()
198-
dir, dircase = makepath(sitedir, line)
199-
if not dircase in known_paths and os.path.exists(dir):
200-
sys.path.append(dir)
201-
known_paths.add(dircase)
202-
except Exception as exc:
203-
print("Error processing line {:d} of {}:\n".format(n+1, fullname),
204-
file=sys.stderr)
205-
import traceback
206-
for record in traceback.format_exception(exc):
207-
for line in record.splitlines():
208-
print(' '+line, file=sys.stderr)
209-
print("\nRemainder of file ignored", file=sys.stderr)
210-
break
206+
line = line.rstrip()
207+
dir, dircase = makepath(sitedir, line)
208+
if dircase not in known_paths and os.path.exists(dir):
209+
sys.path.append(dir)
210+
known_paths.add(dircase)
211+
except Exception as exc:
212+
print(f"Error processing line {n:d} of {fullname}:\n",
213+
file=sys.stderr)
214+
import traceback
215+
for record in traceback.format_exception(exc):
216+
for line in record.splitlines():
217+
print(' '+line, file=sys.stderr)
218+
print("\nRemainder of file ignored", file=sys.stderr)
219+
break
211220
if reset:
212221
known_paths = None
213222
return known_paths
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:mod:`site` module now parses ``.pth`` file with UTF-8 first, and
2+
:term:`locale encoding` if ``UnicodeDecodeError`` happened. It supported
3+
only locale encoding before.

0 commit comments

Comments
 (0)