Skip to content

Commit fdcc46d

Browse files
authored
bpo-45848: Allow the parser to get error lines from encoded files (GH-29646)
1 parent 6d430ef commit fdcc46d

File tree

5 files changed

+42
-11
lines changed

5 files changed

+42
-11
lines changed

Include/cpython/pyerrors.h

+5
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,11 @@ PyAPI_FUNC(PyObject *) PyErr_ProgramTextObject(
149149
PyObject *filename,
150150
int lineno);
151151

152+
PyAPI_FUNC(PyObject *) _PyErr_ProgramDecodedTextObject(
153+
PyObject *filename,
154+
int lineno,
155+
const char* encoding);
156+
152157
PyAPI_FUNC(PyObject *) _PyUnicodeTranslateError_Create(
153158
PyObject *object,
154159
Py_ssize_t start,

Lib/test/test_exceptions.py

+13
Original file line numberDiff line numberDiff line change
@@ -2353,6 +2353,19 @@ def test_encodings(self):
23532353
finally:
23542354
unlink(TESTFN)
23552355

2356+
# Check backwards tokenizer errors
2357+
source = '# -*- coding: ascii -*-\n\n(\n'
2358+
try:
2359+
with open(TESTFN, 'w', encoding='ascii') as testfile:
2360+
testfile.write(source)
2361+
rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
2362+
err = err.decode('utf-8').splitlines()
2363+
2364+
self.assertEqual(err[-3], ' (')
2365+
self.assertEqual(err[-2], ' ^')
2366+
finally:
2367+
unlink(TESTFN)
2368+
23562369
def test_attributes_new_constructor(self):
23572370
args = ("bad.py", 1, 2, "abcdefg", 1, 100)
23582371
the_exception = SyntaxError("bad bad", args)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Allow the parser to obtain error lines directly from encoded files. Patch by
2+
Pablo Galindo

Parser/pegen.c

+8-7
Original file line numberDiff line numberDiff line change
@@ -482,14 +482,12 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
482482
goto error;
483483
}
484484

485-
// PyErr_ProgramTextObject assumes that the text is utf-8 so we cannot call it with a file
486-
// with an arbitrary encoding or otherwise we could get some badly decoded text.
487-
int uses_utf8_codec = (!p->tok->encoding || strcmp(p->tok->encoding, "utf-8") == 0);
488485
if (p->tok->fp_interactive) {
489486
error_line = get_error_line(p, lineno);
490487
}
491-
else if (uses_utf8_codec && p->start_rule == Py_file_input) {
492-
error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno);
488+
else if (p->start_rule == Py_file_input) {
489+
error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,
490+
(int) lineno, p->tok->encoding);
493491
}
494492

495493
if (!error_line) {
@@ -500,15 +498,18 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
500498
we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
501499
`PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
502500
does not physically exist */
503-
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec);
501+
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
504502

505503
if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
506504
Py_ssize_t size = p->tok->inp - p->tok->buf;
507505
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
508506
}
509-
else {
507+
else if (p->tok->fp == NULL || p->tok->fp == stdin) {
510508
error_line = get_error_line(p, lineno);
511509
}
510+
else {
511+
error_line = PyUnicode_FromStringAndSize("", 0);
512+
}
512513
if (!error_line) {
513514
goto error;
514515
}

Python/errors.c

+14-4
Original file line numberDiff line numberDiff line change
@@ -1692,7 +1692,7 @@ PyErr_SyntaxLocationEx(const char *filename, int lineno, int col_offset)
16921692
functionality in tb_displayline() in traceback.c. */
16931693

16941694
static PyObject *
1695-
err_programtext(PyThreadState *tstate, FILE *fp, int lineno)
1695+
err_programtext(PyThreadState *tstate, FILE *fp, int lineno, const char* encoding)
16961696
{
16971697
int i;
16981698
char linebuf[1000];
@@ -1720,7 +1720,11 @@ err_programtext(PyThreadState *tstate, FILE *fp, int lineno)
17201720
fclose(fp);
17211721
if (i == lineno) {
17221722
PyObject *res;
1723-
res = PyUnicode_FromString(linebuf);
1723+
if (encoding != NULL) {
1724+
res = PyUnicode_Decode(linebuf, strlen(linebuf), encoding, "replace");
1725+
} else {
1726+
res = PyUnicode_FromString(linebuf);
1727+
}
17241728
if (res == NULL)
17251729
_PyErr_Clear(tstate);
17261730
return res;
@@ -1746,7 +1750,7 @@ PyErr_ProgramText(const char *filename, int lineno)
17461750
}
17471751

17481752
PyObject *
1749-
PyErr_ProgramTextObject(PyObject *filename, int lineno)
1753+
_PyErr_ProgramDecodedTextObject(PyObject *filename, int lineno, const char* encoding)
17501754
{
17511755
if (filename == NULL || lineno <= 0) {
17521756
return NULL;
@@ -1758,7 +1762,13 @@ PyErr_ProgramTextObject(PyObject *filename, int lineno)
17581762
_PyErr_Clear(tstate);
17591763
return NULL;
17601764
}
1761-
return err_programtext(tstate, fp, lineno);
1765+
return err_programtext(tstate, fp, lineno, encoding);
1766+
}
1767+
1768+
PyObject *
1769+
PyErr_ProgramTextObject(PyObject *filename, int lineno)
1770+
{
1771+
return _PyErr_ProgramDecodedTextObject(filename, lineno, NULL);
17621772
}
17631773

17641774
#ifdef __cplusplus

0 commit comments

Comments
 (0)