Skip to content

Commit 0cc6b5e

Browse files
bpo-39219: Fix SyntaxError attributes in the tokenizer. (GH-17828)
* Always set the text attribute. * Correct the offset attribute for non-ascii sources.
1 parent f4f445b commit 0cc6b5e

File tree

3 files changed

+47
-5
lines changed

3 files changed

+47
-5
lines changed

Lib/test/test_exceptions.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,17 +179,25 @@ def ckmsg(src, msg, exception=SyntaxError):
179179
ckmsg(s, "inconsistent use of tabs and spaces in indentation", TabError)
180180

181181
def testSyntaxErrorOffset(self):
182-
def check(src, lineno, offset):
182+
def check(src, lineno, offset, encoding='utf-8'):
183183
with self.assertRaises(SyntaxError) as cm:
184184
compile(src, '<fragment>', 'exec')
185185
self.assertEqual(cm.exception.lineno, lineno)
186186
self.assertEqual(cm.exception.offset, offset)
187+
if cm.exception.text is not None:
188+
if not isinstance(src, str):
189+
src = src.decode(encoding, 'replace')
190+
line = src.split('\n')[lineno-1]
191+
self.assertEqual(cm.exception.text.rstrip('\n'), line)
187192

188193
check('def fact(x):\n\treturn x!\n', 2, 10)
189194
check('1 +\n', 1, 4)
190195
check('def spam():\n print(1)\n print(2)', 3, 10)
191196
check('Python = "Python" +', 1, 20)
192197
check('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', 1, 20)
198+
check(b'# -*- coding: cp1251 -*-\nPython = "\xcf\xb3\xf2\xee\xed" +',
199+
2, 19, encoding='cp1251')
200+
check(b'Python = "\xcf\xb3\xf2\xee\xed" +', 1, 18)
193201
check('x = "a', 1, 7)
194202
check('lambda x: x = 2', 1, 1)
195203

@@ -205,6 +213,10 @@ def check(src, lineno, offset):
205213
check('0010 + 2', 1, 4)
206214
check('x = 32e-+4', 1, 8)
207215
check('x = 0o9', 1, 6)
216+
check('\u03b1 = 0xI', 1, 6)
217+
check(b'\xce\xb1 = 0xI', 1, 6)
218+
check(b'# -*- coding: iso8859-7 -*-\n\xe1 = 0xI', 2, 6,
219+
encoding='iso8859-7')
208220

209221
# Errors thrown by symtable.c
210222
check('x = [(yield i) for i in range(3)]', 1, 5)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Syntax errors raised in the tokenizer now always set correct "text" and
2+
"offset" attributes.

Parser/tokenizer.c

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11

22
/* Tokenizer implementation */
33

4+
#define PY_SSIZE_T_CLEAN
45
#include "Python.h"
56

67
#include <ctype.h>
@@ -1034,17 +1035,44 @@ tok_backup(struct tok_state *tok, int c)
10341035
static int
10351036
syntaxerror(struct tok_state *tok, const char *format, ...)
10361037
{
1038+
PyObject *errmsg, *errtext, *args;
10371039
va_list vargs;
10381040
#ifdef HAVE_STDARG_PROTOTYPES
10391041
va_start(vargs, format);
10401042
#else
10411043
va_start(vargs);
10421044
#endif
1043-
PyErr_FormatV(PyExc_SyntaxError, format, vargs);
1045+
errmsg = PyUnicode_FromFormatV(format, vargs);
10441046
va_end(vargs);
1045-
PyErr_SyntaxLocationObject(tok->filename,
1046-
tok->lineno,
1047-
(int)(tok->cur - tok->line_start));
1047+
if (!errmsg) {
1048+
goto error;
1049+
}
1050+
1051+
errtext = PyUnicode_DecodeUTF8(tok->line_start, tok->cur - tok->line_start,
1052+
"replace");
1053+
if (!errtext) {
1054+
goto error;
1055+
}
1056+
int offset = (int)PyUnicode_GET_LENGTH(errtext);
1057+
Py_ssize_t line_len = strcspn(tok->line_start, "\n");
1058+
if (line_len != tok->cur - tok->line_start) {
1059+
Py_DECREF(errtext);
1060+
errtext = PyUnicode_DecodeUTF8(tok->line_start, line_len,
1061+
"replace");
1062+
}
1063+
if (!errtext) {
1064+
goto error;
1065+
}
1066+
1067+
args = Py_BuildValue("(O(OiiN))", errmsg,
1068+
tok->filename, tok->lineno, offset, errtext);
1069+
if (args) {
1070+
PyErr_SetObject(PyExc_SyntaxError, args);
1071+
Py_DECREF(args);
1072+
}
1073+
1074+
error:
1075+
Py_XDECREF(errmsg);
10481076
tok->done = E_ERROR;
10491077
return ERRORTOKEN;
10501078
}

0 commit comments

Comments
 (0)