Skip to content

Commit 5e8a9eb

Browse files
authored
[3.12] gh-116042: Fix location for SyntaxErrors of invalid escapes in the tokenizer (GH-116049) (#130065)
(cherry picked from commit 56eda25)
1 parent 719d08c commit 5e8a9eb

File tree

5 files changed

+82
-18
lines changed

5 files changed

+82
-18
lines changed

Lib/test/test_cmd_line_script.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,7 @@ def test_syntaxerror_invalid_escape_sequence_multi_line(self):
652652
self.assertEqual(
653653
stderr.splitlines()[-3:],
654654
[ b' foo = """\\q"""',
655-
b' ^^^^^^^^',
655+
b' ^^',
656656
b'SyntaxError: invalid escape sequence \'\\q\''
657657
],
658658
)

Lib/test/test_string_literals.py

+31-8
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def test_eval_str_invalid_escape(self):
118118
self.assertEqual(len(w), 1)
119119
self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
120120
self.assertEqual(w[0].filename, '<string>')
121-
self.assertEqual(w[0].lineno, 1)
121+
self.assertEqual(w[0].lineno, 2)
122122

123123
with warnings.catch_warnings(record=True) as w:
124124
warnings.simplefilter('error', category=SyntaxWarning)
@@ -128,7 +128,7 @@ def test_eval_str_invalid_escape(self):
128128
self.assertEqual(w, [])
129129
self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
130130
self.assertEqual(exc.filename, '<string>')
131-
self.assertEqual(exc.lineno, 1)
131+
self.assertEqual(exc.lineno, 2)
132132
self.assertEqual(exc.offset, 1)
133133

134134
# Check that the warning is raised only once if there are syntax errors
@@ -155,7 +155,7 @@ def test_eval_str_invalid_octal_escape(self):
155155
self.assertEqual(str(w[0].message),
156156
r"invalid octal escape sequence '\407'")
157157
self.assertEqual(w[0].filename, '<string>')
158-
self.assertEqual(w[0].lineno, 1)
158+
self.assertEqual(w[0].lineno, 2)
159159

160160
with warnings.catch_warnings(record=True) as w:
161161
warnings.simplefilter('error', category=SyntaxWarning)
@@ -165,9 +165,32 @@ def test_eval_str_invalid_octal_escape(self):
165165
self.assertEqual(w, [])
166166
self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
167167
self.assertEqual(exc.filename, '<string>')
168-
self.assertEqual(exc.lineno, 1)
168+
self.assertEqual(exc.lineno, 2)
169169
self.assertEqual(exc.offset, 1)
170170

171+
def test_invalid_escape_locations_with_offset(self):
172+
with warnings.catch_warnings(record=True) as w:
173+
warnings.simplefilter('error', category=SyntaxWarning)
174+
with self.assertRaises(SyntaxError) as cm:
175+
eval("\"'''''''''''''''''''''invalid\ Escape\"")
176+
exc = cm.exception
177+
self.assertEqual(w, [])
178+
self.assertEqual(exc.msg, r"invalid escape sequence '\ '")
179+
self.assertEqual(exc.filename, '<string>')
180+
self.assertEqual(exc.lineno, 1)
181+
self.assertEqual(exc.offset, 30)
182+
183+
with warnings.catch_warnings(record=True) as w:
184+
warnings.simplefilter('error', category=SyntaxWarning)
185+
with self.assertRaises(SyntaxError) as cm:
186+
eval("\"''Incorrect \ logic?\"")
187+
exc = cm.exception
188+
self.assertEqual(w, [])
189+
self.assertEqual(exc.msg, r"invalid escape sequence '\ '")
190+
self.assertEqual(exc.filename, '<string>')
191+
self.assertEqual(exc.lineno, 1)
192+
self.assertEqual(exc.offset, 14)
193+
171194
def test_eval_str_raw(self):
172195
self.assertEqual(eval(""" r'x' """), 'x')
173196
self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
@@ -207,7 +230,7 @@ def test_eval_bytes_invalid_escape(self):
207230
self.assertEqual(len(w), 1)
208231
self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
209232
self.assertEqual(w[0].filename, '<string>')
210-
self.assertEqual(w[0].lineno, 1)
233+
self.assertEqual(w[0].lineno, 2)
211234

212235
with warnings.catch_warnings(record=True) as w:
213236
warnings.simplefilter('error', category=SyntaxWarning)
@@ -217,7 +240,7 @@ def test_eval_bytes_invalid_escape(self):
217240
self.assertEqual(w, [])
218241
self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
219242
self.assertEqual(exc.filename, '<string>')
220-
self.assertEqual(exc.lineno, 1)
243+
self.assertEqual(exc.lineno, 2)
221244

222245
def test_eval_bytes_invalid_octal_escape(self):
223246
for i in range(0o400, 0o1000):
@@ -231,7 +254,7 @@ def test_eval_bytes_invalid_octal_escape(self):
231254
self.assertEqual(str(w[0].message),
232255
r"invalid octal escape sequence '\407'")
233256
self.assertEqual(w[0].filename, '<string>')
234-
self.assertEqual(w[0].lineno, 1)
257+
self.assertEqual(w[0].lineno, 2)
235258

236259
with warnings.catch_warnings(record=True) as w:
237260
warnings.simplefilter('error', category=SyntaxWarning)
@@ -241,7 +264,7 @@ def test_eval_bytes_invalid_octal_escape(self):
241264
self.assertEqual(w, [])
242265
self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
243266
self.assertEqual(exc.filename, '<string>')
244-
self.assertEqual(exc.lineno, 1)
267+
self.assertEqual(exc.lineno, 2)
245268

246269
def test_eval_bytes_raw(self):
247270
self.assertEqual(eval(""" br'x' """), b'x')
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix location for SyntaxErrors of invalid escapes in the tokenizer. Patch by
2+
Pablo Galindo

Parser/pegen_errors.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -350,8 +350,8 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
350350
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
351351

352352
if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
353-
Py_ssize_t size = p->tok->inp - p->tok->buf;
354-
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
353+
Py_ssize_t size = p->tok->inp - p->tok->line_start;
354+
error_line = PyUnicode_DecodeUTF8(p->tok->line_start, size, "replace");
355355
}
356356
else if (p->tok->fp == NULL || p->tok->fp == stdin) {
357357
error_line = get_error_line_from_tokenizer_buffers(p, lineno);

Parser/string_parser.c

+46-7
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
//// STRING HANDLING FUNCTIONS ////
1010

1111
static int
12-
warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t)
12+
warn_invalid_escape_sequence(Parser *p, const char* buffer, const char *first_invalid_escape, Token *t)
1313
{
1414
if (p->call_invalid_rules) {
1515
// Do not report warnings if we are in the second pass of the parser
@@ -38,8 +38,46 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token
3838
else {
3939
category = PyExc_DeprecationWarning;
4040
}
41+
42+
// Calculate the lineno and the col_offset of the invalid escape sequence
43+
const char *start = buffer;
44+
const char *end = first_invalid_escape;
45+
int lineno = t->lineno;
46+
int col_offset = t->col_offset;
47+
while (start < end) {
48+
if (*start == '\n') {
49+
lineno++;
50+
col_offset = 0;
51+
}
52+
else {
53+
col_offset++;
54+
}
55+
start++;
56+
}
57+
58+
// Count the number of quotes in the token
59+
char first_quote = 0;
60+
if (lineno == t->lineno) {
61+
int quote_count = 0;
62+
char* tok = PyBytes_AsString(t->bytes);
63+
for (int i = 0; i < PyBytes_Size(t->bytes); i++) {
64+
if (tok[i] == '\'' || tok[i] == '\"') {
65+
if (quote_count == 0) {
66+
first_quote = tok[i];
67+
}
68+
if (tok[i] == first_quote) {
69+
quote_count++;
70+
}
71+
} else {
72+
break;
73+
}
74+
}
75+
76+
col_offset += quote_count;
77+
}
78+
4179
if (PyErr_WarnExplicitObject(category, msg, p->tok->filename,
42-
t->lineno, NULL, NULL) < 0) {
80+
lineno, NULL, NULL) < 0) {
4381
if (PyErr_ExceptionMatches(category)) {
4482
/* Replace the Syntax/DeprecationWarning exception with a SyntaxError
4583
to get a more accurate error report */
@@ -50,11 +88,12 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token
5088
error location, if p->known_err_token is not set. */
5189
p->known_err_token = t;
5290
if (octal) {
53-
RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'",
54-
first_invalid_escape);
91+
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1,
92+
"invalid octal escape sequence '\\%.3s'", first_invalid_escape);
5593
}
5694
else {
57-
RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c);
95+
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1,
96+
"invalid escape sequence '\\%c'", c);
5897
}
5998
}
6099
Py_DECREF(msg);
@@ -148,7 +187,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
148187
// HACK: later we can simply pass the line no, since we don't preserve the tokens
149188
// when we are decoding the string but we preserve the line numbers.
150189
if (v != NULL && first_invalid_escape != NULL && t != NULL) {
151-
if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) {
190+
if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) {
152191
/* We have not decref u before because first_invalid_escape points
153192
inside u. */
154193
Py_XDECREF(u);
@@ -170,7 +209,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
170209
}
171210

172211
if (first_invalid_escape != NULL) {
173-
if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) {
212+
if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) {
174213
Py_DECREF(result);
175214
return NULL;
176215
}

0 commit comments

Comments
 (0)