Skip to content

Commit 8d1d36b

Browse files
authored
[3.13] gh-116042: Fix location for SyntaxErrors of invalid escapes in the tokenizer (GH-116049) (#130066)
(cherry picked from commit 56eda25) (cherry picked from commit 369704b)
1 parent 4c2a59b commit 8d1d36b

File tree

5 files changed

+82
-18
lines changed

5 files changed

+82
-18
lines changed

Lib/test/test_cmd_line_script.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,7 @@ def test_syntaxerror_invalid_escape_sequence_multi_line(self):
660660
self.assertEqual(
661661
stderr.splitlines()[-3:],
662662
[ b' foo = """\\q"""',
663-
b' ^^^^^^^^',
663+
b' ^^',
664664
b'SyntaxError: invalid escape sequence \'\\q\''
665665
],
666666
)

Lib/test/test_string_literals.py

+31-8
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def test_eval_str_invalid_escape(self):
118118
self.assertEqual(len(w), 1)
119119
self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
120120
self.assertEqual(w[0].filename, '<string>')
121-
self.assertEqual(w[0].lineno, 1)
121+
self.assertEqual(w[0].lineno, 2)
122122

123123
with warnings.catch_warnings(record=True) as w:
124124
warnings.simplefilter('error', category=SyntaxWarning)
@@ -128,7 +128,7 @@ def test_eval_str_invalid_escape(self):
128128
self.assertEqual(w, [])
129129
self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
130130
self.assertEqual(exc.filename, '<string>')
131-
self.assertEqual(exc.lineno, 1)
131+
self.assertEqual(exc.lineno, 2)
132132
self.assertEqual(exc.offset, 1)
133133

134134
# Check that the warning is raised only once if there are syntax errors
@@ -155,7 +155,7 @@ def test_eval_str_invalid_octal_escape(self):
155155
self.assertEqual(str(w[0].message),
156156
r"invalid octal escape sequence '\407'")
157157
self.assertEqual(w[0].filename, '<string>')
158-
self.assertEqual(w[0].lineno, 1)
158+
self.assertEqual(w[0].lineno, 2)
159159

160160
with warnings.catch_warnings(record=True) as w:
161161
warnings.simplefilter('error', category=SyntaxWarning)
@@ -165,9 +165,32 @@ def test_eval_str_invalid_octal_escape(self):
165165
self.assertEqual(w, [])
166166
self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
167167
self.assertEqual(exc.filename, '<string>')
168-
self.assertEqual(exc.lineno, 1)
168+
self.assertEqual(exc.lineno, 2)
169169
self.assertEqual(exc.offset, 1)
170170

171+
def test_invalid_escape_locations_with_offset(self):
172+
with warnings.catch_warnings(record=True) as w:
173+
warnings.simplefilter('error', category=SyntaxWarning)
174+
with self.assertRaises(SyntaxError) as cm:
175+
eval("\"'''''''''''''''''''''invalid\ Escape\"")
176+
exc = cm.exception
177+
self.assertEqual(w, [])
178+
self.assertEqual(exc.msg, r"invalid escape sequence '\ '")
179+
self.assertEqual(exc.filename, '<string>')
180+
self.assertEqual(exc.lineno, 1)
181+
self.assertEqual(exc.offset, 30)
182+
183+
with warnings.catch_warnings(record=True) as w:
184+
warnings.simplefilter('error', category=SyntaxWarning)
185+
with self.assertRaises(SyntaxError) as cm:
186+
eval("\"''Incorrect \ logic?\"")
187+
exc = cm.exception
188+
self.assertEqual(w, [])
189+
self.assertEqual(exc.msg, r"invalid escape sequence '\ '")
190+
self.assertEqual(exc.filename, '<string>')
191+
self.assertEqual(exc.lineno, 1)
192+
self.assertEqual(exc.offset, 14)
193+
171194
def test_eval_str_raw(self):
172195
self.assertEqual(eval(""" r'x' """), 'x')
173196
self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
@@ -207,7 +230,7 @@ def test_eval_bytes_invalid_escape(self):
207230
self.assertEqual(len(w), 1)
208231
self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
209232
self.assertEqual(w[0].filename, '<string>')
210-
self.assertEqual(w[0].lineno, 1)
233+
self.assertEqual(w[0].lineno, 2)
211234

212235
with warnings.catch_warnings(record=True) as w:
213236
warnings.simplefilter('error', category=SyntaxWarning)
@@ -217,7 +240,7 @@ def test_eval_bytes_invalid_escape(self):
217240
self.assertEqual(w, [])
218241
self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
219242
self.assertEqual(exc.filename, '<string>')
220-
self.assertEqual(exc.lineno, 1)
243+
self.assertEqual(exc.lineno, 2)
221244

222245
def test_eval_bytes_invalid_octal_escape(self):
223246
for i in range(0o400, 0o1000):
@@ -231,7 +254,7 @@ def test_eval_bytes_invalid_octal_escape(self):
231254
self.assertEqual(str(w[0].message),
232255
r"invalid octal escape sequence '\407'")
233256
self.assertEqual(w[0].filename, '<string>')
234-
self.assertEqual(w[0].lineno, 1)
257+
self.assertEqual(w[0].lineno, 2)
235258

236259
with warnings.catch_warnings(record=True) as w:
237260
warnings.simplefilter('error', category=SyntaxWarning)
@@ -241,7 +264,7 @@ def test_eval_bytes_invalid_octal_escape(self):
241264
self.assertEqual(w, [])
242265
self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
243266
self.assertEqual(exc.filename, '<string>')
244-
self.assertEqual(exc.lineno, 1)
267+
self.assertEqual(exc.lineno, 2)
245268

246269
def test_eval_bytes_raw(self):
247270
self.assertEqual(eval(""" br'x' """), b'x')
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix location for SyntaxErrors of invalid escapes in the tokenizer. Patch by
2+
Pablo Galindo

Parser/pegen_errors.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -352,8 +352,8 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
352352
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
353353

354354
if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
355-
Py_ssize_t size = p->tok->inp - p->tok->buf;
356-
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
355+
Py_ssize_t size = p->tok->inp - p->tok->line_start;
356+
error_line = PyUnicode_DecodeUTF8(p->tok->line_start, size, "replace");
357357
}
358358
else if (p->tok->fp == NULL || p->tok->fp == stdin) {
359359
error_line = get_error_line_from_tokenizer_buffers(p, lineno);

Parser/string_parser.c

+46-7
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
//// STRING HANDLING FUNCTIONS ////
1212

1313
static int
14-
warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t)
14+
warn_invalid_escape_sequence(Parser *p, const char* buffer, const char *first_invalid_escape, Token *t)
1515
{
1616
if (p->call_invalid_rules) {
1717
// Do not report warnings if we are in the second pass of the parser
@@ -41,8 +41,46 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token
4141
else {
4242
category = PyExc_DeprecationWarning;
4343
}
44+
45+
// Calculate the lineno and the col_offset of the invalid escape sequence
46+
const char *start = buffer;
47+
const char *end = first_invalid_escape;
48+
int lineno = t->lineno;
49+
int col_offset = t->col_offset;
50+
while (start < end) {
51+
if (*start == '\n') {
52+
lineno++;
53+
col_offset = 0;
54+
}
55+
else {
56+
col_offset++;
57+
}
58+
start++;
59+
}
60+
61+
// Count the number of quotes in the token
62+
char first_quote = 0;
63+
if (lineno == t->lineno) {
64+
int quote_count = 0;
65+
char* tok = PyBytes_AsString(t->bytes);
66+
for (int i = 0; i < PyBytes_Size(t->bytes); i++) {
67+
if (tok[i] == '\'' || tok[i] == '\"') {
68+
if (quote_count == 0) {
69+
first_quote = tok[i];
70+
}
71+
if (tok[i] == first_quote) {
72+
quote_count++;
73+
}
74+
} else {
75+
break;
76+
}
77+
}
78+
79+
col_offset += quote_count;
80+
}
81+
4482
if (PyErr_WarnExplicitObject(category, msg, p->tok->filename,
45-
t->lineno, NULL, NULL) < 0) {
83+
lineno, NULL, NULL) < 0) {
4684
if (PyErr_ExceptionMatches(category)) {
4785
/* Replace the Syntax/DeprecationWarning exception with a SyntaxError
4886
to get a more accurate error report */
@@ -53,11 +91,12 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token
5391
error location, if p->known_err_token is not set. */
5492
p->known_err_token = t;
5593
if (octal) {
56-
RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'",
57-
first_invalid_escape);
94+
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1,
95+
"invalid octal escape sequence '\\%.3s'", first_invalid_escape);
5896
}
5997
else {
60-
RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c);
98+
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1,
99+
"invalid escape sequence '\\%c'", c);
61100
}
62101
}
63102
Py_DECREF(msg);
@@ -151,7 +190,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
151190
// HACK: later we can simply pass the line no, since we don't preserve the tokens
152191
// when we are decoding the string but we preserve the line numbers.
153192
if (v != NULL && first_invalid_escape != NULL && t != NULL) {
154-
if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) {
193+
if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) {
155194
/* We have not decref u before because first_invalid_escape points
156195
inside u. */
157196
Py_XDECREF(u);
@@ -173,7 +212,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
173212
}
174213

175214
if (first_invalid_escape != NULL) {
176-
if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) {
215+
if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) {
177216
Py_DECREF(result);
178217
return NULL;
179218
}

0 commit comments

Comments
 (0)