Skip to content

Commit 82f7429

Browse files
committed
bpo-45811: Improve error message when source code contains invisible control characters
1 parent 546cefc commit 82f7429

File tree

3 files changed

+11
-0
lines changed

3 files changed

+11
-0
lines changed

Lib/test/test_syntax.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1528,6 +1528,9 @@ def test_error_parenthesis(self):
15281528
for paren in ")]}":
15291529
self._check_error(paren + "1 + 2", f"unmatched '\\{paren}'")
15301530

1531+
def test_invisible_characters(self):
1532+
self._check_error('print\x17("Hello")', "invalid non-printable character")
1533+
15311534
def test_match_call_does_not_raise_syntax_error(self):
15321535
code = """
15331536
def match(x):
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Improve the tokenizer errors when encountering invisible control characters
2+
in the parser. Patch by Pablo Galindo

Parser/tokenizer.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2045,6 +2045,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
20452045
break;
20462046
}
20472047

2048+
if (!Py_UNICODE_ISPRINTABLE(c)) {
2049+
char hex[9];
2050+
(void)PyOS_snprintf(hex, sizeof(hex), "%04X", c);
2051+
return syntaxerror(tok, "invalid non-printable character U+%s", hex);
2052+
}
2053+
20482054
/* Punctuation character */
20492055
*p_start = tok->start;
20502056
*p_end = tok->cur;

0 commit comments

Comments
 (0)