Skip to content

Commit bf3a0a1

Browse files
pablogsalambv
andauthored
gh-132449: Improve syntax error messages for keywords with typos (#132450)
Signed-off-by: Pablo Galindo <[email protected]> Co-authored-by: Łukasz Langa <[email protected]>
1 parent 3cfab44 commit bf3a0a1

File tree

12 files changed

+818
-450
lines changed

12 files changed

+818
-450
lines changed

Grammar/python.gram

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,12 +94,18 @@ func_type[mod_ty]: '(' a=[type_expressions] ')' '->' b=expression NEWLINE* ENDMA
9494
# GENERAL STATEMENTS
9595
# ==================
9696

97-
statements[asdl_stmt_seq*]: a=statement+ { (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a) }
97+
statements[asdl_stmt_seq*]: a=statement+ { _PyPegen_register_stmts(p, (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a)) }
9898

99-
statement[asdl_stmt_seq*]: a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) } | a[asdl_stmt_seq*]=simple_stmts { a }
99+
statement[asdl_stmt_seq*]:
100+
| a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) }
101+
| a[asdl_stmt_seq*]=simple_stmts { a }
102+
103+
single_compound_stmt[asdl_stmt_seq*]:
104+
| a=compound_stmt {
105+
_PyPegen_register_stmts(p, (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a)) }
100106

101107
statement_newline[asdl_stmt_seq*]:
102-
| a=compound_stmt NEWLINE { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) }
108+
| a=single_compound_stmt NEWLINE { a }
103109
| simple_stmts
104110
| NEWLINE { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, CHECK(stmt_ty, _PyAST_Pass(EXTRA))) }
105111
| ENDMARKER { _PyPegen_interactive_exit(p) }

Include/cpython/pyerrors.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ typedef struct {
3030
PyObject *end_offset;
3131
PyObject *text;
3232
PyObject *print_file_and_line;
33+
PyObject *metadata;
3334
} PySyntaxErrorObject;
3435

3536
typedef struct {

Lib/codeop.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
PyCF_ONLY_AST = 0x400
4848
PyCF_ALLOW_INCOMPLETE_INPUT = 0x4000
4949

50-
def _maybe_compile(compiler, source, filename, symbol):
50+
def _maybe_compile(compiler, source, filename, symbol, flags):
5151
# Check for source consisting of only blank lines and comments.
5252
for line in source.split("\n"):
5353
line = line.strip()
@@ -61,10 +61,10 @@ def _maybe_compile(compiler, source, filename, symbol):
6161
with warnings.catch_warnings():
6262
warnings.simplefilter("ignore", (SyntaxWarning, DeprecationWarning))
6363
try:
64-
compiler(source, filename, symbol)
64+
compiler(source, filename, symbol, flags=flags)
6565
except SyntaxError: # Let other compile() errors propagate.
6666
try:
67-
compiler(source + "\n", filename, symbol)
67+
compiler(source + "\n", filename, symbol, flags=flags)
6868
return None
6969
except _IncompleteInputError as e:
7070
return None
@@ -74,14 +74,13 @@ def _maybe_compile(compiler, source, filename, symbol):
7474

7575
return compiler(source, filename, symbol, incomplete_input=False)
7676

77-
def _compile(source, filename, symbol, incomplete_input=True):
78-
flags = 0
77+
def _compile(source, filename, symbol, incomplete_input=True, *, flags=0):
7978
if incomplete_input:
8079
flags |= PyCF_ALLOW_INCOMPLETE_INPUT
8180
flags |= PyCF_DONT_IMPLY_DEDENT
8281
return compile(source, filename, symbol, flags)
8382

84-
def compile_command(source, filename="<input>", symbol="single"):
83+
def compile_command(source, filename="<input>", symbol="single", flags=0):
8584
r"""Compile a command and determine whether it is incomplete.
8685
8786
Arguments:
@@ -100,7 +99,7 @@ def compile_command(source, filename="<input>", symbol="single"):
10099
syntax error (OverflowError and ValueError can be produced by
101100
malformed literals).
102101
"""
103-
return _maybe_compile(_compile, source, filename, symbol)
102+
return _maybe_compile(_compile, source, filename, symbol, flags)
104103

105104
class Compile:
106105
"""Instances of this class behave much like the built-in compile
@@ -152,4 +151,4 @@ def __call__(self, source, filename="<input>", symbol="single"):
152151
syntax error (OverflowError and ValueError can be produced by
153152
malformed literals).
154153
"""
155-
return _maybe_compile(self.compiler, source, filename, symbol)
154+
return _maybe_compile(self.compiler, source, filename, symbol, flags=self.compiler.flags)

Lib/test/test_exceptions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2462,7 +2462,7 @@ def test_incorrect_constructor(self):
24622462
args = ("bad.py", 1, 2)
24632463
self.assertRaises(TypeError, SyntaxError, "bad bad", args)
24642464

2465-
args = ("bad.py", 1, 2, 4, 5, 6, 7)
2465+
args = ("bad.py", 1, 2, 4, 5, 6, 7, 8)
24662466
self.assertRaises(TypeError, SyntaxError, "bad bad", args)
24672467

24682468
args = ("bad.py", 1, 2, "abcdefg", 1)

Lib/test/test_syntax.py

Lines changed: 125 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1189,7 +1189,7 @@
11891189
>>> with block ad something:
11901190
... pass
11911191
Traceback (most recent call last):
1192-
SyntaxError: invalid syntax
1192+
SyntaxError: invalid syntax. Did you mean 'and'?
11931193
11941194
>>> try
11951195
... pass
@@ -1713,6 +1713,130 @@
17131713
Traceback (most recent call last):
17141714
SyntaxError: expected one or more exception types
17151715
1716+
Check custom exceptions for keywords with typos
1717+
1718+
>>> fur a in b:
1719+
... pass
1720+
Traceback (most recent call last):
1721+
SyntaxError: invalid syntax. Did you mean 'for'?
1722+
1723+
>>> for a in b:
1724+
... pass
1725+
... elso:
1726+
... pass
1727+
Traceback (most recent call last):
1728+
SyntaxError: invalid syntax. Did you mean 'else'?
1729+
1730+
>>> whille True:
1731+
... pass
1732+
Traceback (most recent call last):
1733+
SyntaxError: invalid syntax. Did you mean 'while'?
1734+
1735+
>>> while True:
1736+
... pass
1737+
... elso:
1738+
... pass
1739+
Traceback (most recent call last):
1740+
SyntaxError: invalid syntax. Did you mean 'else'?
1741+
1742+
>>> iff x > 5:
1743+
... pass
1744+
Traceback (most recent call last):
1745+
SyntaxError: invalid syntax. Did you mean 'if'?
1746+
1747+
>>> if x:
1748+
... pass
1749+
... elseif y:
1750+
... pass
1751+
Traceback (most recent call last):
1752+
SyntaxError: invalid syntax. Did you mean 'elif'?
1753+
1754+
>>> if x:
1755+
... pass
1756+
... elif y:
1757+
... pass
1758+
... elso:
1759+
... pass
1760+
Traceback (most recent call last):
1761+
SyntaxError: invalid syntax. Did you mean 'else'?
1762+
1763+
>>> tyo:
1764+
... pass
1765+
... except y:
1766+
... pass
1767+
Traceback (most recent call last):
1768+
SyntaxError: invalid syntax. Did you mean 'try'?
1769+
1770+
>>> classe MyClass:
1771+
... pass
1772+
Traceback (most recent call last):
1773+
SyntaxError: invalid syntax. Did you mean 'class'?
1774+
1775+
>>> impor math
1776+
Traceback (most recent call last):
1777+
SyntaxError: invalid syntax. Did you mean 'import'?
1778+
1779+
>>> form x import y
1780+
Traceback (most recent call last):
1781+
SyntaxError: invalid syntax. Did you mean 'from'?
1782+
1783+
>>> defn calculate_sum(a, b):
1784+
... return a + b
1785+
Traceback (most recent call last):
1786+
SyntaxError: invalid syntax. Did you mean 'def'?
1787+
1788+
>>> def foo():
1789+
... returm result
1790+
Traceback (most recent call last):
1791+
SyntaxError: invalid syntax. Did you mean 'return'?
1792+
1793+
>>> lamda x: x ** 2
1794+
Traceback (most recent call last):
1795+
SyntaxError: invalid syntax. Did you mean 'lambda'?
1796+
1797+
>>> def foo():
1798+
... yeld i
1799+
Traceback (most recent call last):
1800+
SyntaxError: invalid syntax. Did you mean 'yield'?
1801+
1802+
>>> def foo():
1803+
... globel counter
1804+
Traceback (most recent call last):
1805+
SyntaxError: invalid syntax. Did you mean 'global'?
1806+
1807+
>>> frum math import sqrt
1808+
Traceback (most recent call last):
1809+
SyntaxError: invalid syntax. Did you mean 'from'?
1810+
1811+
>>> asynch def fetch_data():
1812+
... pass
1813+
Traceback (most recent call last):
1814+
SyntaxError: invalid syntax. Did you mean 'async'?
1815+
1816+
>>> async def foo():
1817+
... awaid fetch_data()
1818+
Traceback (most recent call last):
1819+
SyntaxError: invalid syntax. Did you mean 'await'?
1820+
1821+
>>> raisee ValueError("Error")
1822+
Traceback (most recent call last):
1823+
SyntaxError: invalid syntax. Did you mean 'raise'?
1824+
1825+
>>> [
1826+
... x for x
1827+
... in range(3)
1828+
... of x
1829+
... ]
1830+
Traceback (most recent call last):
1831+
SyntaxError: invalid syntax. Did you mean 'if'?
1832+
1833+
>>> [
1834+
... 123 fur x
1835+
... in range(3)
1836+
... if x
1837+
... ]
1838+
Traceback (most recent call last):
1839+
SyntaxError: invalid syntax. Did you mean 'for'?
17161840
17171841
>>> f(a=23, a=234)
17181842
Traceback (most recent call last):

Lib/traceback.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
import sys
77
import textwrap
88
import warnings
9+
import codeop
10+
import keyword
11+
import tokenize
12+
import io
913
from contextlib import suppress
1014
import _colorize
1115
from _colorize import ANSIColors
@@ -1090,6 +1094,7 @@ def __init__(self, exc_type, exc_value, exc_traceback, *, limit=None,
10901094
self.end_offset = exc_value.end_offset
10911095
self.msg = exc_value.msg
10921096
self._is_syntax_error = True
1097+
self._exc_metadata = getattr(exc_value, "_metadata", None)
10931098
elif exc_type and issubclass(exc_type, ImportError) and \
10941099
getattr(exc_value, "name_from", None) is not None:
10951100
wrong_name = getattr(exc_value, "name_from", None)
@@ -1273,6 +1278,98 @@ def format_exception_only(self, *, show_group=False, _depth=0, **kwargs):
12731278
for ex in self.exceptions:
12741279
yield from ex.format_exception_only(show_group=show_group, _depth=_depth+1, colorize=colorize)
12751280

1281+
def _find_keyword_typos(self):
1282+
assert self._is_syntax_error
1283+
try:
1284+
import _suggestions
1285+
except ImportError:
1286+
_suggestions = None
1287+
1288+
# Only try to find keyword typos if there is no custom message
1289+
if self.msg != "invalid syntax" and "Perhaps you forgot a comma" not in self.msg:
1290+
return
1291+
1292+
if not self._exc_metadata:
1293+
return
1294+
1295+
line, offset, source = self._exc_metadata
1296+
end_line = int(self.lineno) if self.lineno is not None else 0
1297+
lines = None
1298+
from_filename = False
1299+
1300+
if source is None:
1301+
if self.filename:
1302+
try:
1303+
with open(self.filename) as f:
1304+
lines = f.read().splitlines()
1305+
except Exception:
1306+
line, end_line, offset = 0,1,0
1307+
else:
1308+
from_filename = True
1309+
lines = lines if lines is not None else self.text.splitlines()
1310+
else:
1311+
lines = source.splitlines()
1312+
1313+
error_code = lines[line -1 if line > 0 else 0:end_line]
1314+
error_code[0] = error_code[0][offset:]
1315+
error_code = textwrap.dedent('\n'.join(error_code))
1316+
1317+
# Do not continue if the source is too large
1318+
if len(error_code) > 1024:
1319+
return
1320+
1321+
error_lines = error_code.splitlines()
1322+
tokens = tokenize.generate_tokens(io.StringIO(error_code).readline)
1323+
tokens_left_to_process = 10
1324+
import difflib
1325+
for token in tokens:
1326+
start, end = token.start, token.end
1327+
if token.type != tokenize.NAME:
1328+
continue
1329+
# Only consider NAME tokens on the same line as the error
1330+
if from_filename and token.start[0]+line != end_line+1:
1331+
continue
1332+
wrong_name = token.string
1333+
if wrong_name in keyword.kwlist:
1334+
continue
1335+
1336+
# Limit the number of valid tokens to consider to not spend
1337+
# to much time in this function
1338+
tokens_left_to_process -= 1
1339+
if tokens_left_to_process < 0:
1340+
break
1341+
# Limit the number of possible matches to try
1342+
matches = difflib.get_close_matches(wrong_name, keyword.kwlist, n=3)
1343+
if not matches and _suggestions is not None:
1344+
suggestion = _suggestions._generate_suggestions(keyword.kwlist, wrong_name)
1345+
matches = [suggestion] if suggestion is not None else matches
1346+
for suggestion in matches:
1347+
if not suggestion or suggestion == wrong_name:
1348+
continue
1349+
# Try to replace the token with the keyword
1350+
the_lines = error_lines.copy()
1351+
the_line = the_lines[start[0] - 1][:]
1352+
chars = list(the_line)
1353+
chars[token.start[1]:token.end[1]] = suggestion
1354+
the_lines[start[0] - 1] = ''.join(chars)
1355+
code = '\n'.join(the_lines)
1356+
1357+
# Check if it works
1358+
try:
1359+
codeop.compile_command(code, symbol="exec", flags=codeop.PyCF_ONLY_AST)
1360+
except SyntaxError:
1361+
continue
1362+
1363+
# Keep token.line but handle offsets correctly
1364+
self.text = token.line
1365+
self.offset = token.start[1] + 1
1366+
self.end_offset = token.end[1] + 1
1367+
self.lineno = start[0]
1368+
self.end_lineno = end[0]
1369+
self.msg = f"invalid syntax. Did you mean '{suggestion}'?"
1370+
return
1371+
1372+
12761373
def _format_syntax_error(self, stype, **kwargs):
12771374
"""Format SyntaxError exceptions (internal helper)."""
12781375
# Show exactly where the problem was found.
@@ -1299,6 +1396,9 @@ def _format_syntax_error(self, stype, **kwargs):
12991396
# text = " foo\n"
13001397
# rtext = " foo"
13011398
# ltext = "foo"
1399+
with suppress(Exception):
1400+
self._find_keyword_typos()
1401+
text = self.text
13021402
rtext = text.rstrip('\n')
13031403
ltext = rtext.lstrip(' \n\f')
13041404
spaces = len(rtext) - len(ltext)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Syntax errors that look like misspellings of Python keywords now provide a
2+
helpful fix suggestion for the typo. Contributed by Pablo Galindo Salgado.

0 commit comments

Comments
 (0)