Skip to content

Commit a77aac4

Browse files
authored
bpo-43914: Highlight invalid ranges in SyntaxErrors (#25525)
To improve the user experience understanding what part of the error messages associated with SyntaxErrors is wrong, we can highlight the whole error range and not only place the caret at the first character. In this way: >>> foo(x, z for z in range(10), t, w) File "<stdin>", line 1 foo(x, z for z in range(10), t, w) ^ SyntaxError: Generator expression must be parenthesized becomes >>> foo(x, z for z in range(10), t, w) File "<stdin>", line 1 foo(x, z for z in range(10), t, w) ^^^^^^^^^^^^^^^^^^^^ SyntaxError: Generator expression must be parenthesized
1 parent 91b69b7 commit a77aac4

File tree

17 files changed

+1060
-592
lines changed

17 files changed

+1060
-592
lines changed

Doc/library/exceptions.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,18 @@ The following exceptions are the exceptions that are usually raised.
436436

437437
The source code text involved in the error.
438438

439+
.. attribute:: end_lineno
440+
441+
Which line number in the file the error occurred ends in. This is
442+
1-indexed: the first line in the file has a ``lineno`` of 1.
443+
444+
.. attribute:: end_offset
445+
446+
The column in the end line where the error occurred finishes. This is
447+
1-indexed: the first character in the line has an ``offset`` of 1.
448+
449+
.. versionchanged:: 3.10
450+
Added the :attr:`end_lineno` and :attr:`end_offset` attributes.
439451

440452
.. exception:: IndentationError
441453

Doc/whatsnew/3.10.rst

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,31 @@ These improvements are inspired by previous work in the PyPy interpreter.
171171
(Contributed by Pablo Galindo in :issue:`42864` and Batuhan Taskaya in
172172
:issue:`40176`.)
173173
174+
:exc:`SyntaxError` exceptions raised by the intepreter will now highlight the
175+
full error range of the expression that consistutes the syntax error itself,
176+
instead of just where the problem is detected. In this way, instead of displaying
177+
(before Python 3.10):
178+
179+
.. code-block:: python
180+
181+
>>> foo(x, z for z in range(10), t, w)
182+
File "<stdin>", line 1
183+
foo(x, z for z in range(10), t, w)
184+
^
185+
SyntaxError: Generator expression must be parenthesized
186+
187+
now Python 3.10 will display the exception as:
188+
189+
.. code-block:: python
190+
191+
>>> foo(x, z for z in range(10), t, w)
192+
File "<stdin>", line 1
193+
foo(x, z for z in range(10), t, w)
194+
^^^^^^^^^^^^^^^^^^^^
195+
SyntaxError: Generator expression must be parenthesized
196+
197+
This improvement has been contributed by Pablo Galindo in :issue:`43914`.
198+
174199
A considerable amount of new specialized messages for :exc:`SyntaxError` exceptions
175200
have been incorporated. Some of the most notable ones:
176201

Grammar/python.gram

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -779,32 +779,32 @@ t_atom[expr_ty]:
779779

780780
# From here on, there are rules for invalid syntax with specialised error messages
781781
invalid_arguments:
782-
| args ',' '*' { RAISE_SYNTAX_ERROR("iterable argument unpacking follows keyword argument unpacking") }
783-
| a=expression for_if_clauses ',' [args | expression for_if_clauses] {
784-
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "Generator expression must be parenthesized") }
782+
| a=args ',' '*' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "iterable argument unpacking follows keyword argument unpacking") }
783+
| a=expression b=for_if_clauses ',' [args | expression for_if_clauses] {
784+
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, PyPegen_last_item(b, comprehension_ty)->target, "Generator expression must be parenthesized") }
785785
| a=args for_if_clauses { _PyPegen_nonparen_genexp_in_call(p, a) }
786-
| args ',' a=expression for_if_clauses {
787-
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "Generator expression must be parenthesized") }
786+
| args ',' a=expression b=for_if_clauses {
787+
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, asdl_seq_GET(b, b->size-1)->target, "Generator expression must be parenthesized") }
788788
| a=args ',' args { _PyPegen_arguments_parsing_error(p, a) }
789789
invalid_kwarg:
790-
| expression a='=' {
791-
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
792-
a, "expression cannot contain assignment, perhaps you meant \"==\"?") }
790+
| a=expression b='=' {
791+
RAISE_SYNTAX_ERROR_KNOWN_RANGE(
792+
a, b, "expression cannot contain assignment, perhaps you meant \"==\"?") }
793793

794794
invalid_expression:
795795
# !(NAME STRING) is not matched so we don't show this error with some invalid string prefixes like: kf"dsfsdf"
796796
# Soft keywords need to also be ignored because they can be parsed as NAME NAME
797-
| !(NAME STRING | SOFT_KEYWORD) a=disjunction expression {
798-
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, a->lineno, a->end_col_offset - 1, "invalid syntax. Perhaps you forgot a comma?") }
797+
| !(NAME STRING | SOFT_KEYWORD) a=disjunction b=expression {
798+
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Perhaps you forgot a comma?") }
799799

800800
invalid_named_expression:
801801
| a=expression ':=' expression {
802802
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
803803
a, "cannot use assignment expressions with %s", _PyPegen_get_expr_name(a)) }
804-
| a=NAME b='=' bitwise_or !('='|':='|',') {
805-
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?") }
804+
| a=NAME '=' b=bitwise_or !('='|':='|',') {
805+
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?") }
806806
| !(list|tuple|genexp|'True'|'None'|'False') a=bitwise_or b='=' bitwise_or !('='|':='|',') {
807-
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(b, "cannot assign to %s here. Maybe you meant '==' instead of '='?",
807+
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot assign to %s here. Maybe you meant '==' instead of '='?",
808808
_PyPegen_get_expr_name(a)) }
809809

810810
invalid_assignment:
@@ -841,25 +841,28 @@ invalid_primary:
841841
invalid_comprehension:
842842
| ('[' | '(' | '{') a=starred_expression for_if_clauses {
843843
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "iterable unpacking cannot be used in comprehension") }
844-
| ('[' | '{') a=star_named_expression ',' [star_named_expressions] for_if_clauses {
845-
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "did you forget parentheses around the comprehension target?") }
844+
| ('[' | '{') a=star_named_expression ',' b=star_named_expressions for_if_clauses {
845+
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, PyPegen_last_item(b, expr_ty),
846+
"did you forget parentheses around the comprehension target?") }
847+
| ('[' | '{') a=star_named_expression b=',' for_if_clauses {
848+
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "did you forget parentheses around the comprehension target?") }
846849
invalid_dict_comprehension:
847850
| '{' a='**' bitwise_or for_if_clauses '}' {
848851
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "dict unpacking cannot be used in dict comprehension") }
849852
invalid_parameters:
850-
| param_no_default* invalid_parameters_helper param_no_default {
851-
RAISE_SYNTAX_ERROR("non-default argument follows default argument") }
853+
| param_no_default* invalid_parameters_helper a=param_no_default {
854+
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "non-default argument follows default argument") }
852855
invalid_parameters_helper: # This is only there to avoid type errors
853856
| a=slash_with_default { _PyPegen_singleton_seq(p, a) }
854857
| param_with_default+
855858
invalid_lambda_parameters:
856-
| lambda_param_no_default* invalid_lambda_parameters_helper lambda_param_no_default {
857-
RAISE_SYNTAX_ERROR("non-default argument follows default argument") }
859+
| lambda_param_no_default* invalid_lambda_parameters_helper a=lambda_param_no_default {
860+
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "non-default argument follows default argument") }
858861
invalid_lambda_parameters_helper:
859862
| a=lambda_slash_with_default { _PyPegen_singleton_seq(p, a) }
860863
| lambda_param_with_default+
861864
invalid_star_etc:
862-
| '*' (')' | ',' (')' | '**')) { RAISE_SYNTAX_ERROR("named arguments must follow bare *") }
865+
| a='*' (')' | ',' (')' | '**')) { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "named arguments must follow bare *") }
863866
| '*' ',' TYPE_COMMENT { RAISE_SYNTAX_ERROR("bare * has associated type comment") }
864867
invalid_lambda_star_etc:
865868
| '*' (':' | ',' (':' | '**')) { RAISE_SYNTAX_ERROR("named arguments must follow bare *") }
@@ -897,7 +900,7 @@ invalid_try_stmt:
897900
RAISE_INDENTATION_ERROR("expected an indented block after 'try' statement on line %d", a->lineno) }
898901
invalid_except_stmt:
899902
| 'except' a=expression ',' expressions ['as' NAME ] ':' {
900-
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "exception group must be parenthesized") }
903+
RAISE_SYNTAX_ERROR_STARTING_FROM(a, "exception group must be parenthesized") }
901904
| a='except' expression ['as' NAME ] NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") }
902905
| a='except' NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") }
903906
invalid_finally_stmt:
@@ -942,10 +945,10 @@ invalid_class_def_raw:
942945

943946
invalid_double_starred_kvpairs:
944947
| ','.double_starred_kvpair+ ',' invalid_kvpair
945-
| expression ':' a='*' bitwise_or { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot use a starred expression in a dictionary value") }
948+
| expression ':' a='*' bitwise_or { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "cannot use a starred expression in a dictionary value") }
946949
| expression a=':' &('}'|',') { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "expression expected after dictionary key and ':'") }
947950
invalid_kvpair:
948951
| a=expression !(':') {
949-
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, a->lineno, a->end_col_offset - 1, "':' expected after dictionary key") }
950-
| expression ':' a='*' bitwise_or { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot use a starred expression in a dictionary value") }
952+
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, a->lineno, a->end_col_offset - 1, a->end_lineno, -1, "':' expected after dictionary key") }
953+
| expression ':' a='*' bitwise_or { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "cannot use a starred expression in a dictionary value") }
951954
| expression a=':' {RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "expression expected after dictionary key and ':'") }

Include/cpython/pyerrors.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ typedef struct {
2020
PyObject *filename;
2121
PyObject *lineno;
2222
PyObject *offset;
23+
PyObject *end_lineno;
24+
PyObject *end_offset;
2325
PyObject *text;
2426
PyObject *print_file_and_line;
2527
} PySyntaxErrorObject;
@@ -148,6 +150,13 @@ PyAPI_FUNC(void) PyErr_SyntaxLocationObject(
148150
int lineno,
149151
int col_offset);
150152

153+
PyAPI_FUNC(void) PyErr_RangedSyntaxLocationObject(
154+
PyObject *filename,
155+
int lineno,
156+
int col_offset,
157+
int end_lineno,
158+
int end_col_offset);
159+
151160
PyAPI_FUNC(PyObject *) PyErr_ProgramTextObject(
152161
PyObject *filename,
153162
int lineno);

Include/internal/pycore_symtable.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ typedef struct _symtable_entry {
6262
int ste_comp_iter_expr; /* non-zero if visiting a comprehension range expression */
6363
int ste_lineno; /* first line of block */
6464
int ste_col_offset; /* offset of first line of block */
65+
int ste_end_lineno; /* end line of block */
66+
int ste_end_col_offset; /* end offset of first line of block */
6567
int ste_opt_lineno; /* lineno of last exec or import * */
6668
int ste_opt_col_offset; /* offset of last exec or import * */
6769
struct symtable *ste_table;

Lib/test/test_cmd_line_script.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -601,7 +601,7 @@ def test_syntaxerror_unindented_caret_position(self):
601601
exitcode, stdout, stderr = assert_python_failure(script_name)
602602
text = io.TextIOWrapper(io.BytesIO(stderr), 'ascii').read()
603603
# Confirm that the caret is located under the '=' sign
604-
self.assertIn("\n 1 + 1 = 2\n ^\n", text)
604+
self.assertIn("\n ^^^^^\n", text)
605605

606606
def test_syntaxerror_indented_caret_position(self):
607607
script = textwrap.dedent("""\
@@ -612,8 +612,8 @@ def test_syntaxerror_indented_caret_position(self):
612612
script_name = _make_test_script(script_dir, 'script', script)
613613
exitcode, stdout, stderr = assert_python_failure(script_name)
614614
text = io.TextIOWrapper(io.BytesIO(stderr), 'ascii').read()
615-
# Confirm that the caret is located under the first 1 character
616-
self.assertIn("\n 1 + 1 = 2\n ^\n", text)
615+
# Confirm that the caret starts under the first 1 character
616+
self.assertIn("\n 1 + 1 = 2\n ^^^^^\n", text)
617617

618618
# Try the same with a form feed at the start of the indented line
619619
script = (
@@ -624,7 +624,7 @@ def test_syntaxerror_indented_caret_position(self):
624624
exitcode, stdout, stderr = assert_python_failure(script_name)
625625
text = io.TextIOWrapper(io.BytesIO(stderr), "ascii").read()
626626
self.assertNotIn("\f", text)
627-
self.assertIn("\n 1 + 1 = 2\n ^\n", text)
627+
self.assertIn("\n 1 + 1 = 2\n ^^^^^\n", text)
628628

629629
def test_syntaxerror_multi_line_fstring(self):
630630
script = 'foo = f"""{}\nfoo"""\n'
@@ -650,7 +650,7 @@ def test_syntaxerror_invalid_escape_sequence_multi_line(self):
650650
self.assertEqual(
651651
stderr.splitlines()[-3:],
652652
[ b' foo = """\\q"""',
653-
b' ^',
653+
b' ^^^^^^^^',
654654
b'SyntaxError: invalid escape sequence \\q'
655655
],
656656
)

0 commit comments

Comments
 (0)