From 6a6ee5a6df6bcaffbb004db44bc74a438416b97e Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 6 Dec 2022 23:09:56 +0000 Subject: [PATCH] gh-100050: Fix an assertion error when raising unclosed parenthesis errors in the tokenizer (GH-100065) (cherry picked from commit 97e7004cfe48305bcd642c653b406dc7470e196d) Co-authored-by: Pablo Galindo Salgado Automerge-Triggered-By: GH:pablogsal --- Lib/test/test_syntax.py | 16 ++++++++++++++++ ...022-12-06-22-24-01.gh-issue-100050.lcrPqQ.rst | 2 ++ Parser/pegen_errors.c | 4 ++++ 3 files changed, 22 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-12-06-22-24-01.gh-issue-100050.lcrPqQ.rst diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index 400092ee2c896f..42d36e0b9d81bb 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -2095,6 +2095,22 @@ def test_error_parenthesis(self): for paren in ")]}": self._check_error(paren + "1 + 2", f"unmatched '\\{paren}'") + # Some more complex examples: + code = """\ +func( + a=["unclosed], # Need a quote in this comment: " + b=2, +) +""" + self._check_error(code, "parenthesis '\\)' does not match opening parenthesis '\\['") + + def test_error_string_literal(self): + + self._check_error("'blech", "unterminated string literal") + self._check_error('"blech', "unterminated string literal") + self._check_error("'''blech", "unterminated triple-quoted string literal") + self._check_error('"""blech', "unterminated triple-quoted string literal") + def test_invisible_characters(self): self._check_error('print\x17("Hello")', "invalid non-printable character") diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-12-06-22-24-01.gh-issue-100050.lcrPqQ.rst b/Misc/NEWS.d/next/Core and Builtins/2022-12-06-22-24-01.gh-issue-100050.lcrPqQ.rst new file mode 100644 index 00000000000000..8e7c72d804f82f --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-12-06-22-24-01.gh-issue-100050.lcrPqQ.rst @@ -0,0 +1,2 @@ +Honor existing errors obtained when searching for mismatching parentheses in +the tokenizer. Patch by Pablo Galindo diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c index a0f4b9809e21a7..3d8cccb0a9749c 100644 --- a/Parser/pegen_errors.c +++ b/Parser/pegen_errors.c @@ -170,6 +170,10 @@ _PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) { const char *end; switch (_PyTokenizer_Get(p->tok, &start, &end)) { case ERRORTOKEN: + if (PyErr_Occurred()) { + ret = -1; + goto exit; + } if (p->tok->level != 0) { int error_lineno = p->tok->parenlinenostack[p->tok->level-1]; if (current_err_line > error_lineno) {