From c120bc2d354ca3d27d0c7a53bf65574ddaabaf3a Mon Sep 17 00:00:00 2001 From: Radislav Chugunov Date: Sat, 29 Apr 2023 16:55:13 +0300 Subject: [PATCH 1/4] Fix use-after-free in tokenizer.c `tok_get_fstring_mode`: ensure that token in the f-string middle is properly decoded, i.e. `tok_nextc` doesn't return EOF with `tok->decoding_erred` set to 1 --- Parser/tokenizer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 8de0572a1fc459..9c2e79e09fa2f9 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -2552,6 +2552,9 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct while (end_quote_size != current_tok->f_string_quote_size) { int c = tok_nextc(tok); if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) { + if (tok->decoding_erred) + return MAKE_TOKEN(ERRORTOKEN); + assert(tok->multi_line_start != NULL); // shift the tok_state's location into // the start of string, and report the error From a36418cd3a7871444340911aac61a5258cabfe28 Mon Sep 17 00:00:00 2001 From: Radislav Chugunov Date: Sun, 30 Apr 2023 04:03:10 +0300 Subject: [PATCH 2/4] added test --- Lib/test/test_tokenize.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 283a7c23609e67..3fb2485dd4dcda 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1470,6 +1470,18 @@ def test_comment_at_the_end_of_the_source_without_newline(self): self.assertEqual(tok_name[tokens[i + 1].exact_type], tok_name[expected_tokens[i]]) self.assertEqual(tok_name[tokens[-1].exact_type], tok_name[token.ENDMARKER]) + def test_invalid_character_in_fstring_middle(self): + # See gh-103824 + script = b'''F""" + \xe5"""''' + + with os_helper.temp_dir() as temp_dir: + filename = os.path.join(temp_dir, "script.py") + with open(filename, 'wb') as file: + file.write(script) + run_test_script(filename) + + class UntokenizeTest(TestCase): def test_bad_input_order(self): From 0ef4f2318247d22077ff1964636c51d979706ee0 Mon Sep 17 00:00:00 2001 From: Radislav Chugunov Date: Sun, 30 Apr 2023 04:49:38 +0300 Subject: [PATCH 3/4] fixed test --- Lib/test/test_tokenize.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 3fb2485dd4dcda..911b53e5816588 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -11,7 +11,7 @@ from test.test_grammar import (VALID_UNDERSCORE_LITERALS, INVALID_UNDERSCORE_LITERALS) from test.support import os_helper -from test.support.script_helper import run_test_script, make_script +from test.support.script_helper import run_test_script, make_script, run_python_until_end import os import token @@ -1479,7 +1479,8 @@ def test_invalid_character_in_fstring_middle(self): filename = os.path.join(temp_dir, "script.py") with open(filename, 'wb') as file: file.write(script) - run_test_script(filename) + rs, _ = run_python_until_end(filename) + self.assertIn(b"SyntaxError", rs.err) class UntokenizeTest(TestCase): From b198bc8ccaaedf91dd032c7f590f54dfafe8ac59 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Mon, 1 May 2023 08:52:15 +0100 Subject: [PATCH 4/4] Update Parser/tokenizer.c --- Parser/tokenizer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 9c2e79e09fa2f9..8fb9be7bfd0182 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -2552,8 +2552,9 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct while (end_quote_size != current_tok->f_string_quote_size) { int c = tok_nextc(tok); if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) { - if (tok->decoding_erred) + if (tok->decoding_erred) { return MAKE_TOKEN(ERRORTOKEN); + } assert(tok->multi_line_start != NULL); // shift the tok_state's location into