Skip to content

Commit 6d65087

Browse files
bpo-40334: Disallow invalid single statements in the new parser (GH-19774)
After parsing is done in single statement mode, the tokenizer buffer has to be checked for additional lines and a `SyntaxError` must be raised, in case there are any. Co-authored-by: Pablo Galindo <[email protected]>
1 parent a4dfe8e commit 6d65087

File tree

2 files changed

+51
-1
lines changed

2 files changed

+51
-1
lines changed

Lib/test/test_compile.py

-1
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,6 @@ def test_single_statement(self):
501501
self.compile_single("if x:\n f(x)\nelse:\n g(x)")
502502
self.compile_single("class T:\n pass")
503503

504-
@support.skip_if_new_parser('Pegen does not disallow multiline single stmts')
505504
def test_bad_single_statement(self):
506505
self.assertInvalidSingle('1\n2')
507506
self.assertInvalidSingle('def f(): pass')

Parser/pegen/pegen.c

+51
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,52 @@ _PyPegen_number_token(Parser *p)
911911
p->arena);
912912
}
913913

914+
static int // bool
915+
newline_in_string(Parser *p, const char *cur)
916+
{
917+
for (char c = *cur; cur >= p->tok->buf; c = *--cur) {
918+
if (c == '\'' || c == '"') {
919+
return 1;
920+
}
921+
}
922+
return 0;
923+
}
924+
925+
/* Check that the source for a single input statement really is a single
926+
statement by looking at what is left in the buffer after parsing.
927+
Trailing whitespace and comments are OK. */
928+
static int // bool
929+
bad_single_statement(Parser *p)
930+
{
931+
const char *cur = strchr(p->tok->buf, '\n');
932+
933+
/* Newlines are allowed if preceded by a line continuation character
934+
or if they appear inside a string. */
935+
if (!cur || *(cur - 1) == '\\' || newline_in_string(p, cur)) {
936+
return 0;
937+
}
938+
char c = *cur;
939+
940+
for (;;) {
941+
while (c == ' ' || c == '\t' || c == '\n' || c == '\014') {
942+
c = *++cur;
943+
}
944+
945+
if (!c) {
946+
return 0;
947+
}
948+
949+
if (c != '#') {
950+
return 1;
951+
}
952+
953+
/* Suck up comment. */
954+
while (c && c != '\n') {
955+
c = *++cur;
956+
}
957+
}
958+
}
959+
914960
void
915961
_PyPegen_Parser_Free(Parser *p)
916962
{
@@ -1014,6 +1060,11 @@ _PyPegen_run_parser(Parser *p)
10141060
return NULL;
10151061
}
10161062

1063+
if (p->start_rule == Py_single_input && bad_single_statement(p)) {
1064+
p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future
1065+
return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement");
1066+
}
1067+
10171068
return res;
10181069
}
10191070

0 commit comments

Comments
 (0)