Skip to content

Commit 1fb6b9e

Browse files
bpo-44201: Avoid side effects of "invalid_*" rules in the REPL (GH-26298) (GH-26313)
When the parser does a second pass to check for errors, these rules can have some small side-effects as they may advance the parser more than the point reached in the first pass. This can cause the tokenizer to ask for extra tokens in interactive mode causing the tokenizer to show the prompt instead of failing instantly. To avoid this, add a new mode to the tokenizer that is activated in the second pass and deactivates asking for new tokens when the interactive line is finished. As the parsing should have reached the last line in the first pass, the second pass should not need to ask for more tokens. (cherry picked from commit bd7476d) Co-authored-by: Pablo Galindo <[email protected]>
1 parent 413df57 commit 1fb6b9e

File tree

5 files changed

+26
-0
lines changed

5 files changed

+26
-0
lines changed

Include/errcode.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ extern "C" {
2828
#define E_DECODE 22 /* Error in decoding into Unicode */
2929
#define E_LINECONT 25 /* Unexpected characters after a line continuation */
3030
#define E_BADSINGLE 27 /* Ill-formed single statement input */
31+
#define E_INTERACT_STOP 28 /* Interactive mode stopped tokenization */
3132

3233
#ifdef __cplusplus
3334
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Avoid side effects of checking for specialized syntax errors in the REPL
2+
that was causing it to ask for extra tokens after a syntax error had been
3+
detected. Patch by Pablo Galindo

Parser/pegen.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1234,6 +1234,9 @@ reset_parser_state(Parser *p)
12341234
}
12351235
p->mark = 0;
12361236
p->call_invalid_rules = 1;
1237+
// Don't try to get extra tokens in interactive mode when trying to
1238+
// raise specialized errors in the second pass.
1239+
p->tok->interactive_underflow = IUNDERFLOW_STOP;
12371240
}
12381241

12391242
static int

Parser/tokenizer.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ tok_new(void)
8585
tok->async_def = 0;
8686
tok->async_def_indent = 0;
8787
tok->async_def_nl = 0;
88+
tok->interactive_underflow = IUNDERFLOW_NORMAL;
8889

8990
return tok;
9091
}
@@ -845,6 +846,10 @@ tok_underflow_string(struct tok_state *tok) {
845846

846847
static int
847848
tok_underflow_interactive(struct tok_state *tok) {
849+
if (tok->interactive_underflow == IUNDERFLOW_STOP) {
850+
tok->done = E_INTERACT_STOP;
851+
return 1;
852+
}
848853
char *newtok = PyOS_Readline(stdin, stdout, tok->prompt);
849854
if (newtok != NULL) {
850855
char *translated = translate_newlines(newtok, 0, tok);
@@ -1399,6 +1404,10 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
13991404
}
14001405
}
14011406

1407+
if (tok->done == E_INTERACT_STOP) {
1408+
return ENDMARKER;
1409+
}
1410+
14021411
/* Check for EOF and errors now */
14031412
if (c == EOF) {
14041413
if (tok->level) {

Parser/tokenizer.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,14 @@ enum decoding_state {
1919
STATE_NORMAL
2020
};
2121

22+
enum interactive_underflow_t {
23+
/* Normal mode of operation: return a new token when asked in interactie mode */
24+
IUNDERFLOW_NORMAL,
25+
/* Forcefully return ENDMARKER when asked for a new token in interactive mode. This
26+
* can be used to prevent the tokenizer to promt the user for new tokens */
27+
IUNDERFLOW_STOP,
28+
};
29+
2230
/* Tokenizer state */
2331
struct tok_state {
2432
/* Input state; buf <= cur <= inp <= end */
@@ -74,6 +82,8 @@ struct tok_state {
7482
int async_def_indent; /* Indentation level of the outermost 'async def'. */
7583
int async_def_nl; /* =1 if the outermost 'async def' had at least one
7684
NEWLINE token after it. */
85+
/* How to proceed when asked for a new token in interactive mode */
86+
enum interactive_underflow_t interactive_underflow;
7787
};
7888

7989
extern struct tok_state *PyTokenizer_FromString(const char *, int);

0 commit comments

Comments
 (0)