Skip to content

Commit d49c99f

Browse files
gh-94360: Fix a tokenizer crash when reading encoded files with syntax errors from stdin (GH-94386)
* gh-94360: Fix a tokenizer crash when reading encoded files with syntax errors from stdin Signed-off-by: Pablo Galindo <[email protected]> * nitty nit Co-authored-by: Łukasz Langa <[email protected]> (cherry picked from commit 36fcde6) Co-authored-by: Pablo Galindo Salgado <[email protected]>
1 parent 9bd97a2 commit d49c99f

File tree

3 files changed

+15
-5
lines changed

3 files changed

+15
-5
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fixed a tokenizer crash when reading encoded files with syntax errors from
2+
``stdin`` with non utf-8 encoded text. Patch by Pablo Galindo

Parser/pegen_errors.c

+4-4
Original file line numberDiff line numberDiff line change
@@ -259,15 +259,15 @@ get_error_line_from_tokenizer_buffers(Parser *p, Py_ssize_t lineno)
259259
const char* buf_end = p->tok->fp_interactive ? p->tok->interactive_src_end : p->tok->inp;
260260

261261
for (int i = 0; i < relative_lineno - 1; i++) {
262-
char *new_line = strchr(cur_line, '\n') + 1;
262+
char *new_line = strchr(cur_line, '\n');
263263
// The assert is here for debug builds but the conditional that
264264
// follows is there so in release builds we do not crash at the cost
265265
// to report a potentially wrong line.
266-
assert(new_line != NULL && new_line <= buf_end);
267-
if (new_line == NULL || new_line > buf_end) {
266+
assert(new_line != NULL && new_line + 1 < buf_end);
267+
if (new_line == NULL || new_line + 1 > buf_end) {
268268
break;
269269
}
270-
cur_line = new_line;
270+
cur_line = new_line + 1;
271271
}
272272

273273
char *next_newline;

Parser/tokenizer.c

+9-1
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,10 @@ tok_concatenate_interactive_new_line(struct tok_state *tok, const char *line) {
305305

306306
Py_ssize_t current_size = tok->interactive_src_end - tok->interactive_src_start;
307307
Py_ssize_t line_size = strlen(line);
308+
char last_char = line[line_size > 0 ? line_size - 1 : line_size];
309+
if (last_char != '\n') {
310+
line_size += 1;
311+
}
308312
char* new_str = tok->interactive_src_start;
309313

310314
new_str = PyMem_Realloc(new_str, current_size + line_size + 1);
@@ -318,7 +322,11 @@ tok_concatenate_interactive_new_line(struct tok_state *tok, const char *line) {
318322
return -1;
319323
}
320324
strcpy(new_str + current_size, line);
321-
325+
if (last_char != '\n') {
326+
/* Last line does not end in \n, fake one */
327+
new_str[current_size + line_size - 1] = '\n';
328+
new_str[current_size + line_size] = '\0';
329+
}
322330
tok->interactive_src_start = new_str;
323331
tok->interactive_src_end = new_str + current_size + line_size;
324332
return 0;

0 commit comments

Comments
 (0)