Skip to content

Commit f381644

Browse files
gh-99581: Fix a buffer overflow in the tokenizer when copying lines that fill the available buffer (GH-99605)
(cherry picked from commit e13d1d9) Co-authored-by: Pablo Galindo Salgado <[email protected]>
1 parent 152a437 commit f381644

File tree

3 files changed

+25
-1
lines changed

3 files changed

+25
-1
lines changed

Lib/test/test_tokenize.py

+16
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
from unittest import TestCase, mock
1111
from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
1212
INVALID_UNDERSCORE_LITERALS)
13+
from test.support import os_helper
14+
from test.support.script_helper import run_test_script, make_script
1315
import os
1416
import token
1517

@@ -2631,5 +2633,19 @@ def fib(n):
26312633
self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
26322634

26332635

2636+
class CTokenizerBufferTests(unittest.TestCase):
2637+
def test_newline_at_the_end_of_buffer(self):
2638+
# See issue 99581: Make sure that if we need to add a new line at the
2639+
# end of the buffer, we have enough space in the buffer, specially when
2640+
# the current line is as long as the buffer space available.
2641+
test_script = f"""\
2642+
#coding: latin-1
2643+
#{"a"*10000}
2644+
#{"a"*10002}"""
2645+
with os_helper.temp_dir() as temp_dir:
2646+
file_name = make_script(temp_dir, 'foo', test_script)
2647+
run_test_script(file_name)
2648+
2649+
26342650
if __name__ == "__main__":
26352651
unittest.main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fixed a bug that was causing a buffer overflow if the tokenizer copies a
2+
line missing the newline caracter from a file that is as long as the
3+
available tokenizer buffer. Patch by Pablo galindo

Parser/tokenizer.c

+6-1
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,11 @@ tok_readline_recode(struct tok_state *tok) {
396396
error_ret(tok);
397397
goto error;
398398
}
399-
if (!tok_reserve_buf(tok, buflen + 1)) {
399+
// Make room for the null terminator *and* potentially
400+
// an extra newline character that we may need to artificially
401+
// add.
402+
size_t buffer_size = buflen + 2;
403+
if (!tok_reserve_buf(tok, buffer_size)) {
400404
goto error;
401405
}
402406
memcpy(tok->inp, buf, buflen);
@@ -983,6 +987,7 @@ tok_underflow_file(struct tok_state *tok) {
983987
return 0;
984988
}
985989
if (tok->inp[-1] != '\n') {
990+
assert(tok->inp + 1 < tok->end);
986991
/* Last line does not end in \n, fake one */
987992
*tok->inp++ = '\n';
988993
*tok->inp = '\0';

0 commit comments

Comments
 (0)