Skip to content

Commit 7279fb6

Browse files
authored
gh-105435: Fix spurious NEWLINE token if file ends with comment without a newline (#105442)
1 parent a24a780 commit 7279fb6

File tree

3 files changed

+39
-5
lines changed

3 files changed

+39
-5
lines changed

Lib/test/test_tokenize.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1631,13 +1631,34 @@ def test_pathological_trailing_whitespace(self):
16311631
def test_comment_at_the_end_of_the_source_without_newline(self):
16321632
# See http://bugs.python.org/issue44667
16331633
source = 'b = 1\n\n#test'
1634-
expected_tokens = [token.NAME, token.EQUAL, token.NUMBER, token.NEWLINE, token.NL, token.COMMENT]
1634+
expected_tokens = [
1635+
TokenInfo(type=token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
1636+
TokenInfo(type=token.NAME, string='b', start=(1, 0), end=(1, 1), line='b = 1\n'),
1637+
TokenInfo(type=token.OP, string='=', start=(1, 2), end=(1, 3), line='b = 1\n'),
1638+
TokenInfo(type=token.NUMBER, string='1', start=(1, 4), end=(1, 5), line='b = 1\n'),
1639+
TokenInfo(type=token.NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='b = 1\n'),
1640+
TokenInfo(type=token.NL, string='\n', start=(2, 0), end=(2, 1), line='\n'),
1641+
TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test\n'),
1642+
TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test\n'),
1643+
TokenInfo(type=token.ENDMARKER, string='', start=(4, 0), end=(4, 0), line='')
1644+
]
1645+
1646+
tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
1647+
self.assertEqual(tokens, expected_tokens)
1648+
1649+
def test_newline_and_space_at_the_end_of_the_source_without_newline(self):
1650+
# See https://github.com/python/cpython/issues/105435
1651+
source = 'a\n '
1652+
expected_tokens = [
1653+
TokenInfo(token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
1654+
TokenInfo(token.NAME, string='a', start=(1, 0), end=(1, 1), line='a\n'),
1655+
TokenInfo(token.NEWLINE, string='\n', start=(1, 1), end=(1, 2), line='a\n'),
1656+
TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' \n'),
1657+
TokenInfo(token.ENDMARKER, string='', start=(3, 0), end=(3, 0), line='')
1658+
]
16351659

16361660
tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
1637-
self.assertEqual(tok_name[tokens[0].exact_type], tok_name[ENCODING])
1638-
for i in range(6):
1639-
self.assertEqual(tok_name[tokens[i + 1].exact_type], tok_name[expected_tokens[i]])
1640-
self.assertEqual(tok_name[tokens[-1].exact_type], tok_name[token.ENDMARKER])
1661+
self.assertEqual(tokens, expected_tokens)
16411662

16421663
def test_invalid_character_in_fstring_middle(self):
16431664
# See gh-103824
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix spurious newline character if file ends on a comment without a newline.
2+
Patch by Pablo Galindo

Python/Python-tokenize.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,17 @@ tokenizeriter_next(tokenizeriterobject *it)
247247
}
248248
end_col_offset++;
249249
}
250+
else if (type == NL) {
251+
if (it->tok->implicit_newline) {
252+
Py_DECREF(str);
253+
str = PyUnicode_FromString("");
254+
}
255+
}
256+
257+
if (str == NULL) {
258+
Py_DECREF(line);
259+
goto exit;
260+
}
250261
}
251262

252263
result = Py_BuildValue("(iN(nn)(nn)N)", type, str, lineno, col_offset, end_lineno, end_col_offset, line);

0 commit comments

Comments
 (0)