diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 75710db7d05375..53b2032467d9f5 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1786,11 +1786,11 @@ def test_bad_input_order(self): u.prev_row = 2 u.prev_col = 2 with self.assertRaises(ValueError) as cm: - u.add_whitespace((1,3)) + u.add_whitespace((1,3), line=' ') self.assertEqual(cm.exception.args[0], 'start (1,3) precedes previous end (2,2)') # raise if previous column in row - self.assertRaises(ValueError, u.add_whitespace, (2,1)) + self.assertRaises(ValueError, u.add_whitespace, (2,1), ' ') def test_backslash_continuation(self): # The problem is that \ leaves no token @@ -1798,10 +1798,10 @@ def test_backslash_continuation(self): u.prev_row = 1 u.prev_col = 1 u.tokens = [] - u.add_whitespace((2, 0)) + u.add_whitespace((2, 0), line=' \n') self.assertEqual(u.tokens, ['\\\n']) u.prev_row = 2 - u.add_whitespace((4, 4)) + u.add_whitespace((4, 4), line=' ') self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' ']) TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n') @@ -1985,6 +1985,11 @@ def test_string_concatenation(self): # Two string literals on the same line self.check_roundtrip("'' ''") + def test_tabs(self): + # Tabs should be preserved + self.check_roundtrip("a +\tb") + self.check_roundtrip("a + b\t# comment") + def test_random_files(self): # Test roundtrip on random python modules. # pass the '-ucpu' option to process the full directory. diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 7ece4e9b70d31b..8f161a082694e2 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -171,7 +171,7 @@ def __init__(self): self.prev_type = None self.encoding = None - def add_whitespace(self, start): + def add_whitespace(self, start, line): row, col = start if row < self.prev_row or row == self.prev_row and col < self.prev_col: raise ValueError("start ({},{}) precedes previous end ({},{})" @@ -182,7 +182,7 @@ def add_whitespace(self, start): self.prev_col = 0 col_offset = col - self.prev_col if col_offset: - self.tokens.append(" " * col_offset) + self.tokens.append(line[self.prev_col:col]) def escape_brackets(self, token): characters = [] @@ -246,7 +246,7 @@ def untokenize(self, iterable): elif tok_type in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END): self.tokens.append(" ") - self.add_whitespace(start) + self.add_whitespace(start, line) self.tokens.append(token) self.prev_row, self.prev_col = end if tok_type in (NEWLINE, NL): diff --git a/Misc/NEWS.d/next/Library/2024-12-17-16-16-30.gh-issue-128031.WoEQqM.rst b/Misc/NEWS.d/next/Library/2024-12-17-16-16-30.gh-issue-128031.WoEQqM.rst new file mode 100644 index 00000000000000..b25065bcb5ccd4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-17-16-16-30.gh-issue-128031.WoEQqM.rst @@ -0,0 +1 @@ +Preserve tab characters in :func:`tokenize.untokenize`.