Skip to content

Commit 0315fdc

Browse files
miss-islingtonlysnikolaoublurb-it[bot]
authored
[3.12] gh-120343: Do not reset byte_col_offset_diff after multiline tokens (GH-120352) (#120356)
(cherry picked from commit 1b62bce) Co-authored-by: Lysandros Nikolaou <[email protected]> Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
1 parent 92e1c13 commit 0315fdc

File tree

3 files changed

+18
-1
lines changed

3 files changed

+18
-1
lines changed

Lib/test/test_tokenize.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,6 +1204,17 @@ def test_closing_parenthesis_from_different_line(self):
12041204
NAME 'x' (1, 3) (1, 4)
12051205
""")
12061206

1207+
def test_multiline_non_ascii_fstring(self):
1208+
self.check_tokenize("""\
1209+
a = f'''
1210+
Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli'''""", """\
1211+
NAME 'a' (1, 0) (1, 1)
1212+
OP '=' (1, 2) (1, 3)
1213+
FSTRING_START "f\'\'\'" (1, 4) (1, 8)
1214+
FSTRING_MIDDLE '\\n Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli' (1, 8) (2, 68)
1215+
FSTRING_END "\'\'\'" (2, 68) (2, 71)
1216+
""")
1217+
12071218
class GenerateTokensTest(TokenizeTest):
12081219
def check_tokenize(self, s, expected):
12091220
# Format the tokens in s in a table format.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix column offset reporting for tokens that come after multiline f-strings in the :mod:`tokenize` module.

Python/Python-tokenize.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ typedef struct
3535
/* Needed to cache line for performance */
3636
PyObject *last_line;
3737
Py_ssize_t last_lineno;
38+
Py_ssize_t last_end_lineno;
3839
Py_ssize_t byte_col_offset_diff;
3940
} tokenizeriterobject;
4041

@@ -76,6 +77,7 @@ tokenizeriter_new_impl(PyTypeObject *type, PyObject *readline,
7677
self->last_line = NULL;
7778
self->byte_col_offset_diff = 0;
7879
self->last_lineno = 0;
80+
self->last_end_lineno = 0;
7981

8082
return (PyObject *)self;
8183
}
@@ -226,7 +228,9 @@ tokenizeriter_next(tokenizeriterobject *it)
226228
Py_XDECREF(it->last_line);
227229
line = PyUnicode_DecodeUTF8(line_start, size, "replace");
228230
it->last_line = line;
229-
it->byte_col_offset_diff = 0;
231+
if (it->tok->lineno != it->last_end_lineno) {
232+
it->byte_col_offset_diff = 0;
233+
}
230234
} else {
231235
// Line hasn't changed so we reuse the cached one.
232236
line = it->last_line;
@@ -240,6 +244,7 @@ tokenizeriter_next(tokenizeriterobject *it)
240244
Py_ssize_t lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno;
241245
Py_ssize_t end_lineno = it->tok->lineno;
242246
it->last_lineno = lineno;
247+
it->last_end_lineno = end_lineno;
243248

244249
Py_ssize_t col_offset = -1;
245250
Py_ssize_t end_col_offset = -1;

0 commit comments

Comments
 (0)