Skip to content

Commit 51bcb67

Browse files
miss-islingtonlysnikolaoublurb-it[bot]
authored
[3.13] gh-120343: Do not reset byte_col_offset_diff after multiline tokens (GH-120352) (#120355)
(cherry picked from commit 1b62bce) Co-authored-by: Lysandros Nikolaou <[email protected]> Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
1 parent f465dfb commit 51bcb67

File tree

3 files changed

+18
-1
lines changed

3 files changed

+18
-1
lines changed

Lib/test/test_tokenize.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1199,6 +1199,17 @@ def test_closing_parenthesis_from_different_line(self):
11991199
NAME 'x' (1, 3) (1, 4)
12001200
""")
12011201

1202+
def test_multiline_non_ascii_fstring(self):
1203+
self.check_tokenize("""\
1204+
a = f'''
1205+
Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli'''""", """\
1206+
NAME 'a' (1, 0) (1, 1)
1207+
OP '=' (1, 2) (1, 3)
1208+
FSTRING_START "f\'\'\'" (1, 4) (1, 8)
1209+
FSTRING_MIDDLE '\\n Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli' (1, 8) (2, 68)
1210+
FSTRING_END "\'\'\'" (2, 68) (2, 71)
1211+
""")
1212+
12021213
class GenerateTokensTest(TokenizeTest):
12031214
def check_tokenize(self, s, expected):
12041215
# Format the tokens in s in a table format.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix column offset reporting for tokens that come after multiline f-strings in the :mod:`tokenize` module.

Python/Python-tokenize.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ typedef struct
3636
/* Needed to cache line for performance */
3737
PyObject *last_line;
3838
Py_ssize_t last_lineno;
39+
Py_ssize_t last_end_lineno;
3940
Py_ssize_t byte_col_offset_diff;
4041
} tokenizeriterobject;
4142

@@ -77,6 +78,7 @@ tokenizeriter_new_impl(PyTypeObject *type, PyObject *readline,
7778
self->last_line = NULL;
7879
self->byte_col_offset_diff = 0;
7980
self->last_lineno = 0;
81+
self->last_end_lineno = 0;
8082

8183
return (PyObject *)self;
8284
}
@@ -227,7 +229,9 @@ tokenizeriter_next(tokenizeriterobject *it)
227229
Py_XDECREF(it->last_line);
228230
line = PyUnicode_DecodeUTF8(line_start, size, "replace");
229231
it->last_line = line;
230-
it->byte_col_offset_diff = 0;
232+
if (it->tok->lineno != it->last_end_lineno) {
233+
it->byte_col_offset_diff = 0;
234+
}
231235
} else {
232236
// Line hasn't changed so we reuse the cached one.
233237
line = it->last_line;
@@ -241,6 +245,7 @@ tokenizeriter_next(tokenizeriterobject *it)
241245
Py_ssize_t lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno;
242246
Py_ssize_t end_lineno = it->tok->lineno;
243247
it->last_lineno = lineno;
248+
it->last_end_lineno = end_lineno;
244249

245250
Py_ssize_t col_offset = -1;
246251
Py_ssize_t end_col_offset = -1;

0 commit comments

Comments
 (0)