[3.12] gh-120343: Do not reset byte_col_offset_diff after multiline tokens (GH-120352) (#120356)

miss-islington · lysnikolaou · blurb-it[bot] · web-flow · commit 0315fdc24df9 · 2024-06-11T17:22:16.000Z
(cherry picked from commit 1b62bce) Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
@@ -1204,6 +1204,17 @@ def test_closing_parenthesis_from_different_line(self):
     NAME       'x'           (1, 3) (1, 4)
     """)
 
+    def test_multiline_non_ascii_fstring(self):
+        self.check_tokenize("""\
+a = f'''
+    Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli'''""", """\
+    NAME       'a'           (1, 0) (1, 1)
+    OP         '='           (1, 2) (1, 3)
+    FSTRING_START "f\'\'\'"        (1, 4) (1, 8)
+    FSTRING_MIDDLE '\\n    Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli' (1, 8) (2, 68)
+    FSTRING_END "\'\'\'"         (2, 68) (2, 71)
+    """)
+
 class GenerateTokensTest(TokenizeTest):
     def check_tokenize(self, s, expected):
         # Format the tokens in s in a table format.
diff --git a/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst b/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst
@@ -0,0 +1 @@
+Fix column offset reporting for tokens that come after multiline f-strings in the :mod:`tokenize` module.
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c
@@ -35,6 +35,7 @@ typedef struct
     /* Needed to cache line for performance */
     PyObject *last_line;
     Py_ssize_t last_lineno;
+    Py_ssize_t last_end_lineno;
     Py_ssize_t byte_col_offset_diff;
 } tokenizeriterobject;
 
@@ -76,6 +77,7 @@ tokenizeriter_new_impl(PyTypeObject *type, PyObject *readline,
     self->last_line = NULL;
     self->byte_col_offset_diff = 0;
     self->last_lineno = 0;
+    self->last_end_lineno = 0;
 
     return (PyObject *)self;
 }
@@ -226,7 +228,9 @@ tokenizeriter_next(tokenizeriterobject *it)
             Py_XDECREF(it->last_line);
             line = PyUnicode_DecodeUTF8(line_start, size, "replace");
             it->last_line = line;
-            it->byte_col_offset_diff = 0;
+            if (it->tok->lineno != it->last_end_lineno) {
+                it->byte_col_offset_diff = 0;
+            }
         } else {
             // Line hasn't changed so we reuse the cached one.
             line = it->last_line;
@@ -240,6 +244,7 @@ tokenizeriter_next(tokenizeriterobject *it)
     Py_ssize_t lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno;
     Py_ssize_t end_lineno = it->tok->lineno;
     it->last_lineno = lineno;
+    it->last_end_lineno = end_lineno;
 
     Py_ssize_t col_offset = -1;
     Py_ssize_t end_col_offset = -1;

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+Fix column offset reporting for tokens that come after multiline f-strings in the :mod:`tokenize` module.