From a13e65b6586c356c6f7ae0c5c4d22387733a9f2d Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Tue, 11 Jun 2024 18:26:40 +0200 Subject: [PATCH 1/2] gh-120343: Do not reset byte_col_offset_diff after multiline tokens --- Lib/test/test_tokenize.py | 11 +++++++++++ Python/Python-tokenize.c | 7 ++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 4428e8cea1964c..36dba71766cc20 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1199,6 +1199,17 @@ def test_closing_parenthesis_from_different_line(self): NAME 'x' (1, 3) (1, 4) """) + def test_multiline_non_ascii_fstring(self): + self.check_tokenize("""\ +a = f''' + Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli'''""", """\ + NAME 'a' (1, 0) (1, 1) + OP '=' (1, 2) (1, 3) + FSTRING_START "f\'\'\'" (1, 4) (1, 8) + FSTRING_MIDDLE '\\n Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli' (1, 8) (2, 68) + FSTRING_END "\'\'\'" (2, 68) (2, 71) + """) + class GenerateTokensTest(TokenizeTest): def check_tokenize(self, s, expected): # Format the tokens in s in a table format. diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 09fad18b5b4df7..2591dae35736ba 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -36,6 +36,7 @@ typedef struct /* Needed to cache line for performance */ PyObject *last_line; Py_ssize_t last_lineno; + Py_ssize_t last_end_lineno; Py_ssize_t byte_col_offset_diff; } tokenizeriterobject; @@ -77,6 +78,7 @@ tokenizeriter_new_impl(PyTypeObject *type, PyObject *readline, self->last_line = NULL; self->byte_col_offset_diff = 0; self->last_lineno = 0; + self->last_end_lineno = 0; return (PyObject *)self; } @@ -227,7 +229,9 @@ tokenizeriter_next(tokenizeriterobject *it) Py_XDECREF(it->last_line); line = PyUnicode_DecodeUTF8(line_start, size, "replace"); it->last_line = line; - it->byte_col_offset_diff = 0; + if (it->tok->lineno != it->last_end_lineno) { + it->byte_col_offset_diff = 0; + } } else { // Line hasn't changed so we reuse the cached one. line = it->last_line; @@ -241,6 +245,7 @@ tokenizeriter_next(tokenizeriterobject *it) Py_ssize_t lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno; Py_ssize_t end_lineno = it->tok->lineno; it->last_lineno = lineno; + it->last_end_lineno = end_lineno; Py_ssize_t col_offset = -1; Py_ssize_t end_col_offset = -1; From 013027889c841aa11c03e89e616c98d04847bfa8 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 11 Jun 2024 16:34:48 +0000 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst diff --git a/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst b/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst new file mode 100644 index 00000000000000..76714b0c394eef --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst @@ -0,0 +1 @@ +Fix column offset reporting for tokens that come after multiline f-strings in the :mod:`tokenize` module.