diff --git a/Lib/idlelib/autocomplete.py b/Lib/idlelib/autocomplete.py index 032d31225315fb..4d4b71c46e8ecc 100644 --- a/Lib/idlelib/autocomplete.py +++ b/Lib/idlelib/autocomplete.py @@ -28,9 +28,8 @@ TRY_A = False, False, False, ATTRS # '.' for attributes. TRY_F = False, False, False, FILES # '/' in quotes for file name. -# This string includes all chars that may be in an identifier. -# TODO Update this here and elsewhere. -ID_CHARS = string.ascii_letters + string.digits + "_" +# all ASCII chars that may be in an identifier +_ASCII_ID_CHARS = frozenset(string.ascii_letters + string.digits + "_") SEPS = f"{os.sep}{os.altsep if os.altsep else ''}" TRIGGERS = f".{SEPS}" @@ -134,7 +133,11 @@ def open_completions(self, args): elif hp.is_in_code() and (not mode or mode==ATTRS): self._remove_autocomplete_window() mode = ATTRS - while i and (curline[i-1] in ID_CHARS or ord(curline[i-1]) > 127): + while i: + c = curline[i-1] + if c not in _ASCII_ID_CHARS: + if c <= '\x7f' or not ('a' + c).isidentifier(): + break i -= 1 comp_start = curline[i:j] if i and curline[i-1] == '.': # Need object with attributes. diff --git a/Lib/idlelib/autoexpand.py b/Lib/idlelib/autoexpand.py index 92f5c84eb6f401..206d36994ce9e8 100644 --- a/Lib/idlelib/autoexpand.py +++ b/Lib/idlelib/autoexpand.py @@ -13,12 +13,10 @@ There is only one instance of Autoexpand. ''' import re -import string +_LAST_WORD_RE = re.compile(r'\b\w+\Z') class AutoExpand: - wordchars = string.ascii_letters + string.digits + "_" - def __init__(self, editwin): self.text = editwin.text self.bell = self.text.bell @@ -85,10 +83,8 @@ def getwords(self): def getprevword(self): "Return the word prefix before the cursor." line = self.text.get("insert linestart", "insert") - i = len(line) - while i > 0 and line[i-1] in self.wordchars: - i = i-1 - return line[i:] + m = _LAST_WORD_RE.search(line) + return m[0] if m else '' if __name__ == '__main__': diff --git a/Lib/idlelib/editor.py b/Lib/idlelib/editor.py index 08d6aa2efde22a..154eaf7bf53ecf 100644 --- a/Lib/idlelib/editor.py +++ b/Lib/idlelib/editor.py @@ -3,7 +3,6 @@ import os import platform import re -import string import sys import tokenize import traceback @@ -817,14 +816,12 @@ def ResetColorizer(self): if self.line_numbers is not None: self.line_numbers.update_colors() - IDENTCHARS = string.ascii_letters + string.digits + "_" - def colorize_syntax_error(self, text, pos): text.tag_add("ERROR", pos) char = text.get(pos) - if char and char in self.IDENTCHARS: + if char and ('a' + char).isidentifier(): text.tag_add("ERROR", pos + " wordstart", pos) - if '\n' == text.get(pos): # error at line end + if char == '\n': # error at line end text.mark_set("insert", pos) else: text.mark_set("insert", pos + "+1c") diff --git a/Lib/idlelib/hyperparser.py b/Lib/idlelib/hyperparser.py index 76144ee8fb30f5..c510f4dc79d93f 100644 --- a/Lib/idlelib/hyperparser.py +++ b/Lib/idlelib/hyperparser.py @@ -14,13 +14,6 @@ # all ASCII chars that may be the first char of an identifier _ASCII_ID_FIRST_CHARS = frozenset(string.ascii_letters + "_") -# lookup table for whether 7-bit ASCII chars are valid in a Python identifier -_IS_ASCII_ID_CHAR = [(chr(x) in _ASCII_ID_CHARS) for x in range(128)] -# lookup table for whether 7-bit ASCII chars are valid as the first -# char in a Python identifier -_IS_ASCII_ID_FIRST_CHAR = \ - [(chr(x) in _ASCII_ID_FIRST_CHARS) for x in range(128)] - class HyperParser: def __init__(self, editwin, index): @@ -166,8 +159,6 @@ def _eat_identifier(cls, str, limit, pos): This ignores non-identifier eywords are not identifiers. """ - is_ascii_id_char = _IS_ASCII_ID_CHAR - # Start at the end (pos) and work backwards. i = pos @@ -175,44 +166,40 @@ def _eat_identifier(cls, str, limit, pos): # identifier characters. This is an optimization, since it # is faster in the common case where most of the characters # are ASCII. - while i > limit and ( - ord(str[i - 1]) < 128 and - is_ascii_id_char[ord(str[i - 1])] - ): + while i > limit and str[i - 1] in _ASCII_ID_CHARS: i -= 1 # If the above loop ended due to reaching a non-ASCII # character, continue going backwards using the most generic # test for whether a string contains only valid identifier # characters. - if i > limit and ord(str[i - 1]) >= 128: - while i - 4 >= limit and ('a' + str[i - 4:pos]).isidentifier(): + if i > limit and str[i - 1] > '\x7f': + while i - 4 >= limit and ('a' + str[i - 4:i]).isidentifier(): i -= 4 - if i - 2 >= limit and ('a' + str[i - 2:pos]).isidentifier(): + if i - 2 >= limit and ('a' + str[i - 2:i]).isidentifier(): i -= 2 - if i - 1 >= limit and ('a' + str[i - 1:pos]).isidentifier(): + if i - 1 >= limit and ('a' + str[i - 1]).isidentifier(): i -= 1 # The identifier candidate starts here. If it isn't a valid # identifier, don't eat anything. At this point that is only # possible if the first character isn't a valid first # character for an identifier. - if not str[i:pos].isidentifier(): + if i < pos and not str[i].isidentifier(): return 0 elif i < pos: # All characters in str[i:pos] are valid ASCII identifier # characters, so it is enough to check that the first is # valid as the first character of an identifier. - if not _IS_ASCII_ID_FIRST_CHAR[ord(str[i])]: + if str[i] not in _ASCII_ID_FIRST_CHARS: return 0 # All keywords are valid identifiers, but should not be # considered identifiers here, except for True, False and None. - if i < pos and ( - iskeyword(str[i:pos]) and - str[i:pos] not in cls._ID_KEYWORDS - ): - return 0 + if i < pos: + word = str[i:pos] + if iskeyword(word) and word not in cls._ID_KEYWORDS: + return 0 return pos - i diff --git a/Lib/idlelib/undo.py b/Lib/idlelib/undo.py index 85ecffecb4cbcb..70230d2bea1a89 100644 --- a/Lib/idlelib/undo.py +++ b/Lib/idlelib/undo.py @@ -1,5 +1,3 @@ -import string - from idlelib.delegator import Delegator # tkinter import not needed because module does not create widgets, @@ -251,10 +249,8 @@ def merge(self, cmd): self.chars = self.chars + cmd.chars return True - alphanumeric = string.ascii_letters + string.digits + "_" - def classify(self, c): - if c in self.alphanumeric: + if ('a' + c).isidentifier(): return "alphanumeric" if c == "\n": return "newline" diff --git a/Misc/NEWS.d/next/IDLE/2021-11-03-10-37-29.bpo-45692.QSuHbM.rst b/Misc/NEWS.d/next/IDLE/2021-11-03-10-37-29.bpo-45692.QSuHbM.rst new file mode 100644 index 00000000000000..bde8b2eea248ff --- /dev/null +++ b/Misc/NEWS.d/next/IDLE/2021-11-03-10-37-29.bpo-45692.QSuHbM.rst @@ -0,0 +1,3 @@ +Improve support of non-ASCII identifiers in IDLE +(autoexpanding, autocompletion, undo, etc).y +