Skip to content

Commit 654e179

Browse files
committed
typeops: extend make_simplified_union fast path to enums
In PR #9192 a fast path was created to address the slowness reported in issue #9169 wherein large Union or literal types would dramatically slow down typechecking. It is desirable to extend this fast path to cover Enum types, as these can also leverage the O(n) set-based fast path instead of the O(n**2) fallback. This is seen to bring down the typechecking of a single fairly simple chain of `if` statements operating on a large enum (~3k members) from ~40min to 12s in real-world code! Note that the timing is taken from a pure-python run of mypy, as opposed to a compiled version.
1 parent 5906a5d commit 654e179

File tree

1 file changed

+56
-27
lines changed

1 file changed

+56
-27
lines changed

mypy/typeops.py

+56-27
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
since these may assume that MROs are ready.
66
"""
77

8-
from typing import cast, Optional, List, Sequence, Set, Iterable, TypeVar
8+
from typing import cast, Optional, List, Sequence, Set, Iterable, TypeVar, Tuple
99
from typing_extensions import Type as TypingType
1010
import sys
1111

@@ -311,6 +311,17 @@ def callable_corresponding_argument(typ: CallableType,
311311
return by_name if by_name is not None else by_pos
312312

313313

314+
def is_simple_literal(t: ProperType) -> bool:
315+
"""
316+
Whether a type is a simple enough literal to allow for fast Union simplification
317+
318+
For now this means enum or string
319+
"""
320+
return isinstance(t, LiteralType) and (
321+
t.fallback.type.is_enum or t.fallback.type.fullname == 'builtins.str'
322+
)
323+
324+
314325
def make_simplified_union(items: Sequence[Type],
315326
line: int = -1, column: int = -1,
316327
*, keep_erased: bool = False) -> ProperType:
@@ -344,35 +355,53 @@ def make_simplified_union(items: Sequence[Type],
344355
from mypy.subtypes import is_proper_subtype
345356

346357
removed = set() # type: Set[int]
347-
348-
# Avoid slow nested for loop for Union of Literal of strings (issue #9169)
349-
if all((isinstance(item, LiteralType) and
350-
item.fallback.type.fullname == 'builtins.str')
351-
for item in items):
352-
seen = set() # type: Set[str]
353-
for index, item in enumerate(items):
358+
seen = set() # type: Set[Tuple[str, str]]
359+
360+
# NB: having a separate fast path for Union of Literal and slow path for other things
361+
# would arguably be cleaner, however it breaks down when simplifying the Union of two
362+
# different enum types as try_expanding_enum_to_union works recursively and will
363+
# trigger intermediate simplifications that would render the fast path useless
364+
for i, item in enumerate(items):
365+
if i in removed:
366+
continue
367+
# Avoid slow nested for loop for Union of Literal of strings/enums (issue #9169)
368+
if is_simple_literal(item):
354369
assert isinstance(item, LiteralType)
355370
assert isinstance(item.value, str)
356-
if item.value in seen:
357-
removed.add(index)
358-
seen.add(item.value)
371+
k = (item.value, item.fallback.type.fullname)
372+
if k in seen:
373+
removed.add(i)
374+
continue
359375

360-
else:
361-
for i, ti in enumerate(items):
362-
if i in removed: continue
363-
# Keep track of the truishness info for deleted subtypes which can be relevant
364-
cbt = cbf = False
365-
for j, tj in enumerate(items):
366-
if i != j and is_proper_subtype(tj, ti, keep_erased_types=keep_erased):
367-
# We found a redundant item in the union.
368-
removed.add(j)
369-
cbt = cbt or tj.can_be_true
370-
cbf = cbf or tj.can_be_false
371-
# if deleted subtypes had more general truthiness, use that
372-
if not ti.can_be_true and cbt:
373-
items[i] = true_or_false(ti)
374-
elif not ti.can_be_false and cbf:
375-
items[i] = true_or_false(ti)
376+
# NB: one would naively expect that it would be safe to skip the slow path
377+
# always for literals. One would be sorely mistaken. Indeed, some simplifications
378+
# such as that of None/Optional when strict optional is false, do require that we
379+
# proceed with the slow path. Thankfully, all literals will have the same subtype
380+
# relationship to non-literal types, so we only need to do that walk for the first
381+
# literal, which keeps the fast path fast even in the presence of a mixture of
382+
# literals and other types.
383+
safe_skip = len(seen) > 0
384+
seen.add(k)
385+
if safe_skip:
386+
continue
387+
# Keep track of the truishness info for deleted subtypes which can be relevant
388+
cbt = cbf = False
389+
for j, tj in enumerate(items):
390+
# NB: we don't need to check literals as the fast path above takes care of that
391+
if (
392+
i != j
393+
and not is_simple_literal(tj)
394+
and is_proper_subtype(tj, item, keep_erased_types=keep_erased)
395+
):
396+
# We found a redundant item in the union.
397+
removed.add(j)
398+
cbt = cbt or tj.can_be_true
399+
cbf = cbf or tj.can_be_false
400+
# if deleted subtypes had more general truthiness, use that
401+
if not item.can_be_true and cbt:
402+
items[i] = true_or_false(item)
403+
elif not item.can_be_false and cbf:
404+
items[i] = true_or_false(item)
376405

377406
simplified_set = [items[i] for i in range(len(items)) if i not in removed]
378407
return UnionType.make_union(simplified_set, line, column)

0 commit comments

Comments
 (0)