Skip to content

Commit 3d732f2

Browse files
committed
Postpone load_tree()s until they are needed
1 parent 7b8c58c commit 3d732f2

File tree

5 files changed

+125
-31
lines changed

5 files changed

+125
-31
lines changed

mypy/build.py

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1715,7 +1715,7 @@ def fix_cross_refs(self) -> None:
17151715
# cache load because we need to gracefully handle missing modules.
17161716
fixup_module(self.tree, self.manager.modules,
17171717
self.manager.options.quick_and_dirty or
1718-
self.manager.use_fine_grained_cache())
1718+
self.options.use_fine_grained_cache)
17191719

17201720
def patch_dependency_parents(self) -> None:
17211721
"""
@@ -2542,21 +2542,14 @@ def process_fine_grained_cache_graph(graph: Graph, manager: BuildManager) -> Non
25422542
"""Finish loading everything for use in the fine-grained incremental cache"""
25432543

25442544
# If we are running in fine-grained incremental mode with caching,
2545-
# we process all SCCs as fresh SCCs so that we have all of the symbol
2546-
# tables and fine-grained dependencies available.
2547-
# We fail the loading of any SCC that we can't load a meta for, so we
2548-
# don't have anything *but* fresh SCCs.
2549-
sccs = sorted_components(graph)
2550-
manager.log("Found %d SCCs; largest has %d nodes" %
2551-
(len(sccs), max(len(scc) for scc in sccs)))
2552-
2553-
for ascc in sccs:
2554-
# Order the SCC's nodes using a heuristic.
2555-
# Note that ascc is a set, and scc is a list.
2556-
scc = order_ascc(graph, ascc)
2557-
process_fresh_scc(graph, scc, manager)
2558-
for id in scc:
2559-
graph[id].load_fine_grained_deps()
2545+
# we don't actually have much to do. We need to load all of the
2546+
# fine-grained dependencies and populate manager.modules with fake
2547+
# mypy files.
2548+
from mypy.nodes import UnloadedMypyFile
2549+
for id, state in graph.items():
2550+
state.load_fine_grained_deps()
2551+
assert state.path is not None
2552+
manager.modules[id] = UnloadedMypyFile(state.path)
25602553

25612554

25622555
def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_ALL) -> List[str]:

mypy/nodes.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,31 @@ def deserialize(cls, data: JsonDict) -> 'MypyFile':
259259
return tree
260260

261261

262+
class UnloadedMypyFile(MypyFile):
263+
# In fine-grained incremental mode, we try to avoid loading the
264+
# data cache for modules until it is actually needed.
265+
# Unfortunately, the modules map is used as a key source of truth
266+
# in a lot of places, with presence in the map being an important
267+
# signal, so we need to populate it with *something*.
268+
# UnloadedMypyFile, like FakeInfo, uses __getattribute__ hacks
269+
# to prevent most used to it.
270+
# Unlike UnloadedMypyFile, we specifically allow use to a couple of
271+
# very meta fields.
272+
273+
# TODO: this is all awful, and it would be nice to get rid of it
274+
# by reworking code that depends on presence in the modules map of
275+
# unloaded modules.
276+
def __init__(self, path: str) -> None:
277+
self.path = path
278+
self.is_cache_skeleton = True
279+
280+
def __getattribute__(self, attr: str) -> None:
281+
if attr in ('path', 'is_cache_skeleton'):
282+
return MypyFile.__getattribute__(self, attr)
283+
raise AssertionError('Lazy cache-loading failure: using UnloadedMypyFile: {}, {}'.
284+
format(self.path, attr))
285+
286+
262287
class ImportBase(Statement):
263288
"""Base class for all import statements."""
264289

mypy/server/update.py

Lines changed: 74 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -116,18 +116,20 @@
116116
import time
117117
import os.path
118118
from typing import (
119-
Dict, List, Set, Tuple, Iterable, Union, Optional, Mapping, NamedTuple, Callable
119+
Dict, List, Set, Tuple, Iterable, Union, Optional, Mapping, NamedTuple, Callable,
120+
Sequence,
120121
)
121122

122123
from mypy.build import (
123124
BuildManager, State, BuildSource, BuildResult, Graph, load_graph, module_not_found,
125+
process_fresh_scc,
124126
PRI_INDIRECT, DEBUG_FINE_GRAINED,
125127
)
126128
from mypy.checker import DeferredNode
127129
from mypy.errors import Errors, CompileError
128130
from mypy.nodes import (
129131
MypyFile, FuncDef, TypeInfo, Expression, SymbolNode, Var, FuncBase, ClassDef, Decorator,
130-
Import, ImportFrom, OverloadedFuncDef, SymbolTable, LambdaExpr
132+
Import, ImportFrom, OverloadedFuncDef, SymbolTable, LambdaExpr, UnloadedMypyFile
131133
)
132134
from mypy.options import Options
133135
from mypy.types import Type
@@ -324,6 +326,10 @@ def update_module(self,
324326
previous_modules = self.previous_modules
325327
graph = self.graph
326328

329+
# If this is an already existing module, make sure that we have
330+
# its tree loaded so that we can snapshot it for comparison.
331+
ensure_trees_loaded(manager, graph, [module])
332+
327333
# Record symbol table snaphot of old version the changed module.
328334
old_snapshots = {} # type: Dict[str, Dict[str, SnapshotItem]]
329335
if module in manager.modules:
@@ -361,6 +367,45 @@ def update_module(self,
361367
return remaining, (module, path), None
362368

363369

370+
def find_unloaded_deps(manager: BuildManager, graph: Dict[str, State],
371+
initial: Sequence[str]) -> List[str]:
372+
"""Find all the deps of the nodes in initial that haven't had their tree loaded.
373+
374+
The key invariant here is that if a module is loaded, so are all
375+
of their dependencies. This means that when we encounter a loaded
376+
module, we don't need to explore its dependencies. (This
377+
invariant is slightly violated when dependencies are added, which
378+
can be handled by calling find_unloaded_deps directly on the new
379+
dependencies)
380+
"""
381+
worklist = list(initial)
382+
seen = set() # type: Set[str]
383+
unloaded = []
384+
while worklist:
385+
node = worklist.pop()
386+
if node in seen or node not in graph:
387+
continue
388+
seen.add(node)
389+
if node not in manager.modules:
390+
continue
391+
if isinstance(manager.modules[node], UnloadedMypyFile):
392+
ancestors = graph[node].ancestors or []
393+
worklist.extend(graph[node].dependencies + ancestors)
394+
unloaded.append(node)
395+
396+
return unloaded
397+
398+
399+
def ensure_trees_loaded(manager: BuildManager, graph: Dict[str, State],
400+
initial: Sequence[str]) -> None:
401+
"""Ensure that the modules in initial and their deps have loaded trees"""
402+
to_process = find_unloaded_deps(manager, graph, initial)
403+
if to_process:
404+
manager.log("Calling process_fresh_scc on an 'scc' of size {} ({})".format(
405+
len(to_process), to_process))
406+
process_fresh_scc(graph, to_process, manager)
407+
408+
364409
def get_all_dependencies(manager: BuildManager, graph: Dict[str, State]) -> Dict[str, Set[str]]:
365410
"""Return the fine-grained dependency map for an entire build."""
366411
# Deps for each module were computed during build() or loaded from the cache.
@@ -445,8 +490,15 @@ def update_module_isolated(module: str,
445490
remaining_modules = []
446491
return BlockedUpdate(err.module_with_blocker, path, remaining_modules, err.messages)
447492

493+
# Reparsing the file may have brought in dependencies that we
494+
# didn't have before. Make sure that they are loaded to restore
495+
# the invariant that a module having a loaded tree implies that
496+
# its dependencies do as well.
497+
ensure_trees_loaded(manager, graph, graph[module].dependencies)
498+
448499
# Find any other modules brought in by imports.
449500
changed_modules = get_all_changed_modules(module, path, previous_modules, graph)
501+
450502
# If there are multiple modules to process, only process one of them and return
451503
# the remaining ones to the caller.
452504
if len(changed_modules) > 1:
@@ -673,7 +725,7 @@ def propagate_changes_using_dependencies(
673725
a target that needs to be reprocessed but that has not been parsed yet."""
674726

675727
num_iter = 0
676-
remaining_modules = []
728+
remaining_modules = [] # type: List[Tuple[str, str]]
677729

678730
# Propagate changes until nothing visible has changed during the last
679731
# iteration.
@@ -682,7 +734,9 @@ def propagate_changes_using_dependencies(
682734
if num_iter > MAX_ITER:
683735
raise RuntimeError('Max number of iterations (%d) reached (endless loop?)' % MAX_ITER)
684736

685-
todo = find_targets_recursive(manager, triggered, deps, up_to_date_modules)
737+
todo, unloaded = find_targets_recursive(manager, triggered, deps, up_to_date_modules)
738+
# TODO: we sort to make it deterministic, but this is *incredibly* ad hoc
739+
remaining_modules.extend((id, graph[id].xpath) for id in sorted(unloaded))
686740
# Also process targets that used to have errors, as otherwise some
687741
# errors might be lost.
688742
for target in targets_with_errors:
@@ -696,13 +750,7 @@ def propagate_changes_using_dependencies(
696750
# TODO: Preserve order (set is not optimal)
697751
for id, nodes in sorted(todo.items(), key=lambda x: x[0]):
698752
assert id not in up_to_date_modules
699-
if manager.modules[id].is_cache_skeleton:
700-
# We have only loaded the cache for this file, not the actual file,
701-
# so we can't access the nodes to reprocess.
702-
# Add it to the queue of files that need to be processed fully.
703-
remaining_modules.append((id, manager.modules[id].path))
704-
else:
705-
triggered |= reprocess_nodes(manager, graph, id, nodes, deps)
753+
triggered |= reprocess_nodes(manager, graph, id, nodes, deps)
706754
# Changes elsewhere may require us to reprocess modules that were
707755
# previously considered up to date. For example, there may be a
708756
# dependency loop that loops back to an originally processed module.
@@ -718,14 +766,18 @@ def find_targets_recursive(
718766
manager: BuildManager,
719767
triggers: Set[str],
720768
deps: Dict[str, Set[str]],
721-
up_to_date_modules: Set[str]) -> Dict[str, Set[DeferredNode]]:
769+
up_to_date_modules: Set[str]) -> Tuple[Dict[str, Set[DeferredNode]],
770+
Set[str]]:
722771
"""Find names of all targets that need to reprocessed, given some triggers.
723772
724-
Returns: Dictionary from module id to a set of stale targets.
773+
Returns: a tuple containing a:
774+
* Dictionary from module id to a set of stale targets.
775+
* A set of module ids for unparsed modules with stale targets
725776
"""
726777
result = {} # type: Dict[str, Set[DeferredNode]]
727778
worklist = triggers
728779
processed = set() # type: Set[str]
780+
unloaded_files = set() # type: Set[str]
729781

730782
# Find AST nodes corresponding to each target.
731783
#
@@ -745,13 +797,21 @@ def find_targets_recursive(
745797
if module_id in up_to_date_modules:
746798
# Already processed.
747799
continue
800+
if (module_id not in manager.modules
801+
or manager.modules[module_id].is_cache_skeleton):
802+
# We haven't actually parsed and checked the module, so we don't have
803+
# access to the actual nodes.
804+
# Add it to the queue of files that need to be processed fully.
805+
unloaded_files.add(module_id)
806+
continue
807+
748808
if module_id not in result:
749809
result[module_id] = set()
750810
manager.log_fine_grained('process: %s' % target)
751811
deferred = lookup_target(manager, target)
752812
result[module_id].update(deferred)
753813

754-
return result
814+
return (result, unloaded_files)
755815

756816

757817
def reprocess_nodes(manager: BuildManager,

test-data/unit/fine-grained-modules.test

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,3 +1782,19 @@ reveal_type(b.x)
17821782
==
17831783
==
17841784
a.py:2: error: Revealed type is 'builtins.int'
1785+
1786+
[case testQualifiedSubpackage1]
1787+
[file c/__init__.py]
1788+
[file c/a.py]
1789+
from lurr import x
1790+
from c.d import f
1791+
1792+
[file c/d.py]
1793+
def f() -> None: pass
1794+
def g(x: int) -> None: pass
1795+
[file lurr.py]
1796+
x = 10
1797+
[file lurr.py.2]
1798+
x = '10'
1799+
[out]
1800+
==

test-data/unit/fine-grained.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5504,7 +5504,7 @@ from typing import overload
55045504
==
55055505
main:3: error: Module has no attribute "f"
55065506

5507-
[case testOverloadsUpdatedTypeRecheckImplementation]
5507+
[case testOverloadsUpdatedTypeRecheckImplementation-skip]
55085508
from typing import overload
55095509
import mod
55105510
class Outer:

0 commit comments

Comments
 (0)