Skip to content

Commit 9a1f7ec

Browse files
committed
Free typechecker state and ASTs when they are no longer needed
Instead of hanging around to all of them for the entire run of the process, free ASTs and typechecker state (especially the type map) as soon as a module is finished being processing. In order to have this work when generating fine-grained dependencies, we need to produce fine-grained dependencies much earlier, so BuildManager now has an `fg_deps` field. In the daemon, only free typechecker state, since we want to keep ASTs around to increase recheck speed. (A future change might use an LRU cache to keep only some around.)
1 parent d044c2e commit 9a1f7ec

File tree

10 files changed

+102
-71
lines changed

10 files changed

+102
-71
lines changed

mypy/build.py

Lines changed: 46 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
from mypy.typestate import TypeState, reset_global_state
6060
from mypy.renaming import VariableRenameVisitor
6161
from mypy.config_parser import parse_mypy_comments
62+
from mypy.freetree import free_tree
6263

6364

6465
# Switch to True to produce debug output related to fine-grained incremental
@@ -471,6 +472,7 @@ class BuildManager:
471472
missing_modules: Set of modules that could not be imported encountered so far
472473
stale_modules: Set of modules that needed to be rechecked (only used by tests)
473474
fg_deps_meta: Metadata for fine-grained dependencies caches associated with modules
475+
fg_deps: A fine-grained dependency map
474476
version_id: The current mypy version (based on commit id when possible)
475477
plugin: Active mypy plugin(s)
476478
plugins_snapshot:
@@ -523,6 +525,11 @@ def __init__(self, data_dir: str,
523525
self.modules = {} # type: Dict[str, MypyFile]
524526
self.missing_modules = set() # type: Set[str]
525527
self.fg_deps_meta = {} # type: Dict[str, FgDepMeta]
528+
# fg_deps holds the dependencies of every module that has been
529+
# processed. We store this in BuildManager so that we can compute
530+
# dependencies as we go, which allows us to free ASTs and type information,
531+
# saving a ton of memory on net.
532+
self.fg_deps = {} # type: Dict[str, Set[str]]
526533
# Always convert the plugin to a ChainedPlugin so that it can be manipulated if needed
527534
if not isinstance(plugin, ChainedPlugin):
528535
plugin = ChainedPlugin(options, [plugin])
@@ -894,32 +901,24 @@ def invert_deps(deps: Dict[str, Set[str]],
894901
return rdeps
895902

896903

897-
def generate_deps_for_cache(proto_deps: Dict[str, Set[str]],
898-
manager: BuildManager,
904+
def generate_deps_for_cache(manager: BuildManager,
899905
graph: Graph) -> Dict[str, Dict[str, Set[str]]]:
900906
"""Generate fine-grained dependencies into a form suitable for serializing.
901907
902-
This does a few things:
903-
1. Computes all fine grained deps from modules that were processed
904-
2. Splits fine-grained deps based on the module of the trigger
905-
3. For each module we generated fine-grained deps for, load any previous
908+
This does a couple things:
909+
1. Splits fine-grained deps based on the module of the trigger
910+
2. For each module we generated fine-grained deps for, load any previous
906911
deps and merge them in.
907912
908913
Returns a dictionary from module ids to all dependencies on that
909914
module. Dependencies not associated with a module in the build will be
910915
associated with the nearest parent module that is in the build, or the
911916
fake module FAKE_ROOT_MODULE if none are.
912917
"""
913-
from mypy.server.update import merge_dependencies # Lazy import to speed up startup
914-
915-
# Compute the full set of dependencies from everything we've processed.
916-
deps = {} # type: Dict[str, Set[str]]
917-
things = [st.compute_fine_grained_deps() for st in graph.values() if st.tree] + [proto_deps]
918-
for st_deps in things:
919-
merge_dependencies(st_deps, deps)
918+
from mypy.server.deps import merge_dependencies # Lazy import to speed up startup
920919

921920
# Split the dependencies out into based on the module that is depended on.
922-
rdeps = invert_deps(deps, graph)
921+
rdeps = invert_deps(manager.fg_deps, graph)
923922

924923
# We can't just clobber existing dependency information, so we
925924
# load the deps for every module we've generated new dependencies
@@ -2172,6 +2171,16 @@ def finish_passes(self) -> None:
21722171
typemap=self.type_map())
21732172
manager.report_file(self.tree, self.type_map(), self.options)
21742173

2174+
self.update_fine_grained_deps(self.manager.fg_deps)
2175+
self.free_state()
2176+
if not manager.options.fine_grained_incremental and not manager.options.preserve_asts:
2177+
free_tree(self.tree)
2178+
2179+
def free_state(self) -> None:
2180+
if self._type_checker:
2181+
self._type_checker.reset()
2182+
self._type_checker = None
2183+
21752184
def _patch_indirect_dependencies(self,
21762185
module_refs: Set[str],
21772186
type_map: Dict[Expression, Type]) -> None:
@@ -2206,6 +2215,13 @@ def compute_fine_grained_deps(self) -> Dict[str, Set[str]]:
22062215
python_version=self.options.python_version,
22072216
options=self.manager.options)
22082217

2218+
def update_fine_grained_deps(self, deps: Dict[str, Set[str]]) -> None:
2219+
options = self.manager.options
2220+
if options.cache_fine_grained or options.fine_grained_incremental:
2221+
from mypy.server.deps import merge_dependencies # Lazy import to speed up startup
2222+
merge_dependencies(self.compute_fine_grained_deps(), deps)
2223+
TypeState.update_protocol_deps(deps)
2224+
22092225
def valid_references(self) -> Set[str]:
22102226
assert self.ancestors is not None
22112227
valid_refs = set(self.dependencies + self.suppressed + self.ancestors)
@@ -2616,10 +2632,9 @@ def dispatch(sources: List[BuildSource],
26162632
# then we need to collect fine grained protocol dependencies.
26172633
# Since these are a global property of the program, they are calculated after we
26182634
# processed the whole graph.
2619-
TypeState.update_protocol_deps()
2635+
TypeState.add_all_protocol_deps(manager.fg_deps)
26202636
if not manager.options.fine_grained_incremental:
2621-
proto_deps = TypeState.proto_deps or {}
2622-
rdeps = generate_deps_for_cache(proto_deps, manager, graph)
2637+
rdeps = generate_deps_for_cache(manager, graph)
26232638
write_deps_cache(rdeps, manager, graph)
26242639

26252640
if manager.options.dump_deps:
@@ -3023,23 +3038,29 @@ def process_stale_scc(graph: Graph, scc: List[str], manager: BuildManager) -> No
30233038
graph[id].semantic_analysis_pass_three()
30243039
for id in stale:
30253040
graph[id].semantic_analysis_apply_patches()
3041+
3042+
# Track what modules aren't yet done so we can finish them as soon
3043+
# as possible, saving memory.
3044+
unfinished_modules = set(stale)
30263045
for id in stale:
30273046
graph[id].type_check_first_pass()
3028-
more = True
3029-
while more:
3030-
more = False
3047+
if not graph[id].type_checker().deferred_nodes:
3048+
unfinished_modules.discard(id)
3049+
graph[id].finish_passes()
3050+
3051+
while unfinished_modules:
30313052
for id in stale:
3032-
if graph[id].type_check_second_pass():
3033-
more = True
3053+
if id not in unfinished_modules:
3054+
continue
3055+
if not graph[id].type_check_second_pass():
3056+
unfinished_modules.discard(id)
3057+
graph[id].finish_passes()
30343058
for id in stale:
30353059
graph[id].generate_unused_ignore_notes()
30363060
if any(manager.errors.is_errors_for_file(graph[id].xpath) for id in stale):
30373061
for id in stale:
30383062
graph[id].transitive_error = True
30393063
for id in stale:
3040-
graph[id].finish_passes()
3041-
if manager.options.cache_fine_grained or manager.options.fine_grained_incremental:
3042-
graph[id].compute_fine_grained_deps()
30433064
manager.flush_errors(manager.errors.file_messages(graph[id].xpath), False)
30443065
graph[id].write_cache()
30453066
graph[id].mark_as_rechecked()

mypy/freetree.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
"""Generic node traverser visitor"""
2+
3+
from mypy.traverser import TraverserVisitor
4+
from mypy.nodes import Block, MypyFile
5+
6+
7+
class TreeFreer(TraverserVisitor):
8+
def visit_block(self, block: Block) -> None:
9+
super().visit_block(block)
10+
block.body.clear()
11+
12+
13+
def free_tree(tree: MypyFile) -> None:
14+
tree.accept(TreeFreer())

mypy/options.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,12 @@ def __init__(self) -> None:
216216
# in modules being compiled. Not in the config file or command line.
217217
self.mypyc = False
218218

219+
# Disable the memory optimization of freeing ASTs when
220+
# possible. This isn't exposed as a command line option
221+
# because it is intended for software integrating with
222+
# mypy. (Like mypyc.)
223+
self.preserve_asts = False
224+
219225
# Paths of user plugins
220226
self.plugins = [] # type: List[str]
221227

mypy/server/deps.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -958,6 +958,12 @@ def visit_union_type(self, typ: UnionType) -> List[str]:
958958
return triggers
959959

960960

961+
def merge_dependencies(new_deps: Dict[str, Set[str]],
962+
deps: Dict[str, Set[str]]) -> None:
963+
for trigger, targets in new_deps.items():
964+
deps.setdefault(trigger, set()).update(targets)
965+
966+
961967
def non_trivial_bases(info: TypeInfo) -> List[TypeInfo]:
962968
return [base for base in info.mro[1:]
963969
if base.fullname() != 'builtins.object']

mypy/server/update.py

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@
140140
from mypy.server.astmerge import merge_asts
141141
from mypy.server.aststrip import strip_target
142142
from mypy.server.aststripnew import strip_target_new, SavedAttributes
143-
from mypy.server.deps import get_dependencies_of_target
143+
from mypy.server.deps import get_dependencies_of_target, merge_dependencies
144144
from mypy.server.target import trigger_to_target
145145
from mypy.server.trigger import make_trigger, WILDCARD_TAG
146146
from mypy.util import module_prefix, split_target
@@ -163,7 +163,9 @@ def __init__(self, result: BuildResult) -> None:
163163
self.manager = manager
164164
self.graph = result.graph
165165
self.previous_modules = get_module_to_path_map(self.graph)
166-
self.deps = get_all_dependencies(manager, self.graph)
166+
self.deps = manager.fg_deps
167+
# Merge in any root dependencies that may not have been loaded
168+
merge_dependencies(manager.load_fine_grained_deps(FAKE_ROOT_MODULE), self.deps)
167169
self.previous_targets_with_errors = manager.errors.targets()
168170
self.previous_messages = result.errors[:]
169171
# Module, if any, that had blocking errors in the last run as (id, path) tuple.
@@ -378,7 +380,8 @@ def update_module(self,
378380
self.manager.log_fine_grained('triggered: %r' % sorted(filtered))
379381
self.triggered.extend(triggered | self.previous_targets_with_errors)
380382
if module in graph:
381-
merge_dependencies(graph[module].compute_fine_grained_deps(), self.deps)
383+
graph[module].update_fine_grained_deps(self.deps)
384+
graph[module].free_state()
382385
remaining += propagate_changes_using_dependencies(
383386
manager, graph, self.deps, triggered,
384387
{module},
@@ -453,15 +456,11 @@ def ensure_trees_loaded(manager: BuildManager, graph: Dict[str, State],
453456
process_fresh_modules(graph, to_process, manager)
454457

455458

456-
def get_all_dependencies(manager: BuildManager, graph: Dict[str, State]) -> Dict[str, Set[str]]:
457-
"""Return the fine-grained dependency map for an entire build."""
458-
# Deps for each module were computed during build() or loaded from the cache.
459-
deps = manager.load_fine_grained_deps(FAKE_ROOT_MODULE) # type: Dict[str, Set[str]]
460-
for id in graph:
461-
if graph[id].tree is not None:
462-
merge_dependencies(graph[id].compute_fine_grained_deps(), deps)
463-
TypeState.add_all_protocol_deps(deps)
464-
return deps
459+
def fix_fg_dependencies(manager: BuildManager, deps: Dict[str, Set[str]]) -> None:
460+
"""Populate the dependencies with stuff that build may have missed"""
461+
# This means the root module and typestate
462+
merge_dependencies(manager.load_fine_grained_deps(FAKE_ROOT_MODULE), deps)
463+
# TypeState.add_all_protocol_deps(deps)
465464

466465

467466
# The result of update_module_isolated when no blockers, with these items:
@@ -608,15 +607,12 @@ def restore(ids: List[str]) -> None:
608607
state.type_check_first_pass()
609608
state.type_check_second_pass()
610609
t2 = time.time()
611-
state.compute_fine_grained_deps()
612-
t3 = time.time()
613610
state.finish_passes()
614-
t4 = time.time()
611+
t3 = time.time()
615612
manager.add_stats(
616613
semanal_time=t1 - t0,
617614
typecheck_time=t2 - t1,
618-
deps_time=t3 - t2,
619-
finish_passes_time=t4 - t3)
615+
finish_passes_time=t3 - t2)
620616

621617
graph[module] = state
622618

@@ -700,14 +696,6 @@ def get_sources(fscache: FileSystemCache,
700696
return sources
701697

702698

703-
def merge_dependencies(new_deps: Dict[str, Set[str]],
704-
deps: Dict[str, Set[str]]) -> None:
705-
for trigger, targets in new_deps.items():
706-
deps.setdefault(trigger, set()).update(targets)
707-
# Merge also the newly added protocol deps.
708-
TypeState.update_protocol_deps(deps)
709-
710-
711699
def calculate_active_triggers(manager: BuildManager,
712700
old_snapshots: Dict[str, Dict[str, SnapshotItem]],
713701
new_modules: Dict[str, Optional[MypyFile]]) -> Set[str]:
@@ -994,6 +982,8 @@ def key(node: FineGrainedDeferredNode) -> int:
994982
# Report missing imports.
995983
graph[module_id].verify_dependencies()
996984

985+
graph[module_id].free_state()
986+
997987
return new_triggered
998988

999989

mypy/test/testdeps.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
4545
options.cache_dir = os.devnull
4646
options.python_version = python_version
4747
options.export_types = True
48+
options.preserve_asts = True
4849
messages, files, type_map = self.build(src, options)
4950
a = messages
5051
if files is None or type_map is None:

mypy/test/testmerge.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ def build(self, source: str, testcase: DataDrivenTestCase) -> Optional[BuildResu
106106
options.incremental = True
107107
options.fine_grained_incremental = True
108108
options.use_builtins_fixtures = True
109+
options.export_types = True
109110
options.show_traceback = True
110111
options.python_version = PYTHON3_VERSION
111112
main_path = os.path.join(test_temp_dir, 'main')
@@ -216,7 +217,13 @@ def dump_types(self, manager: FineGrainedBuildManager) -> List[str]:
216217
for module_id in sorted(manager.manager.modules):
217218
if not is_dumped_module(module_id):
218219
continue
219-
type_map = manager.graph[module_id].type_map()
220+
all_types = manager.manager.all_types
221+
# Compute a module type map from the global type map
222+
tree = manager.graph[module_id].tree
223+
assert tree is not None
224+
type_map = {node: all_types[node]
225+
for node in get_subexpressions(tree)
226+
if node in all_types}
220227
if type_map:
221228
a.append('## {}'.format(module_id))
222229
for expr in sorted(type_map, key=lambda n: (n.line, short_type(n),

mypy/test/testtypegen.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
3131
options.use_builtins_fixtures = True
3232
options.show_traceback = True
3333
options.export_types = True
34+
options.preserve_asts = True
3435
result = build.build(sources=[BuildSource('main', None, src)],
3536
options=options,
3637
alt_lib_path=test_temp_dir)

mypy/typestate.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -211,15 +211,13 @@ def update_protocol_deps(second_map: Optional[Dict[str, Set[str]]] = None) -> No
211211
def add_all_protocol_deps(deps: Dict[str, Set[str]]) -> None:
212212
"""Add all known protocol dependencies to deps.
213213
214-
This is used by tests and debug output, and also when passing
215-
all collected or loaded dependencies on to FineGrainedBuildManager
216-
in its __init__.
214+
This is used by tests and debug output, and also when collecting
215+
all collected or loaded dependencies as part of build.
217216
"""
218217
TypeState.update_protocol_deps() # just in case
219-
assert TypeState.proto_deps is not None, (
220-
"This should not be called after failed cache load")
221-
for trigger, targets in TypeState.proto_deps.items():
222-
deps.setdefault(trigger, set()).update(targets)
218+
if TypeState.proto_deps is not None:
219+
for trigger, targets in TypeState.proto_deps.items():
220+
deps.setdefault(trigger, set()).update(targets)
223221

224222

225223
def reset_global_state() -> None:

0 commit comments

Comments
 (0)