Skip to content

Commit eb34a7b

Browse files
authored
Free typechecker state and ASTs when they are no longer needed (#7280)
Instead of hanging around to all of them for the entire run of the process, free ASTs and typechecker state (especially the type map) as soon as a module is finished being processing. In order to have this work when generating fine-grained dependencies, we need to produce fine-grained dependencies much earlier, so BuildManager now has an `fg_deps` field. In the daemon, only free typechecker state, since we want to keep ASTs around to increase recheck speed. (A future change might use an LRU cache to keep only some around.)
1 parent d044c2e commit eb34a7b

File tree

10 files changed

+109
-71
lines changed

10 files changed

+109
-71
lines changed

mypy/build.py

Lines changed: 46 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
from mypy.typestate import TypeState, reset_global_state
6060
from mypy.renaming import VariableRenameVisitor
6161
from mypy.config_parser import parse_mypy_comments
62+
from mypy.freetree import free_tree
6263

6364

6465
# Switch to True to produce debug output related to fine-grained incremental
@@ -471,6 +472,7 @@ class BuildManager:
471472
missing_modules: Set of modules that could not be imported encountered so far
472473
stale_modules: Set of modules that needed to be rechecked (only used by tests)
473474
fg_deps_meta: Metadata for fine-grained dependencies caches associated with modules
475+
fg_deps: A fine-grained dependency map
474476
version_id: The current mypy version (based on commit id when possible)
475477
plugin: Active mypy plugin(s)
476478
plugins_snapshot:
@@ -523,6 +525,11 @@ def __init__(self, data_dir: str,
523525
self.modules = {} # type: Dict[str, MypyFile]
524526
self.missing_modules = set() # type: Set[str]
525527
self.fg_deps_meta = {} # type: Dict[str, FgDepMeta]
528+
# fg_deps holds the dependencies of every module that has been
529+
# processed. We store this in BuildManager so that we can compute
530+
# dependencies as we go, which allows us to free ASTs and type information,
531+
# saving a ton of memory on net.
532+
self.fg_deps = {} # type: Dict[str, Set[str]]
526533
# Always convert the plugin to a ChainedPlugin so that it can be manipulated if needed
527534
if not isinstance(plugin, ChainedPlugin):
528535
plugin = ChainedPlugin(options, [plugin])
@@ -894,32 +901,24 @@ def invert_deps(deps: Dict[str, Set[str]],
894901
return rdeps
895902

896903

897-
def generate_deps_for_cache(proto_deps: Dict[str, Set[str]],
898-
manager: BuildManager,
904+
def generate_deps_for_cache(manager: BuildManager,
899905
graph: Graph) -> Dict[str, Dict[str, Set[str]]]:
900906
"""Generate fine-grained dependencies into a form suitable for serializing.
901907
902-
This does a few things:
903-
1. Computes all fine grained deps from modules that were processed
904-
2. Splits fine-grained deps based on the module of the trigger
905-
3. For each module we generated fine-grained deps for, load any previous
908+
This does a couple things:
909+
1. Splits fine-grained deps based on the module of the trigger
910+
2. For each module we generated fine-grained deps for, load any previous
906911
deps and merge them in.
907912
908913
Returns a dictionary from module ids to all dependencies on that
909914
module. Dependencies not associated with a module in the build will be
910915
associated with the nearest parent module that is in the build, or the
911916
fake module FAKE_ROOT_MODULE if none are.
912917
"""
913-
from mypy.server.update import merge_dependencies # Lazy import to speed up startup
914-
915-
# Compute the full set of dependencies from everything we've processed.
916-
deps = {} # type: Dict[str, Set[str]]
917-
things = [st.compute_fine_grained_deps() for st in graph.values() if st.tree] + [proto_deps]
918-
for st_deps in things:
919-
merge_dependencies(st_deps, deps)
918+
from mypy.server.deps import merge_dependencies # Lazy import to speed up startup
920919

921920
# Split the dependencies out into based on the module that is depended on.
922-
rdeps = invert_deps(deps, graph)
921+
rdeps = invert_deps(manager.fg_deps, graph)
923922

924923
# We can't just clobber existing dependency information, so we
925924
# load the deps for every module we've generated new dependencies
@@ -2172,6 +2171,16 @@ def finish_passes(self) -> None:
21722171
typemap=self.type_map())
21732172
manager.report_file(self.tree, self.type_map(), self.options)
21742173

2174+
self.update_fine_grained_deps(self.manager.fg_deps)
2175+
self.free_state()
2176+
if not manager.options.fine_grained_incremental and not manager.options.preserve_asts:
2177+
free_tree(self.tree)
2178+
2179+
def free_state(self) -> None:
2180+
if self._type_checker:
2181+
self._type_checker.reset()
2182+
self._type_checker = None
2183+
21752184
def _patch_indirect_dependencies(self,
21762185
module_refs: Set[str],
21772186
type_map: Dict[Expression, Type]) -> None:
@@ -2206,6 +2215,13 @@ def compute_fine_grained_deps(self) -> Dict[str, Set[str]]:
22062215
python_version=self.options.python_version,
22072216
options=self.manager.options)
22082217

2218+
def update_fine_grained_deps(self, deps: Dict[str, Set[str]]) -> None:
2219+
options = self.manager.options
2220+
if options.cache_fine_grained or options.fine_grained_incremental:
2221+
from mypy.server.deps import merge_dependencies # Lazy import to speed up startup
2222+
merge_dependencies(self.compute_fine_grained_deps(), deps)
2223+
TypeState.update_protocol_deps(deps)
2224+
22092225
def valid_references(self) -> Set[str]:
22102226
assert self.ancestors is not None
22112227
valid_refs = set(self.dependencies + self.suppressed + self.ancestors)
@@ -2616,10 +2632,9 @@ def dispatch(sources: List[BuildSource],
26162632
# then we need to collect fine grained protocol dependencies.
26172633
# Since these are a global property of the program, they are calculated after we
26182634
# processed the whole graph.
2619-
TypeState.update_protocol_deps()
2635+
TypeState.add_all_protocol_deps(manager.fg_deps)
26202636
if not manager.options.fine_grained_incremental:
2621-
proto_deps = TypeState.proto_deps or {}
2622-
rdeps = generate_deps_for_cache(proto_deps, manager, graph)
2637+
rdeps = generate_deps_for_cache(manager, graph)
26232638
write_deps_cache(rdeps, manager, graph)
26242639

26252640
if manager.options.dump_deps:
@@ -3023,23 +3038,29 @@ def process_stale_scc(graph: Graph, scc: List[str], manager: BuildManager) -> No
30233038
graph[id].semantic_analysis_pass_three()
30243039
for id in stale:
30253040
graph[id].semantic_analysis_apply_patches()
3041+
3042+
# Track what modules aren't yet done so we can finish them as soon
3043+
# as possible, saving memory.
3044+
unfinished_modules = set(stale)
30263045
for id in stale:
30273046
graph[id].type_check_first_pass()
3028-
more = True
3029-
while more:
3030-
more = False
3047+
if not graph[id].type_checker().deferred_nodes:
3048+
unfinished_modules.discard(id)
3049+
graph[id].finish_passes()
3050+
3051+
while unfinished_modules:
30313052
for id in stale:
3032-
if graph[id].type_check_second_pass():
3033-
more = True
3053+
if id not in unfinished_modules:
3054+
continue
3055+
if not graph[id].type_check_second_pass():
3056+
unfinished_modules.discard(id)
3057+
graph[id].finish_passes()
30343058
for id in stale:
30353059
graph[id].generate_unused_ignore_notes()
30363060
if any(manager.errors.is_errors_for_file(graph[id].xpath) for id in stale):
30373061
for id in stale:
30383062
graph[id].transitive_error = True
30393063
for id in stale:
3040-
graph[id].finish_passes()
3041-
if manager.options.cache_fine_grained or manager.options.fine_grained_incremental:
3042-
graph[id].compute_fine_grained_deps()
30433064
manager.flush_errors(manager.errors.file_messages(graph[id].xpath), False)
30443065
graph[id].write_cache()
30453066
graph[id].mark_as_rechecked()

mypy/freetree.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
"""Generic node traverser visitor"""
2+
3+
from mypy.traverser import TraverserVisitor
4+
from mypy.nodes import Block, MypyFile
5+
6+
7+
class TreeFreer(TraverserVisitor):
8+
def visit_block(self, block: Block) -> None:
9+
super().visit_block(block)
10+
block.body.clear()
11+
12+
13+
def free_tree(tree: MypyFile) -> None:
14+
"""Free all the ASTs associated with a module.
15+
16+
This needs to be done recursively, since symbol tables contain
17+
references to definitions, so those won't be freed but we want their
18+
contents to be.
19+
"""
20+
tree.accept(TreeFreer())
21+
tree.defs.clear()

mypy/options.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,12 @@ def __init__(self) -> None:
216216
# in modules being compiled. Not in the config file or command line.
217217
self.mypyc = False
218218

219+
# Disable the memory optimization of freeing ASTs when
220+
# possible. This isn't exposed as a command line option
221+
# because it is intended for software integrating with
222+
# mypy. (Like mypyc.)
223+
self.preserve_asts = False
224+
219225
# Paths of user plugins
220226
self.plugins = [] # type: List[str]
221227

mypy/server/deps.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -958,6 +958,12 @@ def visit_union_type(self, typ: UnionType) -> List[str]:
958958
return triggers
959959

960960

961+
def merge_dependencies(new_deps: Dict[str, Set[str]],
962+
deps: Dict[str, Set[str]]) -> None:
963+
for trigger, targets in new_deps.items():
964+
deps.setdefault(trigger, set()).update(targets)
965+
966+
961967
def non_trivial_bases(info: TypeInfo) -> List[TypeInfo]:
962968
return [base for base in info.mro[1:]
963969
if base.fullname() != 'builtins.object']

mypy/server/update.py

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@
140140
from mypy.server.astmerge import merge_asts
141141
from mypy.server.aststrip import strip_target
142142
from mypy.server.aststripnew import strip_target_new, SavedAttributes
143-
from mypy.server.deps import get_dependencies_of_target
143+
from mypy.server.deps import get_dependencies_of_target, merge_dependencies
144144
from mypy.server.target import trigger_to_target
145145
from mypy.server.trigger import make_trigger, WILDCARD_TAG
146146
from mypy.util import module_prefix, split_target
@@ -163,7 +163,9 @@ def __init__(self, result: BuildResult) -> None:
163163
self.manager = manager
164164
self.graph = result.graph
165165
self.previous_modules = get_module_to_path_map(self.graph)
166-
self.deps = get_all_dependencies(manager, self.graph)
166+
self.deps = manager.fg_deps
167+
# Merge in any root dependencies that may not have been loaded
168+
merge_dependencies(manager.load_fine_grained_deps(FAKE_ROOT_MODULE), self.deps)
167169
self.previous_targets_with_errors = manager.errors.targets()
168170
self.previous_messages = result.errors[:]
169171
# Module, if any, that had blocking errors in the last run as (id, path) tuple.
@@ -378,7 +380,8 @@ def update_module(self,
378380
self.manager.log_fine_grained('triggered: %r' % sorted(filtered))
379381
self.triggered.extend(triggered | self.previous_targets_with_errors)
380382
if module in graph:
381-
merge_dependencies(graph[module].compute_fine_grained_deps(), self.deps)
383+
graph[module].update_fine_grained_deps(self.deps)
384+
graph[module].free_state()
382385
remaining += propagate_changes_using_dependencies(
383386
manager, graph, self.deps, triggered,
384387
{module},
@@ -453,15 +456,11 @@ def ensure_trees_loaded(manager: BuildManager, graph: Dict[str, State],
453456
process_fresh_modules(graph, to_process, manager)
454457

455458

456-
def get_all_dependencies(manager: BuildManager, graph: Dict[str, State]) -> Dict[str, Set[str]]:
457-
"""Return the fine-grained dependency map for an entire build."""
458-
# Deps for each module were computed during build() or loaded from the cache.
459-
deps = manager.load_fine_grained_deps(FAKE_ROOT_MODULE) # type: Dict[str, Set[str]]
460-
for id in graph:
461-
if graph[id].tree is not None:
462-
merge_dependencies(graph[id].compute_fine_grained_deps(), deps)
463-
TypeState.add_all_protocol_deps(deps)
464-
return deps
459+
def fix_fg_dependencies(manager: BuildManager, deps: Dict[str, Set[str]]) -> None:
460+
"""Populate the dependencies with stuff that build may have missed"""
461+
# This means the root module and typestate
462+
merge_dependencies(manager.load_fine_grained_deps(FAKE_ROOT_MODULE), deps)
463+
# TypeState.add_all_protocol_deps(deps)
465464

466465

467466
# The result of update_module_isolated when no blockers, with these items:
@@ -608,15 +607,12 @@ def restore(ids: List[str]) -> None:
608607
state.type_check_first_pass()
609608
state.type_check_second_pass()
610609
t2 = time.time()
611-
state.compute_fine_grained_deps()
612-
t3 = time.time()
613610
state.finish_passes()
614-
t4 = time.time()
611+
t3 = time.time()
615612
manager.add_stats(
616613
semanal_time=t1 - t0,
617614
typecheck_time=t2 - t1,
618-
deps_time=t3 - t2,
619-
finish_passes_time=t4 - t3)
615+
finish_passes_time=t3 - t2)
620616

621617
graph[module] = state
622618

@@ -700,14 +696,6 @@ def get_sources(fscache: FileSystemCache,
700696
return sources
701697

702698

703-
def merge_dependencies(new_deps: Dict[str, Set[str]],
704-
deps: Dict[str, Set[str]]) -> None:
705-
for trigger, targets in new_deps.items():
706-
deps.setdefault(trigger, set()).update(targets)
707-
# Merge also the newly added protocol deps.
708-
TypeState.update_protocol_deps(deps)
709-
710-
711699
def calculate_active_triggers(manager: BuildManager,
712700
old_snapshots: Dict[str, Dict[str, SnapshotItem]],
713701
new_modules: Dict[str, Optional[MypyFile]]) -> Set[str]:
@@ -994,6 +982,8 @@ def key(node: FineGrainedDeferredNode) -> int:
994982
# Report missing imports.
995983
graph[module_id].verify_dependencies()
996984

985+
graph[module_id].free_state()
986+
997987
return new_triggered
998988

999989

mypy/test/testdeps.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
4545
options.cache_dir = os.devnull
4646
options.python_version = python_version
4747
options.export_types = True
48+
options.preserve_asts = True
4849
messages, files, type_map = self.build(src, options)
4950
a = messages
5051
if files is None or type_map is None:

mypy/test/testmerge.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ def build(self, source: str, testcase: DataDrivenTestCase) -> Optional[BuildResu
106106
options.incremental = True
107107
options.fine_grained_incremental = True
108108
options.use_builtins_fixtures = True
109+
options.export_types = True
109110
options.show_traceback = True
110111
options.python_version = PYTHON3_VERSION
111112
main_path = os.path.join(test_temp_dir, 'main')
@@ -216,7 +217,13 @@ def dump_types(self, manager: FineGrainedBuildManager) -> List[str]:
216217
for module_id in sorted(manager.manager.modules):
217218
if not is_dumped_module(module_id):
218219
continue
219-
type_map = manager.graph[module_id].type_map()
220+
all_types = manager.manager.all_types
221+
# Compute a module type map from the global type map
222+
tree = manager.graph[module_id].tree
223+
assert tree is not None
224+
type_map = {node: all_types[node]
225+
for node in get_subexpressions(tree)
226+
if node in all_types}
220227
if type_map:
221228
a.append('## {}'.format(module_id))
222229
for expr in sorted(type_map, key=lambda n: (n.line, short_type(n),

mypy/test/testtypegen.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
3131
options.use_builtins_fixtures = True
3232
options.show_traceback = True
3333
options.export_types = True
34+
options.preserve_asts = True
3435
result = build.build(sources=[BuildSource('main', None, src)],
3536
options=options,
3637
alt_lib_path=test_temp_dir)

mypy/typestate.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -211,15 +211,13 @@ def update_protocol_deps(second_map: Optional[Dict[str, Set[str]]] = None) -> No
211211
def add_all_protocol_deps(deps: Dict[str, Set[str]]) -> None:
212212
"""Add all known protocol dependencies to deps.
213213
214-
This is used by tests and debug output, and also when passing
215-
all collected or loaded dependencies on to FineGrainedBuildManager
216-
in its __init__.
214+
This is used by tests and debug output, and also when collecting
215+
all collected or loaded dependencies as part of build.
217216
"""
218217
TypeState.update_protocol_deps() # just in case
219-
assert TypeState.proto_deps is not None, (
220-
"This should not be called after failed cache load")
221-
for trigger, targets in TypeState.proto_deps.items():
222-
deps.setdefault(trigger, set()).update(targets)
218+
if TypeState.proto_deps is not None:
219+
for trigger, targets in TypeState.proto_deps.items():
220+
deps.setdefault(trigger, set()).update(targets)
223221

224222

225223
def reset_global_state() -> None:

0 commit comments

Comments
 (0)