Skip to content

Free typechecker state and ASTs when they are no longer needed #7280

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 46 additions & 25 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
from mypy.typestate import TypeState, reset_global_state
from mypy.renaming import VariableRenameVisitor
from mypy.config_parser import parse_mypy_comments
from mypy.freetree import free_tree


# Switch to True to produce debug output related to fine-grained incremental
Expand Down Expand Up @@ -471,6 +472,7 @@ class BuildManager:
missing_modules: Set of modules that could not be imported encountered so far
stale_modules: Set of modules that needed to be rechecked (only used by tests)
fg_deps_meta: Metadata for fine-grained dependencies caches associated with modules
fg_deps: A fine-grained dependency map
version_id: The current mypy version (based on commit id when possible)
plugin: Active mypy plugin(s)
plugins_snapshot:
Expand Down Expand Up @@ -523,6 +525,11 @@ def __init__(self, data_dir: str,
self.modules = {} # type: Dict[str, MypyFile]
self.missing_modules = set() # type: Set[str]
self.fg_deps_meta = {} # type: Dict[str, FgDepMeta]
# fg_deps holds the dependencies of every module that has been
# processed. We store this in BuildManager so that we can compute
# dependencies as we go, which allows us to free ASTs and type information,
# saving a ton of memory on net.
self.fg_deps = {} # type: Dict[str, Set[str]]
# Always convert the plugin to a ChainedPlugin so that it can be manipulated if needed
if not isinstance(plugin, ChainedPlugin):
plugin = ChainedPlugin(options, [plugin])
Expand Down Expand Up @@ -894,32 +901,24 @@ def invert_deps(deps: Dict[str, Set[str]],
return rdeps


def generate_deps_for_cache(proto_deps: Dict[str, Set[str]],
manager: BuildManager,
def generate_deps_for_cache(manager: BuildManager,
graph: Graph) -> Dict[str, Dict[str, Set[str]]]:
"""Generate fine-grained dependencies into a form suitable for serializing.

This does a few things:
1. Computes all fine grained deps from modules that were processed
2. Splits fine-grained deps based on the module of the trigger
3. For each module we generated fine-grained deps for, load any previous
This does a couple things:
1. Splits fine-grained deps based on the module of the trigger
2. For each module we generated fine-grained deps for, load any previous
deps and merge them in.

Returns a dictionary from module ids to all dependencies on that
module. Dependencies not associated with a module in the build will be
associated with the nearest parent module that is in the build, or the
fake module FAKE_ROOT_MODULE if none are.
"""
from mypy.server.update import merge_dependencies # Lazy import to speed up startup

# Compute the full set of dependencies from everything we've processed.
deps = {} # type: Dict[str, Set[str]]
things = [st.compute_fine_grained_deps() for st in graph.values() if st.tree] + [proto_deps]
for st_deps in things:
merge_dependencies(st_deps, deps)
from mypy.server.deps import merge_dependencies # Lazy import to speed up startup

# Split the dependencies out into based on the module that is depended on.
rdeps = invert_deps(deps, graph)
rdeps = invert_deps(manager.fg_deps, graph)

# We can't just clobber existing dependency information, so we
# load the deps for every module we've generated new dependencies
Expand Down Expand Up @@ -2172,6 +2171,16 @@ def finish_passes(self) -> None:
typemap=self.type_map())
manager.report_file(self.tree, self.type_map(), self.options)

self.update_fine_grained_deps(self.manager.fg_deps)
self.free_state()
if not manager.options.fine_grained_incremental and not manager.options.preserve_asts:
free_tree(self.tree)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason why we can't do self.tree = None? Or maybe doing self.tree.defs.clear() or something?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason we need to go deeper than just clearing defs is that the symbol table contains references to functions and classes and the like, and we need to clear out their bodies.

It would be possible to drive this from the symbol table, and I considered that, but it is more complex for not much gain. (Probably a bit faster, but probably doesn't matter)

(We actually probably should also do self.tree.defs.clear(), which would free a little bit more memory, though probably not a substantial amount.)


def free_state(self) -> None:
if self._type_checker:
self._type_checker.reset()
self._type_checker = None

def _patch_indirect_dependencies(self,
module_refs: Set[str],
type_map: Dict[Expression, Type]) -> None:
Expand Down Expand Up @@ -2206,6 +2215,13 @@ def compute_fine_grained_deps(self) -> Dict[str, Set[str]]:
python_version=self.options.python_version,
options=self.manager.options)

def update_fine_grained_deps(self, deps: Dict[str, Set[str]]) -> None:
options = self.manager.options
if options.cache_fine_grained or options.fine_grained_incremental:
from mypy.server.deps import merge_dependencies # Lazy import to speed up startup
merge_dependencies(self.compute_fine_grained_deps(), deps)
TypeState.update_protocol_deps(deps)

def valid_references(self) -> Set[str]:
assert self.ancestors is not None
valid_refs = set(self.dependencies + self.suppressed + self.ancestors)
Expand Down Expand Up @@ -2616,10 +2632,9 @@ def dispatch(sources: List[BuildSource],
# then we need to collect fine grained protocol dependencies.
# Since these are a global property of the program, they are calculated after we
# processed the whole graph.
TypeState.update_protocol_deps()
TypeState.add_all_protocol_deps(manager.fg_deps)
if not manager.options.fine_grained_incremental:
proto_deps = TypeState.proto_deps or {}
rdeps = generate_deps_for_cache(proto_deps, manager, graph)
rdeps = generate_deps_for_cache(manager, graph)
write_deps_cache(rdeps, manager, graph)

if manager.options.dump_deps:
Expand Down Expand Up @@ -3023,23 +3038,29 @@ def process_stale_scc(graph: Graph, scc: List[str], manager: BuildManager) -> No
graph[id].semantic_analysis_pass_three()
for id in stale:
graph[id].semantic_analysis_apply_patches()

# Track what modules aren't yet done so we can finish them as soon
# as possible, saving memory.
unfinished_modules = set(stale)
for id in stale:
graph[id].type_check_first_pass()
more = True
while more:
more = False
if not graph[id].type_checker().deferred_nodes:
unfinished_modules.discard(id)
graph[id].finish_passes()

while unfinished_modules:
for id in stale:
if graph[id].type_check_second_pass():
more = True
if id not in unfinished_modules:
continue
if not graph[id].type_check_second_pass():
unfinished_modules.discard(id)
graph[id].finish_passes()
for id in stale:
graph[id].generate_unused_ignore_notes()
if any(manager.errors.is_errors_for_file(graph[id].xpath) for id in stale):
for id in stale:
graph[id].transitive_error = True
for id in stale:
graph[id].finish_passes()
if manager.options.cache_fine_grained or manager.options.fine_grained_incremental:
graph[id].compute_fine_grained_deps()
manager.flush_errors(manager.errors.file_messages(graph[id].xpath), False)
graph[id].write_cache()
graph[id].mark_as_rechecked()
Expand Down
21 changes: 21 additions & 0 deletions mypy/freetree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Generic node traverser visitor"""

from mypy.traverser import TraverserVisitor
from mypy.nodes import Block, MypyFile


class TreeFreer(TraverserVisitor):
def visit_block(self, block: Block) -> None:
super().visit_block(block)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would clearing just the top-level blocks be sufficient here, instead of recursively clearing?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to go recursively or we'll miss nested structures that appear in the symbol table.

block.body.clear()


def free_tree(tree: MypyFile) -> None:
"""Free all the ASTs associated with a module.

This needs to be done recursively, since symbol tables contain
references to definitions, so those won't be freed but we want their
contents to be.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might fix this in long term, see #5159, but definitely not right now. I don't have any additional comments here, thanks for improving this!

"""
tree.accept(TreeFreer())
tree.defs.clear()
6 changes: 6 additions & 0 deletions mypy/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,12 @@ def __init__(self) -> None:
# in modules being compiled. Not in the config file or command line.
self.mypyc = False

# Disable the memory optimization of freeing ASTs when
# possible. This isn't exposed as a command line option
# because it is intended for software integrating with
# mypy. (Like mypyc.)
self.preserve_asts = False

# Paths of user plugins
self.plugins = [] # type: List[str]

Expand Down
6 changes: 6 additions & 0 deletions mypy/server/deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -958,6 +958,12 @@ def visit_union_type(self, typ: UnionType) -> List[str]:
return triggers


def merge_dependencies(new_deps: Dict[str, Set[str]],
deps: Dict[str, Set[str]]) -> None:
for trigger, targets in new_deps.items():
deps.setdefault(trigger, set()).update(targets)


def non_trivial_bases(info: TypeInfo) -> List[TypeInfo]:
return [base for base in info.mro[1:]
if base.fullname() != 'builtins.object']
Expand Down
40 changes: 15 additions & 25 deletions mypy/server/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@
from mypy.server.astmerge import merge_asts
from mypy.server.aststrip import strip_target
from mypy.server.aststripnew import strip_target_new, SavedAttributes
from mypy.server.deps import get_dependencies_of_target
from mypy.server.deps import get_dependencies_of_target, merge_dependencies
from mypy.server.target import trigger_to_target
from mypy.server.trigger import make_trigger, WILDCARD_TAG
from mypy.util import module_prefix, split_target
Expand All @@ -163,7 +163,9 @@ def __init__(self, result: BuildResult) -> None:
self.manager = manager
self.graph = result.graph
self.previous_modules = get_module_to_path_map(self.graph)
self.deps = get_all_dependencies(manager, self.graph)
self.deps = manager.fg_deps
# Merge in any root dependencies that may not have been loaded
merge_dependencies(manager.load_fine_grained_deps(FAKE_ROOT_MODULE), self.deps)
self.previous_targets_with_errors = manager.errors.targets()
self.previous_messages = result.errors[:]
# Module, if any, that had blocking errors in the last run as (id, path) tuple.
Expand Down Expand Up @@ -378,7 +380,8 @@ def update_module(self,
self.manager.log_fine_grained('triggered: %r' % sorted(filtered))
self.triggered.extend(triggered | self.previous_targets_with_errors)
if module in graph:
merge_dependencies(graph[module].compute_fine_grained_deps(), self.deps)
graph[module].update_fine_grained_deps(self.deps)
graph[module].free_state()
remaining += propagate_changes_using_dependencies(
manager, graph, self.deps, triggered,
{module},
Expand Down Expand Up @@ -453,15 +456,11 @@ def ensure_trees_loaded(manager: BuildManager, graph: Dict[str, State],
process_fresh_modules(graph, to_process, manager)


def get_all_dependencies(manager: BuildManager, graph: Dict[str, State]) -> Dict[str, Set[str]]:
"""Return the fine-grained dependency map for an entire build."""
# Deps for each module were computed during build() or loaded from the cache.
deps = manager.load_fine_grained_deps(FAKE_ROOT_MODULE) # type: Dict[str, Set[str]]
for id in graph:
if graph[id].tree is not None:
merge_dependencies(graph[id].compute_fine_grained_deps(), deps)
TypeState.add_all_protocol_deps(deps)
return deps
def fix_fg_dependencies(manager: BuildManager, deps: Dict[str, Set[str]]) -> None:
"""Populate the dependencies with stuff that build may have missed"""
# This means the root module and typestate
merge_dependencies(manager.load_fine_grained_deps(FAKE_ROOT_MODULE), deps)
# TypeState.add_all_protocol_deps(deps)


# The result of update_module_isolated when no blockers, with these items:
Expand Down Expand Up @@ -608,15 +607,12 @@ def restore(ids: List[str]) -> None:
state.type_check_first_pass()
state.type_check_second_pass()
t2 = time.time()
state.compute_fine_grained_deps()
t3 = time.time()
state.finish_passes()
t4 = time.time()
t3 = time.time()
manager.add_stats(
semanal_time=t1 - t0,
typecheck_time=t2 - t1,
deps_time=t3 - t2,
finish_passes_time=t4 - t3)
finish_passes_time=t3 - t2)

graph[module] = state

Expand Down Expand Up @@ -700,14 +696,6 @@ def get_sources(fscache: FileSystemCache,
return sources


def merge_dependencies(new_deps: Dict[str, Set[str]],
deps: Dict[str, Set[str]]) -> None:
for trigger, targets in new_deps.items():
deps.setdefault(trigger, set()).update(targets)
# Merge also the newly added protocol deps.
TypeState.update_protocol_deps(deps)


def calculate_active_triggers(manager: BuildManager,
old_snapshots: Dict[str, Dict[str, SnapshotItem]],
new_modules: Dict[str, Optional[MypyFile]]) -> Set[str]:
Expand Down Expand Up @@ -994,6 +982,8 @@ def key(node: FineGrainedDeferredNode) -> int:
# Report missing imports.
graph[module_id].verify_dependencies()

graph[module_id].free_state()

return new_triggered


Expand Down
1 change: 1 addition & 0 deletions mypy/test/testdeps.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
options.cache_dir = os.devnull
options.python_version = python_version
options.export_types = True
options.preserve_asts = True
messages, files, type_map = self.build(src, options)
a = messages
if files is None or type_map is None:
Expand Down
9 changes: 8 additions & 1 deletion mypy/test/testmerge.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def build(self, source: str, testcase: DataDrivenTestCase) -> Optional[BuildResu
options.incremental = True
options.fine_grained_incremental = True
options.use_builtins_fixtures = True
options.export_types = True
options.show_traceback = True
options.python_version = PYTHON3_VERSION
main_path = os.path.join(test_temp_dir, 'main')
Expand Down Expand Up @@ -216,7 +217,13 @@ def dump_types(self, manager: FineGrainedBuildManager) -> List[str]:
for module_id in sorted(manager.manager.modules):
if not is_dumped_module(module_id):
continue
type_map = manager.graph[module_id].type_map()
all_types = manager.manager.all_types
# Compute a module type map from the global type map
tree = manager.graph[module_id].tree
assert tree is not None
type_map = {node: all_types[node]
for node in get_subexpressions(tree)
if node in all_types}
if type_map:
a.append('## {}'.format(module_id))
for expr in sorted(type_map, key=lambda n: (n.line, short_type(n),
Expand Down
1 change: 1 addition & 0 deletions mypy/test/testtypegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
options.use_builtins_fixtures = True
options.show_traceback = True
options.export_types = True
options.preserve_asts = True
result = build.build(sources=[BuildSource('main', None, src)],
options=options,
alt_lib_path=test_temp_dir)
Expand Down
12 changes: 5 additions & 7 deletions mypy/typestate.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,15 +211,13 @@ def update_protocol_deps(second_map: Optional[Dict[str, Set[str]]] = None) -> No
def add_all_protocol_deps(deps: Dict[str, Set[str]]) -> None:
"""Add all known protocol dependencies to deps.

This is used by tests and debug output, and also when passing
all collected or loaded dependencies on to FineGrainedBuildManager
in its __init__.
This is used by tests and debug output, and also when collecting
all collected or loaded dependencies as part of build.
"""
TypeState.update_protocol_deps() # just in case
assert TypeState.proto_deps is not None, (
"This should not be called after failed cache load")
for trigger, targets in TypeState.proto_deps.items():
deps.setdefault(trigger, set()).update(targets)
if TypeState.proto_deps is not None:
for trigger, targets in TypeState.proto_deps.items():
deps.setdefault(trigger, set()).update(targets)


def reset_global_state() -> None:
Expand Down
Loading