Skip to content

Commit f2bc385

Browse files
msullivanyedpodtrzitko
authored andcommitted
Optimize fine-grained update by using Graph as the cache (python#4622)
Make fine-grained update use a Graph as the source of truth instead of a SavedCache. We modify load_graph to optionally use an existing graph as a starting point. This allows us to skip the expensive full load_graph and preserve_full_cache operations that essentially only convert between Graph and SavedCache. This requires a nontrivial change to the loading of dependency information in build. Since we are no longer doing a full `load_graph()`, we can't rely on it fully populating `missing_modules` if it is cleared between updates. If `missing_modules` isn't fully populated, then semanal will spuriously reject `from x import y` where `x.y` is a missing module. We can't just *not* clear `missing_modules`, however, because `parse_file` uses it to decide that modules are suppressed. But this is actually wrong! It can lead to an import failure message not being generated because some other module has already failed to import it (and perhaps even ignored the error). `parse_file()` shouldn't actually *need* to compute `suppressed`, though. If it is called on a file with no cache, `load_graph()` will handle moving deps to `suppressed`, and if called on a file that has had cache info loaded, then the dependency information has all been loaded from the cache. So we refactor things so that dep information from the cache is used when available and `parse_file` doesn't need to deal with it. I strongly suspect that this refactor obviates the need for `fix_suppressed_dependencies`, but I was not able to quickly produce a test case from the description in python#2036, so I am not comfortable making that change.
1 parent dd117af commit f2bc385

11 files changed

+335
-228
lines changed

mypy/build.py

Lines changed: 40 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,6 @@ def default_lib_path(data_dir: str,
389389
CacheMeta = NamedTuple('CacheMeta',
390390
[('id', str),
391391
('path', str),
392-
('memory_only', bool), # no corresponding json files (fine-grained only)
393392
('mtime', int),
394393
('size', int),
395394
('hash', str),
@@ -415,7 +414,6 @@ def cache_meta_from_dict(meta: Dict[str, Any], data_json: str) -> CacheMeta:
415414
return CacheMeta(
416415
meta.get('id', sentinel),
417416
meta.get('path', sentinel),
418-
meta.get('memory_only', False),
419417
int(meta['mtime']) if 'mtime' in meta else sentinel,
420418
meta.get('size', sentinel),
421419
meta.get('hash', sentinel),
@@ -569,7 +567,7 @@ class BuildManager:
569567
plugin: Active mypy plugin(s)
570568
errors: Used for reporting all errors
571569
flush_errors: A function for processing errors after each SCC
572-
saved_cache: Dict with saved cache state for dmypy and fine-grained incremental mode
570+
saved_cache: Dict with saved cache state for coarse-grained dmypy
573571
(read-write!)
574572
stats: Dict with various instrumentation numbers
575573
"""
@@ -626,6 +624,8 @@ def all_imported_modules_in_file(self,
626624
627625
Return list of tuples (priority, module id, import line number)
628626
for all modules imported in file; lower numbers == higher priority.
627+
628+
Can generate blocking errors on bogus relative imports.
629629
"""
630630

631631
def correct_rel_imp(imp: Union[ImportFrom, ImportAll]) -> str:
@@ -640,6 +640,12 @@ def correct_rel_imp(imp: Union[ImportFrom, ImportAll]) -> str:
640640
file_id = ".".join(file_id.split(".")[:-rel])
641641
new_id = file_id + "." + imp.id if imp.id else file_id
642642

643+
if not new_id:
644+
self.errors.set_file(file.path, file.name())
645+
self.errors.report(imp.line, 0,
646+
"No parent module -- cannot perform relative import",
647+
blocker=True)
648+
643649
return new_id
644650

645651
res = [] # type: List[Tuple[int, str, int]]
@@ -1129,12 +1135,6 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],
11291135
manager.log('Metadata abandoned for {}: errors were previously ignored'.format(id))
11301136
return None
11311137

1132-
if meta.memory_only:
1133-
# Special case for fine-grained incremental mode when the JSON file is missing but
1134-
# we want to cache the module anyway.
1135-
manager.log('Memory-only metadata for {}'.format(id))
1136-
return meta
1137-
11381138
assert path is not None, "Internal error: meta was provided without a path"
11391139
# Check data_json; assume if its mtime matches it's good.
11401140
# TODO: stat() errors
@@ -1626,7 +1626,8 @@ def __init__(self,
16261626
self.ignore_all = True
16271627
else:
16281628
# In 'error' mode, produce special error messages.
1629-
manager.log("Skipping %s (%s)" % (path, id))
1629+
if id not in manager.missing_modules:
1630+
manager.log("Skipping %s (%s)" % (path, id))
16301631
if follow_imports == 'error':
16311632
if ancestor_for:
16321633
self.skipping_ancestor(id, path, ancestor_for)
@@ -1678,7 +1679,7 @@ def __init__(self,
16781679
else:
16791680
# Parse the file (and then some) to get the dependencies.
16801681
self.parse_file()
1681-
self.suppressed = []
1682+
self.compute_dependencies()
16821683
self.child_modules = set()
16831684

16841685
def skipping_ancestor(self, id: str, path: str, ancestor_for: 'State') -> None:
@@ -1833,6 +1834,8 @@ def fix_suppressed_dependencies(self, graph: Graph) -> None:
18331834
"""
18341835
# TODO: See if it's possible to move this check directly into parse_file in some way.
18351836
# TODO: Find a way to write a test case for this fix.
1837+
# TODO: I suspect that splitting compute_dependencies() out from parse_file
1838+
# obviates the need for this but lacking a test case for the problem this fixed...
18361839
silent_mode = (self.options.ignore_missing_imports or
18371840
self.options.follow_imports == 'skip')
18381841
if not silent_mode:
@@ -1899,49 +1902,48 @@ def parse_file(self) -> None:
18991902
# TODO: Why can't SemanticAnalyzerPass1 .analyze() do this?
19001903
self.tree.names = manager.semantic_analyzer.globals
19011904

1905+
self.check_blockers()
1906+
1907+
def compute_dependencies(self) -> None:
1908+
"""Compute a module's dependencies after parsing it.
1909+
1910+
This is used when we parse a file that we didn't have
1911+
up-to-date cache information for. When we have an up-to-date
1912+
cache, we just use the cached info.
1913+
"""
1914+
manager = self.manager
1915+
assert self.tree is not None
1916+
19021917
# Compute (direct) dependencies.
19031918
# Add all direct imports (this is why we needed the first pass).
19041919
# Also keep track of each dependency's source line.
19051920
dependencies = []
1906-
suppressed = []
19071921
priorities = {} # type: Dict[str, int] # id -> priority
19081922
dep_line_map = {} # type: Dict[str, int] # id -> line
19091923
for pri, id, line in manager.all_imported_modules_in_file(self.tree):
19101924
priorities[id] = min(pri, priorities.get(id, PRI_ALL))
19111925
if id == self.id:
19121926
continue
1913-
# Omit missing modules, as otherwise we could not type-check
1914-
# programs with missing modules.
1915-
if id in manager.missing_modules:
1916-
if id not in dep_line_map:
1917-
suppressed.append(id)
1918-
dep_line_map[id] = line
1919-
continue
1920-
if id == '':
1921-
# Must be from a relative import.
1922-
manager.errors.set_file(self.xpath, self.id)
1923-
manager.errors.report(line, 0,
1924-
"No parent module -- cannot perform relative import",
1925-
blocker=True)
1926-
continue
19271927
if id not in dep_line_map:
19281928
dependencies.append(id)
19291929
dep_line_map[id] = line
19301930
# Every module implicitly depends on builtins.
19311931
if self.id != 'builtins' and 'builtins' not in dep_line_map:
19321932
dependencies.append('builtins')
19331933

1934-
# If self.dependencies is already set, it was read from the
1935-
# cache, but for some reason we're re-parsing the file.
19361934
# NOTE: What to do about race conditions (like editing the
19371935
# file while mypy runs)? A previous version of this code
19381936
# explicitly checked for this, but ran afoul of other reasons
19391937
# for differences (e.g. silent mode).
1938+
1939+
# Missing dependencies will be moved from dependencies to
1940+
# suppressed when they fail to be loaded in load_graph.
19401941
self.dependencies = dependencies
1941-
self.suppressed = suppressed
1942+
self.suppressed = []
19421943
self.priorities = priorities
19431944
self.dep_line_map = dep_line_map
1944-
self.check_blockers()
1945+
1946+
self.check_blockers() # Can fail due to bogus relative imports
19451947

19461948
def semantic_analysis(self) -> None:
19471949
assert self.tree is not None, "Internal error: method must be called on parsed file only"
@@ -2198,13 +2200,19 @@ def dump_graph(graph: Graph) -> None:
21982200
print("[" + ",\n ".join(node.dumps() for node in nodes) + "\n]")
21992201

22002202

2201-
def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph:
2203+
def load_graph(sources: List[BuildSource], manager: BuildManager,
2204+
old_graph: Optional[Graph] = None) -> Graph:
22022205
"""Given some source files, load the full dependency graph.
22032206
2207+
If an old_graph is passed in, it is used as the starting point and
2208+
modified during graph loading.
2209+
22042210
As this may need to parse files, this can raise CompileError in case
22052211
there are syntax errors.
22062212
"""
2207-
graph = {} # type: Graph
2213+
2214+
graph = old_graph or {} # type: Graph
2215+
22082216
# The deque is used to implement breadth-first traversal.
22092217
# TODO: Consider whether to go depth-first instead. This may
22102218
# affect the order in which we process files within import cycles.

mypy/checker.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,24 @@ def __init__(self, errors: Errors, modules: Dict[str, MypyFile], options: Option
211211
# for processing module top levels in fine-grained incremental mode.
212212
self.recurse_into_functions = True
213213

214+
def reset(self) -> None:
215+
"""Cleanup stale state that might be left over from a typechecking run.
216+
217+
This allows us to reuse TypeChecker objects in fine-grained
218+
incremental mode.
219+
"""
220+
# TODO: verify this is still actually worth it over creating new checkers
221+
self.partial_reported.clear()
222+
self.module_refs.clear()
223+
self.binder = ConditionalTypeBinder()
224+
self.type_map.clear()
225+
226+
assert self.inferred_attribute_types is None
227+
assert self.partial_types == []
228+
assert self.deferred_nodes == []
229+
assert len(self.scope.stack) == 1
230+
assert self.partial_types == []
231+
214232
def check_first_pass(self) -> None:
215233
"""Type check the entire file, but defer functions with unresolved references.
216234

mypy/dmypy_server.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -282,10 +282,12 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict
282282
if self.options.use_fine_grained_cache:
283283
# Pull times and hashes out of the saved_cache and stick them into
284284
# the fswatcher, so we pick up the changes.
285-
for meta, mypyfile, type_map in manager.saved_cache.values():
286-
if meta.mtime is None: continue
285+
for state in self.fine_grained_manager.graph.values():
286+
meta = state.meta
287+
if meta is None: continue
288+
assert state.path is not None
287289
self.fswatcher.set_file_data(
288-
mypyfile.path,
290+
state.path,
289291
FileData(st_mtime=float(meta.mtime), st_size=meta.size, md5=meta.hash))
290292

291293
# Run an update

0 commit comments

Comments
 (0)