Skip to content

Commit 952a75e

Browse files
authored
Modifying multiple files per update in fine-grained incremental mode (#4282)
The approach is to split multiple file updates into multiple single-file updates, and only report errors for the final update. If a modified file brings in additional files through an import, we'll process one of the imported files instead, changing the target file as we go. The motivation is that this should result in less redundant work than postponing the imported file, as a missing imported file will result in a lot of stale targets which would need to processed again on the next update. This still doesn't handle blockers well. I'll fix these issues separately. This is inefficient as some files can be parsed multiple times, with O(n**2) worst case behavior (n is the number of changed files). It should be fine when a small number of files need to be processed.
1 parent fe02d38 commit 952a75e

File tree

3 files changed

+407
-60
lines changed

3 files changed

+407
-60
lines changed

mypy/server/update.py

Lines changed: 133 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ def __init__(self,
9393
# Modules that had blocking errors in the previous run.
9494
# TODO: Handle blocking errors in the initial build
9595
self.blocking_errors = [] # type: List[str]
96+
mark_all_meta_as_memory_only(graph, manager)
9697
manager.saved_cache = preserve_full_cache(graph, manager)
9798

9899
def update(self, changed_modules: List[Tuple[str, str]]) -> List[str]:
@@ -112,58 +113,94 @@ def update(self, changed_modules: List[Tuple[str, str]]) -> List[str]:
112113
Returns:
113114
A list of errors.
114115
"""
115-
changed_ids = [id for id, _ in changed_modules]
116116
if DEBUG:
117+
changed_ids = [id for id, _ in changed_modules]
117118
print('==== update %s ====' % changed_ids)
119+
while changed_modules:
120+
id, path = changed_modules.pop(0)
121+
if DEBUG:
122+
print('-- %s --' % id)
123+
result, remaining = self.update_single(id, path)
124+
changed_modules.extend(remaining)
125+
return result
126+
127+
def update_single(self, module: str, path: str) -> Tuple[List[str],
128+
List[Tuple[str, str]]]:
129+
"""Update a single modified module.
130+
131+
If the module contains imports of previously unseen modules, only process one of
132+
the new modules and return the remaining work to be done.
133+
134+
Returns:
135+
Tuple with these items:
136+
137+
- Error messages
138+
- Remaining modules to process as (module id, path) tuples
139+
"""
140+
# TODO: If new module brings in other modules, we parse some files multiple times.
118141
if self.blocking_errors:
119142
# TODO: Relax this requirement
120-
assert self.blocking_errors == changed_ids
143+
assert self.blocking_errors == [module]
121144
manager = self.manager
122145
graph = self.graph
123146

124-
# Record symbol table snaphots of old versions of changed moduiles.
147+
# Record symbol table snaphots of old versions of changed modules.
125148
old_snapshots = {}
126-
for id, _ in changed_modules:
127-
if id in manager.modules:
128-
snapshot = snapshot_symbol_table(id, manager.modules[id].names)
129-
old_snapshots[id] = snapshot
130-
else:
131-
old_snapshots[id] = {}
149+
if module in manager.modules:
150+
snapshot = snapshot_symbol_table(module, manager.modules[module].names)
151+
old_snapshots[module] = snapshot
152+
else:
153+
old_snapshots[module] = {}
132154

133155
manager.errors.reset()
134156
try:
135-
new_modules, graph = build_incremental_step(manager, changed_modules, graph)
157+
module, tree, graph, remaining = update_single_isolated(module, path, manager, graph)
136158
except CompileError as err:
137-
self.blocking_errors = changed_ids
138-
return err.messages
159+
# TODO: Remaining modules
160+
self.blocking_errors = [module]
161+
return err.messages, []
139162
self.blocking_errors = []
140163

164+
if module not in old_snapshots:
165+
old_snapshots[module] = {}
166+
141167
# TODO: What to do with stale dependencies?
142-
triggered = calculate_active_triggers(manager, old_snapshots, new_modules)
168+
triggered = calculate_active_triggers(manager, old_snapshots, {module: tree})
143169
if DEBUG:
144170
print('triggered:', sorted(triggered))
145-
update_dependencies(new_modules, self.deps, graph, self.options)
171+
update_dependencies({module: tree}, self.deps, graph, self.options)
146172
propagate_changes_using_dependencies(manager, graph, self.deps, triggered,
147-
set(changed_ids),
173+
{module},
148174
self.previous_targets_with_errors,
149175
graph)
150176

151177
# Preserve state needed for the next update.
152178
self.previous_targets_with_errors = manager.errors.targets()
153-
for id, _ in changed_modules:
154-
# If deleted, module won't be in the graph.
155-
if id in graph:
156-
# Generate metadata so that we can reuse the AST in the next run.
157-
graph[id].write_cache()
179+
# If deleted, module won't be in the graph.
180+
if module in graph:
181+
# Generate metadata so that we can reuse the AST in the next run.
182+
graph[module].write_cache()
158183
for id, state in graph.items():
159184
# Look up missing ASTs from saved cache.
160185
if state.tree is None and id in manager.saved_cache:
161186
meta, tree, type_map = manager.saved_cache[id]
162187
state.tree = tree
188+
mark_all_meta_as_memory_only(graph, manager)
163189
manager.saved_cache = preserve_full_cache(graph, manager)
164190
self.graph = graph
165191

166-
return manager.errors.messages()
192+
return manager.errors.messages(), remaining
193+
194+
195+
def mark_all_meta_as_memory_only(graph: Dict[str, State],
196+
manager: BuildManager) -> None:
197+
for id, state in graph.items():
198+
if id in manager.saved_cache:
199+
# Don't look at disk.
200+
old = manager.saved_cache[id]
201+
manager.saved_cache[id] = (old[0]._replace(memory_only=True),
202+
old[1],
203+
old[2])
167204

168205

169206
def get_all_dependencies(manager: BuildManager, graph: Dict[str, State],
@@ -174,70 +211,85 @@ def get_all_dependencies(manager: BuildManager, graph: Dict[str, State],
174211
return deps
175212

176213

177-
def build_incremental_step(manager: BuildManager,
178-
changed_modules: List[Tuple[str, str]],
179-
graph: Dict[str, State]) -> Tuple[Dict[str, Optional[MypyFile]],
180-
Graph]:
181-
"""Build new versions of changed modules only.
182-
183-
Raise CompleError on encountering a blocking error.
184-
185-
Return the new ASTs for the changed modules and the entire build graph.
214+
def update_single_isolated(module: str,
215+
path: str,
216+
manager: BuildManager,
217+
graph: Dict[str, State]) -> Tuple[str,
218+
Optional[MypyFile],
219+
Graph,
220+
List[Tuple[str, str]]]:
221+
"""Build a new version of one changed module only.
222+
223+
Don't propagate changes to elsewhere in the program. Raise CompleError on
224+
encountering a blocking error.
225+
226+
Args:
227+
module: Changed module (modified, created or deleted)
228+
path: Path of the changed module
229+
manager: Build manager
230+
graph: Build graph
231+
232+
Returns:
233+
A 4-tuple with these items:
234+
235+
- Id of the changed module (can be different from the argument)
236+
- New AST for the changed module (None if module was deleted)
237+
- The entire build graph
238+
- Remaining changed modules that are not processed yet as (module id, path)
239+
tuples (non-empty if the original changed module imported other new
240+
modules)
186241
"""
187-
# TODO: Handle multiple changed modules per step
188-
assert len(changed_modules) == 1
189-
id, path = changed_modules[0]
190-
if id in manager.modules:
191-
path1 = os.path.normpath(path)
192-
path2 = os.path.normpath(manager.modules[id].path)
193-
assert path1 == path2, '%s != %s' % (path1, path2)
242+
if module in manager.modules:
243+
assert_equivalent_paths(path, manager.modules[module].path)
194244

195245
old_modules = dict(manager.modules)
196-
197-
sources = get_sources(graph, changed_modules)
198-
changed_set = {id for id, _ in changed_modules}
199-
200-
invalidate_stale_cache_entries(manager.saved_cache, changed_modules)
246+
sources = get_sources(graph, [(module, path)])
247+
invalidate_stale_cache_entries(manager.saved_cache, [(module, path)])
201248

202249
if not os.path.isfile(path):
203-
graph = delete_module(id, graph, manager)
204-
return {id: None}, graph
250+
graph = delete_module(module, graph, manager)
251+
return module, None, graph, []
205252

206253
old_graph = graph
207254
manager.missing_modules = set()
208255
graph = load_graph(sources, manager)
209256

210257
# Find any other modules brought in by imports.
211-
for st in graph.values():
212-
if st.id not in old_graph and st.id not in changed_set:
213-
changed_set.add(st.id)
214-
assert st.path
215-
changed_modules.append((st.id, st.path))
216-
# TODO: Handle multiple changed modules per step
217-
assert len(changed_modules) == 1, changed_modules
258+
changed_modules = get_all_changed_modules(module, path, old_graph, graph)
259+
# If there are multiple modules to process, only process one of them and return the
260+
# remaining ones to the caller.
261+
if len(changed_modules) > 1:
262+
remaining_modules = changed_modules[:-1]
263+
# The remaining modules haven't been processed yet so drop them.
264+
for id, _ in remaining_modules:
265+
del manager.modules[id]
266+
del graph[id]
267+
module, path = changed_modules[-1]
268+
if DEBUG:
269+
print('--> %s (newly imported)' % module)
270+
else:
271+
remaining_modules = []
218272

219-
state = graph[id]
273+
state = graph[module]
220274

221-
# Parse file and run first pass of semantic analysis.
275+
# Process the changed file.
222276
state.parse_file()
223-
224277
# TODO: state.fix_suppressed_dependencies()?
225-
226-
# Run remaining passes of semantic analysis.
227278
try:
228279
state.semantic_analysis()
229280
except CompileError as err:
230281
# TODO: What if there are multiple changed modules?
231282
# There was a blocking error, so module AST is incomplete. Restore old modules.
232283
manager.modules.clear()
233284
manager.modules.update(old_modules)
285+
# TODO: Propagate remaining modules
234286
raise err
235287
state.semantic_analysis_pass_three()
236288
state.semantic_analysis_apply_patches()
237289

238290
# Merge old and new ASTs.
239291
assert state.tree is not None, "file must be at least parsed"
240-
new_modules = {id: state.tree} # type: Dict[str, Optional[MypyFile]]
292+
new_modules = {module: state.tree} # type: Dict[str, Optional[MypyFile]]
241293
replace_modules_with_new_variants(manager, graph, old_modules, new_modules)
242294

243295
# Perform type checking.
@@ -246,11 +298,16 @@ def build_incremental_step(manager: BuildManager,
246298
state.finish_passes()
247299
# TODO: state.write_cache()?
248300
# TODO: state.mark_as_rechecked()?
249-
# TODO: Store new State in graph, as it has updated dependencies etc.
250301

251-
graph[id] = state
302+
graph[module] = state
303+
304+
return module, state.tree, graph, remaining_modules
252305

253-
return new_modules, graph
306+
307+
def assert_equivalent_paths(path1: str, path2: str) -> None:
308+
path1 = os.path.normpath(path1)
309+
path2 = os.path.normpath(path2)
310+
assert path1 == path2, '%s != %s' % (path1, path2)
254311

255312

256313
def delete_module(module_id: str,
@@ -279,6 +336,20 @@ def get_sources(graph: Graph, changed_modules: List[Tuple[str, str]]) -> List[Bu
279336
return sources
280337

281338

339+
def get_all_changed_modules(root_module: str,
340+
root_path: str,
341+
old_graph: Dict[str, State],
342+
new_graph: Dict[str, State]) -> List[Tuple[str, str]]:
343+
changed_set = {root_module}
344+
changed_modules = [(root_module, root_path)]
345+
for st in new_graph.values():
346+
if st.id not in old_graph and st.id not in changed_set:
347+
changed_set.add(st.id)
348+
assert st.path
349+
changed_modules.append((st.id, st.path))
350+
return changed_modules
351+
352+
282353
def preserve_full_cache(graph: Graph, manager: BuildManager) -> SavedCache:
283354
"""Preserve every module with an AST in the graph, including modules with errors."""
284355
saved_cache = {}
@@ -301,6 +372,8 @@ def preserve_full_cache(graph: Graph, manager: BuildManager) -> SavedCache:
301372
state.source_hash,
302373
state.ignore_all,
303374
manager)
375+
else:
376+
meta = meta._replace(memory_only=True)
304377
saved_cache[id] = (meta, state.tree, state.type_map())
305378
return saved_cache
306379

0 commit comments

Comments
 (0)