From 14034b9a50e019e220fd7a2b9f835011e86d3653 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 22 Feb 2021 12:29:08 +0000 Subject: [PATCH 01/10] [WIP] Add AST cache to speed up mypy daemon --- mypy/build.py | 16 ++++++++++++++++ mypy/dmypy_server.py | 11 ++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 909d793b0002..11edbcfdb802 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -562,6 +562,7 @@ class BuildManager: not only for debugging, but also required for correctness, in particular to check consistency of the fine-grained dependency cache. fscache: A file system cacher + ast_cache: AST cache to speed up mypy daemon """ def __init__(self, data_dir: str, @@ -645,6 +646,13 @@ def __init__(self, data_dir: str, self.processed_targets = [] # type: List[str] # Missing stub packages encountered. self.missing_stub_packages = set() # type: Set[str] + # Cache for mypy ASTs that have completed semantic analysis pass 1. + # When mypy daemon processes an increment where multiple files + # are added to the build, only one the files actually gets added and + # the others are discarded. This gets repeated until all the files + # have been added. This means that the same new file can be parsed + # O(n**2) times. We use this cache to avoid this redundant work. + self.ast_cache = {} # type: Dict[str, Tuple[MypyFile, List[ErrorInfo]]] def dump_stats(self) -> None: if self.options.dump_build_stats: @@ -1994,6 +2002,12 @@ def parse_file(self) -> None: return manager = self.manager + + if self.id in manager.ast_cache: + manager.log("Using cached AST for %s (%s)" % (self.xpath, self.id)) + self.tree, self.early_errors = manager.ast_cache[self.id] + return + modules = manager.modules manager.log("Parsing %s (%s)" % (self.xpath, self.id)) @@ -2041,6 +2055,8 @@ def parse_file(self) -> None: self.check_blockers() + manager.ast_cache[self.id] = (self.tree, self.early_errors) + def parse_inline_configuration(self, source: str) -> None: """Check for inline mypy: options directive and parse them.""" flags = get_mypy_comments(source) diff --git a/mypy/dmypy_server.py b/mypy/dmypy_server.py index 30002c09641d..10afa4abe716 100644 --- a/mypy/dmypy_server.py +++ b/mypy/dmypy_server.py @@ -373,7 +373,7 @@ def cmd_recheck(self, assert remove is None and update is None messages = self.fine_grained_increment_follow_imports(sources) res = self.increment_output(messages, sources, is_tty, terminal_width) - self.fscache.flush() + self.flush_caches() self.update_stats(res) return res @@ -392,10 +392,15 @@ def check(self, sources: List[BuildSource], else: messages = self.fine_grained_increment_follow_imports(sources) res = self.increment_output(messages, sources, is_tty, terminal_width) - self.fscache.flush() + self.flush_caches() self.update_stats(res) return res + def flush_caches(self) -> None: + self.fscache.flush() + assert self.fine_grained_manager + self.fine_grained_manager.manager.ast_cache.clear() + def update_stats(self, res: Dict[str, Any]) -> None: if self.fine_grained_manager: manager = self.fine_grained_manager.manager @@ -852,7 +857,7 @@ def cmd_suggest(self, out += "\n" return {'out': out, 'err': "", 'status': 0} finally: - self.fscache.flush() + self.flush_caches() def cmd_hang(self) -> Dict[str, object]: """Hang for 100 seconds, as a debug hack.""" From 7a49608383ee1924764f237c07e46bfaf8b6d6a9 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 22 Feb 2021 12:44:02 +0000 Subject: [PATCH 02/10] [WIP] clear caches in mypy daemon --- mypy/dmypy_server.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mypy/dmypy_server.py b/mypy/dmypy_server.py index 10afa4abe716..df7e1ff44659 100644 --- a/mypy/dmypy_server.py +++ b/mypy/dmypy_server.py @@ -270,6 +270,7 @@ def run_command(self, command: str, data: Dict[str, object]) -> Dict[str, object del data['is_tty'] del data['terminal_width'] return method(self, **data) + self.flush_caches() # Command functions (run in the server via RPC). @@ -398,8 +399,8 @@ def check(self, sources: List[BuildSource], def flush_caches(self) -> None: self.fscache.flush() - assert self.fine_grained_manager - self.fine_grained_manager.manager.ast_cache.clear() + if self.fine_grained_manager: + self.fine_grained_manager.manager.ast_cache.clear() def update_stats(self, res: Dict[str, Any]) -> None: if self.fine_grained_manager: @@ -432,6 +433,7 @@ def initialize_fine_grained(self, sources: List[BuildSource], return {'out': out, 'err': err, 'status': 2} messages = result.errors self.fine_grained_manager = FineGrainedBuildManager(result) + self.fine_grained_manager.manager.ast_cache.clear() if self.following_imports(): sources = find_all_sources_in_build(self.fine_grained_manager.graph, sources) From 8f28625a9ce71470d8e2298c5c9a6a28786f665d Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 22 Feb 2021 15:20:16 +0000 Subject: [PATCH 03/10] Fix issues --- mypy/build.py | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 11edbcfdb802..2a948862c71a 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -2003,13 +2003,12 @@ def parse_file(self) -> None: manager = self.manager - if self.id in manager.ast_cache: - manager.log("Using cached AST for %s (%s)" % (self.xpath, self.id)) - self.tree, self.early_errors = manager.ast_cache[self.id] - return - + cached = self.id in manager.ast_cache and True modules = manager.modules - manager.log("Parsing %s (%s)" % (self.xpath, self.id)) + if not cached: + manager.log("Parsing %s (%s)" % (self.xpath, self.id)) + else: + manager.log("Using cached AST for %s (%s)" % (self.xpath, self.id)) with self.wrap_context(): source = self.source @@ -2040,22 +2039,33 @@ def parse_file(self) -> None: self.source_hash = compute_hash(source) self.parse_inline_configuration(source) - self.tree = manager.parse_file(self.id, self.xpath, source, - self.ignore_all or self.options.ignore_errors, - self.options) + if not cached: + self.tree = manager.parse_file(self.id, self.xpath, source, + self.ignore_all or self.options.ignore_errors, + self.options) - modules[self.id] = self.tree + else: + self.tree = manager.ast_cache[self.id][0] + manager.errors.set_file_ignored_lines(self.xpath, self.tree.ignored_lines, + self.ignore_all or self.options.ignore_errors) + + if not cached: + # Make a copy of any errors produced during parse time so that + # fine-grained mode can repeat them when the module is + # reprocessed. + self.early_errors = list(manager.errors.error_info_map.get(self.xpath, [])) + else: + self.early_errors = manager.ast_cache[self.id][1] - # Make a copy of any errors produced during parse time so that - # fine-grained mode can repeat them when the module is - # reprocessed. - self.early_errors = list(manager.errors.error_info_map.get(self.xpath, [])) + modules[self.id] = self.tree - self.semantic_analysis_pass1() + if not cached: + self.semantic_analysis_pass1() self.check_blockers() - manager.ast_cache[self.id] = (self.tree, self.early_errors) + if source is not None: + manager.ast_cache[self.id] = (self.tree, self.early_errors) def parse_inline_configuration(self, source: str) -> None: """Check for inline mypy: options directive and parse them.""" From 3c1efff882e1448b592390a43aebdf525bc9ad8d Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 22 Feb 2021 15:23:01 +0000 Subject: [PATCH 04/10] Update comment --- mypy/build.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 2a948862c71a..d414626d92ab 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -646,12 +646,13 @@ def __init__(self, data_dir: str, self.processed_targets = [] # type: List[str] # Missing stub packages encountered. self.missing_stub_packages = set() # type: Set[str] - # Cache for mypy ASTs that have completed semantic analysis pass 1. - # When mypy daemon processes an increment where multiple files - # are added to the build, only one the files actually gets added and - # the others are discarded. This gets repeated until all the files - # have been added. This means that the same new file can be parsed - # O(n**2) times. We use this cache to avoid this redundant work. + # Cache for mypy ASTs that have completed semantic analysis + # pass 1. When multiple files are added to the build in a + # single daemon increment, only one of the files gets added + # per step and the others are discarded. This gets repeated + # until all the files have been added. This means that a + # new file can be processed O(n**2) times. This cache + # avoids most of this redundant work. self.ast_cache = {} # type: Dict[str, Tuple[MypyFile, List[ErrorInfo]]] def dump_stats(self) -> None: From dc26e82d0e82a30a3d8b1a896177726a62af99fe Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 22 Feb 2021 16:31:07 +0000 Subject: [PATCH 05/10] Simplify a bit --- mypy/build.py | 5 ++--- mypy/dmypy_server.py | 2 -- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index d414626d92ab..e83cbd4d98d6 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -647,7 +647,7 @@ def __init__(self, data_dir: str, # Missing stub packages encountered. self.missing_stub_packages = set() # type: Set[str] # Cache for mypy ASTs that have completed semantic analysis - # pass 1. When multiple files are added to the build in a + # pass 1. When multiple files are added to the build in a # single daemon increment, only one of the files gets added # per step and the others are discarded. This gets repeated # until all the files have been added. This means that a @@ -2065,8 +2065,7 @@ def parse_file(self) -> None: self.check_blockers() - if source is not None: - manager.ast_cache[self.id] = (self.tree, self.early_errors) + manager.ast_cache[self.id] = (self.tree, self.early_errors) def parse_inline_configuration(self, source: str) -> None: """Check for inline mypy: options directive and parse them.""" diff --git a/mypy/dmypy_server.py b/mypy/dmypy_server.py index df7e1ff44659..c3f21a75e4ed 100644 --- a/mypy/dmypy_server.py +++ b/mypy/dmypy_server.py @@ -270,7 +270,6 @@ def run_command(self, command: str, data: Dict[str, object]) -> Dict[str, object del data['is_tty'] del data['terminal_width'] return method(self, **data) - self.flush_caches() # Command functions (run in the server via RPC). @@ -433,7 +432,6 @@ def initialize_fine_grained(self, sources: List[BuildSource], return {'out': out, 'err': err, 'status': 2} messages = result.errors self.fine_grained_manager = FineGrainedBuildManager(result) - self.fine_grained_manager.manager.ast_cache.clear() if self.following_imports(): sources = find_all_sources_in_build(self.fine_grained_manager.graph, sources) From 0a0f1ce367a0ee62d390d336304f35072d668126 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 22 Feb 2021 16:32:50 +0000 Subject: [PATCH 06/10] Add comments --- mypy/build.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mypy/build.py b/mypy/build.py index e83cbd4d98d6..9ad6c5dc5f85 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -2004,6 +2004,7 @@ def parse_file(self) -> None: manager = self.manager + # Can we reuse a previously parsed AST? This avoids redundant work in daemon. cached = self.id in manager.ast_cache and True modules = manager.modules if not cached: @@ -2046,6 +2047,7 @@ def parse_file(self) -> None: self.options) else: + # Reuse a cached AST self.tree = manager.ast_cache[self.id][0] manager.errors.set_file_ignored_lines(self.xpath, self.tree.ignored_lines, self.ignore_all or self.options.ignore_errors) From 7b6e70c0196a9cc8e4c0123c061e02920df36210 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 22 Feb 2021 16:34:01 +0000 Subject: [PATCH 07/10] Fix lint --- mypy/build.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 9ad6c5dc5f85..14642aeadffc 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -2049,8 +2049,10 @@ def parse_file(self) -> None: else: # Reuse a cached AST self.tree = manager.ast_cache[self.id][0] - manager.errors.set_file_ignored_lines(self.xpath, self.tree.ignored_lines, - self.ignore_all or self.options.ignore_errors) + manager.errors.set_file_ignored_lines( + self.xpath, + self.tree.ignored_lines, + self.ignore_all or self.options.ignore_errors) if not cached: # Make a copy of any errors produced during parse time so that From 905f274bdea74a7e182409133e87f57e37fe194c Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 22 Feb 2021 16:41:43 +0000 Subject: [PATCH 08/10] Fix ast merge tests --- mypy/test/testmerge.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mypy/test/testmerge.py b/mypy/test/testmerge.py index c9f04c2abef6..2b3444225485 100644 --- a/mypy/test/testmerge.py +++ b/mypy/test/testmerge.py @@ -124,6 +124,7 @@ def build(self, source: str, testcase: DataDrivenTestCase) -> Optional[BuildResu def build_increment(self, manager: FineGrainedBuildManager, module_id: str, path: str) -> Tuple[MypyFile, Dict[Expression, Type]]: + manager.manager.ast_cache.clear() manager.update([(module_id, path)], []) module = manager.manager.modules[module_id] type_map = manager.graph[module_id].type_map() From bff914f36f11f93f0fe3e00ee695625daeca0611 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 22 Feb 2021 16:58:36 +0000 Subject: [PATCH 09/10] Remove redundant code --- mypy/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypy/build.py b/mypy/build.py index 14642aeadffc..494dfdf3ae01 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -2005,7 +2005,7 @@ def parse_file(self) -> None: manager = self.manager # Can we reuse a previously parsed AST? This avoids redundant work in daemon. - cached = self.id in manager.ast_cache and True + cached = self.id in manager.ast_cache modules = manager.modules if not cached: manager.log("Parsing %s (%s)" % (self.xpath, self.id)) From 1b3c6b8d5cfdec72532d73438574c7c92ea7b529 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 22 Feb 2021 18:07:57 +0000 Subject: [PATCH 10/10] Minor refactoring + attempt to fix test --- mypy/dmypy_server.py | 2 +- mypy/server/update.py | 8 ++++++++ mypy/suggestions.py | 1 + mypy/test/testmerge.py | 2 +- 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/mypy/dmypy_server.py b/mypy/dmypy_server.py index c3f21a75e4ed..eb53935db297 100644 --- a/mypy/dmypy_server.py +++ b/mypy/dmypy_server.py @@ -399,7 +399,7 @@ def check(self, sources: List[BuildSource], def flush_caches(self) -> None: self.fscache.flush() if self.fine_grained_manager: - self.fine_grained_manager.manager.ast_cache.clear() + self.fine_grained_manager.flush_cache() def update_stats(self, res: Dict[str, Any]) -> None: if self.fine_grained_manager: diff --git a/mypy/server/update.py b/mypy/server/update.py index a9f931429a45..085c143fadd1 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -288,6 +288,14 @@ def trigger(self, target: str) -> List[str]: self.previous_messages = self.manager.errors.new_messages()[:] return self.update(changed_modules, []) + def flush_cache(self) -> None: + """Flush AST cache. + + This needs to be called after each increment, or file changes won't + be detected reliably. + """ + self.manager.ast_cache.clear() + def update_one(self, changed_modules: List[Tuple[str, str]], initial_set: Set[str], diff --git a/mypy/suggestions.py b/mypy/suggestions.py index b66ba6d6118d..8df180d825b4 100644 --- a/mypy/suggestions.py +++ b/mypy/suggestions.py @@ -640,6 +640,7 @@ def reload(self, state: State, check_errors: bool = False) -> List[str]: If check_errors is true, raise an exception if there are errors. """ assert state.path is not None + self.fgmanager.flush_cache() return self.fgmanager.update([(state.id, state.path)], []) def ensure_loaded(self, state: State, force: bool = False) -> MypyFile: diff --git a/mypy/test/testmerge.py b/mypy/test/testmerge.py index 2b3444225485..c7fcbda01c04 100644 --- a/mypy/test/testmerge.py +++ b/mypy/test/testmerge.py @@ -124,7 +124,7 @@ def build(self, source: str, testcase: DataDrivenTestCase) -> Optional[BuildResu def build_increment(self, manager: FineGrainedBuildManager, module_id: str, path: str) -> Tuple[MypyFile, Dict[Expression, Type]]: - manager.manager.ast_cache.clear() + manager.flush_cache() manager.update([(module_id, path)], []) module = manager.manager.modules[module_id] type_map = manager.graph[module_id].type_map()