Skip to content

Commit 9779e18

Browse files
authored
Two optimizations for load_graph() (#4294)
Profiling load_graph() found that there were some unnecessary stat() calls going on, and, more importantly, that Options.clone_for_module() is very inefficient if you have many sections in your mypy.ini file. I solved the latter by introducing a cache. An alternative design would move the cache into BuildManager -- that would avoid a reference cycle, but it turns out that having the cache in the root Options object means that it survives between dmypy check runs, and that's a nice win there. (This explains at least partly why load_graph() was slow even with everything loaded in memory.)
1 parent b541af9 commit 9779e18

File tree

2 files changed

+22
-8
lines changed

2 files changed

+22
-8
lines changed

mypy/build.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import os.path
2121
import re
2222
import site
23+
import stat
2324
import sys
2425
import time
2526
from os.path import dirname, basename
@@ -950,13 +951,14 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache
950951
# TODO: May need to take more build options into account
951952
meta_json, data_json = get_cache_names(id, path, manager)
952953
manager.trace('Looking for {} at {}'.format(id, meta_json))
953-
if not os.path.exists(meta_json):
954+
try:
955+
with open(meta_json, 'r') as f:
956+
meta_str = f.read()
957+
manager.trace('Meta {} {}'.format(id, meta_str.rstrip()))
958+
meta = json.loads(meta_str) # TODO: Errors
959+
except IOError:
954960
manager.log('Could not load cache for {}: could not find {}'.format(id, meta_json))
955961
return None
956-
with open(meta_json, 'r') as f:
957-
meta_str = f.read()
958-
manager.trace('Meta {} {}'.format(id, meta_str.rstrip()))
959-
meta = json.loads(meta_str) # TODO: Errors
960962
if not isinstance(meta, dict):
961963
manager.log('Could not load cache for {}: meta cache is not a dict: {}'
962964
.format(id, repr(meta)))
@@ -1056,11 +1058,10 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],
10561058

10571059
# TODO: Share stat() outcome with find_module()
10581060
path = os.path.abspath(path)
1059-
# TODO: Don't use isfile() but check st.st_mode
1060-
if not os.path.isfile(path):
1061+
st = manager.get_stat(path) # TODO: Errors
1062+
if not stat.S_ISREG(st.st_mode):
10611063
manager.log('Metadata abandoned for {}: file {} does not exist'.format(id, path))
10621064
return None
1063-
st = manager.get_stat(path) # TODO: Errors
10641065
size = st.st_size
10651066
if size != meta.size:
10661067
manager.log('Metadata abandoned for {}: file {} has different size'.format(id, path))

mypy/options.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ class Options:
4545
- {"debug_cache"})
4646

4747
def __init__(self) -> None:
48+
# Cache for clone_for_module()
49+
self.clone_cache = {} # type: Dict[str, Options]
50+
4851
# -- build options --
4952
self.build_type = BuildType.STANDARD
5053
self.python_version = defaults.PYTHON3_VERSION
@@ -177,17 +180,27 @@ def __repr__(self) -> str:
177180
return 'Options({})'.format(pprint.pformat(self.__dict__))
178181

179182
def clone_for_module(self, module: str) -> 'Options':
183+
"""Create an Options object that incorporates per-module options.
184+
185+
NOTE: Once this method is called all Options objects should be
186+
considered read-only, else the caching might be incorrect.
187+
"""
188+
res = self.clone_cache.get(module)
189+
if res is not None:
190+
return res
180191
updates = {}
181192
for pattern in self.per_module_options:
182193
if self.module_matches_pattern(module, pattern):
183194
if pattern in self.unused_configs:
184195
del self.unused_configs[pattern]
185196
updates.update(self.per_module_options[pattern])
186197
if not updates:
198+
self.clone_cache[module] = self
187199
return self
188200
new_options = Options()
189201
new_options.__dict__.update(self.__dict__)
190202
new_options.__dict__.update(updates)
203+
self.clone_cache[module] = new_options
191204
return new_options
192205

193206
def module_matches_pattern(self, module: str, pattern: Pattern[str]) -> bool:

0 commit comments

Comments
 (0)