Skip to content

Two optimizations for load_graph() #4294

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 30, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import os.path
import re
import site
import stat
import sys
import time
from os.path import dirname, basename
Expand Down Expand Up @@ -950,13 +951,14 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache
# TODO: May need to take more build options into account
meta_json, data_json = get_cache_names(id, path, manager)
manager.trace('Looking for {} at {}'.format(id, meta_json))
if not os.path.exists(meta_json):
try:
with open(meta_json, 'r') as f:
meta_str = f.read()
manager.trace('Meta {} {}'.format(id, meta_str.rstrip()))
meta = json.loads(meta_str) # TODO: Errors
except IOError:
manager.log('Could not load cache for {}: could not find {}'.format(id, meta_json))
return None
with open(meta_json, 'r') as f:
meta_str = f.read()
manager.trace('Meta {} {}'.format(id, meta_str.rstrip()))
meta = json.loads(meta_str) # TODO: Errors
if not isinstance(meta, dict):
manager.log('Could not load cache for {}: meta cache is not a dict: {}'
.format(id, repr(meta)))
Expand Down Expand Up @@ -1056,11 +1058,10 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],

# TODO: Share stat() outcome with find_module()
path = os.path.abspath(path)
# TODO: Don't use isfile() but check st.st_mode
if not os.path.isfile(path):
st = manager.get_stat(path) # TODO: Errors
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about adding a is_file method to BuildManager instead and using it here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a bigger effort, we'd have to give BuildManager a cache results and use that everywhere. There's also an inefficiency in find_module(), which seems to make way to many stat() and listdir() calls, which I haven't tracked down.

if not stat.S_ISREG(st.st_mode):
manager.log('Metadata abandoned for {}: file {} does not exist'.format(id, path))
return None
st = manager.get_stat(path) # TODO: Errors
size = st.st_size
if size != meta.size:
manager.log('Metadata abandoned for {}: file {} has different size'.format(id, path))
Expand Down
13 changes: 13 additions & 0 deletions mypy/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ class Options:
- {"debug_cache"})

def __init__(self) -> None:
# Cache for clone_for_module()
self.clone_cache = {} # type: Dict[str, Options]

# -- build options --
self.build_type = BuildType.STANDARD
self.python_version = defaults.PYTHON3_VERSION
Expand Down Expand Up @@ -177,17 +180,27 @@ def __repr__(self) -> str:
return 'Options({})'.format(pprint.pformat(self.__dict__))

def clone_for_module(self, module: str) -> 'Options':
"""Create an Options object that incorporates per-module options.

NOTE: Once this method is called all Options objects should be
considered read-only, else the caching might be incorrect.
"""
res = self.clone_cache.get(module)
if res is not None:
return res
updates = {}
for pattern in self.per_module_options:
if self.module_matches_pattern(module, pattern):
if pattern in self.unused_configs:
del self.unused_configs[pattern]
updates.update(self.per_module_options[pattern])
if not updates:
self.clone_cache[module] = self
return self
new_options = Options()
new_options.__dict__.update(self.__dict__)
new_options.__dict__.update(updates)
self.clone_cache[module] = new_options
return new_options

def module_matches_pattern(self, module: str, pattern: Pattern[str]) -> bool:
Expand Down