Skip to content

Optimize the computation of lib_path #4982

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 27, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 21 additions & 46 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,11 @@ def __init__(self, manager: 'BuildManager', graph: Graph) -> None:

class BuildSource:
def __init__(self, path: Optional[str], module: Optional[str],
text: Optional[str]) -> None:
text: Optional[str], base_dir: Optional[str] = None) -> None:
self.path = path
self.module = module or '__main__'
self.text = text
self.base_dir = base_dir

def __repr__(self) -> str:
return '<BuildSource path=%r module=%r has_text=%s>' % (self.path,
Expand Down Expand Up @@ -197,50 +198,52 @@ def default_flush_errors(new_messages: List[str], is_serious: bool) -> None:

def compute_lib_path(sources: List[BuildSource],
options: Options,
alt_lib_path: Optional[str],
data_dir: str,
fscache: FileSystemCache) -> List[str]:
alt_lib_path: Optional[str] = None) -> List[str]:
# Determine the default module search path.
lib_path = default_lib_path(data_dir,
options.python_version,
custom_typeshed_dir=options.custom_typeshed_dir)
lib_path = collections.deque(
default_lib_path(data_dir,
options.python_version,
custom_typeshed_dir=options.custom_typeshed_dir))

if options.use_builtins_fixtures:
# Use stub builtins (to speed up test cases and to make them easier to
# debug). This is a test-only feature, so assume our files are laid out
# as in the source tree.
root_dir = dirname(dirname(__file__))
lib_path.insert(0, os.path.join(root_dir, 'test-data', 'unit', 'lib-stub'))
lib_path.appendleft(os.path.join(root_dir, 'test-data', 'unit', 'lib-stub'))
# alt_lib_path is used by some tests to bypass the normal lib_path mechanics.
# If we don't have one, grab directories of source files.
lib_path_set = set(lib_path)
if not alt_lib_path:
for source in sources:
if source.path:
# Include directory of the program file in the module search path.
dir = remove_cwd_prefix_from_path(fscache, dirname(source.path))
if dir not in lib_path:
lib_path.insert(0, dir)
# Include directory of the program file in the module search path.
if source.base_dir:
dir = source.base_dir
if dir not in lib_path_set:
lib_path.appendleft(dir)
lib_path_set.add(dir)

# Do this even if running as a file, for sanity (mainly because with
# multiple builds, there could be a mix of files/modules, so its easier
# to just define the semantics that we always add the current director
# to the lib_path
# TODO: Don't do this in some cases; for motivation see see
# https://github.com/python/mypy/issues/4195#issuecomment-341915031
lib_path.insert(0, os.getcwd())
lib_path.appendleft(os.getcwd())

# Prepend a config-defined mypy path.
lib_path[:0] = options.mypy_path
lib_path.extendleft(options.mypy_path)

# Add MYPYPATH environment variable to front of library path, if defined.
lib_path[:0] = mypy_path()
lib_path.extendleft(mypy_path())

# If provided, insert the caller-supplied extra module path to the
# beginning (highest priority) of the search path.
if alt_lib_path:
lib_path.insert(0, alt_lib_path)
lib_path.appendleft(alt_lib_path)

return lib_path
return list(lib_path)


def _build(sources: List[BuildSource],
Expand All @@ -256,7 +259,7 @@ def _build(sources: List[BuildSource],
data_dir = default_data_dir(bin_dir)
fscache = fscache or FileSystemCache()

lib_path = compute_lib_path(sources, options, alt_lib_path, data_dir, fscache)
lib_path = compute_lib_path(sources, options, data_dir, alt_lib_path)

reports = Reports(data_dir, options.report_dirs)
source_set = BuildSourceSet(sources)
Expand Down Expand Up @@ -791,34 +794,6 @@ def stats_summary(self) -> Mapping[str, object]:
return self.stats


def remove_cwd_prefix_from_path(fscache: FileSystemCache, p: str) -> str:
"""Remove current working directory prefix from p, if present.

Also crawl up until a directory without __init__.py is found.

If the result would be empty, return '.' instead.
"""
cur = os.getcwd()
# Add separator to the end of the path, unless one is already present.
if basename(cur) != '':
cur += os.sep
# Compute root path.
while (p and
(fscache.isfile(os.path.join(p, '__init__.py')) or
fscache.isfile(os.path.join(p, '__init__.pyi')))):
dir, base = os.path.split(p)
if not base:
break
p = dir
# Remove current directory prefix from the path, if present.
if p.startswith(cur):
p = p[len(cur):]
# Avoid returning an empty path; replace that with '.'.
if p == '':
p = '.'
return p


@functools.lru_cache(maxsize=None)
def _get_site_packages_dirs(python_executable: Optional[str]) -> List[str]:
"""Find package directories for given python.
Expand Down
11 changes: 3 additions & 8 deletions mypy/dmypy_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,10 @@ class Server:
# serve() is called in the grandchild (by daemonize()).

def __init__(self, options: Options,
timeout: Optional[int] = None,
alt_lib_path: Optional[str] = None) -> None:
timeout: Optional[int] = None) -> None:
"""Initialize the server with the desired mypy flags."""
self.options = options
self.timeout = timeout
self.alt_lib_path = alt_lib_path
self.fine_grained_manager = None # type: Optional[FineGrainedBuildManager]

if os.path.isfile(STATUS_FILE):
Expand Down Expand Up @@ -259,8 +257,7 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict
try:
result = mypy.build.build(sources=sources,
options=self.options,
fscache=self.fscache,
alt_lib_path=self.alt_lib_path)
fscache=self.fscache)
except mypy.errors.CompileError as e:
output = ''.join(s + '\n' for s in e.messages)
if e.use_stdout:
Expand Down Expand Up @@ -314,10 +311,8 @@ def fine_grained_increment(self, sources: List[mypy.build.BuildSource]) -> Dict[
t0 = time.time()
self.update_sources(sources)
changed, removed = self.find_changed(sources)
# Update the lib_path, which can change when sources do.
# TODO: This is slow.
manager.lib_path = tuple(mypy.build.compute_lib_path(
sources, manager.options, self.alt_lib_path, manager.data_dir, self.fscache))
sources, manager.options, manager.data_dir))
t1 = time.time()
messages = self.fine_grained_manager.update(changed, removed)
t2 = time.time()
Expand Down
34 changes: 18 additions & 16 deletions mypy/find_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ def create_source_list(files: Sequence[str], options: Options,
for f in files:
if f.endswith(PY_EXTENSIONS):
# Can raise InvalidSourceList if a directory doesn't have a valid module name.
targets.append(BuildSource(f, finder.crawl_up(f), None))
name, base_dir = finder.crawl_up(os.path.normpath(f))
targets.append(BuildSource(f, name, None, base_dir))
elif fscache.isdir(f):
sub_targets = finder.expand_dir(f)
sub_targets = finder.expand_dir(os.path.normpath(f))
if not sub_targets and not allow_empty_dir:
raise InvalidSourceList("There are no .py[i] files in directory '{}'"
.format(f))
Expand All @@ -58,8 +59,8 @@ def keyfunc(name: str) -> Tuple[int, str]:
class SourceFinder:
def __init__(self, fscache: FileSystemCache) -> None:
self.fscache = fscache
# A cache for package names, mapping from module id to directory path
self.package_cache = {} # type: Dict[str, str]
# A cache for package names, mapping from directory path to module id and base dir
self.package_cache = {} # type: Dict[str, Tuple[str, str]]

def expand_dir(self, arg: str, mod_prefix: str = '') -> List[BuildSource]:
"""Convert a directory name to a list of sources to build."""
Expand All @@ -68,11 +69,11 @@ def expand_dir(self, arg: str, mod_prefix: str = '') -> List[BuildSource]:
return []
seen = set() # type: Set[str]
sources = []
top_mod, base_dir = self.crawl_up_dir(arg)
if f and not mod_prefix:
top_mod = self.crawl_up(f)
mod_prefix = top_mod + '.'
if mod_prefix:
sources.append(BuildSource(f, mod_prefix.rstrip('.'), None))
sources.append(BuildSource(f, mod_prefix.rstrip('.'), None, base_dir))
names = self.fscache.listdir(arg)
names.sort(key=keyfunc)
for name in names:
Expand All @@ -88,28 +89,28 @@ def expand_dir(self, arg: str, mod_prefix: str = '') -> List[BuildSource]:
continue
if base not in seen and '.' not in base and suffix in PY_EXTENSIONS:
seen.add(base)
src = BuildSource(path, mod_prefix + base, None)
src = BuildSource(path, mod_prefix + base, None, base_dir)
sources.append(src)
return sources

def crawl_up(self, arg: str) -> str:
"""Given a .py[i] filename, return module.
def crawl_up(self, arg: str) -> Tuple[str, str]:
"""Given a .py[i] filename, return module and base directory

We crawl up the path until we find a directory without
__init__.py[i], or until we run out of path components.
"""
dir, mod = os.path.split(arg)
mod = strip_py(mod) or mod
base = self.crawl_up_dir(dir)
base, base_dir = self.crawl_up_dir(dir)
if mod == '__init__' or not mod:
mod = base
else:
mod = module_join(base, mod)

return mod
return mod, base_dir

def crawl_up_dir(self, dir: str) -> str:
"""Given a directory name, return the corresponding module name.
def crawl_up_dir(self, dir: str) -> Tuple[str, str]:
"""Given a directory name, return the corresponding module name and base directory

Use package_cache to cache results.
"""
Expand All @@ -119,15 +120,16 @@ def crawl_up_dir(self, dir: str) -> str:
parent_dir, base = os.path.split(dir)
if not dir or not self.get_init_file(dir) or not base:
res = ''
base_dir = dir or '.'
else:
# Ensure that base is a valid python module name
if not base.isidentifier():
raise InvalidSourceList('{} is not a valid Python package name'.format(base))
parent = self.crawl_up_dir(parent_dir)
parent, base_dir = self.crawl_up_dir(parent_dir)
res = module_join(parent, base)

self.package_cache[dir] = res
return res
self.package_cache[dir] = res, base_dir
return res, base_dir

def get_init_file(self, dir: str) -> Optional[str]:
"""Check whether a directory contains a file named __init__.py[i].
Expand Down