Skip to content

Commit 59947f8

Browse files
emmatypingilevkivskyi
authored andcommitted
Refactor and reorder search path (#5256)
As promised in #5227, here is an implementation for refactoring and making the search path compliant with PEP 561. The order is specified in https://www.python.org/dev/peps/pep-0561/#type-checker-module-resolution-order.
1 parent c5de2fd commit 59947f8

File tree

8 files changed

+98
-73
lines changed

8 files changed

+98
-73
lines changed

docs/source/command_line.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ This is computed from the following items:
152152
(a colon-separated list of directories).
153153
- The directories containing the sources given on the command line
154154
(see below).
155+
- The installed packages marked as safe for type checking (see
156+
:ref:`PEP 561 support <installed-packages>`)
155157
- The relevant directories of the
156158
`typeshed <https://github.com/python/typeshed>`_ repo.
157159

@@ -161,7 +163,7 @@ contain an ``__init__.py`` or ``__init__.pyi`` file.
161163

162164
Second, mypy searches for stub files in addition to regular Python files
163165
and packages.
164-
The rules for searching a module ``foo`` are as follows:
166+
The rules for searching for a module ``foo`` are as follows:
165167

166168
- The search looks in each of the directories in the search path
167169
(see above) until a match is found.

mypy/build.py

Lines changed: 81 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -188,10 +188,46 @@ def default_flush_errors(new_messages: List[str], is_serious: bool) -> None:
188188
raise
189189

190190

191-
def compute_lib_path(sources: List[BuildSource],
191+
# python_path is usercode, mypy_path is set via config or envionment variable,
192+
# package_path is calculated by _get_site_packages_dirs, and typeshed_path points
193+
# to typeshed. Each is a tuple of paths to be searched in find_module()
194+
SearchPaths = NamedTuple('SearchPaths',
195+
(('python_path', Tuple[str, ...]),
196+
('mypy_path', Tuple[str, ...]),
197+
('package_path', Tuple[str, ...]),
198+
('typeshed_path', Tuple[str, ...])))
199+
200+
201+
@functools.lru_cache(maxsize=None)
202+
def _get_site_packages_dirs(python_executable: Optional[str]) -> List[str]:
203+
"""Find package directories for given python.
204+
205+
This runs a subprocess call, which generates a list of the site package directories.
206+
To avoid repeatedly calling a subprocess (which can be slow!) we lru_cache the results."""
207+
if python_executable is None:
208+
return []
209+
if python_executable == sys.executable:
210+
# Use running Python's package dirs
211+
return sitepkgs.getsitepackages()
212+
else:
213+
# Use subprocess to get the package directory of given Python
214+
# executable
215+
return ast.literal_eval(subprocess.check_output([python_executable, sitepkgs.__file__],
216+
stderr=subprocess.PIPE).decode())
217+
218+
219+
def compute_search_paths(sources: List[BuildSource],
192220
options: Options,
193221
data_dir: str,
194-
alt_lib_path: Optional[str] = None) -> List[str]:
222+
alt_lib_path: Optional[str] = None) -> SearchPaths:
223+
"""Compute the search paths as specified in PEP 561.
224+
225+
There are the following 4 members created:
226+
- User code (from `sources`)
227+
- MYPYPATH (set either via config or environment variable)
228+
- installed package directories (which will later be split into stub-only and inline)
229+
- typeshed
230+
"""
195231
# Determine the default module search path.
196232
lib_path = collections.deque(
197233
default_lib_path(data_dir,
@@ -206,15 +242,14 @@ def compute_lib_path(sources: List[BuildSource],
206242
lib_path.appendleft(os.path.join(root_dir, 'test-data', 'unit', 'lib-stub'))
207243
# alt_lib_path is used by some tests to bypass the normal lib_path mechanics.
208244
# If we don't have one, grab directories of source files.
209-
lib_path_set = set(lib_path)
245+
python_path = [] # type: List[str]
210246
if not alt_lib_path:
211247
for source in sources:
212248
# Include directory of the program file in the module search path.
213249
if source.base_dir:
214250
dir = source.base_dir
215-
if dir not in lib_path_set:
216-
lib_path.appendleft(dir)
217-
lib_path_set.add(dir)
251+
if dir not in python_path:
252+
python_path.append(dir)
218253

219254
# Do this even if running as a file, for sanity (mainly because with
220255
# multiple builds, there could be a mix of files/modules, so its easier
@@ -227,20 +262,23 @@ def compute_lib_path(sources: List[BuildSource],
227262
else:
228263
dir = os.getcwd()
229264
if dir not in lib_path:
230-
lib_path.appendleft(dir)
265+
python_path.insert(0, dir)
231266

232-
# Prepend a config-defined mypy path.
233-
lib_path.extendleft(options.mypy_path)
267+
# Start with a MYPYPATH environment variable at the front of the mypy_path, if defined.
268+
mypypath = mypy_path()
234269

235-
# Add MYPYPATH environment variable to front of library path, if defined.
236-
lib_path.extendleft(mypy_path())
270+
# Add a config-defined mypy path.
271+
mypypath.extend(options.mypy_path)
237272

238273
# If provided, insert the caller-supplied extra module path to the
239274
# beginning (highest priority) of the search path.
240275
if alt_lib_path:
241-
lib_path.appendleft(alt_lib_path)
276+
mypypath.insert(0, alt_lib_path)
242277

243-
return list(lib_path)
278+
return SearchPaths(tuple(reversed(python_path)),
279+
tuple(mypypath),
280+
tuple(_get_site_packages_dirs(options.python_executable)),
281+
tuple(lib_path))
244282

245283

246284
def _build(sources: List[BuildSource],
@@ -256,7 +294,7 @@ def _build(sources: List[BuildSource],
256294
data_dir = default_data_dir(bin_dir)
257295
fscache = fscache or FileSystemCache()
258296

259-
lib_path = compute_lib_path(sources, options, data_dir, alt_lib_path)
297+
search_paths = compute_search_paths(sources, options, data_dir, alt_lib_path)
260298

261299
reports = Reports(data_dir, options.report_dirs)
262300
source_set = BuildSourceSet(sources)
@@ -266,7 +304,7 @@ def _build(sources: List[BuildSource],
266304
# Construct a build manager object to hold state during the build.
267305
#
268306
# Ignore current directory prefix in error messages.
269-
manager = BuildManager(data_dir, lib_path,
307+
manager = BuildManager(data_dir, search_paths,
270308
ignore_prefix=os.getcwd(),
271309
source_set=source_set,
272310
reports=reports,
@@ -613,7 +651,7 @@ class BuildManager:
613651
"""
614652

615653
def __init__(self, data_dir: str,
616-
lib_path: List[str],
654+
search_paths: SearchPaths,
617655
ignore_prefix: str,
618656
source_set: BuildSourceSet,
619657
reports: Reports,
@@ -628,7 +666,7 @@ def __init__(self, data_dir: str,
628666
self.data_dir = data_dir
629667
self.errors = errors
630668
self.errors.set_ignore_prefix(ignore_prefix)
631-
self.lib_path = tuple(lib_path)
669+
self.search_paths = search_paths
632670
self.source_set = source_set
633671
self.reports = reports
634672
self.options = options
@@ -637,7 +675,7 @@ def __init__(self, data_dir: str,
637675
self.missing_modules = set() # type: Set[str]
638676
self.plugin = plugin
639677
self.semantic_analyzer = SemanticAnalyzerPass2(self.modules, self.missing_modules,
640-
lib_path, self.errors, self.plugin)
678+
self.errors, self.plugin)
641679
self.semantic_analyzer_pass3 = SemanticAnalyzerPass3(self.modules, self.errors,
642680
self.semantic_analyzer)
643681
self.all_types = {} # type: Dict[Expression, Type] # Used by tests only
@@ -780,7 +818,7 @@ def correct_rel_imp(imp: Union[ImportFrom, ImportAll]) -> str:
780818

781819
def is_module(self, id: str) -> bool:
782820
"""Is there a file in the file system corresponding to module id?"""
783-
return self.find_module_cache.find_module(id, self.lib_path,
821+
return self.find_module_cache.find_module(id, self.search_paths,
784822
self.options.python_executable) is not None
785823

786824
def parse_file(self, id: str, path: str, source: str, ignore_errors: bool) -> MypyFile:
@@ -844,26 +882,8 @@ def stats_summary(self) -> Mapping[str, object]:
844882
return self.stats
845883

846884

847-
@functools.lru_cache(maxsize=None)
848-
def _get_site_packages_dirs(python_executable: Optional[str]) -> List[str]:
849-
"""Find package directories for given python.
850-
851-
This runs a subprocess call, which generates a list of the site package directories.
852-
To avoid repeatedly calling a subprocess (which can be slow!) we lru_cache the results."""
853-
if python_executable is None:
854-
return []
855-
if python_executable == sys.executable:
856-
# Use running Python's package dirs
857-
return sitepkgs.getsitepackages()
858-
else:
859-
# Use subprocess to get the package directory of given Python
860-
# executable
861-
return ast.literal_eval(subprocess.check_output([python_executable, sitepkgs.__file__],
862-
stderr=subprocess.PIPE).decode())
863-
864-
865-
# Search paths are a two-tuple of path and whether to verify the module
866-
SearchPaths = List[Tuple[str, bool]]
885+
# Package dirs are a two-tuple of path to search and whether to verify the module
886+
PackageDirs = List[Tuple[str, bool]]
867887

868888

869889
class FindModuleCache:
@@ -879,26 +899,27 @@ class FindModuleCache:
879899

880900
def __init__(self, fscache: Optional[FileSystemCache] = None) -> None:
881901
self.fscache = fscache or FileSystemCache()
882-
# Cache find_lib_path_dirs: (dir_chain, lib_path) -> list of (package_path, should_verify)
883-
self.dirs = {} # type: Dict[Tuple[str, Tuple[str, ...]], SearchPaths]
884-
# Cache find_module: (id, lib_path, python_version) -> result.
885-
self.results = {} # type: Dict[Tuple[str, Tuple[str, ...], Optional[str]], Optional[str]]
902+
# Cache find_lib_path_dirs: (dir_chain, search_paths) -> list(package_dirs, should_verify)
903+
self.dirs = {} # type: Dict[Tuple[str, Tuple[str, ...]], PackageDirs]
904+
# Cache find_module: (id, search_paths, python_version) -> result.
905+
self.results = {} # type: Dict[Tuple[str, SearchPaths, Optional[str]], Optional[str]]
886906

887907
def clear(self) -> None:
888908
self.results.clear()
889909
self.dirs.clear()
890910

891-
def find_lib_path_dirs(self, dir_chain: str, lib_path: Tuple[str, ...]) -> SearchPaths:
911+
def find_lib_path_dirs(self, dir_chain: str, lib_path: Tuple[str, ...]) -> PackageDirs:
892912
# Cache some repeated work within distinct find_module calls: finding which
893913
# elements of lib_path have even the subdirectory they'd need for the module
894-
# to exist. This is shared among different module ids when they differ only
914+
# to exist. This is shared among different module ids when they differ only
895915
# in the last component.
916+
# This is run for the python_path, mypy_path, and typeshed_path search paths
896917
key = (dir_chain, lib_path)
897918
if key not in self.dirs:
898919
self.dirs[key] = self._find_lib_path_dirs(dir_chain, lib_path)
899920
return self.dirs[key]
900921

901-
def _find_lib_path_dirs(self, dir_chain: str, lib_path: Tuple[str, ...]) -> SearchPaths:
922+
def _find_lib_path_dirs(self, dir_chain: str, lib_path: Tuple[str, ...]) -> PackageDirs:
902923
dirs = []
903924
for pathitem in lib_path:
904925
# e.g., '/usr/lib/python3.4/foo/bar'
@@ -907,15 +928,15 @@ def _find_lib_path_dirs(self, dir_chain: str, lib_path: Tuple[str, ...]) -> Sear
907928
dirs.append((dir, True))
908929
return dirs
909930

910-
def find_module(self, id: str, lib_path: Tuple[str, ...],
931+
def find_module(self, id: str, search_paths: SearchPaths,
911932
python_executable: Optional[str]) -> Optional[str]:
912933
"""Return the path of the module source file, or None if not found."""
913-
key = (id, lib_path, python_executable)
934+
key = (id, search_paths, python_executable)
914935
if key not in self.results:
915-
self.results[key] = self._find_module(id, lib_path, python_executable)
936+
self.results[key] = self._find_module(id, search_paths, python_executable)
916937
return self.results[key]
917938

918-
def _find_module(self, id: str, lib_path: Tuple[str, ...],
939+
def _find_module(self, id: str, search_paths: SearchPaths,
919940
python_executable: Optional[str]) -> Optional[str]:
920941
fscache = self.fscache
921942

@@ -932,7 +953,7 @@ def _find_module(self, id: str, lib_path: Tuple[str, ...],
932953
third_party_inline_dirs = []
933954
third_party_stubs_dirs = []
934955
# Third-party stub/typed packages
935-
for pkg_dir in _get_site_packages_dirs(python_executable):
956+
for pkg_dir in search_paths.package_path:
936957
stub_name = components[0] + '-stubs'
937958
typed_file = os.path.join(pkg_dir, components[0], 'py.typed')
938959
stub_dir = os.path.join(pkg_dir, stub_name)
@@ -957,8 +978,10 @@ def _find_module(self, id: str, lib_path: Tuple[str, ...],
957978
elif fscache.isfile(typed_file):
958979
path = os.path.join(pkg_dir, dir_chain)
959980
third_party_inline_dirs.append((path, True))
960-
candidate_base_dirs = self.find_lib_path_dirs(dir_chain, lib_path) + \
961-
third_party_stubs_dirs + third_party_inline_dirs
981+
python_mypy_path = search_paths.python_path + search_paths.mypy_path
982+
candidate_base_dirs = self.find_lib_path_dirs(dir_chain, python_mypy_path) + \
983+
third_party_stubs_dirs + third_party_inline_dirs + \
984+
self.find_lib_path_dirs(dir_chain, search_paths.typeshed_path)
962985

963986
# If we're looking for a module like 'foo.bar.baz', then candidate_base_dirs now
964987
# contains just the subdirectories 'foo/bar' that actually exist under the
@@ -989,9 +1012,9 @@ def _find_module(self, id: str, lib_path: Tuple[str, ...],
9891012
return path
9901013
return None
9911014

992-
def find_modules_recursive(self, module: str, lib_path: Tuple[str, ...],
1015+
def find_modules_recursive(self, module: str, search_paths: SearchPaths,
9931016
python_executable: Optional[str]) -> List[BuildSource]:
994-
module_path = self.find_module(module, lib_path, python_executable)
1017+
module_path = self.find_module(module, search_paths, python_executable)
9951018
if not module_path:
9961019
return []
9971020
result = [BuildSource(module_path, module, None)]
@@ -1011,14 +1034,14 @@ def find_modules_recursive(self, module: str, lib_path: Tuple[str, ...],
10111034
(os.path.isfile(os.path.join(abs_path, '__init__.py')) or
10121035
os.path.isfile(os.path.join(abs_path, '__init__.pyi'))):
10131036
hits.add(item)
1014-
result += self.find_modules_recursive(module + '.' + item, lib_path,
1037+
result += self.find_modules_recursive(module + '.' + item, search_paths,
10151038
python_executable)
10161039
elif item != '__init__.py' and item != '__init__.pyi' and \
10171040
item.endswith(('.py', '.pyi')):
10181041
mod = item.split('.')[0]
10191042
if mod not in hits:
10201043
hits.add(mod)
1021-
result += self.find_modules_recursive(module + '.' + mod, lib_path,
1044+
result += self.find_modules_recursive(module + '.' + mod, search_paths,
10221045
python_executable)
10231046
return result
10241047

@@ -2304,7 +2327,7 @@ def find_module_and_diagnose(manager: BuildManager,
23042327
# difference and just assume 'builtins' everywhere,
23052328
# which simplifies code.
23062329
file_id = '__builtin__'
2307-
path = manager.find_module_cache.find_module(file_id, manager.lib_path,
2330+
path = manager.find_module_cache.find_module(file_id, manager.search_paths,
23082331
manager.options.python_executable)
23092332
if path:
23102333
# For non-stubs, look at options.follow_imports:

mypy/dmypy_server.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -326,8 +326,8 @@ def fine_grained_increment(self, sources: List[mypy.build.BuildSource]) -> Dict[
326326
t0 = time.time()
327327
self.update_sources(sources)
328328
changed, removed = self.find_changed(sources)
329-
manager.lib_path = tuple(mypy.build.compute_lib_path(
330-
sources, manager.options, manager.data_dir))
329+
manager.search_paths = mypy.build.compute_search_paths(
330+
sources, manager.options, manager.data_dir)
331331
t1 = time.time()
332332
messages = self.fine_grained_manager.update(changed, removed)
333333
t2 = time.time()

mypy/main.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from mypy import defaults
1616
from mypy import experiments
1717
from mypy import util
18-
from mypy.build import BuildSource, BuildResult
18+
from mypy.build import BuildSource, BuildResult, SearchPaths
1919
from mypy.find_sources import create_source_list, InvalidSourceList
2020
from mypy.fscache import FileSystemCache
2121
from mypy.errors import CompileError
@@ -776,14 +776,14 @@ def add_invertible_flag(flag: str,
776776
# Set target.
777777
if special_opts.modules + special_opts.packages:
778778
options.build_type = BuildType.MODULE
779-
lib_path = [os.getcwd()] + build.mypy_path()
779+
search_paths = SearchPaths((os.getcwd(),), tuple(build.mypy_path()), (), ())
780780
targets = []
781781
# TODO: use the same cache that the BuildManager will
782782
cache = build.FindModuleCache(fscache)
783783
for p in special_opts.packages:
784784
if os.sep in p or os.altsep and os.altsep in p:
785785
fail("Package name '{}' cannot have a slash in it.".format(p))
786-
p_targets = cache.find_modules_recursive(p, tuple(lib_path), options.python_executable)
786+
p_targets = cache.find_modules_recursive(p, search_paths, options.python_executable)
787787
if not p_targets:
788788
fail("Can't find package '{}'".format(p))
789789
targets.extend(p_targets)

mypy/semanal.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,6 @@ class SemanticAnalyzerPass2(NodeVisitor[None],
182182
This is the second phase of semantic analysis.
183183
"""
184184

185-
# Library search paths
186-
lib_path = None # type: List[str]
187185
# Module name space
188186
modules = None # type: Dict[str, MypyFile]
189187
# Global name space for current module
@@ -227,7 +225,7 @@ class SemanticAnalyzerPass2(NodeVisitor[None],
227225
def __init__(self,
228226
modules: Dict[str, MypyFile],
229227
missing_modules: Set[str],
230-
lib_path: List[str], errors: Errors,
228+
errors: Errors,
231229
plugin: Plugin) -> None:
232230
"""Construct semantic analyzer.
233231
@@ -242,7 +240,6 @@ def __init__(self,
242240
self.function_stack = []
243241
self.block_depth = [0]
244242
self.loop_depth = 0
245-
self.lib_path = lib_path
246243
self.errors = errors
247244
self.modules = modules
248245
self.msg = MessageBuilder(errors, modules)

mypy/stubgen.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,8 @@ def find_module_path_and_all(module: str, pyversion: Tuple[int, int],
160160
module_all = getattr(mod, '__all__', None)
161161
else:
162162
# Find module by going through search path.
163-
module_path = mypy.build.FindModuleCache().find_module(module, ('.',) + tuple(search_path),
163+
search_paths = mypy.build.SearchPaths(('.',) + tuple(search_path), (), (), ())
164+
module_path = mypy.build.FindModuleCache().find_module(module, search_paths,
164165
interpreter)
165166
if not module_path:
166167
raise SystemExit(

0 commit comments

Comments
 (0)