diff --git a/mypy/build.py b/mypy/build.py index 1992cecca475..27b960e35b8f 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -74,10 +74,6 @@ Graph = Dict[str, 'State'] -def getmtime(name: str) -> int: - return int(os.path.getmtime(name)) - - # TODO: Get rid of BuildResult. We might as well return a BuildManager. class BuildResult: """The result of a successful build. @@ -230,7 +226,12 @@ def compute_lib_path(sources: List[BuildSource], # to the lib_path # TODO: Don't do this in some cases; for motivation see see # https://github.com/python/mypy/issues/4195#issuecomment-341915031 - lib_path.appendleft(os.getcwd()) + if options.bazel: + dir = '.' + else: + dir = os.getcwd() + if dir not in lib_path: + lib_path.appendleft(dir) # Prepend a config-defined mypy path. lib_path.extendleft(options.mypy_path) @@ -687,6 +688,31 @@ def maybe_swap_for_shadow_path(self, path: str) -> str: def get_stat(self, path: str) -> os.stat_result: return self.fscache.stat(self.maybe_swap_for_shadow_path(path)) + def getmtime(self, path: str) -> int: + """Return a file's mtime; but 0 in bazel mode. + + (Bazel's distributed cache doesn't like filesystem metadata to + end up in output files.) + """ + if self.options.bazel: + return 0 + else: + return int(os.path.getmtime(path)) + + def normpath(self, path: str) -> str: + """Convert path to absolute; but to relative in bazel mode. + + (Bazel's distributed cache doesn't like filesystem metadata to + end up in output files.) + """ + # TODO: Could we always use relpath? (A worry in non-bazel + # mode would be that a moved file may change its full module + # name without changing its size, mtime or hash.) + if self.options.bazel: + return os.path.relpath(path) + else: + return os.path.abspath(path) + def all_imported_modules_in_file(self, file: MypyFile) -> List[Tuple[int, str, int]]: """Find all reachable import statements in a file. @@ -1094,7 +1120,7 @@ def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str Args: id: module ID - path: module path (used to recognize packages) + path: module path cache_dir: cache directory pyversion: Python version (major, minor) @@ -1102,6 +1128,9 @@ def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str A tuple with the file names to be used for the meta JSON, the data JSON, and the fine-grained deps JSON, respectively. """ + pair = manager.options.cache_map.get(path) + if pair is not None: + return (pair[0], pair[1], None) prefix = _cache_dir_prefix(manager, id) is_package = os.path.basename(path).startswith('__init__.py') if is_package: @@ -1232,22 +1261,23 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], manager.log('Metadata abandoned for {}: errors were previously ignored'.format(id)) return None + bazel = manager.options.bazel assert path is not None, "Internal error: meta was provided without a path" # Check data_json; assume if its mtime matches it's good. # TODO: stat() errors - data_mtime = getmtime(meta.data_json) + data_mtime = manager.getmtime(meta.data_json) if data_mtime != meta.data_mtime: manager.log('Metadata abandoned for {}: data cache is modified'.format(id)) return None deps_mtime = None if manager.options.cache_fine_grained: assert meta.deps_json - deps_mtime = getmtime(meta.deps_json) + deps_mtime = manager.getmtime(meta.deps_json) if deps_mtime != meta.deps_mtime: manager.log('Metadata abandoned for {}: deps cache is modified'.format(id)) return None - path = os.path.abspath(path) + path = manager.normpath(path) try: st = manager.get_stat(path) except OSError: @@ -1272,12 +1302,14 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], fine_grained_cache = manager.use_fine_grained_cache() size = st.st_size - if size != meta.size and not fine_grained_cache: + # Bazel ensures the cache is valid. + if size != meta.size and not bazel and not fine_grained_cache: manager.log('Metadata abandoned for {}: file {} has different size'.format(id, path)) return None - mtime = int(st.st_mtime) - if mtime != meta.mtime or path != meta.path: + # Bazel ensures the cache is valid. + mtime = 0 if bazel else int(st.st_mtime) + if not bazel and (mtime != meta.mtime or path != meta.path): try: source_hash = manager.fscache.md5(path) except (OSError, UnicodeDecodeError, DecodeError): @@ -1317,7 +1349,7 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], meta_str = json.dumps(meta_dict, indent=2, sort_keys=True) else: meta_str = json.dumps(meta_dict) - meta_json, _, _2 = get_cache_names(id, path, manager) + meta_json, _, _ = get_cache_names(id, path, manager) manager.log('Updating mtime for {}: file {}, meta {}, mtime {}' .format(id, path, meta_json, meta.mtime)) atomic_write(meta_json, meta_str, '\n') # Ignore errors, it's just an optimization. @@ -1373,12 +1405,20 @@ def write_cache(id: str, path: str, tree: MypyFile, corresponding to the metadata that was written (the latter may be None if the cache could not be written). """ - # Obtain file paths - path = os.path.abspath(path) + # For Bazel we use relative paths and zero mtimes. + bazel = manager.options.bazel + + # Obtain file paths. + path = manager.normpath(path) meta_json, data_json, deps_json = get_cache_names(id, path, manager) manager.log('Writing {} {} {} {} {}'.format( id, path, meta_json, data_json, deps_json)) + # Update tree.path so that in bazel mode it's made relative (since + # sometimes paths leak out). + if bazel: + tree.path = path + # Make sure directory for cache files exists parent = os.path.dirname(data_json) assert os.path.dirname(meta_json) == parent @@ -1390,7 +1430,8 @@ def write_cache(id: str, path: str, tree: MypyFile, # Obtain and set up metadata try: - os.makedirs(parent, exist_ok=True) + if parent: + os.makedirs(parent, exist_ok=True) st = manager.get_stat(path) except OSError as err: manager.log("Cannot get stat for {}: {}".format(path, err)) @@ -1405,10 +1446,11 @@ def write_cache(id: str, path: str, tree: MypyFile, return interface_hash, None # Write data cache file, if applicable + # Note that for Bazel we don't record the data file's mtime. if old_interface_hash == interface_hash: # If the interface is unchanged, the cached data is guaranteed # to be equivalent, and we only need to update the metadata. - data_mtime = getmtime(data_json) + data_mtime = manager.getmtime(data_json) manager.trace("Interface for {} is unchanged".format(id)) else: manager.trace("Interface for {} has changed".format(id)) @@ -1425,7 +1467,7 @@ def write_cache(id: str, path: str, tree: MypyFile, # Both have the effect of slowing down the next run a # little bit due to an out-of-date cache file. return interface_hash, None - data_mtime = getmtime(data_json) + data_mtime = manager.getmtime(data_json) deps_mtime = None if deps_json: @@ -1433,9 +1475,9 @@ def write_cache(id: str, path: str, tree: MypyFile, if not atomic_write(deps_json, deps_str, '\n'): manager.log("Error writing deps JSON file {}".format(deps_json)) return interface_hash, None - deps_mtime = getmtime(deps_json) + deps_mtime = manager.getmtime(deps_json) - mtime = int(st.st_mtime) + mtime = 0 if bazel else int(st.st_mtime) size = st.st_size options = manager.options.clone_for_module(id) assert source_hash is not None @@ -1475,7 +1517,7 @@ def delete_cache(id: str, path: str, manager: BuildManager) -> None: This avoids inconsistent states with cache files from different mypy runs, see #4043 for an example. """ - path = os.path.abspath(path) + path = manager.normpath(path) cache_paths = get_cache_names(id, path, manager) manager.log('Deleting {} {} {}'.format(id, path, " ".join(x for x in cache_paths if x))) diff --git a/mypy/find_sources.py b/mypy/find_sources.py index fd489fbbeb34..c054586d3abb 100644 --- a/mypy/find_sources.py +++ b/mypy/find_sources.py @@ -143,6 +143,8 @@ def get_init_file(self, dir: str) -> Optional[str]: f = os.path.join(dir, '__init__' + ext) if self.fscache.isfile(f): return f + if ext == '.py' and self.fscache.init_under_package_root(f): + return f return None diff --git a/mypy/fscache.py b/mypy/fscache.py index 9b89144e7685..48dad8495908 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -32,13 +32,19 @@ import hashlib import os import stat -from typing import Dict, List, Tuple +from typing import Dict, List, Optional, Set, Tuple class FileSystemCache: def __init__(self) -> None: + # The package root is not flushed with the caches. + # It is set by set_package_root() below. + self.package_root = [] # type: List[str] self.flush() + def set_package_root(self, package_root: List[str]) -> None: + self.package_root = package_root + def flush(self) -> None: """Start another transaction and empty all caches.""" self.stat_cache = {} # type: Dict[str, os.stat_result] @@ -49,6 +55,7 @@ def flush(self) -> None: self.read_cache = {} # type: Dict[str, bytes] self.read_error_cache = {} # type: Dict[str, Exception] self.hash_cache = {} # type: Dict[str, str] + self.fake_package_cache = set() # type: Set[str] def stat(self, path: str) -> os.stat_result: if path in self.stat_cache: @@ -58,6 +65,11 @@ def stat(self, path: str) -> os.stat_result: try: st = os.stat(path) except OSError as err: + if self.init_under_package_root(path): + try: + return self._fake_init(path) + except OSError: + pass # Take a copy to get rid of associated traceback and frame objects. # Just assigning to __traceback__ doesn't free them. self.stat_error_cache[path] = copy_os_error(err) @@ -65,9 +77,88 @@ def stat(self, path: str) -> os.stat_result: self.stat_cache[path] = st return st + def init_under_package_root(self, path: str) -> bool: + """Is this path an __init__.py under a package root? + + This is used to detect packages that don't contain __init__.py + files, which is needed to support Bazel. The function should + only be called for non-existing files. + + It will return True if it refers to a __init__.py file that + Bazel would create, so that at runtime Python would think the + directory containing it is a package. For this to work you + must pass one or more package roots using the --package-root + flag. + + As an exceptional case, any directory that is a package root + itself will not be considered to contain a __init__.py file. + This is different from the rules Bazel itself applies, but is + necessary for mypy to properly distinguish packages from other + directories. + + See https://docs.bazel.build/versions/master/be/python.html, + where this behavior is described under legacy_create_init. + """ + if not self.package_root: + return False + dirname, basename = os.path.split(path) + if basename != '__init__.py': + return False + try: + st = self.stat(dirname) + except OSError: + return False + else: + if not stat.S_ISDIR(st.st_mode): + return False + ok = False + drive, path = os.path.splitdrive(path) # Ignore Windows drive name + path = os.path.normpath(path) + for root in self.package_root: + if path.startswith(root): + if path == root + basename: + # A package root itself is never a package. + ok = False + break + else: + ok = True + return ok + + def _fake_init(self, path: str) -> os.stat_result: + """Prime the cache with a fake __init__.py file. + + This makes code that looks for path believe an empty file by + that name exists. Should only be called after + init_under_package_root() returns True. + """ + dirname, basename = os.path.split(path) + assert basename == '__init__.py', path + assert not os.path.exists(path), path # Not cached! + dirname = os.path.normpath(dirname) + st = self.stat(dirname) # May raise OSError + # Get stat result as a sequence so we can modify it. + # (Alas, typeshed's os.stat_result is not a sequence yet.) + tpl = tuple(st) # type: ignore + seq = list(tpl) # type: List[float] + seq[stat.ST_MODE] = stat.S_IFREG | 0o444 + seq[stat.ST_INO] = 1 + seq[stat.ST_NLINK] = 1 + seq[stat.ST_SIZE] = 0 + tpl = tuple(seq) + st = os.stat_result(tpl) + self.stat_cache[path] = st + # Make listdir() and read() also pretend this file exists. + self.fake_package_cache.add(dirname) + return st + def listdir(self, path: str) -> List[str]: + path = os.path.normpath(path) if path in self.listdir_cache: - return self.listdir_cache[path] + res = self.listdir_cache[path] + # Check the fake cache. + if path in self.fake_package_cache and '__init__.py' not in res: + res.append('__init__.py') # Updates the result as well as the cache + return res if path in self.listdir_error_cache: raise copy_os_error(self.listdir_error_cache[path]) try: @@ -77,6 +168,9 @@ def listdir(self, path: str) -> List[str]: self.listdir_error_cache[path] = copy_os_error(err) raise err self.listdir_cache[path] = results + # Check the fake cache. + if path in self.fake_package_cache and '__init__.py' not in results: + results.append('__init__.py') return results def isfile(self, path: str) -> bool: @@ -133,12 +227,19 @@ def read(self, path: str) -> bytes: # earlier instant than the mtime reported by self.stat(). self.stat(path) - try: - with open(path, 'rb') as f: - data = f.read() - except Exception as err: - self.read_error_cache[path] = err - raise + dirname, basename = os.path.split(path) + dirname = os.path.normpath(dirname) + # Check the fake cache. + if basename == '__init__.py' and dirname in self.fake_package_cache: + data = b'' + else: + try: + with open(path, 'rb') as f: + data = f.read() + except OSError as err: + self.read_error_cache[path] = err + raise + md5hash = hashlib.md5(data).hexdigest() self.read_cache[path] = data self.hash_cache[path] = md5hash diff --git a/mypy/main.py b/mypy/main.py index bc6a21e1aa80..e74c791387a9 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -303,7 +303,11 @@ def process_options(args: List[str], server_options: bool = False, fscache: Optional[FileSystemCache] = None, ) -> Tuple[List[BuildSource], Options]: - """Parse command line arguments.""" + """Parse command line arguments. + + If a FileSystemCache is passed in, and package_root options are given, + call fscache.set_package_root() to set the cache's package root. + """ parser = argparse.ArgumentParser(prog='mypy', epilog=FOOTER, fromfile_prefix_chars='@', @@ -480,6 +484,19 @@ def add_invertible_flag(flag: str, # --local-partial-types disallows partial types spanning module top level and a function # (implicitly defined in fine-grained incremental mode) parser.add_argument('--local-partial-types', action='store_true', help=argparse.SUPPRESS) + # --bazel changes some behaviors for use with Bazel (https://bazel.build). + parser.add_argument('--bazel', action='store_true', help=argparse.SUPPRESS) + # --package-root adds a directory below which directories are considered + # packages even without __init__.py. May be repeated. + parser.add_argument('--package-root', metavar='ROOT', action='append', default=[], + help=argparse.SUPPRESS) + # --cache-map FILE ... gives a mapping from source files to cache files. + # Each triple of arguments is a source file, a cache meta file, and a cache data file. + # Modules not mentioned in the file will go through cache_dir. + # Must be followed by another flag or by '--' (and then only file args may follow). + parser.add_argument('--cache-map', nargs='+', dest='special-opts:cache_map', + help=argparse.SUPPRESS) + # deprecated options parser.add_argument('--disallow-any', dest='special-opts:disallow_any', help=argparse.SUPPRESS) @@ -633,6 +650,14 @@ def add_invertible_flag(flag: str, report_dir = val options.report_dirs[report_type] = report_dir + # Process --package-root. + if options.package_root: + process_package_roots(fscache, parser, options) + + # Process --cache-map. + if special_opts.cache_map: + process_cache_map(parser, special_opts, options) + # Let quick_and_dirty imply incremental. if options.quick_and_dirty: options.incremental = True @@ -666,6 +691,63 @@ def add_invertible_flag(flag: str, return targets, options +def process_package_roots(fscache: Optional[FileSystemCache], + parser: argparse.ArgumentParser, + options: Options) -> None: + """Validate and normalize package_root.""" + if fscache is None: + parser.error("--package-root does not work here (no fscache)") + assert fscache is not None # Since mypy doesn't know parser.error() raises. + # Do some stuff with drive letters to make Windows happy (esp. tests). + current_drive, _ = os.path.splitdrive(os.getcwd()) + dot = os.curdir + dotslash = os.curdir + os.sep + dotdotslash = os.pardir + os.sep + trivial_paths = {dot, dotslash} + package_root = [] + for root in options.package_root: + if os.path.isabs(root): + parser.error("Package root cannot be absolute: %r" % root) + drive, root = os.path.splitdrive(root) + if drive and drive != current_drive: + parser.error("Package root must be on current drive: %r" % (drive + root)) + # Empty package root is always okay. + if root: + root = os.path.relpath(root) # Normalize the heck out of it. + if root.startswith(dotdotslash): + parser.error("Package root cannot be above current directory: %r" % root) + if root in trivial_paths: + root = '' + elif not root.endswith(os.sep): + root = root + os.sep + package_root.append(root) + options.package_root = package_root + # Pass the package root on the the filesystem cache. + fscache.set_package_root(package_root) + + +def process_cache_map(parser: argparse.ArgumentParser, + special_opts: argparse.Namespace, + options: Options) -> None: + """Validate cache_map and copy into options.cache_map.""" + n = len(special_opts.cache_map) + if n % 3 != 0: + parser.error("--cache-map requires one or more triples (see source)") + for i in range(0, n, 3): + source, meta_file, data_file = special_opts.cache_map[i:i + 3] + if source in options.cache_map: + parser.error("Duplicate --cache-map source %s)" % source) + if not source.endswith('.py') and not source.endswith('.pyi'): + parser.error("Invalid --cache-map source %s (triple[0] must be *.py[i])" % source) + if not meta_file.endswith('.meta.json'): + parser.error("Invalid --cache-map meta_file %s (triple[1] must be *.meta.json)" % + meta_file) + if not data_file.endswith('.data.json'): + parser.error("Invalid --cache-map data_file %s (triple[2] must be *.data.json)" % + data_file) + options.cache_map[source] = (meta_file, data_file) + + # For most options, the type of the default value set in options.py is # sufficient, and we don't have to do anything here. This table # exists to specify types for values initialized to None or container @@ -683,6 +765,7 @@ def add_invertible_flag(flag: str, 'plugins': lambda s: [p.strip() for p in s.split(',')], 'always_true': lambda s: [p.strip() for p in s.split(',')], 'always_false': lambda s: [p.strip() for p in s.split(',')], + 'package_root': lambda s: [p.strip() for p in s.split(',')], } diff --git a/mypy/options.py b/mypy/options.py index bb95ff1af23f..af9237d1807d 100644 --- a/mypy/options.py +++ b/mypy/options.py @@ -47,7 +47,7 @@ class Options: } OPTIONS_AFFECTING_CACHE = ((PER_MODULE_OPTIONS | - {"quick_and_dirty", "platform"}) + {"quick_and_dirty", "platform", "bazel"}) - {"debug_cache"}) def __init__(self) -> None: @@ -193,6 +193,12 @@ def __init__(self) -> None: self.dump_deps = False # If True, partial types can't span a module top level and a function self.local_partial_types = False + # Some behaviors are changed when using Bazel (https://bazel.build). + self.bazel = False + # List of package roots -- directories under these are packages even + # if they don't have __init__.py. + self.package_root = [] # type: List[str] + self.cache_map = {} # type: Dict[str, Tuple[str, str]] def snapshot(self) -> object: """Produce a comparable snapshot of this Option""" diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index 3f537922d9e3..f7a55c93088b 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -4743,3 +4743,14 @@ class C: [out] [out2] main:5: error: Incompatible types in assignment (expression has type "str", variable has type "int") + +[case testBazelFlagIgnoresFileChanges] +-- Since the initial run wrote a cache file, the second run ignores the source +# flags: --bazel +from a import f +f() +[file a.py] +def f(): pass +[file a.py.2] +[out] +[out2] diff --git a/test-data/unit/cmdline.test b/test-data/unit/cmdline.test index fc12fa94584a..3e5050636f58 100644 --- a/test-data/unit/cmdline.test +++ b/test-data/unit/cmdline.test @@ -5,6 +5,9 @@ -- -- # cmd: mypy -- +-- Note that # flags: --some-flag IS NOT SUPPORTED. +-- Use # cmd: mypy --some-flag ... +-- -- '== Return code: ' is added to the output when the process return code -- is "nonobvious" -- that is, when it is something other than 0 if there are no -- messages and 1 if there are. @@ -1166,7 +1169,6 @@ s1.py:2: error: Incompatible return value type (got "int", expected "str") [case testConfigWarnUnusedSection1] # cmd: mypy foo.py quux.py spam/eggs.py -# flags: --follow-imports=skip [file mypy.ini] [[mypy] warn_unused_configs = True @@ -1223,3 +1225,38 @@ fail foo/lol.py:1: error: Name 'fail' is not defined emarg/foo.py:1: error: Name 'fail' is not defined emarg/hatch/villip/mankangulisk.py:1: error: Name 'fail' is not defined + +[case testPackageRootEmpty] +# cmd: mypy --package-root= a/b/c.py main.py +[file a/b/c.py] +[file main.py] +import a.b.c + +[case testPackageRootNonEmpty] +# cmd: mypy --package-root=a/ a/b/c.py main.py +[file a/b/c.py] +[file main.py] +import b.c + +[case testPackageRootMultiple1] +# cmd: mypy --package-root=. --package-root=a a/b/c.py d.py main.py +[file a/b/c.py] +[file d.py] +[file main.py] +import b.c +import d + +[case testPackageRootMultiple2] +# cmd: mypy --package-root=a/ --package-root=./ a/b/c.py d.py main.py +[file a/b/c.py] +[file d.py] +[file main.py] +import b.c +import d + +[case testCacheMap] +-- This just checks that a valid --cache-map triple is accepted. +-- (Errors are too verbose to check.) +# cmd: mypy a.py --cache-map a.py a.meta.json a.data.json +[file a.py] +[out]