From 13a52e1868194f3815defb4e4e1b6151a19d311e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 14 Mar 2018 14:01:28 -0700 Subject: [PATCH 01/39] New --bazel flag --- mypy/main.py | 7 +++++++ mypy/options.py | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/mypy/main.py b/mypy/main.py index 8756be3bed2b..ccf160034595 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -367,6 +367,7 @@ def add_invertible_flag(flag: str, parser.add_argument('--shadow-file', nargs=2, metavar=('SOURCE_FILE', 'SHADOW_FILE'), dest='shadow_file', help='Typecheck SHADOW_FILE in place of SOURCE_FILE.') + # hidden options # --debug-cache will disable any cache-related compressions/optimizations, # which will make the cache writing process output pretty-printed JSON (which @@ -381,6 +382,9 @@ def add_invertible_flag(flag: str, # --local-partial-types disallows partial types spanning module top level and a function # (implicitly defined in fine-grained incremental mode) parser.add_argument('--local-partial-types', action='store_true', help=argparse.SUPPRESS) + # --bazel changes some behaviors for use with Bazel (https://bazel.build). + parser.add_argument('--bazel', action='store_true', help=argparse.SUPPRESS) + # deprecated options parser.add_argument('--disallow-any', dest='special-opts:disallow_any', help=argparse.SUPPRESS) @@ -500,6 +504,9 @@ def add_invertible_flag(flag: str, parser.error("Missing target module, package, files, or command.") elif code_methods > 1: parser.error("May only specify one of: module/package, files, or command.") + if options.bazel: + if not options.incremental: + fail("--bazel requires --incremental") # Set build flags. if options.strict_optional_whitelist is not None: diff --git a/mypy/options.py b/mypy/options.py index 5ea251df2c9d..22e4f0793d44 100644 --- a/mypy/options.py +++ b/mypy/options.py @@ -43,7 +43,7 @@ class Options: } OPTIONS_AFFECTING_CACHE = ((PER_MODULE_OPTIONS | - {"quick_and_dirty", "platform", "cache_fine_grained"}) + {"quick_and_dirty", "platform", "cache_fine_grained", "bazel"}) - {"debug_cache"}) def __init__(self) -> None: @@ -176,6 +176,8 @@ def __init__(self) -> None: self.dump_deps = False # If True, partial types can't span a module top level and a function self.local_partial_types = False + # Some behaviors are changed when using Bazel (https://bazel.build). + self.bazel = False def __eq__(self, other: object) -> bool: return self.__class__ == other.__class__ and self.__dict__ == other.__dict__ From 46181158112020981c3441d63b0948709884111a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 14 Mar 2018 14:15:15 -0700 Subject: [PATCH 02/39] Implement behavior changes for --bazel flag. - Force relative paths. - Force mtimes to zero. - Locate cache files alongside source files. --- mypy/build.py | 69 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 11 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index a2d04aaf4a4e..cc66bbfec653 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -66,6 +66,12 @@ PYTHON_EXTENSIONS = ['.pyi', '.py'] +if os.altsep: + SEPARATORS = frozenset([os.sep, os.altsep]) +else: + SEPARATORS = frozenset([os.sep]) + + Graph = Dict[str, 'State'] @@ -73,6 +79,25 @@ def getmtime(name: str) -> int: return int(os.path.getmtime(name)) +def relpath(path: str) -> str: + if not path or path[0] not in SEPARATORS: + # Relative path, or has drive letter prefix (C: etc.) on Windows. + assert not os.path.isabs(path), "%s is neither rel nor abs" % (path,) + return path + if os.altsep and os.altsep in path: + # So the rest of this code doesn't need to handle altsep on Windows. + path = path.replace(os.altsep, os.sep) + cwd = os.getcwd() + for i in range(10): + if path.startswith(cwd + os.sep): + # The .lstrip(os.sep) call strips duplicate leading slashes. + return '../'*i + path[len(cwd) + 1:].lstrip(os.sep) + cwd = os.path.dirname(cwd) + if all(c == os.sep for c in cwd): + break + return path + + # TODO: Get rid of BuildResult. We might as well return a BuildManager. class BuildResult: """The result of a successful build. @@ -228,7 +253,12 @@ def _build(sources: List[BuildSource], # to the lib_path # TODO: Don't do this in some cases; for motivation see see # https://github.com/python/mypy/issues/4195#issuecomment-341915031 - lib_path.insert(0, os.getcwd()) + if options.bazel: + dir = '.' + else: + dir = os.getcwd() + if dir not in lib_path: + lib_path.insert(0, dir) # Prepend a config-defined mypy path. lib_path[:0] = options.mypy_path @@ -939,12 +969,19 @@ def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str A tuple with the file names to be used for the meta JSON and the data JSON, respectively. """ - cache_dir = manager.options.cache_dir - pyversion = manager.options.python_version - prefix = os.path.join(cache_dir, '%d.%d' % pyversion, *id.split('.')) - is_package = os.path.basename(path).startswith('__init__.py') - if is_package: - prefix = os.path.join(prefix, '__init__') + if manager.options.bazel: + # For Bazel we put the cache files alongside the source files. + # The reason is that Bazel output files cannot live outside + # the package, and the repo root is often higher up. + path = relpath(path) + prefix, _ = os.path.splitext(path) + else: + cache_dir = manager.options.cache_dir + pyversion = manager.options.python_version + prefix = os.path.join(cache_dir, '%d.%d' % pyversion, *id.split('.')) + is_package = os.path.basename(path).startswith('__init__.py') + if is_package: + prefix = os.path.join(prefix, '__init__') return (prefix + '.meta.json', prefix + '.data.json') @@ -1171,8 +1208,17 @@ def write_cache(id: str, path: str, tree: MypyFile, corresponding to the metadata that was written (the latter may be None if the cache could not be written). """ + # For Bazel we use relative paths and zero mtimes. + bazel = manager.options.bazel + # Obtain file paths - path = os.path.abspath(path) + if not bazel: + # Normally, make all paths absolute. + path = os.path.abspath(path) + else: + # For Bazel, make all paths relative (else Bazel caching is thwarted). + tree.path = path = relpath(path) + meta_json, data_json = get_cache_names(id, path, manager) manager.log('Writing {} {} {} {}'.format(id, path, meta_json, data_json)) @@ -1207,10 +1253,11 @@ def write_cache(id: str, path: str, tree: MypyFile, return interface_hash, None # Write data cache file, if applicable + # Note that for Bazel we don't record the data file's mtime. if old_interface_hash == interface_hash: # If the interface is unchanged, the cached data is guaranteed # to be equivalent, and we only need to update the metadata. - data_mtime = getmtime(data_json) + data_mtime = 0 if bazel else getmtime(data_json) manager.trace("Interface for {} is unchanged".format(id)) else: manager.trace("Interface for {} has changed".format(id)) @@ -1227,9 +1274,9 @@ def write_cache(id: str, path: str, tree: MypyFile, # Both have the effect of slowing down the next run a # little bit due to an out-of-date cache file. return interface_hash, None - data_mtime = getmtime(data_json) + data_mtime = 0 if bazel else getmtime(data_json) - mtime = int(st.st_mtime) + mtime = 0 if bazel else int(st.st_mtime) size = st.st_size options = manager.options.clone_for_module(id) assert source_hash is not None From d188e9fbb001292d00002a5de382dcd84eab5f0f Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 14 Mar 2018 14:42:57 -0700 Subject: [PATCH 03/39] Don't raise OSError for makedirs('') --- mypy/build.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mypy/build.py b/mypy/build.py index cc66bbfec653..fccf5d9771f1 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1238,7 +1238,8 @@ def write_cache(id: str, path: str, tree: MypyFile, # Obtain and set up metadata try: - os.makedirs(parent, exist_ok=True) + if parent: + os.makedirs(parent, exist_ok=True) st = manager.get_stat(path) except OSError as err: manager.log("Cannot get stat for {}: {}".format(path, err)) From 9d321922e8e09ab7635b9fcca3b25767be90f75c Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 14 Mar 2018 14:58:42 -0700 Subject: [PATCH 04/39] Use zero mtimes in validate_meta() if bazel --- mypy/build.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index fccf5d9771f1..5bdee5092d8c 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1092,10 +1092,11 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], manager.log('Metadata abandoned for {}: errors were previously ignored'.format(id)) return None + bazel = manager.options.bazel assert path is not None, "Internal error: meta was provided without a path" # Check data_json; assume if its mtime matches it's good. # TODO: stat() errors - data_mtime = getmtime(meta.data_json) + data_mtime = 0 if bazel else getmtime(meta.data_json) if data_mtime != meta.data_mtime: manager.log('Metadata abandoned for {}: data cache is modified'.format(id)) return None @@ -1124,7 +1125,7 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], manager.log('Metadata abandoned for {}: file {} has different size'.format(id, path)) return None - mtime = int(st.st_mtime) + mtime = 0 if bazel else int(st.st_mtime) if mtime != meta.mtime or path != meta.path: try: source_hash = manager.fscache.md5(path) From f1c7834d91cbd84182f5d3ec8687fbef5e19b9f5 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 14 Mar 2018 15:16:40 -0700 Subject: [PATCH 05/39] More zero mtime, and don't compare size -> always believe cache file if present --- mypy/build.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 5bdee5092d8c..8c1973c208ab 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1101,7 +1101,7 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], manager.log('Metadata abandoned for {}: data cache is modified'.format(id)) return None - path = os.path.abspath(path) + path = relpath(path) if bazel else os.path.abspath(path) try: st = manager.get_stat(path) except OSError: @@ -1121,7 +1121,7 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], return meta size = st.st_size - if size != meta.size: + if not bazel and size != meta.size: manager.log('Metadata abandoned for {}: file {} has different size'.format(id, path)) return None From aff2f72434722285a1af35a357b210ed1539f06b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 16 Mar 2018 15:49:07 -0700 Subject: [PATCH 06/39] Different tack for cache under Bazel: use --cache-dir but omit pyversion --- mypy/build.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 8c1973c208ab..d7e5cf5dc992 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -969,19 +969,16 @@ def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str A tuple with the file names to be used for the meta JSON and the data JSON, respectively. """ - if manager.options.bazel: - # For Bazel we put the cache files alongside the source files. - # The reason is that Bazel output files cannot live outside - # the package, and the repo root is often higher up. - path = relpath(path) - prefix, _ = os.path.splitext(path) - else: - cache_dir = manager.options.cache_dir + cache_dir = manager.options.cache_dir + prefix = cache_dir + # For Bazel we omit the Python version from the cache name. + if not manager.options.bazel: pyversion = manager.options.python_version - prefix = os.path.join(cache_dir, '%d.%d' % pyversion, *id.split('.')) - is_package = os.path.basename(path).startswith('__init__.py') - if is_package: - prefix = os.path.join(prefix, '__init__') + prefix = os.path.join(prefix, '%d.%d' % pyversion) + prefix = os.path.join(prefix, *id.split('.')) + is_package = os.path.basename(path).startswith('__init__.py') + if is_package: + prefix = os.path.join(prefix, '__init__') return (prefix + '.meta.json', prefix + '.data.json') From 8048ee456ee200a6cc4c8d1f1092646d5e1591b5 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 16 Mar 2018 21:11:50 -0700 Subject: [PATCH 07/39] Add --cache-map option --- mypy/build.py | 19 ++++++++++--------- mypy/main.py | 20 ++++++++++++++++++++ mypy/options.py | 1 + 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index d7e5cf5dc992..5d61a731b917 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -969,16 +969,17 @@ def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str A tuple with the file names to be used for the meta JSON and the data JSON, respectively. """ - cache_dir = manager.options.cache_dir - prefix = cache_dir - # For Bazel we omit the Python version from the cache name. - if not manager.options.bazel: + if manager.options.cache_map and id in manager.options.cache_map: + meta_file = manager.options.cache_map[id] + assert meta_file.endswith('.meta.json'), (id, meta_file) + prefix = meta_file[:-len('.meta.json')] + else: + cache_dir = manager.options.cache_dir pyversion = manager.options.python_version - prefix = os.path.join(prefix, '%d.%d' % pyversion) - prefix = os.path.join(prefix, *id.split('.')) - is_package = os.path.basename(path).startswith('__init__.py') - if is_package: - prefix = os.path.join(prefix, '__init__') + prefix = os.path.join(cache_dir, '%d.%d' % pyversion, *id.split('.')) + is_package = os.path.basename(path).startswith('__init__.py') + if is_package: + prefix = os.path.join(prefix, '__init__') return (prefix + '.meta.json', prefix + '.data.json') diff --git a/mypy/main.py b/mypy/main.py index ccf160034595..44645f92a3b5 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -384,6 +384,14 @@ def add_invertible_flag(flag: str, parser.add_argument('--local-partial-types', action='store_true', help=argparse.SUPPRESS) # --bazel changes some behaviors for use with Bazel (https://bazel.build). parser.add_argument('--bazel', action='store_true', help=argparse.SUPPRESS) + # --cache-map is a file mapping module ID to cache files. + # Each non-comment, non-blank line in the file is a module ID, + # a space, and the name of the corresponding .meta.json file. + # The corresponding .data.json file is implied. + # For modules the file should be .../__init__.meta.json. + # Modules not mentioned in the file will go through cache_dir. + parser.add_argument('--cache-map', action='store', dest='special-opts:cache_map', + help=argparse.SUPPRESS) # deprecated options parser.add_argument('--disallow-any', dest='special-opts:disallow_any', @@ -527,6 +535,18 @@ def add_invertible_flag(flag: str, report_dir = val options.report_dirs[report_type] = report_dir + # Parse bazel cache map. If it crashes, the file is missing or + # its contents invalid. + if special_opts.cache_map: + cache_map = {} + with open(special_opts.cache_map) as f: + for line in f: + parts = line.split() + if parts and not parts[0].startswith('#'): + assert len(parts) == 2 and parts[1].endswith('.meta.json'), parts + cache_map[parts[0]] = parts[1] + options.cache_map = cache_map + # Let quick_and_dirty imply incremental. if options.quick_and_dirty: options.incremental = True diff --git a/mypy/options.py b/mypy/options.py index 22e4f0793d44..4d77a68a66d5 100644 --- a/mypy/options.py +++ b/mypy/options.py @@ -178,6 +178,7 @@ def __init__(self) -> None: self.local_partial_types = False # Some behaviors are changed when using Bazel (https://bazel.build). self.bazel = False + self.cache_map = None # type: Optional[Dict[str, str]] def __eq__(self, other: object) -> bool: return self.__class__ == other.__class__ and self.__dict__ == other.__dict__ From 7bc3a0b8d7d843f858b96ea2217dd7c353a9aa79 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 17 Mar 2018 08:33:11 -0700 Subject: [PATCH 08/39] Pass cache mapping on command line --- mypy/build.py | 25 ++++++++++++------------- mypy/main.py | 27 +++++++++++++-------------- mypy/options.py | 2 +- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 5d61a731b917..ac13edbe9ec7 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -961,7 +961,7 @@ def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str Args: id: module ID - path: module path (used to recognize packages) + path: module path cache_dir: cache directory pyversion: Python version (major, minor) @@ -969,17 +969,15 @@ def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str A tuple with the file names to be used for the meta JSON and the data JSON, respectively. """ - if manager.options.cache_map and id in manager.options.cache_map: - meta_file = manager.options.cache_map[id] - assert meta_file.endswith('.meta.json'), (id, meta_file) - prefix = meta_file[:-len('.meta.json')] - else: - cache_dir = manager.options.cache_dir - pyversion = manager.options.python_version - prefix = os.path.join(cache_dir, '%d.%d' % pyversion, *id.split('.')) - is_package = os.path.basename(path).startswith('__init__.py') - if is_package: - prefix = os.path.join(prefix, '__init__') + pair = manager.options.cache_map.get(path) + if pair is not None: + return pair + cache_dir = manager.options.cache_dir + pyversion = manager.options.python_version + prefix = os.path.join(cache_dir, '%d.%d' % pyversion, *id.split('.')) + is_package = os.path.basename(path).startswith('__init__.py') + if is_package: + prefix = os.path.join(prefix, '__init__') return (prefix + '.meta.json', prefix + '.data.json') @@ -1216,6 +1214,7 @@ def write_cache(id: str, path: str, tree: MypyFile, path = os.path.abspath(path) else: # For Bazel, make all paths relative (else Bazel caching is thwarted). + # And also patch tree.path, which might have been made absolute earlier. tree.path = path = relpath(path) meta_json, data_json = get_cache_names(id, path, manager) @@ -1318,7 +1317,7 @@ def delete_cache(id: str, path: str, manager: BuildManager) -> None: This avoids inconsistent states with cache files from different mypy runs, see #4043 for an example. """ - path = os.path.abspath(path) + path = relpath(path) if manager.options.bazel else os.path.abspath(path) meta_json, data_json = get_cache_names(id, path, manager) manager.log('Deleting {} {} {} {}'.format(id, path, meta_json, data_json)) diff --git a/mypy/main.py b/mypy/main.py index 44645f92a3b5..e89041cf844f 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -384,13 +384,11 @@ def add_invertible_flag(flag: str, parser.add_argument('--local-partial-types', action='store_true', help=argparse.SUPPRESS) # --bazel changes some behaviors for use with Bazel (https://bazel.build). parser.add_argument('--bazel', action='store_true', help=argparse.SUPPRESS) - # --cache-map is a file mapping module ID to cache files. - # Each non-comment, non-blank line in the file is a module ID, - # a space, and the name of the corresponding .meta.json file. - # The corresponding .data.json file is implied. - # For modules the file should be .../__init__.meta.json. + # --cache-map FILE ... gives a mapping from source files to cache files. + # Each triple of arguments is a source file, a cache meta file, and a cache data file. # Modules not mentioned in the file will go through cache_dir. - parser.add_argument('--cache-map', action='store', dest='special-opts:cache_map', + # Must be followed by another flag or by '--' (and then only file args may follow). + parser.add_argument('--cache-map', nargs='*', dest='special-opts:cache_map', help=argparse.SUPPRESS) # deprecated options @@ -538,14 +536,15 @@ def add_invertible_flag(flag: str, # Parse bazel cache map. If it crashes, the file is missing or # its contents invalid. if special_opts.cache_map: - cache_map = {} - with open(special_opts.cache_map) as f: - for line in f: - parts = line.split() - if parts and not parts[0].startswith('#'): - assert len(parts) == 2 and parts[1].endswith('.meta.json'), parts - cache_map[parts[0]] = parts[1] - options.cache_map = cache_map + n = len(special_opts.cache_map) + assert n % 3 == 0, n + for i in range(0, n, 3): + source, meta_file, data_file = special_opts.cache_map[i : i + 3] + assert source not in options.cache_map + assert source.endswith('.py') or source.endswith('.pyi'), source + assert meta_file.endswith('.meta.json'), meta_file + assert data_file.endswith('.data.json'), data_file + options.cache_map[source] = (meta_file, data_file) # Let quick_and_dirty imply incremental. if options.quick_and_dirty: diff --git a/mypy/options.py b/mypy/options.py index 4d77a68a66d5..792bc0fd2953 100644 --- a/mypy/options.py +++ b/mypy/options.py @@ -178,7 +178,7 @@ def __init__(self) -> None: self.local_partial_types = False # Some behaviors are changed when using Bazel (https://bazel.build). self.bazel = False - self.cache_map = None # type: Optional[Dict[str, str]] + self.cache_map = {} # type: Dict[str, Tuple[str, str]] def __eq__(self, other: object) -> bool: return self.__class__ == other.__class__ and self.__dict__ == other.__dict__ From 3632c55af5bd4f2c0ace5c4da188b1ddf6de24f6 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 17 Mar 2018 08:51:49 -0700 Subject: [PATCH 09/39] Tweak --cache-map arg parsing --- mypy/main.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mypy/main.py b/mypy/main.py index e89041cf844f..ae31f0b9b8c7 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -388,7 +388,7 @@ def add_invertible_flag(flag: str, # Each triple of arguments is a source file, a cache meta file, and a cache data file. # Modules not mentioned in the file will go through cache_dir. # Must be followed by another flag or by '--' (and then only file args may follow). - parser.add_argument('--cache-map', nargs='*', dest='special-opts:cache_map', + parser.add_argument('--cache-map', nargs='+', dest='special-opts:cache_map', help=argparse.SUPPRESS) # deprecated options @@ -533,11 +533,10 @@ def add_invertible_flag(flag: str, report_dir = val options.report_dirs[report_type] = report_dir - # Parse bazel cache map. If it crashes, the file is missing or - # its contents invalid. + # Parse cache map. Uses assertions for checking. if special_opts.cache_map: n = len(special_opts.cache_map) - assert n % 3 == 0, n + assert n % 3 == 0, "--cache-map requires one or more triples (see source)" for i in range(0, n, 3): source, meta_file, data_file = special_opts.cache_map[i : i + 3] assert source not in options.cache_map From 24de3d56aa90695efe4d039e77035c83de9cf27a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 19 Mar 2018 11:06:32 -0700 Subject: [PATCH 10/39] Drop the -i requirement for --bazel; it's not strictly needed --- mypy/main.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/mypy/main.py b/mypy/main.py index ae31f0b9b8c7..2ecf458a9eb9 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -510,9 +510,6 @@ def add_invertible_flag(flag: str, parser.error("Missing target module, package, files, or command.") elif code_methods > 1: parser.error("May only specify one of: module/package, files, or command.") - if options.bazel: - if not options.incremental: - fail("--bazel requires --incremental") # Set build flags. if options.strict_optional_whitelist is not None: From ec719df2001d8b15d769040ac977e4afdfe2404a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 20 Mar 2018 13:37:03 -0700 Subject: [PATCH 11/39] Fix lint --- mypy/build.py | 2 +- mypy/main.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index ac13edbe9ec7..ce225edc02c8 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -91,7 +91,7 @@ def relpath(path: str) -> str: for i in range(10): if path.startswith(cwd + os.sep): # The .lstrip(os.sep) call strips duplicate leading slashes. - return '../'*i + path[len(cwd) + 1:].lstrip(os.sep) + return '../' * i + path[len(cwd) + 1:].lstrip(os.sep) cwd = os.path.dirname(cwd) if all(c == os.sep for c in cwd): break diff --git a/mypy/main.py b/mypy/main.py index 2ecf458a9eb9..e3a8db026326 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -535,7 +535,7 @@ def add_invertible_flag(flag: str, n = len(special_opts.cache_map) assert n % 3 == 0, "--cache-map requires one or more triples (see source)" for i in range(0, n, 3): - source, meta_file, data_file = special_opts.cache_map[i : i + 3] + source, meta_file, data_file = special_opts.cache_map[i:i + 3] assert source not in options.cache_map assert source.endswith('.py') or source.endswith('.pyi'), source assert meta_file.endswith('.meta.json'), meta_file From fcc9604a5278c3b584f0b39cd1f0bfd9031e48b8 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 20 Mar 2018 12:47:41 -0700 Subject: [PATCH 12/39] Somehow mypy/api.py had DOS line endings. Fix this. (#4768) --- mypy/api.py | 124 ++++++++++++++++++++++++++-------------------------- 1 file changed, 62 insertions(+), 62 deletions(-) diff --git a/mypy/api.py b/mypy/api.py index b8bb86f8f8ce..95dc0447e95f 100644 --- a/mypy/api.py +++ b/mypy/api.py @@ -1,62 +1,62 @@ -"""This module makes it possible to use mypy as part of a Python application. - -Since mypy still changes, the API was kept utterly simple and non-intrusive. -It just mimics command line activation without starting a new interpreter. -So the normal docs about the mypy command line apply. -Changes in the command line version of mypy will be immediately useable. - -Just import this module and then call the 'run' function with a parameter of -type List[str], containing what normally would have been the command line -arguments to mypy. - -Function 'run' returns a Tuple[str, str, int], namely -(, , ), -in which is what mypy normally writes to sys.stdout, - is what mypy normally writes to sys.stderr and exit_status is -the exit status mypy normally returns to the operating system. - -Any pretty formatting is left to the caller. - -Trivial example of code using this module: - -import sys -from mypy import api - -result = api.run(sys.argv[1:]) - -if result[0]: - print('\nType checking report:\n') - print(result[0]) # stdout - -if result[1]: - print('\nError report:\n') - print(result[1]) # stderr - -print ('\nExit status:', result[2]) -""" - -import sys -from io import StringIO -from typing import List, Tuple -from mypy.main import main - - -def run(args: List[str]) -> Tuple[str, str, int]: - old_stdout = sys.stdout - new_stdout = StringIO() - sys.stdout = new_stdout - - old_stderr = sys.stderr - new_stderr = StringIO() - sys.stderr = new_stderr - - try: - main(None, args=args) - exit_status = 0 - except SystemExit as system_exit: - exit_status = system_exit.code - finally: - sys.stdout = old_stdout - sys.stderr = old_stderr - - return new_stdout.getvalue(), new_stderr.getvalue(), exit_status +"""This module makes it possible to use mypy as part of a Python application. + +Since mypy still changes, the API was kept utterly simple and non-intrusive. +It just mimics command line activation without starting a new interpreter. +So the normal docs about the mypy command line apply. +Changes in the command line version of mypy will be immediately useable. + +Just import this module and then call the 'run' function with a parameter of +type List[str], containing what normally would have been the command line +arguments to mypy. + +Function 'run' returns a Tuple[str, str, int], namely +(, , ), +in which is what mypy normally writes to sys.stdout, + is what mypy normally writes to sys.stderr and exit_status is +the exit status mypy normally returns to the operating system. + +Any pretty formatting is left to the caller. + +Trivial example of code using this module: + +import sys +from mypy import api + +result = api.run(sys.argv[1:]) + +if result[0]: + print('\nType checking report:\n') + print(result[0]) # stdout + +if result[1]: + print('\nError report:\n') + print(result[1]) # stderr + +print ('\nExit status:', result[2]) +""" + +import sys +from io import StringIO +from typing import List, Tuple +from mypy.main import main + + +def run(args: List[str]) -> Tuple[str, str, int]: + old_stdout = sys.stdout + new_stdout = StringIO() + sys.stdout = new_stdout + + old_stderr = sys.stderr + new_stderr = StringIO() + sys.stderr = new_stderr + + try: + main(None, args=args) + exit_status = 0 + except SystemExit as system_exit: + exit_status = system_exit.code + finally: + sys.stdout = old_stdout + sys.stderr = old_stderr + + return new_stdout.getvalue(), new_stderr.getvalue(), exit_status From c62a22ee2b4d9ca984cdefc9aef742c92b0afa50 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 21 Mar 2018 20:43:00 -0700 Subject: [PATCH 13/39] Add docstring for relpath() --- mypy/build.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mypy/build.py b/mypy/build.py index ce225edc02c8..3558d7545a36 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -80,6 +80,7 @@ def getmtime(name: str) -> int: def relpath(path: str) -> str: + """Returns the path relative to the current working directory.""" if not path or path[0] not in SEPARATORS: # Relative path, or has drive letter prefix (C: etc.) on Windows. assert not os.path.isabs(path), "%s is neither rel nor abs" % (path,) From 4dd90b89ec1714fe6012853248ed911df450c06b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 26 Mar 2018 15:12:14 -0700 Subject: [PATCH 14/39] Monumental effort to support packages without __init__.py for Bazel (only) --- mypy/build.py | 2 +- mypy/fscache.py | 51 ++++++++++++++++++++++++++++++++++++++----------- mypy/main.py | 21 +++++++++++--------- 3 files changed, 53 insertions(+), 21 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 3558d7545a36..3a2f285cb8d3 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -227,7 +227,7 @@ def _build(sources: List[BuildSource], gc.set_threshold(50000) data_dir = default_data_dir(bin_dir) - fscache = fscache or FileSystemCache(options.python_version) + fscache = fscache or FileSystemCache(options.python_version, package_root=options.bazel) # Determine the default module search path. lib_path = default_lib_path(data_dir, diff --git a/mypy/fscache.py b/mypy/fscache.py index 75600dba2951..eb496c12a09b 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -28,6 +28,7 @@ advantage of the benefits. """ +import hashlib import os import stat from typing import Tuple, Dict, List, Optional @@ -35,7 +36,8 @@ class FileSystemMetaCache: - def __init__(self) -> None: + def __init__(self, package_root: bool = False) -> None: + self.package_root = package_root self.flush() def flush(self) -> None: @@ -45,6 +47,7 @@ def flush(self) -> None: self.listdir_cache = {} # type: Dict[str, List[str]] self.listdir_error_cache = {} # type: Dict[str, Exception] self.isfile_case_cache = {} # type: Dict[str, bool] + self.fake_package_cache = set() # type: Set[str] def stat(self, path: str) -> os.stat_result: if path in self.stat_cache: @@ -54,14 +57,35 @@ def stat(self, path: str) -> os.stat_result: try: st = os.stat(path) except Exception as err: + if self.package_root and not os.path.isabs(path) and os.path.basename(path) == '__init__.py': + try: + return self._fake_init(path) + except OSError: + pass self.stat_error_cache[path] = err - raise + raise err self.stat_cache[path] = st return st + def _fake_init(self, path: str) -> os.stat_result: + dirname = os.path.dirname(path) or os.curdir + st = self.stat(dirname) # May raise OSError + seq = list(st) # Stat result as a sequence so we can modify it + seq[stat.ST_MODE] = stat.S_IFREG | 0o444 + seq[stat.ST_INO] = 1 + seq[stat.ST_NLINK] = 1 + seq[stat.ST_SIZE] = 0 + st = os.stat_result(seq) + self.stat_cache[path] = st + self.fake_package_cache.add(dirname) + return st + def listdir(self, path: str) -> List[str]: if path in self.listdir_cache: - return self.listdir_cache[path] + res = self.listdir_cache[path] + if os.path.normpath(path) in self.fake_package_cache and '__init__.py' not in res: + res.append('__init__.py') # Updates the result as well as the cache + return res if path in self.listdir_error_cache: raise self.listdir_error_cache[path] try: @@ -70,6 +94,8 @@ def listdir(self, path: str) -> List[str]: self.listdir_error_cache[path] = err raise err self.listdir_cache[path] = results + if path in self.fake_package_cache and '__init__.py' not in results: + results.append('__init__.py') return results def isfile(self, path: str) -> bool: @@ -118,9 +144,9 @@ def exists(self, path: str) -> bool: class FileSystemCache(FileSystemMetaCache): - def __init__(self, pyversion: Tuple[int, int]) -> None: + def __init__(self, pyversion: Tuple[int, int], package_root: bool = False) -> None: self.pyversion = pyversion - self.flush() + super().__init__(package_root=package_root) def flush(self) -> None: """Start another transaction and empty all caches.""" @@ -138,12 +164,15 @@ def read_with_python_encoding(self, path: str) -> str: # Need to stat first so that the contents of file are from no # earlier instant than the mtime reported by self.stat(). self.stat(path) - - try: - data, md5hash = read_with_python_encoding(path, self.pyversion) - except Exception as err: - self.read_error_cache[path] = err - raise + if os.path.basename(path) == '__init__.py' and os.path.dirname(path) in self.fake_package_cache: + data = '' + md5hash = hashlib.md5(b'').hexdigest() + else: + try: + data, md5hash = read_with_python_encoding(path, self.pyversion) + except Exception as err: + self.read_error_cache[path] = err + raise self.read_cache[path] = data self.hash_cache[path] = md5hash return data diff --git a/mypy/main.py b/mypy/main.py index dda82a838cb3..f2f3ef48a314 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -581,13 +581,14 @@ def create_source_list(files: Sequence[str], options: Options) -> List[BuildSour Raises InvalidSourceList on errors. """ + package_root = options.bazel targets = [] for f in files: if f.endswith(PY_EXTENSIONS): # Can raise InvalidSourceList if a directory doesn't have a valid module name. - targets.append(BuildSource(f, crawl_up(f)[1], None)) + targets.append(BuildSource(f, crawl_up(f, package_root)[1], None)) elif os.path.isdir(f): - sub_targets = expand_dir(f) + sub_targets = expand_dir(f, '', package_root) if not sub_targets: raise InvalidSourceList("There are no .py[i] files in directory '{}'" .format(f)) @@ -610,15 +611,15 @@ def keyfunc(name: str) -> Tuple[int, str]: return (-1, name) -def expand_dir(arg: str, mod_prefix: str = '') -> List[BuildSource]: +def expand_dir(arg: str, mod_prefix: str, package_root: bool) -> List[BuildSource]: """Convert a directory name to a list of sources to build.""" - f = get_init_file(arg) + f = get_init_file(arg, package_root=package_root) if mod_prefix and not f: return [] seen = set() # type: Set[str] sources = [] if f and not mod_prefix: - top_dir, top_mod = crawl_up(f) + top_dir, top_mod = crawl_up(f, package_root) mod_prefix = top_mod + '.' if mod_prefix: sources.append(BuildSource(f, mod_prefix.rstrip('.'), None)) @@ -627,7 +628,7 @@ def expand_dir(arg: str, mod_prefix: str = '') -> List[BuildSource]: for name in names: path = os.path.join(arg, name) if os.path.isdir(path): - sub_sources = expand_dir(path, mod_prefix + name + '.') + sub_sources = expand_dir(path, mod_prefix + name + '.', package_root) if sub_sources: seen.add(name) sources.extend(sub_sources) @@ -642,7 +643,7 @@ def expand_dir(arg: str, mod_prefix: str = '') -> List[BuildSource]: return sources -def crawl_up(arg: str) -> Tuple[str, str]: +def crawl_up(arg: str, package_root: bool) -> Tuple[str, str]: """Given a .py[i] filename, return (root directory, module). We crawl up the path until we find a directory without @@ -650,7 +651,7 @@ def crawl_up(arg: str) -> Tuple[str, str]: """ dir, mod = os.path.split(arg) mod = strip_py(mod) or mod - while dir and get_init_file(dir): + while dir and get_init_file(dir, package_root=package_root): dir, base = os.path.split(dir) if not base: break @@ -676,7 +677,7 @@ def strip_py(arg: str) -> Optional[str]: return None -def get_init_file(dir: str) -> Optional[str]: +def get_init_file(dir: str, package_root: bool) -> Optional[str]: """Check whether a directory contains a file named __init__.py[i]. If so, return the file's name (with dir prefixed). If not, return @@ -688,6 +689,8 @@ def get_init_file(dir: str) -> Optional[str]: f = os.path.join(dir, '__init__' + ext) if os.path.isfile(f): return f + if package_root and ext == '.py' and not os.path.isabs(dir): + return f return None From c667dfc79cb93c6f20d644a3ffd224fc24497515 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 26 Mar 2018 17:25:18 -0700 Subject: [PATCH 15/39] Fix mypy errors --- mypy/fscache.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mypy/fscache.py b/mypy/fscache.py index eb496c12a09b..033e60a310d1 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -31,7 +31,7 @@ import hashlib import os import stat -from typing import Tuple, Dict, List, Optional +from typing import Dict, List, Optional, Set, Tuple from mypy.util import read_with_python_encoding @@ -70,12 +70,16 @@ def stat(self, path: str) -> os.stat_result: def _fake_init(self, path: str) -> os.stat_result: dirname = os.path.dirname(path) or os.curdir st = self.stat(dirname) # May raise OSError - seq = list(st) # Stat result as a sequence so we can modify it + # Get stat result as a sequence so we can modify it. + # (Alas, typeshed's os.stat_result is not a sequence yet.) + tpl = tuple(st) # type: ignore + seq = list(tpl) # type: List[float] seq[stat.ST_MODE] = stat.S_IFREG | 0o444 seq[stat.ST_INO] = 1 seq[stat.ST_NLINK] = 1 seq[stat.ST_SIZE] = 0 - st = os.stat_result(seq) + tpl = tuple(seq) + st = os.stat_result(tpl) self.stat_cache[path] = st self.fake_package_cache.add(dirname) return st From a43ecdc41ef8a0ffabb4287112b0227e292ceb21 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 26 Mar 2018 17:35:52 -0700 Subject: [PATCH 16/39] Fix tests --- mypy/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypy/main.py b/mypy/main.py index f2f3ef48a314..048094818471 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -611,7 +611,7 @@ def keyfunc(name: str) -> Tuple[int, str]: return (-1, name) -def expand_dir(arg: str, mod_prefix: str, package_root: bool) -> List[BuildSource]: +def expand_dir(arg: str, mod_prefix: str = '', package_root: bool = False) -> List[BuildSource]: """Convert a directory name to a list of sources to build.""" f = get_init_file(arg, package_root=package_root) if mod_prefix and not f: From e17a702788b8231016139bd8ae7ee6c1cd4731e8 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 26 Mar 2018 20:43:36 -0700 Subject: [PATCH 17/39] Fix flake8 --- mypy/fscache.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mypy/fscache.py b/mypy/fscache.py index 033e60a310d1..0ed6da2a7878 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -57,7 +57,8 @@ def stat(self, path: str) -> os.stat_result: try: st = os.stat(path) except Exception as err: - if self.package_root and not os.path.isabs(path) and os.path.basename(path) == '__init__.py': + if (self.package_root and not os.path.isabs(path) + and os.path.basename(path) == '__init__.py'): try: return self._fake_init(path) except OSError: @@ -168,7 +169,8 @@ def read_with_python_encoding(self, path: str) -> str: # Need to stat first so that the contents of file are from no # earlier instant than the mtime reported by self.stat(). self.stat(path) - if os.path.basename(path) == '__init__.py' and os.path.dirname(path) in self.fake_package_cache: + if (os.path.basename(path) == '__init__.py' + and os.path.dirname(path) in self.fake_package_cache): data = '' md5hash = hashlib.md5(b'').hexdigest() else: From eade1167fa29d4a6de92c52f69436d9e5917d9fe Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 4 Apr 2018 18:57:36 -0700 Subject: [PATCH 18/39] Make --package-root a separate flag --- mypy/build.py | 2 +- mypy/find_sources.py | 4 +++- mypy/fscache.py | 29 ++++++++++++++++++++++++----- mypy/main.py | 5 +++++ mypy/options.py | 3 +++ 5 files changed, 36 insertions(+), 7 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 7bcabc0ac1a0..7cd5c09767d4 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -227,7 +227,7 @@ def _build(sources: List[BuildSource], gc.set_threshold(50000) data_dir = default_data_dir(bin_dir) - fscache = fscache or FileSystemCache(options.python_version, package_root=options.bazel) + fscache = fscache or FileSystemCache(options.python_version, package_root=options.package_root) # Determine the default module search path. lib_path = default_lib_path(data_dir, diff --git a/mypy/find_sources.py b/mypy/find_sources.py index 7ec1e2a5cf01..8c469f557347 100644 --- a/mypy/find_sources.py +++ b/mypy/find_sources.py @@ -23,7 +23,7 @@ def create_source_list(files: Sequence[str], options: Options, Raises InvalidSourceList on errors. """ - fscache = fscache or FileSystemMetaCache() + fscache = fscache or FileSystemMetaCache(package_root=options.package_root) finder = SourceFinder(fscache) targets = [] @@ -141,6 +141,8 @@ def get_init_file(self, dir: str) -> Optional[str]: f = os.path.join(dir, '__init__' + ext) if self.fscache.isfile(f): return f + if ext == '.py' and self.fscache.under_package_root(f): + return f return None diff --git a/mypy/fscache.py b/mypy/fscache.py index 0ed6da2a7878..824d9fcfe57e 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -36,8 +36,8 @@ class FileSystemMetaCache: - def __init__(self, package_root: bool = False) -> None: - self.package_root = package_root + def __init__(self, package_root: Optional[List[str]] = None) -> None: + self.package_root = package_root if package_root is not None else [] self.flush() def flush(self) -> None: @@ -48,6 +48,7 @@ def flush(self) -> None: self.listdir_error_cache = {} # type: Dict[str, Exception] self.isfile_case_cache = {} # type: Dict[str, bool] self.fake_package_cache = set() # type: Set[str] + self.cwd = os.getcwd() def stat(self, path: str) -> os.stat_result: if path in self.stat_cache: @@ -57,8 +58,7 @@ def stat(self, path: str) -> os.stat_result: try: st = os.stat(path) except Exception as err: - if (self.package_root and not os.path.isabs(path) - and os.path.basename(path) == '__init__.py'): + if isinstance(err, OSError) and self.under_package_root(path): try: return self._fake_init(path) except OSError: @@ -68,6 +68,25 @@ def stat(self, path: str) -> os.stat_result: self.stat_cache[path] = st return st + def under_package_root(self, path: str) -> bool: + if not self.package_root: + return False + dirname, basename = os.path.split(path) + if basename not in ('__init__.py', '__init__.pyi'): + return False + try: + st = self.stat(dirname) + except OSError: + return False + else: + if not stat.S_ISDIR(st.st_mode): + return False + for root in self.package_root: + if (path.startswith(root + os.sep) or + root == '' or root == '.' or root == self.cwd and not os.path.isabs(path)): + return True + return False + def _fake_init(self, path: str) -> os.stat_result: dirname = os.path.dirname(path) or os.curdir st = self.stat(dirname) # May raise OSError @@ -149,7 +168,7 @@ def exists(self, path: str) -> bool: class FileSystemCache(FileSystemMetaCache): - def __init__(self, pyversion: Tuple[int, int], package_root: bool = False) -> None: + def __init__(self, pyversion: Tuple[int, int], package_root: Optional[List[str]] = None) -> None: self.pyversion = pyversion super().__init__(package_root=package_root) diff --git a/mypy/main.py b/mypy/main.py index f1057d36d8c7..2facfcd93b0c 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -383,6 +383,10 @@ def add_invertible_flag(flag: str, parser.add_argument('--local-partial-types', action='store_true', help=argparse.SUPPRESS) # --bazel changes some behaviors for use with Bazel (https://bazel.build). parser.add_argument('--bazel', action='store_true', help=argparse.SUPPRESS) + # --package-root adds a directory below which directories are considered + # packages even without __init__.py. May be repeated. + parser.add_argument('--package-root', metavar='ROOT', action='append', default=[], + help=argparse.SUPPRESS) # --cache-map FILE ... gives a mapping from source files to cache files. # Each triple of arguments is a source file, a cache meta file, and a cache data file. # Modules not mentioned in the file will go through cache_dir. @@ -598,6 +602,7 @@ def add_invertible_flag(flag: str, 'plugins': lambda s: [p.strip() for p in s.split(',')], 'always_true': lambda s: [p.strip() for p in s.split(',')], 'always_false': lambda s: [p.strip() for p in s.split(',')], + 'package_root': lambda s: [p.strip() for p in s.split(',')], } SHARED_CONFIG_FILES = ('setup.cfg',) diff --git a/mypy/options.py b/mypy/options.py index 3b58350e8e8d..fb183999f77e 100644 --- a/mypy/options.py +++ b/mypy/options.py @@ -192,6 +192,9 @@ def __init__(self) -> None: self.local_partial_types = False # Some behaviors are changed when using Bazel (https://bazel.build). self.bazel = False + # List of package roots -- directories under these are packages even + # if they don't have __init__.py. + self.package_root = [] # type: List[str] self.cache_map = {} # type: Dict[str, Tuple[str, str]] def __eq__(self, other: object) -> bool: From f19892abc02d773a7aafc8cb86e73376fe7f97df Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 17 Apr 2018 17:37:10 -0700 Subject: [PATCH 19/39] Sync typeshed (same as master) --- typeshed | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/typeshed b/typeshed index 6cf97b8db203..d63866bd3646 160000 --- a/typeshed +++ b/typeshed @@ -1 +1 @@ -Subproject commit 6cf97b8db203871568e7e66492fc2499191495f8 +Subproject commit d63866bd36467315a0415080b82923abb42b478f From a0072ab87db549a25275e64f9d252b8631d01bac Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 18 Apr 2018 15:59:57 -0700 Subject: [PATCH 20/39] Add tests for --package-root and fix behavior --- mypy/find_sources.py | 2 +- mypy/fscache.py | 29 ++++++++++++++++++++++------- test-data/unit/cmdline.test | 32 +++++++++++++++++++++++++++++++- 3 files changed, 54 insertions(+), 9 deletions(-) diff --git a/mypy/find_sources.py b/mypy/find_sources.py index 8c469f557347..fba249d3a5db 100644 --- a/mypy/find_sources.py +++ b/mypy/find_sources.py @@ -141,7 +141,7 @@ def get_init_file(self, dir: str) -> Optional[str]: f = os.path.join(dir, '__init__' + ext) if self.fscache.isfile(f): return f - if ext == '.py' and self.fscache.under_package_root(f): + if ext == '.py' and self.fscache.init_under_package_root(f): return f return None diff --git a/mypy/fscache.py b/mypy/fscache.py index 940c5498897c..00f6c71d8918 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -37,7 +37,17 @@ class FileSystemMetaCache: def __init__(self, package_root: Optional[List[str]] = None) -> None: - self.package_root = package_root if package_root is not None else [] + self.package_root = [] + if package_root: + for root in package_root: + assert not os.path.isabs(root), "Package root cannot be absolute" + if os.altsep: + root = root.replace(root, os.altsep, os.sep) + if root in ('.', '.' + os.sep): + root = '' + if root and not root.endswith(os.sep): + root = root + os.sep + self.package_root.append(root) self.flush() def flush(self) -> None: @@ -58,7 +68,7 @@ def stat(self, path: str) -> os.stat_result: try: st = os.stat(path) except OSError as err: - if isinstance(err, OSError) and self.under_package_root(path): + if isinstance(err, OSError) and self.init_under_package_root(path): try: return self._fake_init(path) except OSError: @@ -70,7 +80,7 @@ def stat(self, path: str) -> os.stat_result: self.stat_cache[path] = st return st - def under_package_root(self, path: str) -> bool: + def init_under_package_root(self, path: str) -> bool: if not self.package_root: return False dirname, basename = os.path.split(path) @@ -83,11 +93,16 @@ def under_package_root(self, path: str) -> bool: else: if not stat.S_ISDIR(st.st_mode): return False + ok = False for root in self.package_root: - if (path.startswith(root + os.sep) or - root == '' or root == '.' or root == self.cwd and not os.path.isabs(path)): - return True - return False + if path.startswith(root): + if path == root + basename: + # A package root itself is never a package. + ok = False + break + else: + ok = True + return ok def _fake_init(self, path: str) -> os.stat_result: dirname = os.path.dirname(path) or os.curdir diff --git a/test-data/unit/cmdline.test b/test-data/unit/cmdline.test index 84da97c5bec8..04c279651f96 100644 --- a/test-data/unit/cmdline.test +++ b/test-data/unit/cmdline.test @@ -5,6 +5,9 @@ -- -- # cmd: mypy -- +-- Note that # flags: --some-flag IS NOT SUPPORTED. +-- Use # cmd: mypy --some-flag ... +-- -- '== Return code: ' is added to the output when the process return code -- is "nonobvious" -- that is, when it is something other than 0 if there are no -- messages and 1 if there are. @@ -1128,7 +1131,6 @@ math.frobnicate() [case testConfigWarnUnusedSection1] # cmd: mypy foo.py quux.py spam/eggs.py -# flags: --follow-imports=skip [file mypy.ini] [[mypy] warn_unused_configs = True @@ -1147,3 +1149,31 @@ warn_unused_configs = True [out] Warning: unused section(s) in mypy.ini: [mypy-bar], [mypy-baz.*], [mypy-emarg.*], [mypy-emarg.hatch] == Return code: 0 + +[case testPackageRootEmpty] +# cmd: mypy --package-root= a/b/c.py main.py +[file a/b/c.py] +[file main.py] +import a.b.c + +[case testPackageRootNonEmpty] +# cmd: mypy --package-root=a/ a/b/c.py main.py +[file a/b/c.py] +[file main.py] +import b.c + +[case testPackageRootMultiple1] +# cmd: mypy --package-root=. --package-root=a a/b/c.py d.py main.py +[file a/b/c.py] +[file d.py] +[file main.py] +import b.c +import d + +[case testPackageRootMultiple2] +# cmd: mypy --package-root=a/ --package-root=./ a/b/c.py d.py main.py +[file a/b/c.py] +[file d.py] +[file main.py] +import b.c +import d From 0096ee0a62c7c8b49ad981d63fc1de6b988b9a2f Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 18 Apr 2018 16:40:49 -0700 Subject: [PATCH 21/39] Add rudimentary tests for --cache-map and --bazel. Also print usage messages for errors instead of assertions. --- mypy/fscache.py | 14 +++-------- mypy/main.py | 34 ++++++++++++++++++++++----- test-data/unit/check-incremental.test | 11 +++++++++ test-data/unit/cmdline.test | 7 ++++++ 4 files changed, 49 insertions(+), 17 deletions(-) diff --git a/mypy/fscache.py b/mypy/fscache.py index 00f6c71d8918..9f092700e067 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -37,17 +37,9 @@ class FileSystemMetaCache: def __init__(self, package_root: Optional[List[str]] = None) -> None: - self.package_root = [] - if package_root: - for root in package_root: - assert not os.path.isabs(root), "Package root cannot be absolute" - if os.altsep: - root = root.replace(root, os.altsep, os.sep) - if root in ('.', '.' + os.sep): - root = '' - if root and not root.endswith(os.sep): - root = root + os.sep - self.package_root.append(root) + if package_root is None: + package_root = [] + self.package_root = package_root self.flush() def flush(self) -> None: diff --git a/mypy/main.py b/mypy/main.py index c7b5abd27f5b..00242729a4a5 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -638,16 +638,38 @@ def add_invertible_flag(flag: str, report_dir = val options.report_dirs[report_type] = report_dir - # Parse cache map. Uses assertions for checking. + # Validate and normalize --package-root. + if options.package_root: + package_root = [] + for root in options.package_root: + if os.path.isabs(root): + parse.error("Package root cannot be absolute: %r" % root) + if os.altsep: + root = root.replace(root, os.altsep, os.sep) + if root in ('.', '.' + os.sep): + root = '' + if root and not root.endswith(os.sep): + root = root + os.sep + package_root.append(root) + options.package_root = package_root + + # Parse cache map. if special_opts.cache_map: n = len(special_opts.cache_map) - assert n % 3 == 0, "--cache-map requires one or more triples (see source)" + if n % 3 != 0: + parser.error("--cache-map requires one or more triples (see source)") for i in range(0, n, 3): source, meta_file, data_file = special_opts.cache_map[i:i + 3] - assert source not in options.cache_map - assert source.endswith('.py') or source.endswith('.pyi'), source - assert meta_file.endswith('.meta.json'), meta_file - assert data_file.endswith('.data.json'), data_file + if source in options.cache_map: + parser.error("Duplicate --cache-map source %s)" % source) + if not source.endswith('.py') and not source.endswith('.pyi'): + parser.error("Invalid --cache-map source %s (triple[0] must be *.py[i])" % source) + if not meta_file.endswith('.meta.json'): + parser.error("Invalid --cache-map meta_file %s (triple[1] must be *.meta.json)" % + meta_file) + if not data_file.endswith('.data.json'): + parser.error("Invalid --cache-map data_file %s (triple[2] must be *.data.json)" % + data_file) options.cache_map[source] = (meta_file, data_file) # Let quick_and_dirty imply incremental. diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index 37c63bce7406..4cbde3c6e593 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -4235,3 +4235,14 @@ pass [out2] [out3] tmp/a.py:1: note: unused 'type: ignore' comment + +[case testBazelFlagIgnoresFileChanges] +-- Since the initial run wrote a cache file, the second run ignores the source +# flags: --bazel +from a import f +f() +[file a.py] +def f(): pass +[file a.py.2] +[out] +[out2] diff --git a/test-data/unit/cmdline.test b/test-data/unit/cmdline.test index 04c279651f96..1a98623fe3c8 100644 --- a/test-data/unit/cmdline.test +++ b/test-data/unit/cmdline.test @@ -1177,3 +1177,10 @@ import d [file main.py] import b.c import d + +[case testCacheMap] +-- This just checks that a valid --cache-map triple is accepted. +-- (Errors are too verbose to check.) +# cmd: mypy a.py --cache-map a.py a.meta.json a.data.json +[file a.py] +[out] From 268b74c9bff4aa0f5ad30a9078ca273d84ce0a07 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 23 Apr 2018 11:58:41 -0700 Subject: [PATCH 22/39] Fix two type errors found by --- mypy/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mypy/main.py b/mypy/main.py index 40f148376354..91ba272adc2d 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -643,9 +643,9 @@ def add_invertible_flag(flag: str, package_root = [] for root in options.package_root: if os.path.isabs(root): - parse.error("Package root cannot be absolute: %r" % root) + parser.error("Package root cannot be absolute: %r" % root) if os.altsep: - root = root.replace(root, os.altsep, os.sep) + root = root.replace(os.altsep, os.sep) if root in ('.', '.' + os.sep): root = '' if root and not root.endswith(os.sep): From dc1f262eed56c1095ff83d0f4637e81d35542c04 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 23 Apr 2018 14:56:03 -0700 Subject: [PATCH 23/39] Replace relpath() with os.path.relpath() (D'oh) --- mypy/build.py | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 28dd83793e0d..1dab339e228d 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -83,26 +83,6 @@ def getmtime(name: str) -> int: return int(os.path.getmtime(name)) -def relpath(path: str) -> str: - """Returns the path relative to the current working directory.""" - if not path or path[0] not in SEPARATORS: - # Relative path, or has drive letter prefix (C: etc.) on Windows. - assert not os.path.isabs(path), "%s is neither rel nor abs" % (path,) - return path - if os.altsep and os.altsep in path: - # So the rest of this code doesn't need to handle altsep on Windows. - path = path.replace(os.altsep, os.sep) - cwd = os.getcwd() - for i in range(10): - if path.startswith(cwd + os.sep): - # The .lstrip(os.sep) call strips duplicate leading slashes. - return '../' * i + path[len(cwd) + 1:].lstrip(os.sep) - cwd = os.path.dirname(cwd) - if all(c == os.sep for c in cwd): - break - return path - - # TODO: Get rid of BuildResult. We might as well return a BuildManager. class BuildResult: """The result of a successful build. @@ -1161,7 +1141,7 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], manager.log('Metadata abandoned for {}: deps cache is modified'.format(id)) return None - path = relpath(path) if bazel else os.path.abspath(path) + path = os.path.relpath(path) if bazel else os.path.abspath(path) try: st = manager.get_stat(path) except OSError: @@ -1287,7 +1267,7 @@ def write_cache(id: str, path: str, tree: MypyFile, else: # For Bazel, make all paths relative (else Bazel caching is thwarted). # And also patch tree.path, which might have been made absolute earlier. - tree.path = path = relpath(path) + tree.path = path = os.path.relpath(path) meta_json, data_json, deps_json = get_cache_names(id, path, manager) manager.log('Writing {} {} {} {} {}'.format( id, path, meta_json, data_json, deps_json)) @@ -1390,7 +1370,7 @@ def delete_cache(id: str, path: str, manager: BuildManager) -> None: This avoids inconsistent states with cache files from different mypy runs, see #4043 for an example. """ - path = relpath(path) if manager.options.bazel else os.path.abspath(path) + path = os.path.relpath(path) if manager.options.bazel else os.path.abspath(path) cache_paths = get_cache_names(id, path, manager) manager.log('Deleting {} {} {}'.format(id, path, " ".join(x for x in cache_paths if x))) From e103d6316bf9eb20a58b3e5eb9beb314e71e0523 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 23 Apr 2018 15:15:11 -0700 Subject: [PATCH 24/39] Use os.path.relpath() in --package-root normalization --- mypy/main.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mypy/main.py b/mypy/main.py index 91ba272adc2d..3c8d166d1abb 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -644,8 +644,7 @@ def add_invertible_flag(flag: str, for root in options.package_root: if os.path.isabs(root): parser.error("Package root cannot be absolute: %r" % root) - if os.altsep: - root = root.replace(os.altsep, os.sep) + root = root and os.path.relpath(root) # Normalizes and removes drive on Windows if root in ('.', '.' + os.sep): root = '' if root and not root.endswith(os.sep): From 80daeaeda2b45fcebb73125feb67aa903ad469e7 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 23 Apr 2018 15:29:29 -0700 Subject: [PATCH 25/39] Break long line --- mypy/fscache.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mypy/fscache.py b/mypy/fscache.py index 9f092700e067..8ac1c8b24229 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -178,7 +178,8 @@ def exists(self, path: str) -> bool: class FileSystemCache(FileSystemMetaCache): - def __init__(self, pyversion: Tuple[int, int], package_root: Optional[List[str]] = None) -> None: + def __init__(self, pyversion: Tuple[int, int], + package_root: Optional[List[str]] = None) -> None: super().__init__(package_root=package_root) self.pyversion = pyversion From b7fcc9e7c2868da64031a828de358429b16dfa73 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 23 Apr 2018 16:01:34 -0700 Subject: [PATCH 26/39] Another attempt at fixing the Windows test failures --- mypy/fscache.py | 1 + mypy/main.py | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/mypy/fscache.py b/mypy/fscache.py index 8ac1c8b24229..51abc53edeba 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -86,6 +86,7 @@ def init_under_package_root(self, path: str) -> bool: if not stat.S_ISDIR(st.st_mode): return False ok = False + drive, path = os.path.splitdrive(path) # Ignore Windows drive name for root in self.package_root: if path.startswith(root): if path == root + basename: diff --git a/mypy/main.py b/mypy/main.py index 3c8d166d1abb..83bc11b1918e 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -644,11 +644,12 @@ def add_invertible_flag(flag: str, for root in options.package_root: if os.path.isabs(root): parser.error("Package root cannot be absolute: %r" % root) - root = root and os.path.relpath(root) # Normalizes and removes drive on Windows - if root in ('.', '.' + os.sep): - root = '' - if root and not root.endswith(os.sep): - root = root + os.sep + drive, root = os.path.splitdrive(root) # Ignore Windows drive name + if root: + if root in (os.curdir, os.curdir + os.sep): + root = '' + elif not root.endswith(os.sep): + root = root + os.sep package_root.append(root) options.package_root = package_root From 363f4b69b4cc7b7f86b05e6cf32323115ef3fe42 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 24 Apr 2018 09:00:32 -0700 Subject: [PATCH 27/39] Merge master --- mypy/build.py | 87 +++++----- mypy/fswatcher.py | 3 +- mypy/server/update.py | 192 ++++++++++++++--------- test-data/unit/check-incremental.test | 17 ++ test-data/unit/fine-grained-modules.test | 29 ++++ test-data/unit/fine-grained.test | 2 +- typeshed | 2 +- 7 files changed, 213 insertions(+), 119 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 1dab339e228d..f8a76bdc493c 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -703,23 +703,17 @@ def correct_rel_imp(imp: Union[ImportFrom, ImportAll]) -> str: elif isinstance(imp, ImportFrom): cur_id = correct_rel_imp(imp) pos = len(res) - all_are_submodules = True # Also add any imported names that are submodules. pri = import_priority(imp, PRI_MED) for name, __ in imp.names: sub_id = cur_id + '.' + name if self.is_module(sub_id): res.append((pri, sub_id, imp.line)) - else: - all_are_submodules = False - # If all imported names are submodules, don't add - # cur_id as a dependency. Otherwise (i.e., if at - # least one imported name isn't a submodule) - # cur_id is also a dependency, and we should - # insert it *before* any submodules. - if not all_are_submodules: - pri = import_priority(imp, PRI_HIGH) - res.insert(pos, ((pri, cur_id, imp.line))) + # Add cur_id as a dependency, even if all of the + # imports are submodules. Processing import from will try + # to look through cur_id, so we should depend on it. + pri = import_priority(imp, PRI_HIGH) + res.insert(pos, ((pri, cur_id, imp.line))) elif isinstance(imp, ImportAll): pri = import_priority(imp, PRI_HIGH) res.append((pri, correct_rel_imp(imp), imp.line)) @@ -1156,12 +1150,17 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], # changed since the cache was generated. We *don't* want to do a # coarse-grained incremental rebuild, so we accept the cache # metadata even if it doesn't match the source file. - if manager.use_fine_grained_cache(): - manager.log('Using potentially stale metadata for {}'.format(id)) - return meta + # + # We still *do* the mtime/md5 checks, however, to enable + # fine-grained mode to take advantage of the mtime-updating + # optimization when mtimes differ but md5s match. There is + # essentially no extra time cost to computing the hash here, since + # it will be cached and will be needed for finding changed files + # later anyways. + fine_grained_cache = manager.use_fine_grained_cache() size = st.st_size - if not bazel and size != meta.size: + if size != meta.size and not (bazel or fine_grained_cache): manager.log('Metadata abandoned for {}: file {} has different size'.format(id, path)) return None @@ -1172,8 +1171,13 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], except (OSError, UnicodeDecodeError, DecodeError): return None if source_hash != meta.hash: - manager.log('Metadata abandoned for {}: file {} has different hash'.format(id, path)) - return None + if fine_grained_cache: + manager.log('Using stale metadata for {}: file {}'.format(id, path)) + return meta + else: + manager.log('Metadata abandoned for {}: file {} has different hash'.format( + id, path)) + return None else: # Optimization: update mtime and path (otherwise, this mismatch will reappear). meta = meta._replace(mtime=mtime, path=path) @@ -1208,7 +1212,7 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], return meta # It's a match on (id, path, size, hash, mtime). - manager.trace('Metadata fresh for {}: file {}'.format(id, path)) + manager.log('Metadata fresh for {}: file {}'.format(id, path)) return meta @@ -1747,7 +1751,7 @@ def fix_cross_refs(self) -> None: # cache load because we need to gracefully handle missing modules. fixup_module(self.tree, self.manager.modules, self.manager.options.quick_and_dirty or - self.manager.use_fine_grained_cache()) + self.options.use_fine_grained_cache) def patch_dependency_parents(self) -> None: """ @@ -2338,12 +2342,17 @@ def dump_graph(graph: Graph) -> None: def load_graph(sources: List[BuildSource], manager: BuildManager, - old_graph: Optional[Graph] = None) -> Graph: + old_graph: Optional[Graph] = None, + new_modules: Optional[List[State]] = None) -> Graph: """Given some source files, load the full dependency graph. If an old_graph is passed in, it is used as the starting point and modified during graph loading. + If a new_modules is passed in, any modules that are loaded are + added to the list. This is an argument and not a return value + so that the caller can access it even if load_graph fails. + As this may need to parse files, this can raise CompileError in case there are syntax errors. """ @@ -2353,7 +2362,7 @@ def load_graph(sources: List[BuildSource], manager: BuildManager, # The deque is used to implement breadth-first traversal. # TODO: Consider whether to go depth-first instead. This may # affect the order in which we process files within import cycles. - new = collections.deque() # type: Deque[State] + new = new_modules if new_modules is not None else [] entry_points = set() # type: Set[str] # Seed the graph with the initial root sources. for bs in sources: @@ -2370,8 +2379,8 @@ def load_graph(sources: List[BuildSource], manager: BuildManager, new.append(st) entry_points.add(bs.module) # Collect dependencies. We go breadth-first. - while new: - st = new.popleft() + # More nodes might get added to new as we go, but that's fine. + for st in new: assert st.ancestors is not None # Strip out indirect dependencies. These will be dealt with # when they show up as direct dependencies, and there's a @@ -2550,7 +2559,7 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: # TODO: see if it's possible to determine if we need to process only a # _subset_ of the past SCCs instead of having to process them all. for prev_scc in fresh_scc_queue: - process_fresh_scc(graph, prev_scc, manager) + process_fresh_modules(graph, prev_scc, manager) fresh_scc_queue = [] size = len(scc) if size == 1: @@ -2574,21 +2583,10 @@ def process_fine_grained_cache_graph(graph: Graph, manager: BuildManager) -> Non """Finish loading everything for use in the fine-grained incremental cache""" # If we are running in fine-grained incremental mode with caching, - # we process all SCCs as fresh SCCs so that we have all of the symbol - # tables and fine-grained dependencies available. - # We fail the loading of any SCC that we can't load a meta for, so we - # don't have anything *but* fresh SCCs. - sccs = sorted_components(graph) - manager.log("Found %d SCCs; largest has %d nodes" % - (len(sccs), max(len(scc) for scc in sccs))) - - for ascc in sccs: - # Order the SCC's nodes using a heuristic. - # Note that ascc is a set, and scc is a list. - scc = order_ascc(graph, ascc) - process_fresh_scc(graph, scc, manager) - for id in scc: - graph[id].load_fine_grained_deps() + # we don't actually have much to do: just load the fine-grained + # deps. + for id, state in graph.items(): + state.load_fine_grained_deps() def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_ALL) -> List[str]: @@ -2638,16 +2636,17 @@ def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_ALL) -> return [s for ss in sccs for s in order_ascc(graph, ss, pri_max)] -def process_fresh_scc(graph: Graph, scc: List[str], manager: BuildManager) -> None: - """Process the modules in one SCC from their cached data. +def process_fresh_modules(graph: Graph, modules: List[str], manager: BuildManager) -> None: + """Process the modules in one group of modules from their cached data. + This can be used to process an SCC of modules This involves loading the tree from JSON and then doing various cleanups. """ - for id in scc: + for id in modules: graph[id].load_tree() - for id in scc: + for id in modules: graph[id].fix_cross_refs() - for id in scc: + for id in modules: graph[id].patch_dependency_parents() diff --git a/mypy/fswatcher.py b/mypy/fswatcher.py index 8afb5e2d1f11..e3df12d7a2a5 100644 --- a/mypy/fswatcher.py +++ b/mypy/fswatcher.py @@ -75,7 +75,8 @@ def find_changed(self) -> Set[str]: # File is new. changed.add(path) self._update(path) - elif st.st_size != old.st_size or st.st_mtime != old.st_mtime: + # Round mtimes down, to match the mtimes we write to meta files + elif st.st_size != old.st_size or int(st.st_mtime) != int(old.st_mtime): # Only look for changes if size or mtime has changed as an # optimization, since calculating md5 is expensive. new_md5 = self.fs.md5(path) diff --git a/mypy/server/update.py b/mypy/server/update.py index c8c2bad57e06..e74e01738a22 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -116,11 +116,13 @@ import time import os.path from typing import ( - Dict, List, Set, Tuple, Iterable, Union, Optional, Mapping, NamedTuple, Callable + Dict, List, Set, Tuple, Iterable, Union, Optional, Mapping, NamedTuple, Callable, + Sequence, ) from mypy.build import ( BuildManager, State, BuildSource, BuildResult, Graph, load_graph, module_not_found, + process_fresh_modules, PRI_INDIRECT, DEBUG_FINE_GRAINED, ) from mypy.checker import DeferredNode @@ -154,12 +156,12 @@ def __init__(self, result: BuildResult) -> None: result: Result from the initialized build. The manager and graph will be taken over by this class. manager: State of the build (mutated by this class) - graph: Additional state of the build (only read to initialize state) + graph: Additional state of the build (mutated by this class) """ manager = result.manager self.manager = manager self.graph = result.graph - self.previous_modules = get_module_to_path_map(manager) + self.previous_modules = get_module_to_path_map(self.graph) self.deps = get_all_dependencies(manager, self.graph) self.previous_targets_with_errors = manager.errors.targets() self.previous_messages = result.errors[:] @@ -188,7 +190,7 @@ def update(self, those parts of other modules that are affected by the changes. Retain the existing ASTs and symbol tables of unaffected modules. - Create new graph with new State objects, but reuse original BuildManager. + Reuses original BuildManager and Graph. Args: changed_modules: Modules changed since the previous update/build; each is @@ -324,6 +326,10 @@ def update_module(self, previous_modules = self.previous_modules graph = self.graph + # If this is an already existing module, make sure that we have + # its tree loaded so that we can snapshot it for comparison. + ensure_trees_loaded(manager, graph, [module]) + # Record symbol table snaphot of old version the changed module. old_snapshots = {} # type: Dict[str, Dict[str, SnapshotItem]] if module in manager.modules: @@ -336,7 +342,7 @@ def update_module(self, if isinstance(result, BlockedUpdate): # Blocking error -- just give up module, path, remaining, errors = result - self.previous_modules = get_module_to_path_map(manager) + self.previous_modules = get_module_to_path_map(graph) return remaining, (module, path), errors assert isinstance(result, NormalUpdate) # Work around #4124 module, path, remaining, tree = result @@ -348,7 +354,7 @@ def update_module(self, if not trigger.endswith('__>')] self.manager.log_fine_grained('triggered: %r' % sorted(filtered)) self.triggered.extend(triggered | self.previous_targets_with_errors) - collect_dependencies({module: tree}, self.deps, graph) + collect_dependencies([module], self.deps, graph) remaining += propagate_changes_using_dependencies( manager, graph, self.deps, triggered, {module}, @@ -356,16 +362,53 @@ def update_module(self, # Preserve state needed for the next update. self.previous_targets_with_errors.update(manager.errors.targets()) - self.previous_modules = get_module_to_path_map(manager) + self.previous_modules = get_module_to_path_map(graph) return remaining, (module, path), None +def find_unloaded_deps(manager: BuildManager, graph: Dict[str, State], + initial: Sequence[str]) -> List[str]: + """Find all the deps of the nodes in initial that haven't had their tree loaded. + + The key invariant here is that if a module is loaded, so are all + of their dependencies. This means that when we encounter a loaded + module, we don't need to explore its dependencies. (This + invariant is slightly violated when dependencies are added, which + can be handled by calling find_unloaded_deps directly on the new + dependencies.) + """ + worklist = list(initial) + seen = set() # type: Set[str] + unloaded = [] + while worklist: + node = worklist.pop() + if node in seen or node not in graph: + continue + seen.add(node) + if node not in manager.modules: + ancestors = graph[node].ancestors or [] + worklist.extend(graph[node].dependencies + ancestors) + unloaded.append(node) + + return unloaded + + +def ensure_trees_loaded(manager: BuildManager, graph: Dict[str, State], + initial: Sequence[str]) -> None: + """Ensure that the modules in initial and their deps have loaded trees.""" + to_process = find_unloaded_deps(manager, graph, initial) + if to_process: + manager.log_fine_grained("Calling process_fresh_modules on set of size {} ({})".format( + len(to_process), to_process)) + process_fresh_modules(graph, to_process, manager) + + def get_all_dependencies(manager: BuildManager, graph: Dict[str, State]) -> Dict[str, Set[str]]: """Return the fine-grained dependency map for an entire build.""" # Deps for each module were computed during build() or loaded from the cache. deps = {} # type: Dict[str, Set[str]] - collect_dependencies(manager.modules, deps, graph) + collect_dependencies(graph, deps, graph) return deps @@ -413,40 +456,56 @@ def update_module_isolated(module: str, Returns a named tuple describing the result (see above for details). """ - if module not in manager.modules: + if module not in graph: manager.log_fine_grained('new module %r' % module) if not manager.fscache.isfile(path) or force_removed: delete_module(module, graph, manager) return NormalUpdate(module, path, [], None) - old_modules = dict(manager.modules) sources = get_sources(manager.fscache, previous_modules, [(module, path)]) if module in manager.missing_modules: manager.missing_modules.remove(module) + orig_module = module + orig_state = graph.get(module) + orig_tree = manager.modules.get(module) + + def restore(ids: List[str]) -> None: + # For each of the modules in ids, restore that id's old + # manager.modules and graphs entries. (Except for the original + # module, this means deleting them.) + for id in ids: + if id == orig_module and orig_tree: + manager.modules[id] = orig_tree + elif id in manager.modules: + del manager.modules[id] + if id == orig_module and orig_state: + graph[id] = orig_state + elif id in graph: + del graph[id] + + new_modules = [] # type: List[State] try: if module in graph: del graph[module] - load_graph(sources, manager, graph) + load_graph(sources, manager, graph, new_modules) except CompileError as err: # Parse error somewhere in the program -- a blocker assert err.module_with_blocker - if err.module_with_blocker != module: - # Blocker is in a fresh module. Delete the state of the original target module - # since it will be stale. - # - # TODO: It would be more efficient to store the original target module - path = manager.modules[module].path - del manager.modules[module] - remaining_modules = [(module, path)] - else: - remaining_modules = [] - return BlockedUpdate(err.module_with_blocker, path, remaining_modules, err.messages) + restore([module] + [st.id for st in new_modules]) + return BlockedUpdate(err.module_with_blocker, path, [], err.messages) + + # Reparsing the file may have brought in dependencies that we + # didn't have before. Make sure that they are loaded to restore + # the invariant that a module having a loaded tree implies that + # its dependencies do as well. + ensure_trees_loaded(manager, graph, graph[module].dependencies) # Find any other modules brought in by imports. - changed_modules = get_all_changed_modules(module, path, previous_modules, graph) + changed_modules = [(st.id, st.xpath) for st in new_modules] + # If there are multiple modules to process, only process one of them and return # the remaining ones to the caller. if len(changed_modules) > 1: @@ -455,12 +514,7 @@ def update_module_isolated(module: str, changed_modules.remove((module, path)) remaining_modules = changed_modules # The remaining modules haven't been processed yet so drop them. - for id, _ in remaining_modules: - if id in old_modules: - manager.modules[id] = old_modules[id] - else: - del manager.modules[id] - del graph[id] + restore([id for id, _ in remaining_modules]) manager.log_fine_grained('--> %r (newly imported)' % module) else: remaining_modules = [] @@ -474,17 +528,15 @@ def update_module_isolated(module: str, state.semantic_analysis() except CompileError as err: # There was a blocking error, so module AST is incomplete. Restore old modules. - manager.modules.clear() - manager.modules.update(old_modules) - del graph[module] + restore([module]) return BlockedUpdate(module, path, remaining_modules, err.messages) state.semantic_analysis_pass_three() state.semantic_analysis_apply_patches() # Merge old and new ASTs. assert state.tree is not None, "file must be at least parsed" - new_modules = {module: state.tree} # type: Dict[str, Optional[MypyFile]] - replace_modules_with_new_variants(manager, graph, old_modules, new_modules) + new_modules_dict = {module: state.tree} # type: Dict[str, Optional[MypyFile]] + replace_modules_with_new_variants(manager, graph, {orig_module: orig_tree}, new_modules_dict) # Perform type checking. state.type_checker().reset() @@ -555,9 +607,9 @@ def dedupe_modules(modules: List[Tuple[str, str]]) -> List[Tuple[str, str]]: return result -def get_module_to_path_map(manager: BuildManager) -> Dict[str, str]: - return {module: node.path - for module, node in manager.modules.items()} +def get_module_to_path_map(graph: Graph) -> Dict[str, str]: + return {module: node.xpath + for module, node in graph.items()} def get_sources(fscache: FileSystemCache, @@ -570,25 +622,11 @@ def get_sources(fscache: FileSystemCache, return sources -def get_all_changed_modules(root_module: str, - root_path: str, - old_modules: Dict[str, str], - new_graph: Dict[str, State]) -> List[Tuple[str, str]]: - changed_set = {root_module} - changed_modules = [(root_module, root_path)] - for st in new_graph.values(): - if st.id not in old_modules and st.id not in changed_set: - assert st.path - changed_set.add(st.id) - changed_modules.append((st.id, st.path)) - return changed_modules - - -def collect_dependencies(new_modules: Mapping[str, Optional[MypyFile]], +def collect_dependencies(new_modules: Iterable[str], deps: Dict[str, Set[str]], graph: Dict[str, State]) -> None: - for id, node in new_modules.items(): - if node is None: + for id in new_modules: + if id not in graph: continue for trigger, targets in graph[id].fine_grained_deps.items(): deps.setdefault(trigger, set()).update(targets) @@ -639,7 +677,7 @@ def calculate_active_triggers(manager: BuildManager, def replace_modules_with_new_variants( manager: BuildManager, graph: Dict[str, State], - old_modules: Dict[str, MypyFile], + old_modules: Dict[str, Optional[MypyFile]], new_modules: Dict[str, Optional[MypyFile]]) -> None: """Replace modules with newly builds versions. @@ -651,10 +689,10 @@ def replace_modules_with_new_variants( propagate_changes_using_dependencies). """ for id in new_modules: + preserved_module = old_modules.get(id) new_module = new_modules[id] - if id in old_modules and new_module is not None: - preserved_module = old_modules[id] - merge_asts(preserved_module, old_modules[id].names, + if preserved_module and new_module is not None: + merge_asts(preserved_module, preserved_module.names, new_module, new_module.names) manager.modules[id] = preserved_module graph[id].tree = preserved_module @@ -673,7 +711,7 @@ def propagate_changes_using_dependencies( a target that needs to be reprocessed but that has not been parsed yet.""" num_iter = 0 - remaining_modules = [] + remaining_modules = [] # type: List[Tuple[str, str]] # Propagate changes until nothing visible has changed during the last # iteration. @@ -682,11 +720,14 @@ def propagate_changes_using_dependencies( if num_iter > MAX_ITER: raise RuntimeError('Max number of iterations (%d) reached (endless loop?)' % MAX_ITER) - todo = find_targets_recursive(manager, triggered, deps, up_to_date_modules) + todo, unloaded = find_targets_recursive(manager, graph, + triggered, deps, up_to_date_modules) + # TODO: we sort to make it deterministic, but this is *incredibly* ad hoc + remaining_modules.extend((id, graph[id].xpath) for id in sorted(unloaded)) # Also process targets that used to have errors, as otherwise some # errors might be lost. for target in targets_with_errors: - id = module_prefix(manager.modules, target) + id = module_prefix(graph, target) if id is not None and id not in up_to_date_modules: if id not in todo: todo[id] = set() @@ -696,13 +737,7 @@ def propagate_changes_using_dependencies( # TODO: Preserve order (set is not optimal) for id, nodes in sorted(todo.items(), key=lambda x: x[0]): assert id not in up_to_date_modules - if manager.modules[id].is_cache_skeleton: - # We have only loaded the cache for this file, not the actual file, - # so we can't access the nodes to reprocess. - # Add it to the queue of files that need to be processed fully. - remaining_modules.append((id, manager.modules[id].path)) - else: - triggered |= reprocess_nodes(manager, graph, id, nodes, deps) + triggered |= reprocess_nodes(manager, graph, id, nodes, deps) # Changes elsewhere may require us to reprocess modules that were # previously considered up to date. For example, there may be a # dependency loop that loops back to an originally processed module. @@ -716,16 +751,21 @@ def propagate_changes_using_dependencies( def find_targets_recursive( manager: BuildManager, + graph: Graph, triggers: Set[str], deps: Dict[str, Set[str]], - up_to_date_modules: Set[str]) -> Dict[str, Set[DeferredNode]]: + up_to_date_modules: Set[str]) -> Tuple[Dict[str, Set[DeferredNode]], + Set[str]]: """Find names of all targets that need to reprocessed, given some triggers. - Returns: Dictionary from module id to a set of stale targets. + Returns: A tuple containing a: + * Dictionary from module id to a set of stale targets. + * A set of module ids for unparsed modules with stale targets. """ result = {} # type: Dict[str, Set[DeferredNode]] worklist = triggers processed = set() # type: Set[str] + unloaded_files = set() # type: Set[str] # Find AST nodes corresponding to each target. # @@ -738,20 +778,28 @@ def find_targets_recursive( if target.startswith('<'): worklist |= deps.get(target, set()) - processed else: - module_id = module_prefix(manager.modules, target) + module_id = module_prefix(graph, target) if module_id is None: # Deleted module. continue if module_id in up_to_date_modules: # Already processed. continue + if (module_id not in manager.modules + or manager.modules[module_id].is_cache_skeleton): + # We haven't actually parsed and checked the module, so we don't have + # access to the actual nodes. + # Add it to the queue of files that need to be processed fully. + unloaded_files.add(module_id) + continue + if module_id not in result: result[module_id] = set() manager.log_fine_grained('process: %s' % target) deferred = lookup_target(manager, target) result[module_id].update(deferred) - return result + return (result, unloaded_files) def reprocess_nodes(manager: BuildManager, diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index 0c4a720a7e9f..3fd141dd3b47 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -4266,6 +4266,23 @@ cache_fine_grained = True [rechecked a, builtins] [stale a, builtins] +[case testIncrementalPackageNameOverload] +# cmd: mypy -m main a +# flags: --follow-imports=skip +[file main.py] +from a import x +x.foo() +[file a/__init__.py] +pass +[file a/__init__.py.2] +x = 10 +[file a/x.py] +def foo() -> None: + pass +[out] +[out2] +tmp/main.py:2: error: "int" has no attribute "foo" + [case testBazelFlagIgnoresFileChanges] -- Since the initial run wrote a cache file, the second run ignores the source # flags: --bazel diff --git a/test-data/unit/fine-grained-modules.test b/test-data/unit/fine-grained-modules.test index 46ab441bae78..21777daf656e 100644 --- a/test-data/unit/fine-grained-modules.test +++ b/test-data/unit/fine-grained-modules.test @@ -1782,3 +1782,32 @@ reveal_type(b.x) == == a.py:2: error: Revealed type is 'builtins.int' + +[case testQualifiedSubpackage1] +[file c/__init__.py] +[file c/a.py] +from lurr import x +from c.d import f + +[file c/d.py] +def f() -> None: pass +def g(x: int) -> None: pass +[file lurr.py] +x = 10 +[file lurr.py.2] +x = '10' +[out] +== + +[case testImportedMissingSubpackage] +# flags: --follow-imports=skip --ignore-missing-imports +# cmd: mypy a.py b/__init__.py +[file a.py] +from b.foo import bar +x = 10 +[file b/__init__.py] +[file a.py.2] +from b.foo import bar +x = '10' +[out] +== diff --git a/test-data/unit/fine-grained.test b/test-data/unit/fine-grained.test index 80c62df511c5..f05ab4be56d2 100644 --- a/test-data/unit/fine-grained.test +++ b/test-data/unit/fine-grained.test @@ -5504,7 +5504,7 @@ from typing import overload == main:3: error: Module has no attribute "f" -[case testOverloadsUpdatedTypeRecheckImplementation] +[case testOverloadsUpdatedTypeRecheckImplementation-skip] from typing import overload import mod class Outer: diff --git a/typeshed b/typeshed index d63866bd3646..2dc7d39af598 160000 --- a/typeshed +++ b/typeshed @@ -1 +1 @@ -Subproject commit d63866bd36467315a0415080b82923abb42b478f +Subproject commit 2dc7d39af598ea594a978bca81ba743cf20f96b2 From 2314b5408b5184626f1e9ec7c871ce97c753d75a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 24 Apr 2018 09:24:22 -0700 Subject: [PATCH 28/39] Another attempt at fixing --package-root on Windows --- mypy/main.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/mypy/main.py b/mypy/main.py index 83bc11b1918e..3a072e72463a 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -640,13 +640,25 @@ def add_invertible_flag(flag: str, # Validate and normalize --package-root. if options.package_root: + # Do some stuff with drive letters to make Windows happy (esp. tests). + current_drive, _ = os.path.splitdrive(os.getcwd()) + dot = os.curdir + dotslash = os.curdir + os.sep + dotdotslash = os.pardir + os.sep + trivial_paths = {dot, dotslash} package_root = [] for root in options.package_root: if os.path.isabs(root): parser.error("Package root cannot be absolute: %r" % root) - drive, root = os.path.splitdrive(root) # Ignore Windows drive name + drive, root = os.path.splitdrive(root) + if drive != current_drive: + parser.error("Package root must be on current drive: %r" % (drive + root)) + # Empty package root is always okay. if root: - if root in (os.curdir, os.curdir + os.sep): + root = os.path.relpath(root) # Normalize the heck out of it. + if root.startswith(dotdotslash): + parser.error("Package root cannot be above current directory: %r" % root) + if root in trivial_paths: root = '' elif not root.endswith(os.sep): root = root + os.sep From a89e7a46419f28d258c09b81076644d2942fab0e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 24 Apr 2018 14:27:00 -0700 Subject: [PATCH 29/39] Make Windows tests pass by judicious use of os.path.normpath() Also cleaned up a fe things around catching OSError. --- mypy/fscache.py | 15 +++++++++------ mypy/main.py | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/mypy/fscache.py b/mypy/fscache.py index d4fa537c36bd..39dcf5ca6471 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -60,7 +60,7 @@ def stat(self, path: str) -> os.stat_result: try: st = os.stat(path) except OSError as err: - if isinstance(err, OSError) and self.init_under_package_root(path): + if self.init_under_package_root(path): try: return self._fake_init(path) except OSError: @@ -87,6 +87,7 @@ def init_under_package_root(self, path: str) -> bool: return False ok = False drive, path = os.path.splitdrive(path) # Ignore Windows drive name + path = os.path.normpath(path) for root in self.package_root: if path.startswith(root): if path == root + basename: @@ -98,7 +99,7 @@ def init_under_package_root(self, path: str) -> bool: return ok def _fake_init(self, path: str) -> os.stat_result: - dirname = os.path.dirname(path) or os.curdir + dirname = os.path.normpath(os.path.dirname(path)) st = self.stat(dirname) # May raise OSError # Get stat result as a sequence so we can modify it. # (Alas, typeshed's os.stat_result is not a sequence yet.) @@ -115,9 +116,10 @@ def _fake_init(self, path: str) -> os.stat_result: return st def listdir(self, path: str) -> List[str]: + path = os.path.normpath(path) if path in self.listdir_cache: res = self.listdir_cache[path] - if os.path.normpath(path) in self.fake_package_cache and '__init__.py' not in res: + if path in self.fake_package_cache and '__init__.py' not in res: res.append('__init__.py') # Updates the result as well as the cache return res if path in self.listdir_error_cache: @@ -200,14 +202,15 @@ def read(self, path: str) -> bytes: # earlier instant than the mtime reported by self.stat(). self.stat(path) - if (os.path.basename(path) == '__init__.py' - and os.path.dirname(path) in self.fake_package_cache): + dirname, basename = os.path.split(path) + dirname = os.path.normpath(dirname) + if basename == '__init__.py' and dirname in self.fake_package_cache: data = b'' else: try: with open(path, 'rb') as f: data = f.read() - except Exception as err: + except OSError as err: self.read_error_cache[path] = err raise diff --git a/mypy/main.py b/mypy/main.py index 3a072e72463a..7683b6f7d3a4 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -651,7 +651,7 @@ def add_invertible_flag(flag: str, if os.path.isabs(root): parser.error("Package root cannot be absolute: %r" % root) drive, root = os.path.splitdrive(root) - if drive != current_drive: + if drive and drive != current_drive: parser.error("Package root must be on current drive: %r" % (drive + root)) # Empty package root is always okay. if root: From 052fa08a4c74eb4e4f171007f3641013c820249c Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 25 Apr 2018 11:22:07 -0700 Subject: [PATCH 30/39] Change the way FileSystemCache gets the package_root --- mypy/build.py | 2 +- mypy/find_sources.py | 2 +- mypy/fscache.py | 12 +++++++----- mypy/main.py | 10 +++++++++- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 27682f6edc8b..1432c96823fd 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -212,7 +212,7 @@ def _build(sources: List[BuildSource], gc.set_threshold(50000) data_dir = default_data_dir(bin_dir) - fscache = fscache or FileSystemCache(package_root=options.package_root) + fscache = fscache or FileSystemCache() # Determine the default module search path. lib_path = default_lib_path(data_dir, diff --git a/mypy/find_sources.py b/mypy/find_sources.py index b4da542d8024..d9fac7dd5767 100644 --- a/mypy/find_sources.py +++ b/mypy/find_sources.py @@ -23,7 +23,7 @@ def create_source_list(files: Sequence[str], options: Options, Raises InvalidSourceList on errors. """ - fscache = fscache or FileSystemCache(package_root=options.package_root) + fscache = fscache or FileSystemCache() finder = SourceFinder(fscache) targets = [] diff --git a/mypy/fscache.py b/mypy/fscache.py index 54ab9e1863d4..c8a9129912ce 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -36,12 +36,15 @@ class FileSystemCache: - def __init__(self, package_root: Optional[List[str]] = None) -> None: - if package_root is None: - package_root = [] - self.package_root = package_root + def __init__(self) -> None: + # The package root is not flushed with the caches. + # It is set by set_package_root() below. + self.package_root = [] # type: List[str] self.flush() + def set_package_root(self, package_root: List[str]) -> None: + self.package_root = package_root + def flush(self) -> None: """Start another transaction and empty all caches.""" self.stat_cache = {} # type: Dict[str, os.stat_result] @@ -53,7 +56,6 @@ def flush(self) -> None: self.read_error_cache = {} # type: Dict[str, Exception] self.hash_cache = {} # type: Dict[str, str] self.fake_package_cache = set() # type: Set[str] - self.cwd = os.getcwd() def stat(self, path: str) -> os.stat_result: if path in self.stat_cache: diff --git a/mypy/main.py b/mypy/main.py index 17a318efc84f..67be83f268db 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -303,7 +303,11 @@ def process_options(args: List[str], server_options: bool = False, fscache: Optional[FileSystemCache] = None, ) -> Tuple[List[BuildSource], Options]: - """Parse command line arguments.""" + """Parse command line arguments. + + If a FileSystemCache is passed in, and package_root options are given, + call fscache.set_package_root() to set the cache's package root. + """ parser = argparse.ArgumentParser(prog='mypy', epilog=FOOTER, fromfile_prefix_chars='@', @@ -646,6 +650,8 @@ def add_invertible_flag(flag: str, # Validate and normalize --package-root. if options.package_root: + if fscache is None: + parser.error("--package-root does not work here (no fscache)") # Do some stuff with drive letters to make Windows happy (esp. tests). current_drive, _ = os.path.splitdrive(os.getcwd()) dot = os.curdir @@ -670,6 +676,8 @@ def add_invertible_flag(flag: str, root = root + os.sep package_root.append(root) options.package_root = package_root + # Pass the package root on the the filesystem cache. + fscache.set_package_root(package_root) # Parse cache map. if special_opts.cache_map: From 05a448da4183f0eadaa8afe1a722399cd02c5d2b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 25 Apr 2018 13:11:23 -0700 Subject: [PATCH 31/39] Avoid mypy error (see https://github.com/python/typeshed/issues/2081) --- mypy/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mypy/main.py b/mypy/main.py index 67be83f268db..fac063e6240f 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -652,6 +652,7 @@ def add_invertible_flag(flag: str, if options.package_root: if fscache is None: parser.error("--package-root does not work here (no fscache)") + assert fscache is not None # Since mypy doesn't know parser.error() raises. # Do some stuff with drive letters to make Windows happy (esp. tests). current_drive, _ = os.path.splitdrive(os.getcwd()) dot = os.curdir From aa5facd2e8d6baa3b98f4c77acf18068ea7868c7 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 2 May 2018 14:50:45 -0700 Subject: [PATCH 32/39] Fix fscache.py bug: only generate fake __init__.py, not .pyi --- mypy/fscache.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mypy/fscache.py b/mypy/fscache.py index c8a9129912ce..650baf08f943 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -81,7 +81,7 @@ def init_under_package_root(self, path: str) -> bool: if not self.package_root: return False dirname, basename = os.path.split(path) - if basename not in ('__init__.py', '__init__.pyi'): + if basename != '__init__.py': return False try: st = self.stat(dirname) @@ -104,7 +104,10 @@ def init_under_package_root(self, path: str) -> bool: return ok def _fake_init(self, path: str) -> os.stat_result: - dirname = os.path.normpath(os.path.dirname(path)) + dirname, basename = os.path.split(path) + assert basename == '__init__.py', path + assert not os.path.exists(path), path # Not cached! + dirname = os.path.normpath(dirname) st = self.stat(dirname) # May raise OSError # Get stat result as a sequence so we can modify it. # (Alas, typeshed's os.stat_result is not a sequence yet.) From 7884ed48fc893d36a396a0842df9a8dcf2030062 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 7 May 2018 16:41:32 -0700 Subject: [PATCH 33/39] Avoid updating hash in cache meta --- mypy/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypy/build.py b/mypy/build.py index 393444ceb6d3..b5e9f27fd495 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1257,7 +1257,7 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], return None mtime = 0 if bazel else int(st.st_mtime) - if mtime != meta.mtime or path != meta.path: + if not bazel and (mtime != meta.mtime or path != meta.path): try: source_hash = manager.fscache.md5(path) except (OSError, UnicodeDecodeError, DecodeError): From 3d7f63390ca61581afa1a50018e6ff919d50b3f1 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 29 May 2018 14:53:02 -0700 Subject: [PATCH 34/39] Respond to code review - Remove dead code - Refactor getmtime() and abspath()/relpath() into BuildManager methods - Explain why we set tree.path (always setting it, I think that's fine) - Add docstrings to init_under_package_root() and _fake_init() - Add comments where fake_package_cache is checked - Refactor processing of package_root and cache_map options --- mypy/build.py | 63 +++++++++++++++++----------- mypy/fscache.py | 31 ++++++++++++++ mypy/main.py | 108 +++++++++++++++++++++++++++--------------------- 3 files changed, 129 insertions(+), 73 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index a6f485b59806..723c0c9d1170 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -71,19 +71,9 @@ PYTHON_EXTENSIONS = ['.pyi', '.py'] -if os.altsep: - SEPARATORS = frozenset([os.sep, os.altsep]) -else: - SEPARATORS = frozenset([os.sep]) - - Graph = Dict[str, 'State'] -def getmtime(name: str) -> int: - return int(os.path.getmtime(name)) - - # TODO: Get rid of BuildResult. We might as well return a BuildManager. class BuildResult: """The result of a successful build. @@ -698,6 +688,32 @@ def maybe_swap_for_shadow_path(self, path: str) -> str: def get_stat(self, path: str) -> os.stat_result: return self.fscache.stat(self.maybe_swap_for_shadow_path(path)) + def getmtime(self, path: str) -> int: + """Return a file's mtime; but 0 in bazel mode. + + (Bazel's distributed cache doesn't like filesystem metadata to + end up in output files.) + """ + if self.options.bazel: + return 0 + else: + st = self.get_stat(path) + return int(st.st_mtime) + + def normpath(self, path: str) -> str: + """Convert path to absolute; but to relative in bazel mode. + + (Bazel's distributed cache doesn't like filesystem metadata to + end up in output files.) + """ + # TODO: Could we always use relpath? (A worry in non-bazel + # mode would be that a moved file may change its full module + # name without changing its size, mtime or hash.) + if self.options.bazel: + return os.path.relpath(path) + else: + return os.path.abspath(path) + def all_imported_modules_in_file(self, file: MypyFile) -> List[Tuple[int, str, int]]: """Find all reachable import statements in a file. @@ -1250,19 +1266,19 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], assert path is not None, "Internal error: meta was provided without a path" # Check data_json; assume if its mtime matches it's good. # TODO: stat() errors - data_mtime = 0 if bazel else getmtime(meta.data_json) + data_mtime = manager.getmtime(meta.data_json) if data_mtime != meta.data_mtime: manager.log('Metadata abandoned for {}: data cache is modified'.format(id)) return None deps_mtime = None if manager.options.cache_fine_grained: assert meta.deps_json - deps_mtime = getmtime(meta.deps_json) + deps_mtime = manager.getmtime(meta.deps_json) if deps_mtime != meta.deps_mtime: manager.log('Metadata abandoned for {}: deps cache is modified'.format(id)) return None - path = os.path.relpath(path) if bazel else os.path.abspath(path) + path = manager.normpath(path) try: st = manager.get_stat(path) except OSError: @@ -1287,10 +1303,12 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], fine_grained_cache = manager.use_fine_grained_cache() size = st.st_size + # Bazel ensures the cache is valid. if size != meta.size and not bazel and not fine_grained_cache: manager.log('Metadata abandoned for {}: file {} has different size'.format(id, path)) return None + # Bazel ensures the cache is valid. mtime = 0 if bazel else int(st.st_mtime) if not bazel and (mtime != meta.mtime or path != meta.path): try: @@ -1391,14 +1409,9 @@ def write_cache(id: str, path: str, tree: MypyFile, # For Bazel we use relative paths and zero mtimes. bazel = manager.options.bazel - # Obtain file paths - if not bazel: - # Normally, make all paths absolute. - path = os.path.abspath(path) - else: - # For Bazel, make all paths relative (else Bazel caching is thwarted). - # And also patch tree.path, which might have been made absolute earlier. - tree.path = path = os.path.relpath(path) + # Obtain file paths. Update tree.path so that in bazel mode it's + # made relative (since sometimes paths leak out). + tree.path = path = manager.normpath(path) meta_json, data_json, deps_json = get_cache_names(id, path, manager) manager.log('Writing {} {} {} {} {}'.format( id, path, meta_json, data_json, deps_json)) @@ -1434,7 +1447,7 @@ def write_cache(id: str, path: str, tree: MypyFile, if old_interface_hash == interface_hash: # If the interface is unchanged, the cached data is guaranteed # to be equivalent, and we only need to update the metadata. - data_mtime = 0 if bazel else getmtime(data_json) + data_mtime = manager.getmtime(data_json) manager.trace("Interface for {} is unchanged".format(id)) else: manager.trace("Interface for {} has changed".format(id)) @@ -1451,7 +1464,7 @@ def write_cache(id: str, path: str, tree: MypyFile, # Both have the effect of slowing down the next run a # little bit due to an out-of-date cache file. return interface_hash, None - data_mtime = 0 if bazel else getmtime(data_json) + data_mtime = manager.getmtime(data_json) deps_mtime = None if deps_json: @@ -1459,7 +1472,7 @@ def write_cache(id: str, path: str, tree: MypyFile, if not atomic_write(deps_json, deps_str, '\n'): manager.log("Error writing deps JSON file {}".format(deps_json)) return interface_hash, None - deps_mtime = getmtime(deps_json) + deps_mtime = manager.getmtime(deps_json) mtime = 0 if bazel else int(st.st_mtime) size = st.st_size @@ -1501,7 +1514,7 @@ def delete_cache(id: str, path: str, manager: BuildManager) -> None: This avoids inconsistent states with cache files from different mypy runs, see #4043 for an example. """ - path = os.path.relpath(path) if manager.options.bazel else os.path.abspath(path) + path = manager.normpath(path) cache_paths = get_cache_names(id, path, manager) manager.log('Deleting {} {} {}'.format(id, path, " ".join(x for x in cache_paths if x))) diff --git a/mypy/fscache.py b/mypy/fscache.py index f73d74048880..48dad8495908 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -78,6 +78,27 @@ def stat(self, path: str) -> os.stat_result: return st def init_under_package_root(self, path: str) -> bool: + """Is this path an __init__.py under a package root? + + This is used to detect packages that don't contain __init__.py + files, which is needed to support Bazel. The function should + only be called for non-existing files. + + It will return True if it refers to a __init__.py file that + Bazel would create, so that at runtime Python would think the + directory containing it is a package. For this to work you + must pass one or more package roots using the --package-root + flag. + + As an exceptional case, any directory that is a package root + itself will not be considered to contain a __init__.py file. + This is different from the rules Bazel itself applies, but is + necessary for mypy to properly distinguish packages from other + directories. + + See https://docs.bazel.build/versions/master/be/python.html, + where this behavior is described under legacy_create_init. + """ if not self.package_root: return False dirname, basename = os.path.split(path) @@ -104,6 +125,12 @@ def init_under_package_root(self, path: str) -> bool: return ok def _fake_init(self, path: str) -> os.stat_result: + """Prime the cache with a fake __init__.py file. + + This makes code that looks for path believe an empty file by + that name exists. Should only be called after + init_under_package_root() returns True. + """ dirname, basename = os.path.split(path) assert basename == '__init__.py', path assert not os.path.exists(path), path # Not cached! @@ -120,6 +147,7 @@ def _fake_init(self, path: str) -> os.stat_result: tpl = tuple(seq) st = os.stat_result(tpl) self.stat_cache[path] = st + # Make listdir() and read() also pretend this file exists. self.fake_package_cache.add(dirname) return st @@ -127,6 +155,7 @@ def listdir(self, path: str) -> List[str]: path = os.path.normpath(path) if path in self.listdir_cache: res = self.listdir_cache[path] + # Check the fake cache. if path in self.fake_package_cache and '__init__.py' not in res: res.append('__init__.py') # Updates the result as well as the cache return res @@ -139,6 +168,7 @@ def listdir(self, path: str) -> List[str]: self.listdir_error_cache[path] = copy_os_error(err) raise err self.listdir_cache[path] = results + # Check the fake cache. if path in self.fake_package_cache and '__init__.py' not in results: results.append('__init__.py') return results @@ -199,6 +229,7 @@ def read(self, path: str) -> bytes: dirname, basename = os.path.split(path) dirname = os.path.normpath(dirname) + # Check the fake cache. if basename == '__init__.py' and dirname in self.fake_package_cache: data = b'' else: diff --git a/mypy/main.py b/mypy/main.py index 1069c2393ad3..9ceb9743ecf4 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -650,56 +650,13 @@ def add_invertible_flag(flag: str, report_dir = val options.report_dirs[report_type] = report_dir - # Validate and normalize --package-root. + # Process --package-root. if options.package_root: - if fscache is None: - parser.error("--package-root does not work here (no fscache)") - assert fscache is not None # Since mypy doesn't know parser.error() raises. - # Do some stuff with drive letters to make Windows happy (esp. tests). - current_drive, _ = os.path.splitdrive(os.getcwd()) - dot = os.curdir - dotslash = os.curdir + os.sep - dotdotslash = os.pardir + os.sep - trivial_paths = {dot, dotslash} - package_root = [] - for root in options.package_root: - if os.path.isabs(root): - parser.error("Package root cannot be absolute: %r" % root) - drive, root = os.path.splitdrive(root) - if drive and drive != current_drive: - parser.error("Package root must be on current drive: %r" % (drive + root)) - # Empty package root is always okay. - if root: - root = os.path.relpath(root) # Normalize the heck out of it. - if root.startswith(dotdotslash): - parser.error("Package root cannot be above current directory: %r" % root) - if root in trivial_paths: - root = '' - elif not root.endswith(os.sep): - root = root + os.sep - package_root.append(root) - options.package_root = package_root - # Pass the package root on the the filesystem cache. - fscache.set_package_root(package_root) - - # Parse cache map. + process_package_roots(fscache, parser, options) + + # Process --cache-map. if special_opts.cache_map: - n = len(special_opts.cache_map) - if n % 3 != 0: - parser.error("--cache-map requires one or more triples (see source)") - for i in range(0, n, 3): - source, meta_file, data_file = special_opts.cache_map[i:i + 3] - if source in options.cache_map: - parser.error("Duplicate --cache-map source %s)" % source) - if not source.endswith('.py') and not source.endswith('.pyi'): - parser.error("Invalid --cache-map source %s (triple[0] must be *.py[i])" % source) - if not meta_file.endswith('.meta.json'): - parser.error("Invalid --cache-map meta_file %s (triple[1] must be *.meta.json)" % - meta_file) - if not data_file.endswith('.data.json'): - parser.error("Invalid --cache-map data_file %s (triple[2] must be *.data.json)" % - data_file) - options.cache_map[source] = (meta_file, data_file) + process_cache_map(parser, special_opts, options) # Let quick_and_dirty imply incremental. if options.quick_and_dirty: @@ -734,6 +691,61 @@ def add_invertible_flag(flag: str, return targets, options +def process_package_roots(fscache: Optional[FileSystemCache], + parser: argparse.ArgumentParser, + options: Options) -> None: + """Validate and normalize package_root.""" + if fscache is None: + parser.error("--package-root does not work here (no fscache)") + assert fscache is not None # Since mypy doesn't know parser.error() raises. + # Do some stuff with drive letters to make Windows happy (esp. tests). + current_drive, _ = os.path.splitdrive(os.getcwd()) + dot = os.curdir + dotslash = os.curdir + os.sep + dotdotslash = os.pardir + os.sep + trivial_paths = {dot, dotslash} + package_root = [] + for root in options.package_root: + if os.path.isabs(root): + parser.error("Package root cannot be absolute: %r" % root) + drive, root = os.path.splitdrive(root) + if drive and drive != current_drive: + parser.error("Package root must be on current drive: %r" % (drive + root)) + # Empty package root is always okay. + if root: + root = os.path.relpath(root) # Normalize the heck out of it. + if root.startswith(dotdotslash): + parser.error("Package root cannot be above current directory: %r" % root) + if root in trivial_paths: + root = '' + elif not root.endswith(os.sep): + root = root + os.sep + package_root.append(root) + options.package_root = package_root + # Pass the package root on the the filesystem cache. + fscache.set_package_root(package_root) + + +def process_cache_map(parser, special_opts, options): + """Validate cache_map and copy into options.cache_map.""" + n = len(special_opts.cache_map) + if n % 3 != 0: + parser.error("--cache-map requires one or more triples (see source)") + for i in range(0, n, 3): + source, meta_file, data_file = special_opts.cache_map[i:i + 3] + if source in options.cache_map: + parser.error("Duplicate --cache-map source %s)" % source) + if not source.endswith('.py') and not source.endswith('.pyi'): + parser.error("Invalid --cache-map source %s (triple[0] must be *.py[i])" % source) + if not meta_file.endswith('.meta.json'): + parser.error("Invalid --cache-map meta_file %s (triple[1] must be *.meta.json)" % + meta_file) + if not data_file.endswith('.data.json'): + parser.error("Invalid --cache-map data_file %s (triple[2] must be *.data.json)" % + data_file) + options.cache_map[source] = (meta_file, data_file) + + # For most options, the type of the default value set in options.py is # sufficient, and we don't have to do anything here. This table # exists to specify types for values initialized to None or container From 210de9c9246706f156f8d4cd4650a05e63bcde65 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 29 May 2018 15:19:09 -0700 Subject: [PATCH 35/39] Don't force tree.path absolute in non-bazel -- it breaks Windows --- mypy/build.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 723c0c9d1170..23f72874ecde 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1409,13 +1409,17 @@ def write_cache(id: str, path: str, tree: MypyFile, # For Bazel we use relative paths and zero mtimes. bazel = manager.options.bazel - # Obtain file paths. Update tree.path so that in bazel mode it's - # made relative (since sometimes paths leak out). - tree.path = path = manager.normpath(path) + # Obtain file paths. + path = manager.normpath(path) meta_json, data_json, deps_json = get_cache_names(id, path, manager) manager.log('Writing {} {} {} {} {}'.format( id, path, meta_json, data_json, deps_json)) + # Update tree.path so that in bazel mode it's made relative (since + # sometimes paths leak out). + if bazel: + tree.path = path + # Make sure directory for cache files exists parent = os.path.dirname(data_json) assert os.path.dirname(meta_json) == parent From 15b7d877ab4a89eb05be06cf841a3f45e9de4212 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 29 May 2018 15:51:00 -0700 Subject: [PATCH 36/39] Add annotations to new functions, to make self-test pass --- mypy/main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mypy/main.py b/mypy/main.py index 9ceb9743ecf4..9dec5cda0ce0 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -726,7 +726,9 @@ def process_package_roots(fscache: Optional[FileSystemCache], fscache.set_package_root(package_root) -def process_cache_map(parser, special_opts, options): +def process_cache_map(parser: argparse.ArgumentParser, + special_opts: argparse.Namespace, + options: Options): """Validate cache_map and copy into options.cache_map.""" n = len(special_opts.cache_map) if n % 3 != 0: From 4135e63d1e60f0c2c0d9f0f2e426fb7e511df14c Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 29 May 2018 15:59:48 -0700 Subject: [PATCH 37/39] Hrmpf --- mypy/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypy/main.py b/mypy/main.py index 9dec5cda0ce0..0980ec3186d3 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -728,7 +728,7 @@ def process_package_roots(fscache: Optional[FileSystemCache], def process_cache_map(parser: argparse.ArgumentParser, special_opts: argparse.Namespace, - options: Options): + options: Options) -> None: """Validate cache_map and copy into options.cache_map.""" n = len(special_opts.cache_map) if n % 3 != 0: From 3bd329efee24775f29f6ed1a692d72a46cf1f2d9 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 1 Jun 2018 18:53:13 -0700 Subject: [PATCH 38/39] Fix bad merge --- test-data/unit/cmdline.test | 1 + 1 file changed, 1 insertion(+) diff --git a/test-data/unit/cmdline.test b/test-data/unit/cmdline.test index bbf67c1f9624..3e5050636f58 100644 --- a/test-data/unit/cmdline.test +++ b/test-data/unit/cmdline.test @@ -1192,6 +1192,7 @@ warn_unused_configs = True [file spam/eggs.py] [out] Warning: unused section(s) in mypy.ini: [mypy-bar], [mypy-baz.*], [mypy-emarg.*], [mypy-emarg.hatch], [mypy-a.*.c], [mypy-a.x.b] +== Return code: 0 [case testConfigUnstructuredGlob] # cmd: mypy emarg foo From 389c9081eee3512c842eac359711b56b58c9441c Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 2 Jun 2018 22:02:37 -0700 Subject: [PATCH 39/39] Hopefully fix flaky tests (don't use the fscache in getmtime()) --- mypy/build.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index a562f4f01e83..27b960e35b8f 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -697,8 +697,7 @@ def getmtime(self, path: str) -> int: if self.options.bazel: return 0 else: - st = self.get_stat(path) - return int(st.st_mtime) + return int(os.path.getmtime(path)) def normpath(self, path: str) -> str: """Convert path to absolute; but to relative in bazel mode.