From 0495cedca5680e2f678c396b2c30953bd6f43cec Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Mon, 8 Aug 2016 21:00:03 -0700 Subject: [PATCH 1/7] Add tests for interface-based incremental checks This commit modifies the incremental tests so that changes that don't change the "public interface" of the file will not trigger a recheck in any dependees. --- test-data/unit/check-incremental.test | 366 +++++++++++++++++++++++++- test-data/unit/fixtures/dict.py | 3 +- 2 files changed, 367 insertions(+), 2 deletions(-) diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index c45c31812f7c..506e10c8bb79 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -57,7 +57,67 @@ def func3() -> None: pass [out] -[case testIncrementalSimpleImportCascade] +[case testIncrementalInternalChangeOnly] +import mod1 +mod1.func1() + +[file mod1.py] +import mod2 +def func1() -> None: mod2.func2() + +[file mod2.py] +import mod3 +def func2() -> None: mod3.func3() + +[file mod3.py] +def func3() -> None: pass + +[file mod3.py.next] +def func3() -> None: 3 + 2 + +[stale mod3] +[out] + + +[case testIncrementalImportGone] +import mod1 + +[file mod1.py] +from mod2 import A +def func1() -> A: pass + +[file mod2.py] +class A: pass + +[file mod1.py.next] +def func1() -> A: pass + +[stale mod1] +[out] +main:1: note: In module imported here: +tmp/mod1.py: note: In function "func1": +tmp/mod1.py:1: error: Name 'A' is not defined + + +[case testIncrementalSameNameChange] +import mod1 + +[file mod1.py] +from mod2 import A +def func1() -> A: pass + +[file mod2.py] +class A: pass + +[file mod2.py.next] +class Parent: pass +class A(Parent): pass + +[stale mod1, mod2] +[out] + + +[case testIncrementalPartialInterfaceChange] import mod1 mod1.func1() @@ -75,9 +135,313 @@ def func3() -> None: pass [file mod3.py.next] def func3() -> int: return 2 +# TODO: ideally, only mod2 and mod3 would be stale, but we treat any +# changes in our dependencies as an interface change, so mod1 is +# rechecked as well. [stale mod1, mod2, mod3] [out] +[case testIncrementalInternalScramble] +import mod1 + +[file mod1.py] +import mod2 +mod2.foo() + +[file mod2.py] +def baz() -> int: + return 3 + +def bar() -> int: + return baz() + +def foo() -> int: + return bar() + +[file mod2.py.next] +def foo() -> int: + return baz() + +def bar() -> int: + return bar() + +def baz() -> int: + return 42 +[stale mod2] +[out] + +[case testIncrementalMethodInterfaceChange] +import mod1 + +[file mod1.py] +import mod2 + +[file mod2.py] +class Foo: + def bar(self, a: str) -> str: + return "a" + +[file mod2.py.next] +class Foo: + def bar(self, a: float) -> str: + return "a" + +[stale mod1, mod2] +[out] + +[case testIncrementalBaseClassChange] +import mod1 + +[file mod1.py] +from mod2 import Child +Child().good_method() + +[file mod2.py] +class Good: + def good_method(self) -> int: return 1 +class Bad: pass +class Child(Good): pass + +[file mod2.py.next] +class Good: + def good_method(self) -> int: return 1 +class Bad: pass +class Child(Bad): pass + +[stale mod1, mod2] +[out] +main:1: note: In module imported here: +tmp/mod1.py:2: error: "Child" has no attribute "good_method" + +[case testIncrementalCascadingChange] +import mod1 + +[file mod1.py] +from mod2 import A + +[file mod2.py] +from mod3 import B +A = B + +[file mod3.py] +from mod4 import C +B = C + +[file mod4.py] +C = 3 + +[file mod4.py.next] +C = "A" + +[stale mod1, mod2, mod3, mod4] +[out] + +[case testIncrementalRemoteChange] +import mod1 + +[file mod1.py] +import mod2 +def accepts_int(a: int) -> None: pass +accepts_int(mod2.mod3.mod4.const) + +[file mod2.py] +import mod3 + +[file mod3.py] +import mod4 + +[file mod4.py] +const = 3 + +[file mod4.py.next] +const = "foo" + +[stale mod1, mod2, mod3, mod4] +[out] +main:1: note: In module imported here: +tmp/mod1.py:3: error: Argument 1 to "accepts_int" has incompatible type "str"; expected "int" + +[case testIncrementalBadChange] +import mod1 + +[file mod1.py] +from mod2 import func2 + +def func1() -> int: + return func2() + +[file mod2.py] +def func2() -> int: + return 1 + +[file mod2.py.next] +def func2() -> str: + return "foo" + +[stale mod1, mod2] +[out] +main:1: note: In module imported here: +tmp/mod1.py: note: In function "func1": +tmp/mod1.py:4: error: Incompatible return value type (got "str", expected "int") + +[case testIncrementalWithComplexDictExpression] +import mod1 + +[file mod1.py] +import mod1_private + +[file mod1_private.py] +my_dict = { + 'a': [1, 2, 3], + 'b': [4, 5, 6] +} + +[file mod1_private.py.next] +my_dict = { + 'a': [1, 2, 3], + 'b': [4, 5, 'a'] +} + +[stale mod1, mod1_private] +[builtins fixtures/dict.py] +[out] + +[case testIncrementalWithComplexConstantExpressionNoAnnotation] +import mod1 + +[file mod1.py] +import mod1_private + +[file mod1_private.py] +def foo() -> int: return 1 +def bar() -> int: return 2 +baz = 1 + foo() + +[file mod1_private.py.next] +def foo() -> int: return 1 +def bar() -> int: return 2 +baz = 1 + bar() +# Ideally, this wouldn't count as an interface change, +# but the interface checker isn't easily able to deduce +# the type of this constant. +[stale mod1, mod1_private] +[out] + +[case testIncrementalWithComplexConstantExpressionWithAnnotation] +import mod1 + +[file mod1.py] +import mod1_private + +[file mod1_private.py] +def foo() -> int: return 1 +def bar() -> int: return 2 +baz = 1 + foo() # type: int + +[file mod1_private.py.next] +def foo() -> int: return 1 +def bar() -> int: return 2 +baz = 1 + bar() # type: int +# However, if the constant expressions are annotated, we +# _can_ currently deduce the type. +[stale mod1_private] +[out] + +[case testIncrementalWithDecorators] +import mod1 + +[file mod1.py] +import mod1_private +def accepts_int(a: int) -> None: pass +accepts_int(mod1_private.some_func(12)) + +[file mod1_private.py] +from typing import Callable +def multiply(f: Callable[[int], int]) -> Callable[[int], int]: + return lambda a: f(a) * 10 + +def stringify(f: Callable[[int], int]) -> Callable[[int], str]: + return lambda a: str(f(a)) + +@multiply +def some_func(a: int) -> int: + return a + 2 + +[file mod1_private.py.next] +from typing import Callable +def multiply(f: Callable[[int], int]) -> Callable[[int], int]: + return lambda a: f(a) * 10 + +def stringify(f: Callable[[int], int]) -> Callable[[int], str]: + return lambda a: str(f(a)) + +@stringify +def some_func(a: int) -> int: + return a + 2 +[stale mod1, mod1_private] +[builtins fixtures/ops.py] +[out] + +[case testIncrementalChangingClassAttributes] +import mod1 + +[file mod1.py] +import mod2 +mod2.Foo.A + +[file mod2.py] +class Foo: + A = 3 + +[file mod2.py.next] +class Foo: + A = "hello" + +[stale mod1, mod2] +[out] + +[case testIncrementalChangingFields] +import mod1 + +[file mod1.py] +import mod2 +f = mod2.Foo() +f.A + +[file mod2.py] +class Foo: + def __init__(self) -> None: + self.A = 3 + +[file mod2.py.next] +class Foo: + def __init__(self) -> None: + self.A = "hello" + +[stale mod1, mod2] +[out] + +[case testIncrementalNestedClassDefinition] +import mod1 + +[file mod1.py] +import mod2 +b = mod2.Foo.Bar() +b.attr + +[file mod2.py] +class Foo: + class Bar: + attr = 3 + +[file mod2.py.next] +class Foo: + class Bar: + attr = "foo" + +[stale mod1, mod2] +[out] + [case testIncrementalSimpleBranchingModules] import mod1 import mod2 diff --git a/test-data/unit/fixtures/dict.py b/test-data/unit/fixtures/dict.py index 709def8c86c9..5a7886439692 100644 --- a/test-data/unit/fixtures/dict.py +++ b/test-data/unit/fixtures/dict.py @@ -20,7 +20,8 @@ def __setitem__(self, k: KT, v: VT) -> None: pass def __iter__(self) -> Iterator[KT]: pass def update(self, a: Mapping[KT, VT]) -> None: pass -class int: pass # for convenience +class int: # for convenience + def __add__(self, x: int) -> int: pass class str: pass # for keyword argument key type From d0c6fc76963afdc3a06a24653eee2440c050b161 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Mon, 8 Aug 2016 22:16:19 -0700 Subject: [PATCH 2/7] Add a best-effort public interface extractor "modinterface.py" is a new stage of the parsing process that ideally happens relatively early, right after each file is parsed. It performs a lightweight, best-effort-basis crawl of the parse tree to try and obtain approximate type signatures of a given module to determine what its "public interface" is. If it cannot deduce the type signature of a given symbol, it resorts to using a serialized form of the entire expression so that absolutely any changes to how that symbol is derived is reflected. --- mypy/modinterface.py | 306 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 306 insertions(+) create mode 100644 mypy/modinterface.py diff --git a/mypy/modinterface.py b/mypy/modinterface.py new file mode 100644 index 000000000000..b6852af54031 --- /dev/null +++ b/mypy/modinterface.py @@ -0,0 +1,306 @@ +"""Code to extract the public interface of a given module on a best-effort basis. + +The public interface of a module consists of anything that somebody importing +a module could conceivably access. The public interface consists of: + +- Top-level imports +- Top-level constants +- Top-level classes, their attributes, internal class definitions, and methods +- Top-level functions + +Things which are currently not handled very well or are not verified to work +correctly in all cases: + +- Respecting the '__all__' variable +- More accurate import handling/nested imports +- Decorators +- Complex assignment statements +- Constant-level expressions +- Repeated functions/redefining thing in tricky ways +- Python version checks +""" + +from typing import Dict, List, Optional, Union +from abc import ABCMeta, abstractmethod +import base64 +from contextlib import contextmanager +import hashlib + +from mypy.visitor import NodeVisitor +from mypy.nodes import (MypyFile, Node, Import, ImportFrom, ImportAll, ImportBase, + FuncDef, OverloadedFuncDef, Decorator, ClassDef, Block, + AssignmentStmt, WhileStmt, ForStmt, IfStmt, TryStmt, WithStmt, + Expression, NameExpr, ListExpr, TupleExpr, StarExpr, + ) +from mypy.types import Type + + +class SymbolSignature: + """Represents the "signature" of any kind of publicly accessible symbol. + + Any change to a signature means that the the types for that symbol + have changed in some way.""" + + def __init__(self, fullname: str) -> None: + self._fullname = fullname + + def fullname(self) -> str: + """The fully qualified name of this symbol.""" + return self._fullname + + @abstractmethod + def signature(self) -> str: + """The 'signature' of this symbol. + + The signature should ideally correspond to the type signature of this symbol, + but any distinctly unique string will work. For example, if it's not possible + to compute the type of a certain expression, you could set the signature to the + serialized form the the expression itself, if necessary.""" + raise NotImplementedError() + + def kind(self) -> str: + """Represents whether this symbol is a constant, function, class, etc.""" + return self.__class__.__name__.replace('Signature', '') + + def __str__(self) -> str: + return '{} ({}): {}'.format(self.fullname(), self.kind(), self.signature()) + + +class FunctionSignature(SymbolSignature): + def __init__(self, fullname: str, type: Type) -> None: + super().__init__(fullname) + self.type = type + self._signature = str(self.type) + + def signature(self) -> str: + return self._signature + + +class OverloadedFunctionSignature(SymbolSignature): + def __init__(self, fullname: str, types: List[Type]) -> None: + super().__init__(fullname) + self.types = types + self._signature = '; '.join(map(str, self.types)) + + def signature(self) -> str: + return self._signature + + +class ClassSignature(SymbolSignature): + def __init__(self, fullname: str, classname: str, base_classes: List[str]) -> None: + super().__init__(fullname) + self.classname = classname + self.base_classes = base_classes + if len(base_classes) == 0: + self._signature = classname + else: + self._signature = '{} (extends {})'.format(classname, ', '.join(base_classes)) + + def signature(self) -> str: + return self._signature + + +class ExpressionSignature(SymbolSignature): + def __init__(self, fullname: str, signature: Union[Type, str]) -> None: + super().__init__(fullname) + self.type = None # type: Optional[Type] + + if isinstance(signature, Type): + self.type = signature + self._signature = str(signature) + elif isinstance(signature, str): + self._signature = signature + else: + raise AssertionError("Encountered unknown signature type: {}".format(type(signature))) + + def signature(self) -> str: + return self._signature + + +class ModuleInterface: + """Represents the public 'interface' of a given module.""" + def __init__(self) -> None: + # TODO: Represent the imports in a cleaner way. + self.fullname = '' + self.imports = [] # type: List[ImportBase] + self.symbols = {} # type: Dict[str, SymbolSignature] + + def add_symbol(self, symbol: SymbolSignature) -> None: + self.symbols[symbol.fullname()] = symbol + + def __str__(self) -> str: + output = ['Interface for ' + self.fullname + ':'] + output.extend(' ' + str(imp) for imp in self.imports) + output.extend(' {} ({})\n {}'.format( + sym.fullname(), sym.kind(), sym.signature()) for sym in self.symbols.values()) + return '\n'.join(output) + + def compute_hash(self) -> str: + """Computes the hash of all stored imports and symbols. + + The order in which the imports are added is significant; + the order in which symbols are added is ignored.""" + hash_obj = hashlib.md5(self.fullname.encode('utf-8')) + for imp in self.imports: + hash_obj.update(str(imp).encode('utf-8')) + for key in sorted(self.symbols.keys()): + hash_obj.update(str(self.symbols[key]).encode('utf-8')) + return base64.b64encode(hash_obj.digest()).decode('utf-8') + + +def extract_interface(module: MypyFile) -> ModuleInterface: + """Returns a best-effort approximation of the public interface of the given MypyFile.""" + interface = ModuleInterface() + extractor = InterfaceExtractor(interface) + extractor.visit_mypy_file(module) + return interface + + +class InterfaceExtractor(NodeVisitor[None]): + def __init__(self, interface: ModuleInterface) -> None: + self.interface = interface + self._scope_stack = [] # type: List[str] + self._qualname = '' + + # Helpers + + def _visit(self, *nodes: Node, scope: Optional[str] = None) -> None: + """Visits the given node(s). + + If `scope` is a string, assumes all visited nodes live within that + particular scope.""" + if scope is not None: + self._scope_stack.append(scope) + self._qualname = '.'.join(self._scope_stack) + '.' + + for node in nodes: + if node is not None: + node.accept(self) + + if scope is not None: + self._scope_stack.pop() + self._qualname = '.'.join(self._scope_stack) + '.' + + def _qualified_name(self, name: str) -> str: + """Returns the fully qualified name of the given symbol based on the current sccope.""" + assert name is not None + return self._qualname + name + + def _flatten_node(self, node: Node) -> str: + """Returns the string representation of a node as a one-line string.""" + return ' '.join(line.strip() for line in str(node).split('\n')) + + # Module structure + + def visit_mypy_file(self, module: MypyFile) -> None: + self.interface.fullname = module.fullname() + self._visit(*module.defs, scope=module.fullname()) + + # TODO: Store these imports in a more useful way. + + def visit_import(self, imp: Import) -> None: + self.interface.imports.append(imp) + + def visit_import_from(self, imp: ImportFrom) -> None: + self.interface.imports.append(imp) + + def visit_import_all(self, imp: ImportAll) -> None: + self.interface.imports.append(imp) + + # Definitions + + def visit_func_def(self, func: FuncDef) -> None: + if func.fullname() is not None: + name = func.fullname() + else: + name = self._qualified_name(func.name()) + + self.interface.add_symbol(FunctionSignature(name, func.type)) + + def visit_overloaded_func_def(self, + func: OverloadedFuncDef) -> None: + if func.fullname() is not None: + name = func.fullname() + else: + name = self._qualified_name(func.name()) + + types = [defn.func.type for defn in func.items] + self.interface.add_symbol(OverloadedFunctionSignature(name, types)) + + def visit_class_def(self, cls: ClassDef) -> None: + info = cls.info + bases = [] + for base in cls.base_type_exprs: + if isinstance(base, NameExpr): + bases.append(base.name) + else: + bases.append(self._flatten_node(base)) + self.interface.add_symbol(ClassSignature(info.fullname(), info.name(), bases)) + self._visit(cls.defs, scope=info.name()) + + def visit_decorator(self, decorator: Decorator) -> None: + # TODO: Do I need decorator.var and decorator.is_overload? + self._visit(decorator.func) + + # Statements + + def visit_block(self, block: Block) -> None: + if not block.is_unreachable: + self._visit(*block.body) + + def _handle_seq_assignment(self, lvalue: Expression, signature: Union[Type, str]) -> None: + if isinstance(lvalue, NameExpr): + name = self._qualified_name(lvalue.name) + self.interface.add_symbol(ExpressionSignature(name, signature)) + elif isinstance(lvalue, (ListExpr, TupleExpr)): + # TODO: This is a cop-out. Rather then trying to extract the type + # signatures of each individual item in the tuple or list, I'm just + # going to give them all the same signature as the overall expression. + # After all, the goal of this visitor is to obtain relatively unique + # signatures, not to obtain accurate type information. + for item in lvalue.items: + self._handle_seq_assignment(item, signature) + elif isinstance(lvalue, StarExpr): + self._handle_seq_assignment(lvalue.expr, signature) + else: + # Assume all other kinds of expressions are either valid things like + # MemberExpr or IndexExpr, or invalid things that'll be caught later + # by the syntax checker. + pass + + def visit_assignment_stmt(self, stmt: AssignmentStmt) -> None: + # If we cannot deduce the type signature of this variable, resort + # to using the serialized form of the entire expression. + signature = None # type: Union[Type, str] + if stmt.type is not None: + signature = stmt.type + else: + signature = self._flatten_node(stmt.rvalue) + + for lvalue in stmt.lvalues: + self._handle_seq_assignment(lvalue, signature) + + def visit_while_stmt(self, stmt: WhileStmt) -> None: + # Hopefully nobody will try defining something within a top-level or + # class-level while statement, but I suppose it could happen. + self._visit(stmt.body) + self._visit(stmt.else_body) + + def visit_for_stmt(self, stmt: ForStmt) -> None: + # Similar thing here. + self._visit(stmt.body) + self._visit(stmt.else_body) + + def visit_if_stmt(self, stmt: IfStmt) -> None: + # TODO: Make sure we respect Python version checks + self._visit(*stmt.body) + self._visit(stmt.else_body) + + def visit_try_stmt(self, stmt: TryStmt) -> None: + self._visit(stmt.body) + self._visit(*stmt.handlers) # "except" clauses + self._visit(stmt.else_body) + self._visit(stmt.finally_body) + + def visit_with_stmt(self, stmt: WithStmt) -> None: + self._visit(stmt.body) From 990636b5748a13cfe21f970a2a1fe24f3c896432 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Mon, 8 Aug 2016 22:28:29 -0700 Subject: [PATCH 3/7] Make cache loader flag if a file is possibly stale Previously, if the file timestamp or size differed from the data stored in the meta files, we treated it as immediately stale. In preparation for the interface changes, this commit extracts that logic into a separate flag. --- mypy/build.py | 49 +++++++++++++++++++++++++----------------- mypy/test/testcheck.py | 4 ++-- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 7e4aba1fca19..65f28e609d0f 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -686,7 +686,7 @@ def get_cache_names(id: str, path: str, cache_dir: str, return (prefix + '.meta.json', prefix + '.data.json') -def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[CacheMeta]: +def find_cache_meta(id: str, path: str, manager: BuildManager) -> Tuple[bool, Optional[CacheMeta]]: """Find cache data for a module. Args: @@ -695,21 +695,25 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache manager: the build manager (for pyversion, log/trace, and build options) Returns: - A CacheMeta instance if the cache data was found and appears - valid; otherwise None. + A tuple containing a bool and a CacheMeta instance. + The bool is True when a CacheMeta instance was both successfully + loaded and the timestamps indicate that there may be a content + or interface change. + The CacheMeta instance is non-null if the cache data was found + and appears valid; otherwise None. """ # TODO: May need to take more build options into account meta_json, data_json = get_cache_names( id, path, manager.options.cache_dir, manager.options.python_version) manager.trace('Looking for {} {}'.format(id, data_json)) if not os.path.exists(meta_json): - return None + return False, None with open(meta_json, 'r') as f: meta_str = f.read() manager.trace('Meta {} {}'.format(id, meta_str.rstrip())) meta = json.loads(meta_str) # TODO: Errors if not isinstance(meta, dict): - return None + return False, None path = os.path.abspath(path) m = CacheMeta( meta.get('id'), @@ -723,38 +727,39 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache meta.get('child_modules', []), meta.get('options'), meta.get('dep_prios', []), + meta.get('interface_hash'), meta.get('version_id'), ) if (m.id != id or m.path != path or m.mtime is None or m.size is None or m.dependencies is None or m.data_mtime is None): - return None + return False, None # Ignore cache if generated by an older mypy version. if (m.version_id != manager.version_id or m.options is None + or m.interface_hash is None or len(m.dependencies) != len(m.dep_prios)): - return None + return False, None # Ignore cache if (relevant) options aren't the same. cached_options = m.options current_options = select_options_affecting_cache(manager.options) if cached_options != current_options: - return None + return False, None # TODO: Share stat() outcome with find_module() - st = os.stat(path) # TODO: Errors - if st.st_mtime != m.mtime or st.st_size != m.size: - manager.log('Metadata abandoned because of modified file {}'.format(path)) - return None - + # TODO: stat() errors + # TODO: revise below comment # It's a match on (id, path, mtime, size). # Check data_json; assume if its mtime matches it's good. - # TODO: stat() errors - if os.path.getmtime(data_json) != m.data_mtime: - return None + st = os.stat(path) # TODO: Errors + maybe_stale = (st.st_mtime != m.mtime + or st.st_size != m.size + or os.path.getmtime(data_json) != m.data_mtime) + manager.log('Found {} {}'.format(id, meta_json)) - return m + return maybe_stale, m def select_options_affecting_cache(options: Options) -> Mapping[str, bool]: @@ -1012,6 +1017,10 @@ class State: # If caller_state is set, the line number in the caller where the import occurred caller_line = 0 + # If True, indicates that module must be checked later to see if it is + # stale and if the public interface has been changed. + maybe_stale = False + def __init__(self, id: Optional[str], path: Optional[str], @@ -1089,10 +1098,10 @@ def __init__(self, self.xpath = path or '' self.source = source if path and source is None and manager.options.incremental: - self.meta = find_cache_meta(self.id, self.path, manager) - # TODO: Get mtime if not cached. + self.maybe_stale, self.meta = find_cache_meta(self.id, self.path, manager) + # TODO: Get mtime if not cached self.add_ancestors() - if self.meta: + if self.meta and not self.maybe_stale: # Make copies, since we may modify these and want to # compare them to the originals later. self.dependencies = list(self.meta.dependencies) diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py index b2f164d60933..f7961b3d023a 100644 --- a/mypy/test/testcheck.py +++ b/mypy/test/testcheck.py @@ -207,8 +207,8 @@ def find_missing_cache_files(self, modules: Dict[str, str], manager: build.BuildManager) -> Set[str]: missing = {} for id, path in modules.items(): - meta = build.find_cache_meta(id, path, manager) - if meta is None: + maybe_stale, meta = build.find_cache_meta(id, path, manager) + if meta is None or maybe_stale: missing[id] = path return set(missing.values()) From 4af183bb2f5b9eb58507f8471fcfdbd3d0f3cfde Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Mon, 8 Aug 2016 22:32:18 -0700 Subject: [PATCH 4/7] Store interface hash in meta files This commit modifies the file parsing logic to also compute and store the public interface for a module. The public interface is not yet used anywhere. --- mypy/build.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 65f28e609d0f..3acc5a83a977 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -35,6 +35,7 @@ from mypy import moduleinfo from mypy import util from mypy.fixup import fixup_module_pass_one, fixup_module_pass_two +from mypy.modinterface import ModuleInterface, extract_interface from mypy.options import Options from mypy.parse import parse from mypy.stats import dump_type_stats @@ -287,6 +288,7 @@ def default_lib_path(data_dir: str, pyversion: Tuple[int, int]) -> List[str]: ('child_modules', List[str]), # all submodules of the given module ('options', Optional[Dict[str, bool]]), # build options ('dep_prios', List[int]), + ('interface_hash', str), ('version_id', str), # mypy version for cache invalidation ]) # NOTE: dependencies + suppressed == all reachable imports; @@ -782,7 +784,7 @@ def random_string() -> str: def write_cache(id: str, path: str, tree: MypyFile, dependencies: List[str], suppressed: List[str], child_modules: List[str], dep_prios: List[int], - manager: BuildManager) -> None: + interface_hash: str, manager: BuildManager) -> None: """Write cache files for a module. Args: @@ -824,6 +826,7 @@ def write_cache(id: str, path: str, tree: MypyFile, 'child_modules': child_modules, 'options': select_options_affecting_cache(manager.options), 'dep_prios': dep_prios, + 'interface_hash': interface_hash, 'version_id': manager.version_id, } with open(meta_json_tmp, 'w') as f: @@ -998,6 +1001,8 @@ class State: dependencies = None # type: List[str] suppressed = None # type: List[str] # Suppressed/missing dependencies priorities = None # type: Dict[str, int] + interface = None # type: ModuleInterface + interface_hash = None # type: str # Map each dependency to the line number where it is first imported dep_line_map = None # type: Dict[str, int] @@ -1289,6 +1294,8 @@ def parse_file(self) -> None: self.suppressed = suppressed self.priorities = priorities self.dep_line_map = dep_line_map + self.interface = extract_interface(self.tree) + self.interface_hash = self.interface.compute_hash() self.check_blockers() def patch_parent(self) -> None: @@ -1330,7 +1337,7 @@ def write_cache(self) -> None: dep_prios = [self.priorities.get(dep, PRI_HIGH) for dep in self.dependencies] write_cache(self.id, self.path, self.tree, list(self.dependencies), list(self.suppressed), list(self.child_modules), - dep_prios, + dep_prios, self.interface_hash, self.manager) From 4b768236a154c3306c2dd654dbf7dc45f5a82154 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Mon, 8 Aug 2016 22:34:42 -0700 Subject: [PATCH 5/7] Modify incremental to check public interface This commit finally enables the build process to throttle back incremental mode when the public interface of a method is not changed. --- mypy/build.py | 72 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 10 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 3acc5a83a977..3c893b2e60c1 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1022,6 +1022,9 @@ class State: # If caller_state is set, the line number in the caller where the import occurred caller_line = 0 + # If True, indicates that the public interface is unchanged. + externally_same = False + # If True, indicates that module must be checked later to see if it is # stale and if the public interface has been changed. maybe_stale = False @@ -1122,6 +1125,24 @@ def __init__(self, self.suppressed = [] self.child_modules = set() + def check_interface(self) -> None: + if self.has_new_submodules(): + self.manager.trace("Module {} has new submodules".format(self.id)) + self.parse_file() + self.externally_same = False + elif self.maybe_stale: + self.parse_file() + if self.interface_hash == self.meta.interface_hash: + self.manager.trace("Module {} was changed but has same interface".format(self.id)) + self.externally_same = True + self.meta = None + else: + self.manager.trace("Module {} has different interface".format(self.id)) + self.externally_same = False + elif self.meta is not None: + self.manager.trace("Module {} is unchanged".format(self.id)) + self.externally_same = True + def skipping_ancestor(self, id: str, path: str, ancestor_for: 'State') -> None: # TODO: Read the path (the __init__.py file) and return # immediately if it's empty or only contains comments. @@ -1168,16 +1189,28 @@ def is_fresh(self) -> bool: # suppression by --silent-imports. However when a suppressed # dependency is added back we find out later in the process. return (self.meta is not None + and self.externally_same and self.dependencies == self.meta.dependencies and self.child_modules == set(self.meta.child_modules)) + def is_interface_fresh(self) -> bool: + return self.externally_same + def has_new_submodules(self) -> bool: """Return if this module has new submodules after being loaded from a warm cache.""" return self.meta is not None and self.child_modules != set(self.meta.child_modules) - def mark_stale(self) -> None: - """Throw away the cache data for this file, marking it as stale.""" + def mark_stale(self, interface_is_same=False) -> None: + """Throw away the cache data for this file, marking it as stale. + + If interface_is_same is True, treat the module's interface as being + fresh -- only the module's content should be considered stale.""" + if interface_is_same: + self.manager.trace("Marking {} as stale (but interface is fresh)".format(self.id)) + else: + self.manager.trace("Marking {} as stale (interface is also stale)".format(self.id)) self.meta = None + self.externally_same = interface_is_same self.manager.stale_modules.add(self.id) def check_blockers(self) -> None: @@ -1397,8 +1430,7 @@ def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: if dep in st.ancestors and dep in graph: graph[dep].child_modules.add(st.id) for id, g in graph.items(): - if g.has_new_submodules(): - g.parse_file() + g.check_interface() return graph @@ -1437,8 +1469,24 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: for id in scc: deps.update(graph[id].dependencies) deps -= ascc - stale_deps = {id for id in deps if not graph[id].is_fresh()} + stale_deps = {id for id in deps if not graph[id].is_interface_fresh()} fresh = fresh and not stale_deps + + # TODO: Currently, the interface-based approach to incremental mode will regard + # absolutely any interface changes in our dependencies as a sign that our + # interface has also changed. This is often over-conservative, but is + # an easy way of making sure we preserve correctness. + # + # This unfortunately does mean that an interface change will often result back + # to the worst-case behavior for incremental mode -- changing an interface causes + # a cascade of changes through a large subset of the import graph. + # + # The ideal behavior would be for an interface change to propagate only only one + # or two levels through the import DAG, but this requires us to track dependencies + # on a more finer-grained level then we currently do. + interface_stale_scc = {id for id in scc if not graph[id].is_interface_fresh()} + interface_fresh = len(interface_stale_scc) == 0 and len(stale_deps) == 0 + undeps = set() if fresh: # Check if any dependencies that were suppressed according @@ -1453,9 +1501,10 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: # All cache files are fresh. Check that no dependency's # cache file is newer than any scc node's cache file. oldest_in_scc = min(graph[id].meta.data_mtime for id in scc) - newest_in_deps = 0 if not deps else max(graph[dep].meta.data_mtime for dep in deps) + viable = {id for id in deps if not graph[id].is_interface_fresh()} + newest_in_deps = max(graph[dep].meta.data_mtime for dep in viable) if viable else 0 if manager.options.verbosity >= 3: # Dump all mtimes for extreme debugging. - all_ids = sorted(ascc | deps, key=lambda id: graph[id].meta.data_mtime) + all_ids = sorted(ascc | viable, key=lambda id: graph[id].meta.data_mtime) for id in all_ids: if id in scc: if graph[id].meta.data_mtime < newest_in_deps: @@ -1481,6 +1530,8 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: fresh_msg = "inherently stale (%s)" % " ".join(sorted(stale_scc)) if stale_deps: fresh_msg += " with stale deps (%s)" % " ".join(sorted(stale_deps)) + if interface_fresh: + fresh_msg += ", but interface is fresh" else: fresh_msg = "stale due to deps (%s)" % " ".join(sorted(stale_deps)) if len(scc) == 1: @@ -1488,10 +1539,11 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: else: manager.log("Processing SCC of size %d (%s) as %s" % (len(scc), " ".join(scc), fresh_msg)) + if fresh: process_fresh_scc(graph, scc) else: - process_stale_scc(graph, scc) + process_stale_scc(graph, scc, interface_fresh) def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_ALL) -> List[str]: @@ -1553,10 +1605,10 @@ def process_fresh_scc(graph: Graph, scc: List[str]) -> None: graph[id].calculate_mros() -def process_stale_scc(graph: Graph, scc: List[str]) -> None: +def process_stale_scc(graph: Graph, scc: List[str], is_interface_fresh: bool) -> None: """Process the modules in one SCC from source code.""" for id in scc: - graph[id].mark_stale() + graph[id].mark_stale(is_interface_fresh) for id in scc: # We may already have parsed the module, or not. # If the former, parse_file() is a no-op. From 9391470a013b567ef84882d235095f3b6ef9ae4d Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Mon, 8 Aug 2016 22:38:09 -0700 Subject: [PATCH 6/7] Fix bug in string output for strings --- mypy/types.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mypy/types.py b/mypy/types.py index 59ed4f9b09e8..1adc57a9a994 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -1188,7 +1188,12 @@ def visit_type_var(self, t: TypeVarType) -> str: def visit_callable_type(self, t: CallableType) -> str: s = '' bare_asterisk = False - for i in range(len(t.arg_types)): + # The lengths of t.arg_types and t.arg_kinds must be equal in + # order to be syntactically correct, but the interface extractor + # will visit this node before it's checked. To prevent the interface + # extractor from crashing, we just take the minimum of the two and + # deal with the error later. + for i in range(min(len(t.arg_types), len(t.arg_kinds))): if s != '': s += ', ' if t.arg_kinds[i] == mypy.nodes.ARG_NAMED and not bare_asterisk: From 71dd658100affba711fa4f30e428e58ceb346d7f Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Tue, 9 Aug 2016 11:17:00 -0700 Subject: [PATCH 7/7] Disable interface extraction if not in incremental Previously, we always extracted the public interface, even though there's really no need to do so when not running incremental mode. --- mypy/build.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 3c893b2e60c1..170b6f444e7e 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1327,8 +1327,11 @@ def parse_file(self) -> None: self.suppressed = suppressed self.priorities = priorities self.dep_line_map = dep_line_map - self.interface = extract_interface(self.tree) - self.interface_hash = self.interface.compute_hash() + if self.manager.options.incremental: + self.interface = extract_interface(self.tree) + self.interface_hash = self.interface.compute_hash() + else: + self.interface_hash = "" self.check_blockers() def patch_parent(self) -> None: