From 81ab7695fac0553accff5a3dbf07ac773c2e8b71 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Mon, 8 Aug 2016 21:00:03 -0700 Subject: [PATCH 01/17] Add tests for interface-based incremental checks This commit modifies the incremental tests so that changes that don't change the "public interface" of the file will not trigger a recheck in any dependees. --- test-data/unit/check-incremental.test | 366 +++++++++++++++++++++++++- test-data/unit/fixtures/dict.pyi | 3 +- 2 files changed, 367 insertions(+), 2 deletions(-) diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index 035c624e0555..903bbc78770b 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -57,7 +57,67 @@ def func3() -> None: pass [out] -[case testIncrementalSimpleImportCascade] +[case testIncrementalInternalChangeOnly] +import mod1 +mod1.func1() + +[file mod1.py] +import mod2 +def func1() -> None: mod2.func2() + +[file mod2.py] +import mod3 +def func2() -> None: mod3.func3() + +[file mod3.py] +def func3() -> None: pass + +[file mod3.py.next] +def func3() -> None: 3 + 2 + +[stale mod3] +[out] + + +[case testIncrementalImportGone] +import mod1 + +[file mod1.py] +from mod2 import A +def func1() -> A: pass + +[file mod2.py] +class A: pass + +[file mod1.py.next] +def func1() -> A: pass + +[stale mod1] +[out] +main:1: note: In module imported here: +tmp/mod1.py: note: In function "func1": +tmp/mod1.py:1: error: Name 'A' is not defined + + +[case testIncrementalSameNameChange] +import mod1 + +[file mod1.py] +from mod2 import A +def func1() -> A: pass + +[file mod2.py] +class A: pass + +[file mod2.py.next] +class Parent: pass +class A(Parent): pass + +[stale mod1, mod2] +[out] + + +[case testIncrementalPartialInterfaceChange] import mod1 mod1.func1() @@ -75,9 +135,313 @@ def func3() -> None: pass [file mod3.py.next] def func3() -> int: return 2 +# TODO: ideally, only mod2 and mod3 would be stale, but we treat any +# changes in our dependencies as an interface change, so mod1 is +# rechecked as well. [stale mod1, mod2, mod3] [out] +[case testIncrementalInternalScramble] +import mod1 + +[file mod1.py] +import mod2 +mod2.foo() + +[file mod2.py] +def baz() -> int: + return 3 + +def bar() -> int: + return baz() + +def foo() -> int: + return bar() + +[file mod2.py.next] +def foo() -> int: + return baz() + +def bar() -> int: + return bar() + +def baz() -> int: + return 42 +[stale mod2] +[out] + +[case testIncrementalMethodInterfaceChange] +import mod1 + +[file mod1.py] +import mod2 + +[file mod2.py] +class Foo: + def bar(self, a: str) -> str: + return "a" + +[file mod2.py.next] +class Foo: + def bar(self, a: float) -> str: + return "a" + +[stale mod1, mod2] +[out] + +[case testIncrementalBaseClassChange] +import mod1 + +[file mod1.py] +from mod2 import Child +Child().good_method() + +[file mod2.py] +class Good: + def good_method(self) -> int: return 1 +class Bad: pass +class Child(Good): pass + +[file mod2.py.next] +class Good: + def good_method(self) -> int: return 1 +class Bad: pass +class Child(Bad): pass + +[stale mod1, mod2] +[out] +main:1: note: In module imported here: +tmp/mod1.py:2: error: "Child" has no attribute "good_method" + +[case testIncrementalCascadingChange] +import mod1 + +[file mod1.py] +from mod2 import A + +[file mod2.py] +from mod3 import B +A = B + +[file mod3.py] +from mod4 import C +B = C + +[file mod4.py] +C = 3 + +[file mod4.py.next] +C = "A" + +[stale mod1, mod2, mod3, mod4] +[out] + +[case testIncrementalRemoteChange] +import mod1 + +[file mod1.py] +import mod2 +def accepts_int(a: int) -> None: pass +accepts_int(mod2.mod3.mod4.const) + +[file mod2.py] +import mod3 + +[file mod3.py] +import mod4 + +[file mod4.py] +const = 3 + +[file mod4.py.next] +const = "foo" + +[stale mod1, mod2, mod3, mod4] +[out] +main:1: note: In module imported here: +tmp/mod1.py:3: error: Argument 1 to "accepts_int" has incompatible type "str"; expected "int" + +[case testIncrementalBadChange] +import mod1 + +[file mod1.py] +from mod2 import func2 + +def func1() -> int: + return func2() + +[file mod2.py] +def func2() -> int: + return 1 + +[file mod2.py.next] +def func2() -> str: + return "foo" + +[stale mod1, mod2] +[out] +main:1: note: In module imported here: +tmp/mod1.py: note: In function "func1": +tmp/mod1.py:4: error: Incompatible return value type (got "str", expected "int") + +[case testIncrementalWithComplexDictExpression] +import mod1 + +[file mod1.py] +import mod1_private + +[file mod1_private.py] +my_dict = { + 'a': [1, 2, 3], + 'b': [4, 5, 6] +} + +[file mod1_private.py.next] +my_dict = { + 'a': [1, 2, 3], + 'b': [4, 5, 'a'] +} + +[stale mod1, mod1_private] +[builtins fixtures/dict.py] +[out] + +[case testIncrementalWithComplexConstantExpressionNoAnnotation] +import mod1 + +[file mod1.py] +import mod1_private + +[file mod1_private.py] +def foo() -> int: return 1 +def bar() -> int: return 2 +baz = 1 + foo() + +[file mod1_private.py.next] +def foo() -> int: return 1 +def bar() -> int: return 2 +baz = 1 + bar() +# Ideally, this wouldn't count as an interface change, +# but the interface checker isn't easily able to deduce +# the type of this constant. +[stale mod1, mod1_private] +[out] + +[case testIncrementalWithComplexConstantExpressionWithAnnotation] +import mod1 + +[file mod1.py] +import mod1_private + +[file mod1_private.py] +def foo() -> int: return 1 +def bar() -> int: return 2 +baz = 1 + foo() # type: int + +[file mod1_private.py.next] +def foo() -> int: return 1 +def bar() -> int: return 2 +baz = 1 + bar() # type: int +# However, if the constant expressions are annotated, we +# _can_ currently deduce the type. +[stale mod1_private] +[out] + +[case testIncrementalWithDecorators] +import mod1 + +[file mod1.py] +import mod1_private +def accepts_int(a: int) -> None: pass +accepts_int(mod1_private.some_func(12)) + +[file mod1_private.py] +from typing import Callable +def multiply(f: Callable[[int], int]) -> Callable[[int], int]: + return lambda a: f(a) * 10 + +def stringify(f: Callable[[int], int]) -> Callable[[int], str]: + return lambda a: str(f(a)) + +@multiply +def some_func(a: int) -> int: + return a + 2 + +[file mod1_private.py.next] +from typing import Callable +def multiply(f: Callable[[int], int]) -> Callable[[int], int]: + return lambda a: f(a) * 10 + +def stringify(f: Callable[[int], int]) -> Callable[[int], str]: + return lambda a: str(f(a)) + +@stringify +def some_func(a: int) -> int: + return a + 2 +[stale mod1, mod1_private] +[builtins fixtures/ops.py] +[out] + +[case testIncrementalChangingClassAttributes] +import mod1 + +[file mod1.py] +import mod2 +mod2.Foo.A + +[file mod2.py] +class Foo: + A = 3 + +[file mod2.py.next] +class Foo: + A = "hello" + +[stale mod1, mod2] +[out] + +[case testIncrementalChangingFields] +import mod1 + +[file mod1.py] +import mod2 +f = mod2.Foo() +f.A + +[file mod2.py] +class Foo: + def __init__(self) -> None: + self.A = 3 + +[file mod2.py.next] +class Foo: + def __init__(self) -> None: + self.A = "hello" + +[stale mod1, mod2] +[out] + +[case testIncrementalNestedClassDefinition] +import mod1 + +[file mod1.py] +import mod2 +b = mod2.Foo.Bar() +b.attr + +[file mod2.py] +class Foo: + class Bar: + attr = 3 + +[file mod2.py.next] +class Foo: + class Bar: + attr = "foo" + +[stale mod1, mod2] +[out] + [case testIncrementalSimpleBranchingModules] import mod1 import mod2 diff --git a/test-data/unit/fixtures/dict.pyi b/test-data/unit/fixtures/dict.pyi index 709def8c86c9..5a7886439692 100644 --- a/test-data/unit/fixtures/dict.pyi +++ b/test-data/unit/fixtures/dict.pyi @@ -20,7 +20,8 @@ class dict(Iterable[KT], Mapping[KT, VT], Generic[KT, VT]): def __iter__(self) -> Iterator[KT]: pass def update(self, a: Mapping[KT, VT]) -> None: pass -class int: pass # for convenience +class int: # for convenience + def __add__(self, x: int) -> int: pass class str: pass # for keyword argument key type From 926e30c14b249209052cefd6b892de11a807c957 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Thu, 11 Aug 2016 16:53:34 -0700 Subject: [PATCH 02/17] Make incremental tests use 'rechecked' keyword This commit updates the incremental tests to use the 'rechecked' keywords and recalibrates a few tests to work against a more precise interface change detector. --- mypy/test/testcheck.py | 21 +++++++--- test-data/unit/check-incremental.test | 57 ++++++++++++++++++--------- test-data/unit/check-newtype.test | 3 +- 3 files changed, 57 insertions(+), 24 deletions(-) diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py index 7f9c345c85aa..2f4701958e5c 100644 --- a/mypy/test/testcheck.py +++ b/mypy/test/testcheck.py @@ -166,11 +166,22 @@ def run_case_once(self, testcase: DataDrivenTestCase, incremental=0) -> None: if incremental and res: if not options.silent_imports: self.verify_cache(module_data, a, res.manager) - if testcase.expected_stale_modules is not None and incremental == 2: - assert_string_arrays_equal( - list(sorted(testcase.expected_stale_modules)), - list(sorted(res.manager.stale_modules.difference({"__main__"}))), - 'Set of stale modules does not match expected set') + if incremental == 2: + self.check_module_equivalence( + 'rechecked', + testcase.expected_rechecked_modules, + res.manager.rechecked_modules) + self.check_module_equivalence( + 'stale', + testcase.expected_stale_modules, + res.manager.stale_modules) + + def check_module_equivalence(self, name: str, expected: Set[str], actual: Set[str]) -> None: + if expected is not None: + assert_string_arrays_equal( + list(sorted(expected)), + list(sorted(actual.difference({"__main__"}))), + 'Set of {} modules does not match expected set'.format(name)) def verify_cache(self, module_data: List[Tuple[str, str, str]], a: List[str], manager: build.BuildManager) -> None: diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index 903bbc78770b..cf058215fb38 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -11,6 +11,8 @@ -- if no files should be stale. [case testIncrementalEmpty] +[rechecked] +[stale] [out] [case testIncrementalBasics] @@ -21,6 +23,7 @@ def foo(): [file m.py.next] def foo() -> None: pass +[rechecked m] [stale m] [out] @@ -32,7 +35,8 @@ def foo() -> None: [file m.py.next] def foo() -> None: bar() -[stale m] +[rechecked m] +[stale] [out] main:1: note: In module imported here: tmp/m.py: note: In function "foo": @@ -53,6 +57,7 @@ def func2() -> None: mod3.func3() [file mod3.py] def func3() -> None: pass +[rechecked] [stale] [out] @@ -75,7 +80,8 @@ def func3() -> None: pass [file mod3.py.next] def func3() -> None: 3 + 2 -[stale mod3] +[rechecked mod3] +[stale] [out] @@ -92,13 +98,13 @@ class A: pass [file mod1.py.next] def func1() -> A: pass -[stale mod1] +[rechecked mod1] +[stale] [out] main:1: note: In module imported here: tmp/mod1.py: note: In function "func1": tmp/mod1.py:1: error: Name 'A' is not defined - [case testIncrementalSameNameChange] import mod1 @@ -113,6 +119,7 @@ class A: pass class Parent: pass class A(Parent): pass +[rechecked mod1, mod2] [stale mod1, mod2] [out] @@ -135,9 +142,7 @@ def func3() -> None: pass [file mod3.py.next] def func3() -> int: return 2 -# TODO: ideally, only mod2 and mod3 would be stale, but we treat any -# changes in our dependencies as an interface change, so mod1 is -# rechecked as well. +[rechecked mod1, mod2, mod3] [stale mod1, mod2, mod3] [out] @@ -167,7 +172,8 @@ def bar() -> int: def baz() -> int: return 42 -[stale mod2] +[rechecked mod2] +[stale] [out] [case testIncrementalMethodInterfaceChange] @@ -186,6 +192,7 @@ class Foo: def bar(self, a: float) -> str: return "a" +[rechecked mod1, mod2] [stale mod1, mod2] [out] @@ -208,6 +215,7 @@ class Good: class Bad: pass class Child(Bad): pass +[rechecked mod1, mod2] [stale mod1, mod2] [out] main:1: note: In module imported here: @@ -233,6 +241,7 @@ C = 3 [file mod4.py.next] C = "A" +[rechecked mod1, mod2, mod3, mod4] [stale mod1, mod2, mod3, mod4] [out] @@ -278,6 +287,7 @@ def func2() -> int: def func2() -> str: return "foo" +[rechecked mod1, mod2] [stale mod1, mod2] [out] main:1: note: In module imported here: @@ -302,8 +312,9 @@ my_dict = { 'b': [4, 5, 'a'] } +[rechecked mod1, mod1_private] [stale mod1, mod1_private] -[builtins fixtures/dict.py] +[builtins fixtures/dict.pyi] [out] [case testIncrementalWithComplexConstantExpressionNoAnnotation] @@ -321,10 +332,10 @@ baz = 1 + foo() def foo() -> int: return 1 def bar() -> int: return 2 baz = 1 + bar() -# Ideally, this wouldn't count as an interface change, -# but the interface checker isn't easily able to deduce -# the type of this constant. -[stale mod1, mod1_private] +# Foo + +[rechecked mod1_private] +[stale] [out] [case testIncrementalWithComplexConstantExpressionWithAnnotation] @@ -342,9 +353,10 @@ baz = 1 + foo() # type: int def foo() -> int: return 1 def bar() -> int: return 2 baz = 1 + bar() # type: int -# However, if the constant expressions are annotated, we -# _can_ currently deduce the type. -[stale mod1_private] +# Type annotations shouldn't make a difference, but just in case... + +[rechecked mod1_private] +[stale] [out] [case testIncrementalWithDecorators] @@ -378,9 +390,12 @@ def stringify(f: Callable[[int], int]) -> Callable[[int], str]: @stringify def some_func(a: int) -> int: return a + 2 +[rechecked mod1, mod1_private] [stale mod1, mod1_private] -[builtins fixtures/ops.py] +[builtins fixtures/ops.pyi] [out] +main:1: note: In module imported here: +tmp/mod1.py:3: error: Argument 1 to "accepts_int" has incompatible type "str"; expected "int" [case testIncrementalChangingClassAttributes] import mod1 @@ -397,6 +412,7 @@ class Foo: class Foo: A = "hello" +[rechecked mod1, mod2] [stale mod1, mod2] [out] @@ -418,6 +434,7 @@ class Foo: def __init__(self) -> None: self.A = "hello" +[rechecked mod1, mod2] [stale mod1, mod2] [out] @@ -439,6 +456,7 @@ class Foo: class Bar: attr = "foo" +[rechecked mod1, mod2] [stale mod1, mod2] [out] @@ -455,6 +473,7 @@ def func() -> None: pass [file mod1.py.next] def func() -> int: return 1 +[rechecked mod1] [stale mod1] [out] @@ -482,6 +501,7 @@ class Bar: def test(self) -> int: return 3 [builtins fixtures/module_all.pyi] +[rechecked] [stale] [out] @@ -607,7 +627,8 @@ import a.b [file a/b.py] -[stale b] +[rechecked b] +[stale] [out] main:1: note: In module imported here: tmp/b.py:4: error: Name 'a' already defined diff --git a/test-data/unit/check-newtype.test b/test-data/unit/check-newtype.test index 17775995a4c4..4455f91b9a7c 100644 --- a/test-data/unit/check-newtype.test +++ b/test-data/unit/check-newtype.test @@ -230,7 +230,8 @@ num = id + 1 reveal_type(id) reveal_type(num) -[stale m] +[rechecked m] +[stale] [out] main:1: note: In module imported here: tmp/m.py:13: error: Revealed type is 'm.UserId' From 67031735adfbb88c899f04693fb08137a2b50f6e Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Thu, 11 Aug 2016 16:49:43 -0700 Subject: [PATCH 03/17] Add "rechecked" keyword to incremental tests This commit makes the incremental tests distinguish between "stale" and "rechecked" files. Stale files are ones where the "public interface" of that method is changed; rechecked files are ones that must be re-analyzed (perhaps because one of their dependencies changed?) but did not necessarily have a changed public interface. This functionality is currently unused, but paves a path for upcoming incremental-mode changes. --- mypy/test/data.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/mypy/test/data.py b/mypy/test/data.py index 548872f201ee..c098d81cbf71 100644 --- a/mypy/test/data.py +++ b/mypy/test/data.py @@ -47,6 +47,7 @@ def parse_test_cases( files = [] # type: List[Tuple[str, str]] # path and contents stale_modules = None # type: Optional[Set[str]] # module names + rechecked_modules = None # type: Optional[Set[str]] # module names while i < len(p) and p[i].id not in ['out', 'case']: if p[i].id == 'file': # Record an extra file needed for the test case. @@ -68,12 +69,25 @@ def parse_test_cases( stale_modules = set() else: stale_modules = {item.strip() for item in p[i].arg.split(',')} + elif p[i].id == 'rechecked': + if p[i].arg is None: + rechecked_modules = set() + else: + rechecked_modules = {item.strip() for item in p[i].arg.split(',')} else: raise ValueError( 'Invalid section header {} in {} at line {}'.format( p[i].id, path, p[i].line)) i += 1 + if rechecked_modules is None: + # If the set of rechecked modules isn't specified, make it the same as the set of + # modules with a stale public interface. + rechecked_modules = stale_modules + if stale_modules is not None and not stale_modules.issubset(rechecked_modules): + raise ValueError( + 'Stale modules must be a subset of rechecked modules ({})'.format(path)) + tcout = [] # type: List[str] if i < len(p) and p[i].id == 'out': tcout = p[i].data @@ -90,7 +104,7 @@ def parse_test_cases( lastline = p[i].line if i < len(p) else p[i - 1].line + 9999 tc = DataDrivenTestCase(p[i0].arg, input, tcout, path, p[i0].line, lastline, perform, - files, stale_modules) + files, stale_modules, rechecked_modules) out.append(tc) if not ok: raise ValueError( @@ -116,7 +130,7 @@ class DataDrivenTestCase(TestCase): clean_up = None # type: List[Tuple[bool, str]] def __init__(self, name, input, output, file, line, lastline, - perform, files, expected_stale_modules): + perform, files, expected_stale_modules, expected_rechecked_modules): super().__init__(name) self.input = input self.output = output @@ -126,6 +140,7 @@ def __init__(self, name, input, output, file, line, lastline, self.perform = perform self.files = files self.expected_stale_modules = expected_stale_modules + self.expected_rechecked_modules = expected_rechecked_modules def set_up(self) -> None: super().set_up() From 73c22c362cdc54af3c9ac3ac4d47fc90e9f16340 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Thu, 11 Aug 2016 17:00:38 -0700 Subject: [PATCH 04/17] Add more tests for incremental --- test-data/unit/check-incremental.test | 133 +++++++++++++++++++++++--- 1 file changed, 119 insertions(+), 14 deletions(-) diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index cf058215fb38..447f18a35c14 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -146,6 +146,34 @@ def func3() -> int: return 2 [stale mod1, mod2, mod3] [out] +[case testIncrementalInternalFunctionDefinitionChange] +import mod1 + +[file mod1.py] +import mod2 +def accepts_int(a: int) -> int: return a +accepts_int(mod2.foo()) + +[file mod2.py] +def foo() -> int: + def inner() -> int: + return 42 + return inner() + +[file mod2.py.next] +def foo() -> int: + def inner2() -> str: + return "foo" + return inner2() + +[rechecked mod2] +[stale] +[out] +tmp/mod1.py:1: note: In module imported here, +main:1: note: ... from here: +tmp/mod2.py: note: In function "foo": +tmp/mod2.py:4: error: Incompatible return value type (got "str", expected "int") + [case testIncrementalInternalScramble] import mod1 @@ -226,6 +254,8 @@ import mod1 [file mod1.py] from mod2 import A +def accepts_int(a: int) -> None: pass +accepts_int(A) [file mod2.py] from mod3 import B @@ -244,6 +274,35 @@ C = "A" [rechecked mod1, mod2, mod3, mod4] [stale mod1, mod2, mod3, mod4] [out] +main:1: note: In module imported here: +tmp/mod1.py:3: error: Argument 1 to "accepts_int" has incompatible type "str"; expected "int" + +[case testIncrementalBrokenCascade] +import mod1 + +[file mod1.py] +import mod2 +def accept_int(a: int) -> int: return a +accept_int(mod2.mod3.mod4.const) + +[file mod2.py] +import mod3 + +[file mod3.py] +import mod4 + +[file mod4.py] +const = 3 + +[file mod3.py.next] +# Import to mod4 is gone! + +[rechecked mod1, mod2, mod3] +[stale mod1, mod2, mod3] +[builtins fixtures/module.py] +[out] +main:1: note: In module imported here: +tmp/mod1.py:3: error: "module" has no attribute "mod4" [case testIncrementalRemoteChange] import mod1 @@ -324,15 +383,14 @@ import mod1 import mod1_private [file mod1_private.py] -def foo() -> int: return 1 -def bar() -> int: return 2 -baz = 1 + foo() +def foobar() -> int: return 1 +def baz() -> int: return 2 +const = 1 + foobar() [file mod1_private.py.next] -def foo() -> int: return 1 -def bar() -> int: return 2 -baz = 1 + bar() -# Foo +def foobar() -> int: return 1 +def baz() -> int: return 2 +const = 1 + baz() [rechecked mod1_private] [stale] @@ -345,20 +403,42 @@ import mod1 import mod1_private [file mod1_private.py] -def foo() -> int: return 1 -def bar() -> int: return 2 -baz = 1 + foo() # type: int +def foobar() -> int: return 1 +def baz() -> int: return 2 +const = 1 + foobar() # type: int [file mod1_private.py.next] -def foo() -> int: return 1 -def bar() -> int: return 2 -baz = 1 + bar() # type: int -# Type annotations shouldn't make a difference, but just in case... +def foobar() -> int: return 1 +def baz() -> int: return 2 +const = 1 + baz() # type: int [rechecked mod1_private] [stale] [out] +[case testIncrementalSmall] +import mod1 + +[file mod1.py] +import mod1_private +def accepts_int(a: int) -> None: pass +accepts_int(mod1_private.some_func(12)) + +[file mod1_private.py] +def some_func(a: int) -> int: + return 1 + +[file mod1_private.py.next] +def some_func(a: int) -> str: + return "a" + +[rechecked mod1, mod1_private] +[stale mod1, mod1_private] +[builtins fixtures/ops.py] +[out] +main:1: note: In module imported here: +tmp/mod1.py:3: error: Argument 1 to "accepts_int" has incompatible type "str"; expected "int" + [case testIncrementalWithDecorators] import mod1 @@ -438,6 +518,31 @@ class Foo: [stale mod1, mod2] [out] +[case testIncrementalCheckingChangingFields] +import mod1 + +[file mod1.py] +import mod2 +def accept_int(a: int) -> int: return a +f = mod2.Foo() +accept_int(f.A) + +[file mod2.py] +class Foo: + def __init__(self) -> None: + self.A = 3 + +[file mod2.py.next] +class Foo: + def __init__(self) -> None: + self.A = "hello" + +[rechecked mod1, mod2] +[stale mod1, mod2] +[out] +main:1: note: In module imported here: +tmp/mod1.py:4: error: Argument 1 to "accept_int" has incompatible type "str"; expected "int" + [case testIncrementalNestedClassDefinition] import mod1 From 961e19beab834994ea430ff2c30f6f490375b7a0 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Thu, 11 Aug 2016 17:17:47 -0700 Subject: [PATCH 05/17] Make incremental checks handle remote error Previously, the incremental checks did not correctly handle the case where... 1. Module "A" imports and uses module "B" 2. Incremental mode is successfully run 3. Module "B" is changed such that module "B" typechecks, but module "A" now throws an error 4. Incremental mode is run again What happens in this case is that the cache files for "A" and "B" are produced in the first run, and only the cache files for "B" is updated in the second run. Previously, the checks made the assumption that if a file has an error, it was due to a change within that file. This is not actually always the case. --- mypy/test/testcheck.py | 8 +++++++- test-data/unit/check-incremental.test | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py index 2f4701958e5c..c444835d5da1 100644 --- a/mypy/test/testcheck.py +++ b/mypy/test/testcheck.py @@ -191,11 +191,17 @@ def verify_cache(self, module_data: List[Tuple[str, str, str]], a: List[str], # NOTE: When A imports B and there's an error in B, the cache # data for B is invalidated, but the cache data for A remains. # However build.process_graphs() will ignore A's cache data. + # + # Also note that when A imports B, and there's an error in A + # _due to a valid change in B_, the cache data for B will be + # invalidated and updated, but the old cache data for A will + # remain unchanged. As before, build.process_graphs() will + # ignore A's (old) cache data. error_paths = self.find_error_paths(a) modules = self.find_module_files() modules.update({module_name: path for module_name, path, text in module_data}) missing_paths = self.find_missing_cache_files(modules, manager) - if missing_paths != error_paths: + if not missing_paths.issubset(error_paths): raise AssertionFailure("cache data discrepancy %s != %s" % (missing_paths, error_paths)) diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index 447f18a35c14..84968b3f2b6c 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -760,6 +760,7 @@ def bar(a: str) -> None: pass bar(3) +[rechecked m] [stale m] [out] main:1: note: In module imported here: @@ -834,3 +835,20 @@ val = "foo" [out] tmp/c/submodule.py:2: error: Incompatible types in assignment (expression has type "str", variable has type "int") tmp/main.py:7: error: "C" has no attribute "foo" + +[case testIncrementalRemoteError] +import m +m.C().foo().bar() # E: "A" has no attribute "bar" +[file m.py] +import n +class C: + def foo(self) -> n.A: pass +[file n.py] +class A: + def bar(self): pass +[file n.py.next] +class A: + pass +[rechecked m, n] +[stale m, n] +[out] From d64a4953d4a6f207d7e92255ee3dcae141725e8a Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Thu, 11 Aug 2016 17:36:46 -0700 Subject: [PATCH 06/17] Modify write_cache to safe interface hash --- mypy/build.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 54268ad98626..b8f0a1fcb46b 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -13,6 +13,7 @@ import binascii import collections import contextlib +import hashlib import json import os import os.path @@ -783,10 +784,14 @@ def random_string() -> str: return binascii.hexlify(os.urandom(8)).decode('ascii') +def compute_md5_hash(text: str) -> str: + return hashlib.md5(text.encode('utf-8')).hexdigest() + + def write_cache(id: str, path: str, tree: MypyFile, dependencies: List[str], suppressed: List[str], child_modules: List[str], dep_prios: List[int], - manager: BuildManager) -> None: + manager: BuildManager) -> str: """Write cache files for a module. Args: @@ -815,7 +820,16 @@ def write_cache(id: str, path: str, tree: MypyFile, data_json_tmp = data_json + nonce meta_json_tmp = meta_json + nonce with open(data_json_tmp, 'w') as f: - json.dump(data, f, indent=2, sort_keys=True) + # TODO: The cache writing code currently assumes the data and meta + # file are always written together. However, with the new changes to + # incremental mode, we could instead write only an updated meta file + # and leave the data file alone when the public interface is unchanged. + # + # It might be cleaner to do this as part of a larger effort to speed up + # the serialization logic, however. + data_str = json.dumps(data, indent=2, sort_keys=True) + interface_hash = compute_md5_hash(data_str) + f.write(data_str) f.write('\n') data_mtime = os.path.getmtime(data_json_tmp) meta = {'id': id, @@ -828,6 +842,7 @@ def write_cache(id: str, path: str, tree: MypyFile, 'child_modules': child_modules, 'options': select_options_affecting_cache(manager.options), 'dep_prios': dep_prios, + 'interface_hash': interface_hash, 'version_id': manager.version_id, } with open(meta_json_tmp, 'w') as f: @@ -835,6 +850,7 @@ def write_cache(id: str, path: str, tree: MypyFile, f.write('\n') os.replace(data_json_tmp, data_json) os.replace(meta_json_tmp, meta_json) + return interface_hash """Dependency manager. From bc93aca25bd79c016f536deb0a1a3fe1b62d8418 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Thu, 11 Aug 2016 17:38:00 -0700 Subject: [PATCH 07/17] Load incremental hash when finding cache data --- mypy/build.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index b8f0a1fcb46b..36a75552a6b4 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -291,6 +291,7 @@ def default_lib_path(data_dir: str, pyversion: Tuple[int, int]) -> List[str]: ('child_modules', List[str]), # all submodules of the given module ('options', Optional[Dict[str, bool]]), # build options ('dep_prios', List[int]), + ('interface_hash', str), # hash representing the public interface ('version_id', str), # mypy version for cache invalidation ]) # NOTE: dependencies + suppressed == all reachable imports; @@ -729,6 +730,7 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache meta.get('child_modules', []), meta.get('options'), meta.get('dep_prios', []), + meta.get('interface_hash', 'missing'), meta.get('version_id'), ) if (m.id != id or m.path != path or @@ -1037,6 +1039,9 @@ class State: # If caller_state is set, the line number in the caller where the import occurred caller_line = 0 + # Contains a hash of the public interface in incremental mode + interface_hash = "never_set" # type: str + def __init__(self, id: Optional[str], path: Optional[str], @@ -1378,10 +1383,17 @@ def type_check(self) -> None: def write_cache(self) -> None: if self.path and self.manager.options.incremental and not self.manager.errors.is_errors(): dep_prios = [self.priorities.get(dep, PRI_HIGH) for dep in self.dependencies] - write_cache(self.id, self.path, self.tree, - list(self.dependencies), list(self.suppressed), list(self.child_modules), - dep_prios, - self.manager) + new_interface_hash = write_cache( + self.id, self.path, self.tree, + list(self.dependencies), list(self.suppressed), list(self.child_modules), + dep_prios, + self.manager) + if new_interface_hash == self.interface_hash: + self.manager.log("Cached module {} has same interface".format(self.id)) + else: + self.manager.log("Cached module {} has changed interface".format(self.id)) + self.mark_interface_stale() + self.interface_hash = new_interface_hash def dispatch(sources: List[BuildSource], manager: BuildManager) -> None: From 9ff0d407851f227f3d9bfae87614343179860bb6 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Thu, 11 Aug 2016 17:39:29 -0700 Subject: [PATCH 08/17] Refactor cache load so we can see the hash earlier --- mypy/build.py | 22 ++++++++++++++++------ mypy/test/testcheck.py | 2 +- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 36a75552a6b4..a997dfef48ea 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -753,20 +753,27 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache manager.trace('Metadata abandoned for {}: options differ'.format(id)) return None + return m + + +def is_meta_fresh(meta: CacheMeta, path: str, manager: BuildManager) -> bool: + if meta is None: + return False + # TODO: Share stat() outcome with find_module() st = os.stat(path) # TODO: Errors - if st.st_mtime != m.mtime or st.st_size != m.size: + if st.st_mtime != meta.mtime or st.st_size != meta.size: manager.log('Metadata abandoned for {}: file {} is modified'.format(id, path)) return None # It's a match on (id, path, mtime, size). # Check data_json; assume if its mtime matches it's good. # TODO: stat() errors - if os.path.getmtime(data_json) != m.data_mtime: + if os.path.getmtime(meta.data_json) != meta.data_mtime: manager.log('Metadata abandoned for {}: data cache is modified'.format(id)) - return None - manager.log('Found {} {} (metadata is fresh)'.format(id, meta_json)) - return m + return False + manager.log('Found {} {} (metadata is fresh)'.format(id, meta.data_json)) + return True def select_options_affecting_cache(options: Options) -> Mapping[str, bool]: @@ -1121,8 +1128,10 @@ def __init__(self, if path and source is None and manager.options.incremental: self.meta = find_cache_meta(self.id, self.path, manager) # TODO: Get mtime if not cached. + if self.meta is not None: + self.interface_hash = self.meta.interface_hash self.add_ancestors() - if self.meta: + if is_meta_fresh(self.meta, self.path, manager): # Make copies, since we may modify these and want to # compare them to the originals later. self.dependencies = list(self.meta.dependencies) @@ -1134,6 +1143,7 @@ def __init__(self, self.dep_line_map = {} else: # Parse the file (and then some) to get the dependencies. + self.meta = None self.parse_file() self.suppressed = [] self.child_modules = set() diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py index c444835d5da1..bc7ee27461fd 100644 --- a/mypy/test/testcheck.py +++ b/mypy/test/testcheck.py @@ -237,7 +237,7 @@ def find_missing_cache_files(self, modules: Dict[str, str], missing = {} for id, path in modules.items(): meta = build.find_cache_meta(id, path, manager) - if meta is None: + if not build.is_meta_fresh(meta, path, manager): missing[id] = path return set(missing.values()) From 0defbb836006987fa672605f63aa52959de063a3 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Thu, 11 Aug 2016 17:45:28 -0700 Subject: [PATCH 09/17] Modify incremental to use interface checks --- mypy/build.py | 47 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index a997dfef48ea..32c0d5b073ce 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -353,6 +353,7 @@ def __init__(self, data_dir: str, self.type_checker = TypeChecker(self.errors, self.modules, options=options) self.missing_modules = set() # type: Set[str] self.stale_modules = set() # type: Set[str] + self.rechecked_modules = set() # type: Set[str] def all_imported_modules_in_file(self, file: MypyFile) -> List[Tuple[int, str, int]]: @@ -1046,6 +1047,9 @@ class State: # If caller_state is set, the line number in the caller where the import occurred caller_line = 0 + # If True, indicate that the public interface of this module is unchanged + externally_same = True + # Contains a hash of the public interface in incremental mode interface_hash = "never_set" # type: str @@ -1194,16 +1198,25 @@ def is_fresh(self) -> bool: # suppression by --silent-imports. However when a suppressed # dependency is added back we find out later in the process. return (self.meta is not None + and self.is_interface_fresh() and self.dependencies == self.meta.dependencies and self.child_modules == set(self.meta.child_modules)) + def is_interface_fresh(self) -> bool: + return self.externally_same + def has_new_submodules(self) -> bool: """Return if this module has new submodules after being loaded from a warm cache.""" return self.meta is not None and self.child_modules != set(self.meta.child_modules) - def mark_stale(self) -> None: - """Throw away the cache data for this file, marking it as stale.""" + def mark_as_rechecked(self) -> None: + """Marks this module as having been fully re-analyzed by the type-checker.""" + self.manager.rechecked_modules.add(self.id) + + def mark_interface_stale(self) -> None: + """Marks this module as having a stale public interface, and discards the cache data.""" self.meta = None + self.externally_same = False self.manager.stale_modules.add(self.id) def check_blockers(self) -> None: @@ -1472,6 +1485,7 @@ def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: for id, g in graph.items(): if g.has_new_submodules(): g.parse_file() + g.mark_interface_stale() return graph @@ -1510,7 +1524,7 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: for id in scc: deps.update(graph[id].dependencies) deps -= ascc - stale_deps = {id for id in deps if not graph[id].is_fresh()} + stale_deps = {id for id in deps if not graph[id].is_interface_fresh()} fresh = fresh and not stale_deps undeps = set() if fresh: @@ -1526,9 +1540,10 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: # All cache files are fresh. Check that no dependency's # cache file is newer than any scc node's cache file. oldest_in_scc = min(graph[id].meta.data_mtime for id in scc) - newest_in_deps = 0 if not deps else max(graph[dep].meta.data_mtime for dep in deps) + viable = {id for id in deps if not graph[id].is_interface_fresh()} + newest_in_deps = 0 if not viable else max(graph[dep].meta.data_mtime for dep in viable) if manager.options.verbosity >= 3: # Dump all mtimes for extreme debugging. - all_ids = sorted(ascc | deps, key=lambda id: graph[id].meta.data_mtime) + all_ids = sorted(ascc | viable, key=lambda id: graph[id].meta.data_mtime) for id in all_ids: if id in scc: if graph[id].meta.data_mtime < newest_in_deps: @@ -1566,6 +1581,25 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: else: process_stale_scc(graph, scc) + # TODO: This is a workaround to get around the "chaining imports" problem + # with the interface checks. + # + # That is, if we have a file named `module_a.py` which does: + # + # import module_b + # module_b.module_c.foo(3) + # + # ...and if the type signature of `module_c.foo(...)` were to change, + # module_a_ would not be rechecked since the interface of `module_b` + # would not be considered changed. + # + # As a workaround, this check will force a module's interface to be + # considered stale if anything it imports also has a stale interface + # to make sure these changes are caught and propagated. + if len(stale_deps) > 0: + for id in scc: + graph[id].mark_interface_stale() + def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_ALL) -> List[str]: """Come up with the ideal processing order within an SCC. @@ -1628,8 +1662,6 @@ def process_fresh_scc(graph: Graph, scc: List[str]) -> None: def process_stale_scc(graph: Graph, scc: List[str]) -> None: """Process the modules in one SCC from source code.""" - for id in scc: - graph[id].mark_stale() for id in scc: # We may already have parsed the module, or not. # If the former, parse_file() is a no-op. @@ -1644,6 +1676,7 @@ def process_stale_scc(graph: Graph, scc: List[str]) -> None: for id in scc: graph[id].type_check() graph[id].write_cache() + graph[id].mark_as_rechecked() def sorted_components(graph: Graph, From 559ad00dbfde9d870d56b7d973047cd608c92ad2 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Fri, 12 Aug 2016 13:00:56 -0700 Subject: [PATCH 10/17] Fix minor bug with verbose mode --- mypy/build.py | 4 ++-- mypy/test/testcheck.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 32c0d5b073ce..f75d1e884ceb 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -757,7 +757,7 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache return m -def is_meta_fresh(meta: CacheMeta, path: str, manager: BuildManager) -> bool: +def is_meta_fresh(meta: CacheMeta, id: str, path: str, manager: BuildManager) -> bool: if meta is None: return False @@ -1135,7 +1135,7 @@ def __init__(self, if self.meta is not None: self.interface_hash = self.meta.interface_hash self.add_ancestors() - if is_meta_fresh(self.meta, self.path, manager): + if is_meta_fresh(self.meta, self.id, self.path, manager): # Make copies, since we may modify these and want to # compare them to the originals later. self.dependencies = list(self.meta.dependencies) diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py index bc7ee27461fd..8f24897879de 100644 --- a/mypy/test/testcheck.py +++ b/mypy/test/testcheck.py @@ -237,7 +237,7 @@ def find_missing_cache_files(self, modules: Dict[str, str], missing = {} for id, path in modules.items(): meta = build.find_cache_meta(id, path, manager) - if not build.is_meta_fresh(meta, path, manager): + if not build.is_meta_fresh(meta, id, path, manager): missing[id] = path return set(missing.values()) From 6acd57b82b4570995deaf07b60a0f8c14e8f5c22 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Fri, 12 Aug 2016 13:01:32 -0700 Subject: [PATCH 11/17] Add extra test case for sneaky imports --- test-data/unit/check-incremental.test | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index 84968b3f2b6c..b0b2196e258a 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -852,3 +852,30 @@ class A: [rechecked m, n] [stale m, n] [out] + +[case testIncrementalReplacingImports] +import good, bad, client + +[file good.py] +def foo(a: int) -> None: pass + +[file bad.py] +def foo(a: str) -> None: pass + +[file client.py] +import good +import bad +from good import foo +foo(3) + +[file client.py.next] +import good +import bad +from bad import foo +foo(3) + +[rechecked client] +[stale] +[out] +main:1: note: In module imported here: +tmp/client.py:4: error: Argument 1 to "foo" has incompatible type "int"; expected "str" From 644c785cc5b9bfa19ae611800260e930c50e44ca Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Mon, 15 Aug 2016 17:03:04 -0700 Subject: [PATCH 12/17] Fix tests to work with rebase --- test-data/unit/check-incremental.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index b0b2196e258a..e8c47bd5ea08 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -299,7 +299,7 @@ const = 3 [rechecked mod1, mod2, mod3] [stale mod1, mod2, mod3] -[builtins fixtures/module.py] +[builtins fixtures/module.pyi] [out] main:1: note: In module imported here: tmp/mod1.py:3: error: "module" has no attribute "mod4" @@ -434,7 +434,7 @@ def some_func(a: int) -> str: [rechecked mod1, mod1_private] [stale mod1, mod1_private] -[builtins fixtures/ops.py] +[builtins fixtures/ops.pyi] [out] main:1: note: In module imported here: tmp/mod1.py:3: error: Argument 1 to "accepts_int" has incompatible type "str"; expected "int" From 1bdbd1ff6e5483ca3570867db4bd4e9b98237793 Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Mon, 15 Aug 2016 17:03:47 -0700 Subject: [PATCH 13/17] Remove debugging code and make minor tweaks --- mypy/build.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index f75d1e884ceb..fde5677a5ef2 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -731,7 +731,7 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache meta.get('child_modules', []), meta.get('options'), meta.get('dep_prios', []), - meta.get('interface_hash', 'missing'), + meta.get('interface_hash', ''), meta.get('version_id'), ) if (m.id != id or m.path != path or @@ -1051,7 +1051,7 @@ class State: externally_same = True # Contains a hash of the public interface in incremental mode - interface_hash = "never_set" # type: str + interface_hash = "" # type: str def __init__(self, id: Optional[str], @@ -1594,8 +1594,8 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: # would not be considered changed. # # As a workaround, this check will force a module's interface to be - # considered stale if anything it imports also has a stale interface - # to make sure these changes are caught and propagated. + # considered stale if anything it imports has a stale interface, + # which ensures these changes are caught and propagated. if len(stale_deps) > 0: for id in scc: graph[id].mark_interface_stale() From dc7ff9573c6adde74a6779e979d049bd284e016a Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Mon, 15 Aug 2016 17:03:55 -0700 Subject: [PATCH 14/17] Refactor and optimize cache writing code --- mypy/build.py | 61 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index fde5677a5ef2..f4770ddb8d8f 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -801,7 +801,7 @@ def compute_md5_hash(text: str) -> str: def write_cache(id: str, path: str, tree: MypyFile, dependencies: List[str], suppressed: List[str], child_modules: List[str], dep_prios: List[int], - manager: BuildManager) -> str: + old_interface_hash: str, manager: BuildManager) -> str: """Write cache files for a module. Args: @@ -811,37 +811,52 @@ def write_cache(id: str, path: str, tree: MypyFile, dependencies: module IDs on which this module depends suppressed: module IDs which were suppressed as dependencies dep_prios: priorities (parallel array to dependencies) + old_interface_hash: the hash from the previous version of the data cache file manager: the build manager (for pyversion, log/trace) + + Return: + The new interface hash based on the serialized tree """ + # Obtain file paths path = os.path.abspath(path) - manager.trace('Dumping {} {}'.format(id, path)) - st = os.stat(path) # TODO: Errors - mtime = st.st_mtime - size = st.st_size meta_json, data_json = get_cache_names( id, path, manager.options.cache_dir, manager.options.python_version) - manager.log('Writing {} {} {}'.format(id, meta_json, data_json)) - data = tree.serialize() + manager.log('Writing {} {} {} {}'.format(id, path, meta_json, data_json)) + + # Make sure directory for cache files exists parent = os.path.dirname(data_json) if not os.path.isdir(parent): os.makedirs(parent) assert os.path.dirname(meta_json) == parent + + # Construct temp file names nonce = '.' + random_string() data_json_tmp = data_json + nonce meta_json_tmp = meta_json + nonce - with open(data_json_tmp, 'w') as f: - # TODO: The cache writing code currently assumes the data and meta - # file are always written together. However, with the new changes to - # incremental mode, we could instead write only an updated meta file - # and leave the data file alone when the public interface is unchanged. - # - # It might be cleaner to do this as part of a larger effort to speed up - # the serialization logic, however. - data_str = json.dumps(data, indent=2, sort_keys=True) - interface_hash = compute_md5_hash(data_str) - f.write(data_str) - f.write('\n') - data_mtime = os.path.getmtime(data_json_tmp) + + # Serialize data and analyze interface + data = tree.serialize() + data_str = json.dumps(data, indent=2, sort_keys=True) + interface_hash = compute_md5_hash(data_str) + + # Write data cache file, if applicable + if old_interface_hash == interface_hash: + # If the interface is unchanged, the cached data is guaranteed + # to be equivalent, and we only need to update the metadata. + data_mtime = os.path.getmtime(data_json) + manager.trace("Interface for {} is unchanged".format(id)) + else: + with open(data_json_tmp, 'w') as f: + f.write(data_str) + f.write('\n') + data_mtime = os.path.getmtime(data_json_tmp) + os.replace(data_json_tmp, data_json) + manager.trace("Interface for {} has changed".format(id)) + + # Obtain and set up metadata + st = os.stat(path) # TODO: Handle errors + mtime = st.st_mtime + size = st.st_size meta = {'id': id, 'path': path, 'mtime': mtime, @@ -855,11 +870,13 @@ def write_cache(id: str, path: str, tree: MypyFile, 'interface_hash': interface_hash, 'version_id': manager.version_id, } + + # Write meta cache file with open(meta_json_tmp, 'w') as f: json.dump(meta, f, sort_keys=True) f.write('\n') - os.replace(data_json_tmp, data_json) os.replace(meta_json_tmp, meta_json) + return interface_hash @@ -1409,7 +1426,7 @@ def write_cache(self) -> None: new_interface_hash = write_cache( self.id, self.path, self.tree, list(self.dependencies), list(self.suppressed), list(self.child_modules), - dep_prios, + dep_prios, self.interface_hash, self.manager) if new_interface_hash == self.interface_hash: self.manager.log("Cached module {} has same interface".format(self.id)) From 8754d7ec2c143cf93745e51d07fa46d2680daf2e Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Mon, 15 Aug 2016 17:17:54 -0700 Subject: [PATCH 15/17] Update test case to work with new change --- test-data/unit/check-incremental.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index e8c47bd5ea08..222284cdacf7 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -761,7 +761,7 @@ def bar(a: str) -> None: pass bar(3) [rechecked m] -[stale m] +[stale] [out] main:1: note: In module imported here: tmp/m.py:4: error: Argument 1 to "bar" has incompatible type "int"; expected "str" From cdd615089565b07093c25aad67009ec8645b4dbd Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Tue, 16 Aug 2016 11:00:23 -0700 Subject: [PATCH 16/17] Fix failing test --- test-data/unit/check-incremental.test | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index 222284cdacf7..ab34d88b10f6 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -792,6 +792,7 @@ class Class: pass # empty [builtins fixtures/args.pyi] +[rechecked collections, main, package.subpackage.mod1] [stale collections, main, package.subpackage.mod1] [out] tmp/main.py: note: In function "handle": @@ -831,7 +832,8 @@ val = 3 # type: int val = "foo" [builtins fixtures/module_all.pyi] -[stale main, c, c.submodule] +[rechecked main, c, c.submodule] +[stale] [out] tmp/c/submodule.py:2: error: Incompatible types in assignment (expression has type "str", variable has type "int") tmp/main.py:7: error: "C" has no attribute "foo" From aaf0362432c14abf3aadade7d3c8cf619f35936f Mon Sep 17 00:00:00 2001 From: Michael Lee Date: Tue, 16 Aug 2016 12:07:22 -0700 Subject: [PATCH 17/17] Rename compute_md5_hash, explain why we don't use hash(...) --- mypy/build.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index f4770ddb8d8f..855bd552c608 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -794,7 +794,10 @@ def random_string() -> str: return binascii.hexlify(os.urandom(8)).decode('ascii') -def compute_md5_hash(text: str) -> str: +def compute_hash(text: str) -> str: + # We use md5 instead of the builtin hash(...) function because the output of hash(...) + # can differ between runs due to hash randomization (enabled by default in Python 3.3). + # See the note in https://docs.python.org/3/reference/datamodel.html#object.__hash__. return hashlib.md5(text.encode('utf-8')).hexdigest() @@ -837,7 +840,7 @@ def write_cache(id: str, path: str, tree: MypyFile, # Serialize data and analyze interface data = tree.serialize() data_str = json.dumps(data, indent=2, sort_keys=True) - interface_hash = compute_md5_hash(data_str) + interface_hash = compute_hash(data_str) # Write data cache file, if applicable if old_interface_hash == interface_hash: