13
13
import binascii
14
14
import collections
15
15
import contextlib
16
+ import hashlib
16
17
import json
17
18
import os
18
19
import os .path
@@ -290,6 +291,7 @@ def default_lib_path(data_dir: str, pyversion: Tuple[int, int]) -> List[str]:
290
291
('child_modules' , List [str ]), # all submodules of the given module
291
292
('options' , Optional [Dict [str , bool ]]), # build options
292
293
('dep_prios' , List [int ]),
294
+ ('interface_hash' , str ), # hash representing the public interface
293
295
('version_id' , str ), # mypy version for cache invalidation
294
296
])
295
297
# NOTE: dependencies + suppressed == all reachable imports;
@@ -351,6 +353,7 @@ def __init__(self, data_dir: str,
351
353
self .type_checker = TypeChecker (self .errors , self .modules , options = options )
352
354
self .missing_modules = set () # type: Set[str]
353
355
self .stale_modules = set () # type: Set[str]
356
+ self .rechecked_modules = set () # type: Set[str]
354
357
355
358
def all_imported_modules_in_file (self ,
356
359
file : MypyFile ) -> List [Tuple [int , str , int ]]:
@@ -728,6 +731,7 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache
728
731
meta .get ('child_modules' , []),
729
732
meta .get ('options' ),
730
733
meta .get ('dep_prios' , []),
734
+ meta .get ('interface_hash' , '' ),
731
735
meta .get ('version_id' ),
732
736
)
733
737
if (m .id != id or m .path != path or
@@ -750,20 +754,27 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache
750
754
manager .trace ('Metadata abandoned for {}: options differ' .format (id ))
751
755
return None
752
756
757
+ return m
758
+
759
+
760
+ def is_meta_fresh (meta : CacheMeta , id : str , path : str , manager : BuildManager ) -> bool :
761
+ if meta is None :
762
+ return False
763
+
753
764
# TODO: Share stat() outcome with find_module()
754
765
st = os .stat (path ) # TODO: Errors
755
- if st .st_mtime != m .mtime or st .st_size != m .size :
766
+ if st .st_mtime != meta .mtime or st .st_size != meta .size :
756
767
manager .log ('Metadata abandoned for {}: file {} is modified' .format (id , path ))
757
768
return None
758
769
759
770
# It's a match on (id, path, mtime, size).
760
771
# Check data_json; assume if its mtime matches it's good.
761
772
# TODO: stat() errors
762
- if os .path .getmtime (data_json ) != m .data_mtime :
773
+ if os .path .getmtime (meta . data_json ) != meta .data_mtime :
763
774
manager .log ('Metadata abandoned for {}: data cache is modified' .format (id ))
764
- return None
765
- manager .log ('Found {} {} (metadata is fresh)' .format (id , meta_json ))
766
- return m
775
+ return False
776
+ manager .log ('Found {} {} (metadata is fresh)' .format (id , meta . data_json ))
777
+ return True
767
778
768
779
769
780
def select_options_affecting_cache (options : Options ) -> Mapping [str , bool ]:
@@ -783,10 +794,17 @@ def random_string() -> str:
783
794
return binascii .hexlify (os .urandom (8 )).decode ('ascii' )
784
795
785
796
797
+ def compute_hash (text : str ) -> str :
798
+ # We use md5 instead of the builtin hash(...) function because the output of hash(...)
799
+ # can differ between runs due to hash randomization (enabled by default in Python 3.3).
800
+ # See the note in https://docs.python.org/3/reference/datamodel.html#object.__hash__.
801
+ return hashlib .md5 (text .encode ('utf-8' )).hexdigest ()
802
+
803
+
786
804
def write_cache (id : str , path : str , tree : MypyFile ,
787
805
dependencies : List [str ], suppressed : List [str ],
788
806
child_modules : List [str ], dep_prios : List [int ],
789
- manager : BuildManager ) -> None :
807
+ old_interface_hash : str , manager : BuildManager ) -> str :
790
808
"""Write cache files for a module.
791
809
792
810
Args:
@@ -796,28 +814,52 @@ def write_cache(id: str, path: str, tree: MypyFile,
796
814
dependencies: module IDs on which this module depends
797
815
suppressed: module IDs which were suppressed as dependencies
798
816
dep_prios: priorities (parallel array to dependencies)
817
+ old_interface_hash: the hash from the previous version of the data cache file
799
818
manager: the build manager (for pyversion, log/trace)
819
+
820
+ Return:
821
+ The new interface hash based on the serialized tree
800
822
"""
823
+ # Obtain file paths
801
824
path = os .path .abspath (path )
802
- manager .trace ('Dumping {} {}' .format (id , path ))
803
- st = os .stat (path ) # TODO: Errors
804
- mtime = st .st_mtime
805
- size = st .st_size
806
825
meta_json , data_json = get_cache_names (
807
826
id , path , manager .options .cache_dir , manager .options .python_version )
808
- manager .log ('Writing {} {} {}' .format (id , meta_json , data_json ))
809
- data = tree .serialize ()
827
+ manager .log ('Writing {} {} {} {}' .format (id , path , meta_json , data_json ))
828
+
829
+ # Make sure directory for cache files exists
810
830
parent = os .path .dirname (data_json )
811
831
if not os .path .isdir (parent ):
812
832
os .makedirs (parent )
813
833
assert os .path .dirname (meta_json ) == parent
834
+
835
+ # Construct temp file names
814
836
nonce = '.' + random_string ()
815
837
data_json_tmp = data_json + nonce
816
838
meta_json_tmp = meta_json + nonce
817
- with open (data_json_tmp , 'w' ) as f :
818
- json .dump (data , f , indent = 2 , sort_keys = True )
819
- f .write ('\n ' )
820
- data_mtime = os .path .getmtime (data_json_tmp )
839
+
840
+ # Serialize data and analyze interface
841
+ data = tree .serialize ()
842
+ data_str = json .dumps (data , indent = 2 , sort_keys = True )
843
+ interface_hash = compute_hash (data_str )
844
+
845
+ # Write data cache file, if applicable
846
+ if old_interface_hash == interface_hash :
847
+ # If the interface is unchanged, the cached data is guaranteed
848
+ # to be equivalent, and we only need to update the metadata.
849
+ data_mtime = os .path .getmtime (data_json )
850
+ manager .trace ("Interface for {} is unchanged" .format (id ))
851
+ else :
852
+ with open (data_json_tmp , 'w' ) as f :
853
+ f .write (data_str )
854
+ f .write ('\n ' )
855
+ data_mtime = os .path .getmtime (data_json_tmp )
856
+ os .replace (data_json_tmp , data_json )
857
+ manager .trace ("Interface for {} has changed" .format (id ))
858
+
859
+ # Obtain and set up metadata
860
+ st = os .stat (path ) # TODO: Handle errors
861
+ mtime = st .st_mtime
862
+ size = st .st_size
821
863
meta = {'id' : id ,
822
864
'path' : path ,
823
865
'mtime' : mtime ,
@@ -828,14 +870,18 @@ def write_cache(id: str, path: str, tree: MypyFile,
828
870
'child_modules' : child_modules ,
829
871
'options' : select_options_affecting_cache (manager .options ),
830
872
'dep_prios' : dep_prios ,
873
+ 'interface_hash' : interface_hash ,
831
874
'version_id' : manager .version_id ,
832
875
}
876
+
877
+ # Write meta cache file
833
878
with open (meta_json_tmp , 'w' ) as f :
834
879
json .dump (meta , f , sort_keys = True )
835
880
f .write ('\n ' )
836
- os .replace (data_json_tmp , data_json )
837
881
os .replace (meta_json_tmp , meta_json )
838
882
883
+ return interface_hash
884
+
839
885
840
886
"""Dependency manager.
841
887
@@ -1021,6 +1067,12 @@ class State:
1021
1067
# If caller_state is set, the line number in the caller where the import occurred
1022
1068
caller_line = 0
1023
1069
1070
+ # If True, indicate that the public interface of this module is unchanged
1071
+ externally_same = True
1072
+
1073
+ # Contains a hash of the public interface in incremental mode
1074
+ interface_hash = "" # type: str
1075
+
1024
1076
def __init__ (self ,
1025
1077
id : Optional [str ],
1026
1078
path : Optional [str ],
@@ -1100,8 +1152,10 @@ def __init__(self,
1100
1152
if path and source is None and manager .options .incremental :
1101
1153
self .meta = find_cache_meta (self .id , self .path , manager )
1102
1154
# TODO: Get mtime if not cached.
1155
+ if self .meta is not None :
1156
+ self .interface_hash = self .meta .interface_hash
1103
1157
self .add_ancestors ()
1104
- if self .meta :
1158
+ if is_meta_fresh ( self .meta , self . id , self . path , manager ) :
1105
1159
# Make copies, since we may modify these and want to
1106
1160
# compare them to the originals later.
1107
1161
self .dependencies = list (self .meta .dependencies )
@@ -1113,6 +1167,7 @@ def __init__(self,
1113
1167
self .dep_line_map = {}
1114
1168
else :
1115
1169
# Parse the file (and then some) to get the dependencies.
1170
+ self .meta = None
1116
1171
self .parse_file ()
1117
1172
self .suppressed = []
1118
1173
self .child_modules = set ()
@@ -1163,16 +1218,25 @@ def is_fresh(self) -> bool:
1163
1218
# suppression by --silent-imports. However when a suppressed
1164
1219
# dependency is added back we find out later in the process.
1165
1220
return (self .meta is not None
1221
+ and self .is_interface_fresh ()
1166
1222
and self .dependencies == self .meta .dependencies
1167
1223
and self .child_modules == set (self .meta .child_modules ))
1168
1224
1225
+ def is_interface_fresh (self ) -> bool :
1226
+ return self .externally_same
1227
+
1169
1228
def has_new_submodules (self ) -> bool :
1170
1229
"""Return if this module has new submodules after being loaded from a warm cache."""
1171
1230
return self .meta is not None and self .child_modules != set (self .meta .child_modules )
1172
1231
1173
- def mark_stale (self ) -> None :
1174
- """Throw away the cache data for this file, marking it as stale."""
1232
+ def mark_as_rechecked (self ) -> None :
1233
+ """Marks this module as having been fully re-analyzed by the type-checker."""
1234
+ self .manager .rechecked_modules .add (self .id )
1235
+
1236
+ def mark_interface_stale (self ) -> None :
1237
+ """Marks this module as having a stale public interface, and discards the cache data."""
1175
1238
self .meta = None
1239
+ self .externally_same = False
1176
1240
self .manager .stale_modules .add (self .id )
1177
1241
1178
1242
def check_blockers (self ) -> None :
@@ -1362,10 +1426,17 @@ def type_check(self) -> None:
1362
1426
def write_cache (self ) -> None :
1363
1427
if self .path and self .manager .options .incremental and not self .manager .errors .is_errors ():
1364
1428
dep_prios = [self .priorities .get (dep , PRI_HIGH ) for dep in self .dependencies ]
1365
- write_cache (self .id , self .path , self .tree ,
1366
- list (self .dependencies ), list (self .suppressed ), list (self .child_modules ),
1367
- dep_prios ,
1368
- self .manager )
1429
+ new_interface_hash = write_cache (
1430
+ self .id , self .path , self .tree ,
1431
+ list (self .dependencies ), list (self .suppressed ), list (self .child_modules ),
1432
+ dep_prios , self .interface_hash ,
1433
+ self .manager )
1434
+ if new_interface_hash == self .interface_hash :
1435
+ self .manager .log ("Cached module {} has same interface" .format (self .id ))
1436
+ else :
1437
+ self .manager .log ("Cached module {} has changed interface" .format (self .id ))
1438
+ self .mark_interface_stale ()
1439
+ self .interface_hash = new_interface_hash
1369
1440
1370
1441
1371
1442
def dispatch (sources : List [BuildSource ], manager : BuildManager ) -> None :
@@ -1434,6 +1505,7 @@ def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph:
1434
1505
for id , g in graph .items ():
1435
1506
if g .has_new_submodules ():
1436
1507
g .parse_file ()
1508
+ g .mark_interface_stale ()
1437
1509
return graph
1438
1510
1439
1511
@@ -1472,7 +1544,7 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
1472
1544
for id in scc :
1473
1545
deps .update (graph [id ].dependencies )
1474
1546
deps -= ascc
1475
- stale_deps = {id for id in deps if not graph [id ].is_fresh ()}
1547
+ stale_deps = {id for id in deps if not graph [id ].is_interface_fresh ()}
1476
1548
fresh = fresh and not stale_deps
1477
1549
undeps = set ()
1478
1550
if fresh :
@@ -1488,9 +1560,10 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
1488
1560
# All cache files are fresh. Check that no dependency's
1489
1561
# cache file is newer than any scc node's cache file.
1490
1562
oldest_in_scc = min (graph [id ].meta .data_mtime for id in scc )
1491
- newest_in_deps = 0 if not deps else max (graph [dep ].meta .data_mtime for dep in deps )
1563
+ viable = {id for id in deps if not graph [id ].is_interface_fresh ()}
1564
+ newest_in_deps = 0 if not viable else max (graph [dep ].meta .data_mtime for dep in viable )
1492
1565
if manager .options .verbosity >= 3 : # Dump all mtimes for extreme debugging.
1493
- all_ids = sorted (ascc | deps , key = lambda id : graph [id ].meta .data_mtime )
1566
+ all_ids = sorted (ascc | viable , key = lambda id : graph [id ].meta .data_mtime )
1494
1567
for id in all_ids :
1495
1568
if id in scc :
1496
1569
if graph [id ].meta .data_mtime < newest_in_deps :
@@ -1528,6 +1601,25 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
1528
1601
else :
1529
1602
process_stale_scc (graph , scc )
1530
1603
1604
+ # TODO: This is a workaround to get around the "chaining imports" problem
1605
+ # with the interface checks.
1606
+ #
1607
+ # That is, if we have a file named `module_a.py` which does:
1608
+ #
1609
+ # import module_b
1610
+ # module_b.module_c.foo(3)
1611
+ #
1612
+ # ...and if the type signature of `module_c.foo(...)` were to change,
1613
+ # module_a_ would not be rechecked since the interface of `module_b`
1614
+ # would not be considered changed.
1615
+ #
1616
+ # As a workaround, this check will force a module's interface to be
1617
+ # considered stale if anything it imports has a stale interface,
1618
+ # which ensures these changes are caught and propagated.
1619
+ if len (stale_deps ) > 0 :
1620
+ for id in scc :
1621
+ graph [id ].mark_interface_stale ()
1622
+
1531
1623
1532
1624
def order_ascc (graph : Graph , ascc : AbstractSet [str ], pri_max : int = PRI_ALL ) -> List [str ]:
1533
1625
"""Come up with the ideal processing order within an SCC.
@@ -1590,8 +1682,6 @@ def process_fresh_scc(graph: Graph, scc: List[str]) -> None:
1590
1682
1591
1683
def process_stale_scc (graph : Graph , scc : List [str ]) -> None :
1592
1684
"""Process the modules in one SCC from source code."""
1593
- for id in scc :
1594
- graph [id ].mark_stale ()
1595
1685
for id in scc :
1596
1686
# We may already have parsed the module, or not.
1597
1687
# If the former, parse_file() is a no-op.
@@ -1606,6 +1696,7 @@ def process_stale_scc(graph: Graph, scc: List[str]) -> None:
1606
1696
for id in scc :
1607
1697
graph [id ].type_check ()
1608
1698
graph [id ].write_cache ()
1699
+ graph [id ].mark_as_rechecked ()
1609
1700
1610
1701
1611
1702
def sorted_components (graph : Graph ,
0 commit comments