Skip to content

Commit 0f83375

Browse files
gvanrossumGuido van Rossum
authored and
Guido van Rossum
committed
Fix bug in cache updates and improve cache logging (#3708)
This started out as an attempt to make the logging messages around cache freshness/updates more useful, but I ended up discovering a bug in the code that updates the cache meta file when the mtime or path of the file has changes but size and source hash are the same. It wrote a list instead of a dict! This is a bug in a feature announced in 0.520 so should be included in 0.521 if/when we release it. I also decided to ignore debug_cache when comparing options (since it only affects the JSON style -- it's still a per-file option but changing it no longer invalidates the cache) And I made the --skip-version-check option strictly ignore the platform (since it's undocumented I can make it do what I want, and this is more useful for my one use case).
1 parent 1cdae38 commit 0f83375

File tree

2 files changed

+58
-25
lines changed

2 files changed

+58
-25
lines changed

mypy/build.py

Lines changed: 56 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -860,16 +860,17 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache
860860
"""
861861
# TODO: May need to take more build options into account
862862
meta_json, data_json = get_cache_names(id, path, manager)
863-
manager.trace('Looking for {} {}'.format(id, data_json))
863+
manager.trace('Looking for {} at {}'.format(id, meta_json))
864864
if not os.path.exists(meta_json):
865-
manager.trace('Could not load cache for {}: could not find {}'.format(id, meta_json))
865+
manager.log('Could not load cache for {}: could not find {}'.format(id, meta_json))
866866
return None
867867
with open(meta_json, 'r') as f:
868868
meta_str = f.read()
869869
manager.trace('Meta {} {}'.format(id, meta_str.rstrip()))
870870
meta = json.loads(meta_str) # TODO: Errors
871871
if not isinstance(meta, dict):
872-
manager.trace('Could not load cache for {}: meta cache is not a dict'.format(id))
872+
manager.log('Could not load cache for {}: meta cache is not a dict: {}'
873+
.format(id, repr(meta)))
873874
return None
874875
m = CacheMeta(
875876
meta.get('id'),
@@ -891,27 +892,36 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache
891892
if (m.id != id or
892893
m.mtime is None or m.size is None or
893894
m.dependencies is None or m.data_mtime is None):
894-
manager.trace('Metadata abandoned for {}: attributes are missing'.format(id))
895+
manager.log('Metadata abandoned for {}: attributes are missing'.format(id))
895896
return None
896897

897898
# Ignore cache if generated by an older mypy version.
898899
if ((m.version_id != manager.version_id and not manager.options.skip_version_check)
899900
or m.options is None
900901
or len(m.dependencies) != len(m.dep_prios)):
901-
manager.trace('Metadata abandoned for {}: new attributes are missing'.format(id))
902+
manager.log('Metadata abandoned for {}: new attributes are missing'.format(id))
902903
return None
903904

904905
# Ignore cache if (relevant) options aren't the same.
906+
# Note that it's fine to mutilate cached_options since it's only used here.
905907
cached_options = m.options
906908
current_options = manager.options.clone_for_module(id).select_options_affecting_cache()
907909
if manager.options.quick_and_dirty:
908910
# In quick_and_dirty mode allow non-quick_and_dirty cache files.
909911
cached_options['quick_and_dirty'] = True
910-
if not cached_options.get('platform') and manager.options.skip_version_check:
911-
# Older versions didn't write platform.
912-
cached_options['platform'] = manager.options.platform
912+
if manager.options.skip_version_check:
913+
# When we're lax about version we're also lax about platform.
914+
cached_options['platform'] = current_options['platform']
915+
if 'debug_cache' in cached_options:
916+
# Older versions included debug_cache, but it's silly to compare it.
917+
del cached_options['debug_cache']
913918
if cached_options != current_options:
914-
manager.trace('Metadata abandoned for {}: options differ'.format(id))
919+
manager.log('Metadata abandoned for {}: options differ'.format(id))
920+
if manager.options.verbosity >= 2:
921+
for key in sorted(set(cached_options) | set(current_options)):
922+
if cached_options.get(key) != current_options.get(key):
923+
manager.trace(' {}: {} != {}'
924+
.format(key, cached_options.get(key), current_options.get(key)))
915925
return None
916926

917927
return m
@@ -948,41 +958,63 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: str,
948958
# we use cache data file mtime to propagate information about changes in the dependencies.
949959

950960
if meta is None:
961+
manager.log('Metadata not found for {}'.format(id))
962+
return None
963+
964+
# Check data_json; assume if its mtime matches it's good.
965+
# TODO: stat() errors
966+
data_mtime = getmtime(meta.data_json)
967+
if data_mtime != meta.data_mtime:
968+
manager.log('Metadata abandoned for {}: data cache is modified'.format(id))
951969
return None
952970

953971
# TODO: Share stat() outcome with find_module()
954972
path = os.path.abspath(path)
955973
st = manager.get_stat(path) # TODO: Errors
956-
if st.st_size != meta.size:
974+
size = st.st_size
975+
if size != meta.size:
957976
manager.log('Metadata abandoned for {}: file {} has different size'.format(id, path))
958977
return None
959978

960-
if int(st.st_mtime) != meta.mtime or path != meta.path:
979+
mtime = int(st.st_mtime)
980+
if mtime != meta.mtime or path != meta.path:
961981
with open(path, 'rb') as f:
962982
source_hash = hashlib.md5(f.read()).hexdigest()
963983
if source_hash != meta.hash:
964984
manager.log('Metadata abandoned for {}: file {} has different hash'.format(id, path))
965985
return None
966986
else:
967-
manager.log('Metadata ok for {}: file {} (match on path, size, hash)'.format(id, path))
968987
# Optimization: update mtime and path (otherwise, this mismatch will reappear).
969-
meta = meta._replace(mtime=int(st.st_mtime), path=path)
988+
meta = meta._replace(mtime=mtime, path=path)
989+
# Construct a dict we can pass to json.dumps() (compare to write_cache()).
990+
meta_dict = {
991+
'id': id,
992+
'path': path,
993+
'mtime': mtime,
994+
'size': size,
995+
'hash': source_hash,
996+
'data_mtime': data_mtime,
997+
'dependencies': meta.dependencies,
998+
'suppressed': meta.suppressed,
999+
'child_modules': meta.child_modules,
1000+
'options': (manager.options.clone_for_module(id)
1001+
.select_options_affecting_cache()),
1002+
'dep_prios': meta.dep_prios,
1003+
'interface_hash': meta.interface_hash,
1004+
'version_id': manager.version_id,
1005+
}
9701006
if manager.options.debug_cache:
971-
meta_str = json.dumps(meta, indent=2, sort_keys=True)
1007+
meta_str = json.dumps(meta_dict, indent=2, sort_keys=True)
9721008
else:
973-
meta_str = json.dumps(meta)
1009+
meta_str = json.dumps(meta_dict)
9741010
meta_json, _ = get_cache_names(id, path, manager)
9751011
manager.log('Updating mtime for {}: file {}, meta {}, mtime {}'
9761012
.format(id, path, meta_json, meta.mtime))
977-
atomic_write(meta_json, meta_str) # Ignore errors, since this is just an optimization.
1013+
atomic_write(meta_json, meta_str, '\n') # Ignore errors, it's just an optimization.
1014+
return meta
9781015

979-
# It's a match on (id, path, mtime/hash, size).
980-
# Check data_json; assume if its mtime matches it's good.
981-
# TODO: stat() errors
982-
if getmtime(meta.data_json) != meta.data_mtime:
983-
manager.log('Metadata abandoned for {}: data cache is modified'.format(id))
984-
return None
985-
manager.log('Found {} {} (metadata is fresh)'.format(id, meta.data_json))
1016+
# It's a match on (id, path, size, hash, mtime).
1017+
manager.log('Metadata fresh for {}: file {}'.format(id, path))
9861018
return meta
9871019

9881020

@@ -1096,7 +1128,7 @@ def write_cache(id: str, path: str, tree: MypyFile,
10961128
meta_str = json.dumps(meta, indent=2, sort_keys=True)
10971129
else:
10981130
meta_str = json.dumps(meta)
1099-
if not atomic_write(meta_json, meta_str):
1131+
if not atomic_write(meta_json, meta_str, '\n'):
11001132
# Most likely the error is the replace() call
11011133
# (see https://github.com/python/mypy/issues/3215).
11021134
# The next run will simply find the cache entry out of date.

mypy/options.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ class Options:
3434
"strict_optional",
3535
}
3636

37-
OPTIONS_AFFECTING_CACHE = (PER_MODULE_OPTIONS | {"quick_and_dirty", "platform"})
37+
OPTIONS_AFFECTING_CACHE = ((PER_MODULE_OPTIONS | {"quick_and_dirty", "platform"})
38+
- {"debug_cache"})
3839

3940
def __init__(self) -> None:
4041
# -- build options --

0 commit comments

Comments
 (0)