Skip to content

Commit 66bf773

Browse files
authored
Split fine_grained_deps out into its own .deps.json cache file (#4906)
The goal here is to decouple loading of of the AST trees from the data cache and the fine-grained dependencies. This means that a regular incremental run can use cache files produced with --cache-fine-grained without needing to load the fine-grained dependencies. (To enable this, we drop "cache_fine_grained" from the OPTIONS_AFFECTING_CACHE that are checked when validating metadata. If we require a fine-grained cache but don't have one, that will be caught by the lack of deps_mtime.) More importantly, it will enable loading dependencies without needing to parse the data json, which is a good optimization for for a planned patch to load data caches on demand in fine-grained mode.
1 parent 567dde9 commit 66bf773

File tree

4 files changed

+110
-31
lines changed

4 files changed

+110
-31
lines changed

mypy/build.py

Lines changed: 76 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,11 @@ def default_lib_path(data_dir: str,
396396
('hash', str),
397397
('dependencies', List[str]), # names of imported modules
398398
('data_mtime', int), # mtime of data_json
399+
('deps_mtime', Optional[int]), # mtime of deps_json
399400
('data_json', str), # path of <id>.data.json
401+
# path of <id>.deps.json, which we use to store fine-grained
402+
# dependency information for fine-grained mode
403+
('deps_json', Optional[str]),
400404
('suppressed', List[str]), # dependencies that weren't imported
401405
('child_modules', List[str]), # all submodules of the given module
402406
('options', Optional[Dict[str, object]]), # build options
@@ -413,7 +417,16 @@ def default_lib_path(data_dir: str,
413417
# silent mode or simply not found.
414418

415419

416-
def cache_meta_from_dict(meta: Dict[str, Any], data_json: str) -> CacheMeta:
420+
def cache_meta_from_dict(meta: Dict[str, Any],
421+
data_json: str, deps_json: Optional[str]) -> CacheMeta:
422+
"""Build a CacheMeta object from a json metadata dictionary
423+
424+
Args:
425+
meta: JSON metadata read from the metadata cache file
426+
data_json: Path to the .data.json file containing the AST trees
427+
deps_json: Optionally, path to the .deps.json file containign
428+
fine-grained dependency information.
429+
"""
417430
sentinel = None # type: Any # Values to be validated by the caller
418431
return CacheMeta(
419432
meta.get('id', sentinel),
@@ -423,7 +436,9 @@ def cache_meta_from_dict(meta: Dict[str, Any], data_json: str) -> CacheMeta:
423436
meta.get('hash', sentinel),
424437
meta.get('dependencies', []),
425438
int(meta['data_mtime']) if 'data_mtime' in meta else sentinel,
439+
int(meta['deps_mtime']) if meta.get('deps_mtime') is not None else None,
426440
data_json,
441+
deps_json,
427442
meta.get('suppressed', []),
428443
meta.get('child_modules', []),
429444
meta.get('options'),
@@ -962,7 +977,7 @@ def verify_module(fscache: FileSystemMetaCache, id: str, path: str) -> bool:
962977
return True
963978

964979

965-
def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str]:
980+
def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str, Optional[str]]:
966981
"""Return the file names for the cache files.
967982
968983
Args:
@@ -972,16 +987,20 @@ def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str
972987
pyversion: Python version (major, minor)
973988
974989
Returns:
975-
A tuple with the file names to be used for the meta JSON and the
976-
data JSON, respectively.
990+
A tuple with the file names to be used for the meta JSON, the
991+
data JSON, and the fine-grained deps JSON, respectively.
977992
"""
978993
cache_dir = manager.options.cache_dir
979994
pyversion = manager.options.python_version
980995
prefix = os.path.join(cache_dir, '%d.%d' % pyversion, *id.split('.'))
981996
is_package = os.path.basename(path).startswith('__init__.py')
982997
if is_package:
983998
prefix = os.path.join(prefix, '__init__')
984-
return (prefix + '.meta.json', prefix + '.data.json')
999+
1000+
deps_json = None
1001+
if manager.options.cache_fine_grained:
1002+
deps_json = prefix + '.deps.json'
1003+
return (prefix + '.meta.json', prefix + '.data.json', deps_json)
9851004

9861005

9871006
def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[CacheMeta]:
@@ -997,7 +1016,7 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache
9971016
valid; otherwise None.
9981017
"""
9991018
# TODO: May need to take more build options into account
1000-
meta_json, data_json = get_cache_names(id, path, manager)
1019+
meta_json, data_json, deps_json = get_cache_names(id, path, manager)
10011020
manager.trace('Looking for {} at {}'.format(id, meta_json))
10021021
try:
10031022
with open(meta_json, 'r') as f:
@@ -1011,11 +1030,12 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache
10111030
manager.log('Could not load cache for {}: meta cache is not a dict: {}'
10121031
.format(id, repr(meta)))
10131032
return None
1014-
m = cache_meta_from_dict(meta, data_json)
1033+
m = cache_meta_from_dict(meta, data_json, deps_json)
10151034
# Don't check for path match, that is dealt with in validate_meta().
10161035
if (m.id != id or
10171036
m.mtime is None or m.size is None or
1018-
m.dependencies is None or m.data_mtime is None):
1037+
m.dependencies is None or m.data_mtime is None or
1038+
(manager.options.cache_fine_grained and m.deps_mtime is None)):
10191039
manager.log('Metadata abandoned for {}: attributes are missing'.format(id))
10201040
return None
10211041

@@ -1098,6 +1118,13 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],
10981118
if data_mtime != meta.data_mtime:
10991119
manager.log('Metadata abandoned for {}: data cache is modified'.format(id))
11001120
return None
1121+
deps_mtime = None
1122+
if manager.options.cache_fine_grained:
1123+
assert meta.deps_json
1124+
deps_mtime = getmtime(meta.deps_json)
1125+
if deps_mtime != meta.deps_mtime:
1126+
manager.log('Metadata abandoned for {}: deps cache is modified'.format(id))
1127+
return None
11011128

11021129
path = os.path.abspath(path)
11031130
try:
@@ -1143,6 +1170,7 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],
11431170
'size': size,
11441171
'hash': source_hash,
11451172
'data_mtime': data_mtime,
1173+
'deps_mtime': deps_mtime,
11461174
'dependencies': meta.dependencies,
11471175
'suppressed': meta.suppressed,
11481176
'child_modules': meta.child_modules,
@@ -1158,7 +1186,7 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],
11581186
meta_str = json.dumps(meta_dict, indent=2, sort_keys=True)
11591187
else:
11601188
meta_str = json.dumps(meta_dict)
1161-
meta_json, _ = get_cache_names(id, path, manager)
1189+
meta_json, _, _2 = get_cache_names(id, path, manager)
11621190
manager.log('Updating mtime for {}: file {}, meta {}, mtime {}'
11631191
.format(id, path, meta_json, meta.mtime))
11641192
atomic_write(meta_json, meta_str, '\n') # Ignore errors, it's just an optimization.
@@ -1176,6 +1204,13 @@ def compute_hash(text: str) -> str:
11761204
return hashlib.md5(text.encode('utf-8')).hexdigest()
11771205

11781206

1207+
def json_dumps(obj: Any, debug_cache: bool) -> str:
1208+
if debug_cache:
1209+
return json.dumps(obj, indent=2, sort_keys=True)
1210+
else:
1211+
return json.dumps(obj, sort_keys=True)
1212+
1213+
11791214
def write_cache(id: str, path: str, tree: MypyFile,
11801215
serialized_fine_grained_deps: Dict[str, List[str]],
11811216
dependencies: List[str], suppressed: List[str],
@@ -1209,21 +1244,17 @@ def write_cache(id: str, path: str, tree: MypyFile,
12091244
"""
12101245
# Obtain file paths
12111246
path = os.path.abspath(path)
1212-
meta_json, data_json = get_cache_names(id, path, manager)
1213-
manager.log('Writing {} {} {} {}'.format(id, path, meta_json, data_json))
1247+
meta_json, data_json, deps_json = get_cache_names(id, path, manager)
1248+
manager.log('Writing {} {} {} {} {}'.format(
1249+
id, path, meta_json, data_json, deps_json))
12141250

12151251
# Make sure directory for cache files exists
12161252
parent = os.path.dirname(data_json)
12171253
assert os.path.dirname(meta_json) == parent
12181254

12191255
# Serialize data and analyze interface
1220-
data = {'tree': tree.serialize(),
1221-
'fine_grained_deps': serialized_fine_grained_deps,
1222-
}
1223-
if manager.options.debug_cache:
1224-
data_str = json.dumps(data, indent=2, sort_keys=True)
1225-
else:
1226-
data_str = json.dumps(data, sort_keys=True)
1256+
data = tree.serialize()
1257+
data_str = json_dumps(data, manager.options.debug_cache)
12271258
interface_hash = compute_hash(data_str)
12281259

12291260
# Obtain and set up metadata
@@ -1265,6 +1296,14 @@ def write_cache(id: str, path: str, tree: MypyFile,
12651296
return interface_hash, None
12661297
data_mtime = getmtime(data_json)
12671298

1299+
deps_mtime = None
1300+
if deps_json:
1301+
deps_str = json_dumps(serialized_fine_grained_deps, manager.options.debug_cache)
1302+
if not atomic_write(deps_json, deps_str, '\n'):
1303+
manager.log("Error writing deps JSON file {}".format(deps_json))
1304+
return interface_hash, None
1305+
deps_mtime = getmtime(deps_json)
1306+
12681307
mtime = int(st.st_mtime)
12691308
size = st.st_size
12701309
options = manager.options.clone_for_module(id)
@@ -1275,6 +1314,7 @@ def write_cache(id: str, path: str, tree: MypyFile,
12751314
'size': size,
12761315
'hash': source_hash,
12771316
'data_mtime': data_mtime,
1317+
'deps_mtime': deps_mtime,
12781318
'dependencies': dependencies,
12791319
'suppressed': suppressed,
12801320
'child_modules': child_modules,
@@ -1287,17 +1327,14 @@ def write_cache(id: str, path: str, tree: MypyFile,
12871327
}
12881328

12891329
# Write meta cache file
1290-
if manager.options.debug_cache:
1291-
meta_str = json.dumps(meta, indent=2, sort_keys=True)
1292-
else:
1293-
meta_str = json.dumps(meta)
1330+
meta_str = json_dumps(meta, manager.options.debug_cache)
12941331
if not atomic_write(meta_json, meta_str, '\n'):
12951332
# Most likely the error is the replace() call
12961333
# (see https://github.com/python/mypy/issues/3215).
12971334
# The next run will simply find the cache entry out of date.
12981335
manager.log("Error writing meta JSON file {}".format(meta_json))
12991336

1300-
return interface_hash, cache_meta_from_dict(meta, data_json)
1337+
return interface_hash, cache_meta_from_dict(meta, data_json, deps_json)
13011338

13021339

13031340
def delete_cache(id: str, path: str, manager: BuildManager) -> None:
@@ -1308,12 +1345,13 @@ def delete_cache(id: str, path: str, manager: BuildManager) -> None:
13081345
see #4043 for an example.
13091346
"""
13101347
path = os.path.abspath(path)
1311-
meta_json, data_json = get_cache_names(id, path, manager)
1312-
manager.log('Deleting {} {} {} {}'.format(id, path, meta_json, data_json))
1348+
cache_paths = get_cache_names(id, path, manager)
1349+
manager.log('Deleting {} {} {}'.format(id, path, " ".join(x for x in cache_paths if x)))
13131350

1314-
for filename in [data_json, meta_json]:
1351+
for filename in cache_paths:
13151352
try:
1316-
os.remove(filename)
1353+
if filename:
1354+
os.remove(filename)
13171355
except OSError as e:
13181356
if e.errno != errno.ENOENT:
13191357
manager.log("Error deleting cache file {}: {}".format(filename, e.strerror))
@@ -1657,15 +1695,22 @@ def wrap_context(self) -> Iterator[None]:
16571695
self.check_blockers()
16581696

16591697
# Methods for processing cached modules.
1698+
def load_fine_grained_deps(self) -> None:
1699+
assert self.meta is not None, "Internal error: this method must be called only" \
1700+
" for cached modules"
1701+
assert self.meta.deps_json
1702+
with open(self.meta.deps_json) as f:
1703+
deps = json.load(f)
1704+
# TODO: Assert deps file wasn't changed.
1705+
self.fine_grained_deps = {k: set(v) for k, v in deps.items()}
16601706

16611707
def load_tree(self) -> None:
16621708
assert self.meta is not None, "Internal error: this method must be called only" \
16631709
" for cached modules"
16641710
with open(self.meta.data_json) as f:
16651711
data = json.load(f)
16661712
# TODO: Assert data file wasn't changed.
1667-
self.tree = MypyFile.deserialize(data['tree'])
1668-
self.fine_grained_deps = {k: set(v) for k, v in data['fine_grained_deps'].items()}
1713+
self.tree = MypyFile.deserialize(data)
16691714

16701715
self.manager.modules[self.id] = self.tree
16711716
self.manager.add_stats(fresh_trees=1)
@@ -2520,6 +2565,8 @@ def process_fine_grained_cache_graph(graph: Graph, manager: BuildManager) -> Non
25202565
# Note that ascc is a set, and scc is a list.
25212566
scc = order_ascc(graph, ascc)
25222567
process_fresh_scc(graph, scc, manager)
2568+
for id in scc:
2569+
graph[id].load_fine_grained_deps()
25232570

25242571

25252572
def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_ALL) -> List[str]:

mypy/dmypy_server.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,9 @@ def __init__(self, options: Options,
128128
options.fine_grained_incremental = True
129129
options.show_traceback = True
130130
if options.use_fine_grained_cache:
131-
options.cache_fine_grained = True # set this so that cache options match
131+
# Using fine_grained_cache implies generating and caring
132+
# about the fine grained cache
133+
options.cache_fine_grained = True
132134
else:
133135
options.cache_dir = os.devnull
134136
# Fine-grained incremental doesn't support general partial types

mypy/options.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class Options:
4545
}
4646

4747
OPTIONS_AFFECTING_CACHE = ((PER_MODULE_OPTIONS |
48-
{"quick_and_dirty", "platform", "cache_fine_grained"})
48+
{"quick_and_dirty", "platform"})
4949
- {"debug_cache"})
5050

5151
def __init__(self) -> None:

test-data/unit/check-incremental.test

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4235,3 +4235,33 @@ pass
42354235
[out2]
42364236
[out3]
42374237
tmp/a.py:1: note: unused 'type: ignore' comment
4238+
4239+
-- Test that a non cache_fine_grained run can use a fine-grained cache
4240+
[case testRegularUsesFgCache]
4241+
# flags: --config-file tmp/mypy.ini
4242+
import a
4243+
[file a.py]
4244+
x = 0
4245+
[file mypy.ini]
4246+
[[mypy]
4247+
cache_fine_grained = True
4248+
[file mypy.ini.2]
4249+
[[mypy]
4250+
cache_fine_grained = False
4251+
-- Nothing should get rechecked
4252+
[rechecked]
4253+
[stale]
4254+
4255+
[case testFgCacheNeedsFgCache]
4256+
# flags: --config-file tmp/mypy.ini
4257+
import a
4258+
[file a.py]
4259+
x = 0
4260+
[file mypy.ini]
4261+
[[mypy]
4262+
cache_fine_grained = False
4263+
[file mypy.ini.2]
4264+
[[mypy]
4265+
cache_fine_grained = True
4266+
[rechecked a, builtins]
4267+
[stale a, builtins]

0 commit comments

Comments
 (0)