Skip to content

[WIP] Make incremental not propagate staleness if the public interface of a module is unchanged #2002

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 103 additions & 32 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from mypy import moduleinfo
from mypy import util
from mypy.fixup import fixup_module_pass_one, fixup_module_pass_two
from mypy.modinterface import ModuleInterface, extract_interface
from mypy.options import Options
from mypy.parse import parse
from mypy.stats import dump_type_stats
Expand Down Expand Up @@ -287,6 +288,7 @@ def default_lib_path(data_dir: str, pyversion: Tuple[int, int]) -> List[str]:
('child_modules', List[str]), # all submodules of the given module
('options', Optional[Dict[str, bool]]), # build options
('dep_prios', List[int]),
('interface_hash', str),
('version_id', str), # mypy version for cache invalidation
])
# NOTE: dependencies + suppressed == all reachable imports;
Expand Down Expand Up @@ -686,7 +688,7 @@ def get_cache_names(id: str, path: str, cache_dir: str,
return (prefix + '.meta.json', prefix + '.data.json')


def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[CacheMeta]:
def find_cache_meta(id: str, path: str, manager: BuildManager) -> Tuple[bool, Optional[CacheMeta]]:
"""Find cache data for a module.

Args:
Expand All @@ -695,21 +697,25 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache
manager: the build manager (for pyversion, log/trace, and build options)

Returns:
A CacheMeta instance if the cache data was found and appears
valid; otherwise None.
A tuple containing a bool and a CacheMeta instance.
The bool is True when a CacheMeta instance was both successfully
loaded and the timestamps indicate that there may be a content
or interface change.
The CacheMeta instance is non-null if the cache data was found
and appears valid; otherwise None.
"""
# TODO: May need to take more build options into account
meta_json, data_json = get_cache_names(
id, path, manager.options.cache_dir, manager.options.python_version)
manager.trace('Looking for {} {}'.format(id, data_json))
if not os.path.exists(meta_json):
return None
return False, None
with open(meta_json, 'r') as f:
meta_str = f.read()
manager.trace('Meta {} {}'.format(id, meta_str.rstrip()))
meta = json.loads(meta_str) # TODO: Errors
if not isinstance(meta, dict):
return None
return False, None
path = os.path.abspath(path)
m = CacheMeta(
meta.get('id'),
Expand All @@ -723,38 +729,39 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache
meta.get('child_modules', []),
meta.get('options'),
meta.get('dep_prios', []),
meta.get('interface_hash'),
meta.get('version_id'),
)
if (m.id != id or m.path != path or
m.mtime is None or m.size is None or
m.dependencies is None or m.data_mtime is None):
return None
return False, None

# Ignore cache if generated by an older mypy version.
if (m.version_id != manager.version_id
or m.options is None
or m.interface_hash is None
or len(m.dependencies) != len(m.dep_prios)):
return None
return False, None

# Ignore cache if (relevant) options aren't the same.
cached_options = m.options
current_options = select_options_affecting_cache(manager.options)
if cached_options != current_options:
return None
return False, None

# TODO: Share stat() outcome with find_module()
st = os.stat(path) # TODO: Errors
if st.st_mtime != m.mtime or st.st_size != m.size:
manager.log('Metadata abandoned because of modified file {}'.format(path))
return None

# TODO: stat() errors
# TODO: revise below comment
# It's a match on (id, path, mtime, size).
# Check data_json; assume if its mtime matches it's good.
# TODO: stat() errors
if os.path.getmtime(data_json) != m.data_mtime:
return None
st = os.stat(path) # TODO: Errors
maybe_stale = (st.st_mtime != m.mtime
or st.st_size != m.size
or os.path.getmtime(data_json) != m.data_mtime)

manager.log('Found {} {}'.format(id, meta_json))
return m
return maybe_stale, m


def select_options_affecting_cache(options: Options) -> Mapping[str, bool]:
Expand All @@ -777,7 +784,7 @@ def random_string() -> str:
def write_cache(id: str, path: str, tree: MypyFile,
dependencies: List[str], suppressed: List[str],
child_modules: List[str], dep_prios: List[int],
manager: BuildManager) -> None:
interface_hash: str, manager: BuildManager) -> None:
"""Write cache files for a module.

Args:
Expand Down Expand Up @@ -819,6 +826,7 @@ def write_cache(id: str, path: str, tree: MypyFile,
'child_modules': child_modules,
'options': select_options_affecting_cache(manager.options),
'dep_prios': dep_prios,
'interface_hash': interface_hash,
'version_id': manager.version_id,
}
with open(meta_json_tmp, 'w') as f:
Expand Down Expand Up @@ -993,6 +1001,8 @@ class State:
dependencies = None # type: List[str]
suppressed = None # type: List[str] # Suppressed/missing dependencies
priorities = None # type: Dict[str, int]
interface = None # type: ModuleInterface
interface_hash = None # type: str

# Map each dependency to the line number where it is first imported
dep_line_map = None # type: Dict[str, int]
Expand All @@ -1012,6 +1022,13 @@ class State:
# If caller_state is set, the line number in the caller where the import occurred
caller_line = 0

# If True, indicates that the public interface is unchanged.
externally_same = False

# If True, indicates that module must be checked later to see if it is
# stale and if the public interface has been changed.
maybe_stale = False

def __init__(self,
id: Optional[str],
path: Optional[str],
Expand Down Expand Up @@ -1089,10 +1106,10 @@ def __init__(self,
self.xpath = path or '<string>'
self.source = source
if path and source is None and manager.options.incremental:
self.meta = find_cache_meta(self.id, self.path, manager)
# TODO: Get mtime if not cached.
self.maybe_stale, self.meta = find_cache_meta(self.id, self.path, manager)
# TODO: Get mtime if not cached
self.add_ancestors()
if self.meta:
if self.meta and not self.maybe_stale:
# Make copies, since we may modify these and want to
# compare them to the originals later.
self.dependencies = list(self.meta.dependencies)
Expand All @@ -1108,6 +1125,24 @@ def __init__(self,
self.suppressed = []
self.child_modules = set()

def check_interface(self) -> None:
if self.has_new_submodules():
self.manager.trace("Module {} has new submodules".format(self.id))
self.parse_file()
self.externally_same = False
elif self.maybe_stale:
self.parse_file()
if self.interface_hash == self.meta.interface_hash:
self.manager.trace("Module {} was changed but has same interface".format(self.id))
self.externally_same = True
self.meta = None
else:
self.manager.trace("Module {} has different interface".format(self.id))
self.externally_same = False
elif self.meta is not None:
self.manager.trace("Module {} is unchanged".format(self.id))
self.externally_same = True

def skipping_ancestor(self, id: str, path: str, ancestor_for: 'State') -> None:
# TODO: Read the path (the __init__.py file) and return
# immediately if it's empty or only contains comments.
Expand Down Expand Up @@ -1154,16 +1189,28 @@ def is_fresh(self) -> bool:
# suppression by --silent-imports. However when a suppressed
# dependency is added back we find out later in the process.
return (self.meta is not None
and self.externally_same
and self.dependencies == self.meta.dependencies
and self.child_modules == set(self.meta.child_modules))

def is_interface_fresh(self) -> bool:
return self.externally_same

def has_new_submodules(self) -> bool:
"""Return if this module has new submodules after being loaded from a warm cache."""
return self.meta is not None and self.child_modules != set(self.meta.child_modules)

def mark_stale(self) -> None:
"""Throw away the cache data for this file, marking it as stale."""
def mark_stale(self, interface_is_same=False) -> None:
"""Throw away the cache data for this file, marking it as stale.

If interface_is_same is True, treat the module's interface as being
fresh -- only the module's content should be considered stale."""
if interface_is_same:
self.manager.trace("Marking {} as stale (but interface is fresh)".format(self.id))
else:
self.manager.trace("Marking {} as stale (interface is also stale)".format(self.id))
self.meta = None
self.externally_same = interface_is_same
self.manager.stale_modules.add(self.id)

def check_blockers(self) -> None:
Expand Down Expand Up @@ -1280,6 +1327,11 @@ def parse_file(self) -> None:
self.suppressed = suppressed
self.priorities = priorities
self.dep_line_map = dep_line_map
if self.manager.options.incremental:
self.interface = extract_interface(self.tree)
self.interface_hash = self.interface.compute_hash()
else:
self.interface_hash = ""
self.check_blockers()

def patch_parent(self) -> None:
Expand Down Expand Up @@ -1321,7 +1373,7 @@ def write_cache(self) -> None:
dep_prios = [self.priorities.get(dep, PRI_HIGH) for dep in self.dependencies]
write_cache(self.id, self.path, self.tree,
list(self.dependencies), list(self.suppressed), list(self.child_modules),
dep_prios,
dep_prios, self.interface_hash,
self.manager)


Expand Down Expand Up @@ -1381,8 +1433,7 @@ def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph:
if dep in st.ancestors and dep in graph:
graph[dep].child_modules.add(st.id)
for id, g in graph.items():
if g.has_new_submodules():
g.parse_file()
g.check_interface()
return graph


Expand Down Expand Up @@ -1421,8 +1472,24 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
for id in scc:
deps.update(graph[id].dependencies)
deps -= ascc
stale_deps = {id for id in deps if not graph[id].is_fresh()}
stale_deps = {id for id in deps if not graph[id].is_interface_fresh()}
fresh = fresh and not stale_deps

# TODO: Currently, the interface-based approach to incremental mode will regard
# absolutely any interface changes in our dependencies as a sign that our
# interface has also changed. This is often over-conservative, but is
# an easy way of making sure we preserve correctness.
#
# This unfortunately does mean that an interface change will often result back
# to the worst-case behavior for incremental mode -- changing an interface causes
# a cascade of changes through a large subset of the import graph.
#
# The ideal behavior would be for an interface change to propagate only only one
# or two levels through the import DAG, but this requires us to track dependencies
# on a more finer-grained level then we currently do.
interface_stale_scc = {id for id in scc if not graph[id].is_interface_fresh()}
interface_fresh = len(interface_stale_scc) == 0 and len(stale_deps) == 0

undeps = set()
if fresh:
# Check if any dependencies that were suppressed according
Expand All @@ -1437,9 +1504,10 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
# All cache files are fresh. Check that no dependency's
# cache file is newer than any scc node's cache file.
oldest_in_scc = min(graph[id].meta.data_mtime for id in scc)
newest_in_deps = 0 if not deps else max(graph[dep].meta.data_mtime for dep in deps)
viable = {id for id in deps if not graph[id].is_interface_fresh()}
newest_in_deps = max(graph[dep].meta.data_mtime for dep in viable) if viable else 0
if manager.options.verbosity >= 3: # Dump all mtimes for extreme debugging.
all_ids = sorted(ascc | deps, key=lambda id: graph[id].meta.data_mtime)
all_ids = sorted(ascc | viable, key=lambda id: graph[id].meta.data_mtime)
for id in all_ids:
if id in scc:
if graph[id].meta.data_mtime < newest_in_deps:
Expand All @@ -1465,17 +1533,20 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
fresh_msg = "inherently stale (%s)" % " ".join(sorted(stale_scc))
if stale_deps:
fresh_msg += " with stale deps (%s)" % " ".join(sorted(stale_deps))
if interface_fresh:
fresh_msg += ", but interface is fresh"
else:
fresh_msg = "stale due to deps (%s)" % " ".join(sorted(stale_deps))
if len(scc) == 1:
manager.log("Processing SCC singleton (%s) as %s" % (" ".join(scc), fresh_msg))
else:
manager.log("Processing SCC of size %d (%s) as %s" %
(len(scc), " ".join(scc), fresh_msg))

if fresh:
process_fresh_scc(graph, scc)
else:
process_stale_scc(graph, scc)
process_stale_scc(graph, scc, interface_fresh)


def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_ALL) -> List[str]:
Expand Down Expand Up @@ -1537,10 +1608,10 @@ def process_fresh_scc(graph: Graph, scc: List[str]) -> None:
graph[id].calculate_mros()


def process_stale_scc(graph: Graph, scc: List[str]) -> None:
def process_stale_scc(graph: Graph, scc: List[str], is_interface_fresh: bool) -> None:
"""Process the modules in one SCC from source code."""
for id in scc:
graph[id].mark_stale()
graph[id].mark_stale(is_interface_fresh)
for id in scc:
# We may already have parsed the module, or not.
# If the former, parse_file() is a no-op.
Expand Down
Loading