Skip to content

Commit 8a4f85e

Browse files
authored
Optionally check that we don't have duplicate nodes after AST merge (#4647)
It's hard to manually ensure that all relevant nodes are merged in AST merge and that no references to stale objects remain. This automated opt-in check looks for these duplicate nodes. It can be enabled in tests by switching `mypy.test.testfinegrained.CHECK_CONSISTENCY` to True. This was originally implemented by @msullivan. I did various minor changes and some refactoring.
1 parent 064c8d3 commit 8a4f85e

File tree

4 files changed

+246
-0
lines changed

4 files changed

+246
-0
lines changed

mypy/server/mergecheck.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
"""Check for duplicate AST nodes after merge."""
2+
3+
from typing import Dict, List, Tuple
4+
5+
from mypy.nodes import SymbolNode, Var, Decorator, OverloadedFuncDef, FuncDef
6+
from mypy.server.objgraph import get_reachable_graph, get_path
7+
8+
9+
# If True, print more verbose output on failure.
10+
DUMP_MISMATCH_NODES = False
11+
12+
13+
def check_consistency(o: object) -> None:
14+
"""Fail if there are two AST nodes with the same fullname reachable from 'o'.
15+
16+
Raise AssertionError on failure and print some debugging output.
17+
"""
18+
seen, parents = get_reachable_graph(o)
19+
reachable = list(seen.values())
20+
syms = [x for x in reachable if isinstance(x, SymbolNode)]
21+
22+
m = {} # type: Dict[str, SymbolNode]
23+
for sym in syms:
24+
fn = sym.fullname()
25+
# Skip None names, since they are ambiguous.
26+
# TODO: Everything should have a proper full name?
27+
if fn is None:
28+
continue
29+
# Skip stuff that should be expected to have duplicate names
30+
if isinstance(sym, (Var, Decorator)):
31+
continue
32+
if isinstance(sym, FuncDef) and sym.is_overload:
33+
continue
34+
35+
if fn not in m:
36+
m[sym.fullname()] = sym
37+
continue
38+
39+
# We have trouble and need to decide what to do about it.
40+
sym1, sym2 = sym, m[fn]
41+
42+
# If the type changed, then it shouldn't have been merged.
43+
if type(sym1) is not type(sym2):
44+
continue
45+
46+
path1 = get_path(sym1, seen, parents)
47+
path2 = get_path(sym2, seen, parents)
48+
49+
if fn in m:
50+
print('\nDuplicate %r nodes with fullname %r found:' % (type(sym).__name__, fn))
51+
print('[1] %d: %s' % (id(sym1), path_to_str(path1)))
52+
print('[2] %d: %s' % (id(sym2), path_to_str(path2)))
53+
54+
if DUMP_MISMATCH_NODES and fn in m:
55+
# Add verbose output with full AST node contents.
56+
print('---')
57+
print(id(sym1), sym1)
58+
print('---')
59+
print(id(sym2), sym2)
60+
61+
assert sym.fullname() not in m
62+
63+
64+
def path_to_str(path: List[Tuple[object, object]]) -> str:
65+
result = '<root>'
66+
for attr, obj in path:
67+
t = type(obj).__name__
68+
if t in ('dict', 'tuple', 'SymbolTable', 'list'):
69+
result += '[%s]' % repr(attr)
70+
else:
71+
if isinstance(obj, Var):
72+
result += '.%s(%s:%s)' % (attr, t, obj.name())
73+
elif t in ('BuildManager', 'FineGrainedBuildManager'):
74+
# Omit class name for some classes that aren't part of a class
75+
# hierarchy since there isn't much ambiguity.
76+
result += '.%s' % attr
77+
else:
78+
result += '.%s(%s)' % (attr, t)
79+
return result

mypy/server/objgraph.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
"""Find all objects reachable from a root object."""
2+
3+
from collections import deque
4+
from collections.abc import Iterable
5+
from typing import List, Dict, Iterator, Optional, Tuple, Mapping
6+
import weakref
7+
import types
8+
9+
10+
method_descriptor_type = type(object.__dir__)
11+
method_wrapper_type = type(object().__ne__)
12+
wrapper_descriptor_type = type(object.__ne__)
13+
14+
FUNCTION_TYPES = (types.BuiltinFunctionType,
15+
types.FunctionType,
16+
types.MethodType,
17+
method_descriptor_type,
18+
wrapper_descriptor_type,
19+
method_wrapper_type)
20+
21+
ATTR_BLACKLIST = {
22+
'__doc__',
23+
'__name__',
24+
'__class__',
25+
'__dict__',
26+
27+
# Mypy specific attribute blacklists
28+
'indirection_detector',
29+
'all_types',
30+
'type_maps',
31+
'semantic_analyzer', # Semantic analyzer has stale caches
32+
'semantic_analyzer_pass3', # Semantic analyzer has stale caches
33+
}
34+
35+
# Instances of these types can't have references to other objects
36+
ATOMIC_TYPE_BLACKLIST = {
37+
bool,
38+
int,
39+
float,
40+
str,
41+
type(None),
42+
object,
43+
}
44+
45+
# Don't look at most attributes of these types
46+
COLLECTION_TYPE_BLACKLIST = {
47+
list,
48+
set,
49+
dict,
50+
tuple,
51+
}
52+
53+
# Don't return these objects
54+
TYPE_BLACKLIST = {
55+
weakref.ReferenceType,
56+
}
57+
58+
59+
def isproperty(o: object, attr: str) -> bool:
60+
return isinstance(getattr(type(o), attr, None), property)
61+
62+
63+
def get_edge_candidates(o: object) -> Iterator[Tuple[object, object]]:
64+
if type(o) not in COLLECTION_TYPE_BLACKLIST:
65+
for attr in dir(o):
66+
if attr not in ATTR_BLACKLIST and hasattr(o, attr) and not isproperty(o, attr):
67+
e = getattr(o, attr)
68+
if not type(e) in ATOMIC_TYPE_BLACKLIST:
69+
yield attr, e
70+
if isinstance(o, Mapping):
71+
for k, v in o.items():
72+
yield k, v
73+
elif isinstance(o, Iterable) and not isinstance(o, str):
74+
for i, e in enumerate(o):
75+
yield i, e
76+
77+
78+
def get_edges(o: object) -> Iterator[Tuple[object, object]]:
79+
for s, e in get_edge_candidates(o):
80+
if (isinstance(e, FUNCTION_TYPES)):
81+
# We don't want to collect methods, but do want to collect values
82+
# in closures and self pointers to other objects
83+
84+
if hasattr(e, '__closure__'):
85+
yield (s, '__closure__'), getattr(e, '__closure__')
86+
if hasattr(e, '__self__'):
87+
se = getattr(e, '__self__')
88+
if se is not o and se is not type(o):
89+
yield (s, '__self__'), se
90+
else:
91+
if not type(e) in TYPE_BLACKLIST:
92+
yield s, e
93+
94+
95+
def get_reachable_graph(root: object) -> Tuple[Dict[int, object],
96+
Dict[int, Tuple[int, object]]]:
97+
parents = {}
98+
seen = {id(root): root}
99+
worklist = [root]
100+
while worklist:
101+
o = worklist.pop()
102+
for s, e in get_edges(o):
103+
if id(e) in seen:
104+
continue
105+
parents[id(e)] = (id(o), s)
106+
seen[id(e)] = e
107+
worklist.append(e)
108+
109+
return seen, parents
110+
111+
112+
def find_all_reachable(root: object) -> List[object]:
113+
return list(get_reachable_graph(root)[0].values())
114+
115+
116+
def aggregate_by_type(objs: List[object]) -> Dict[type, List[object]]:
117+
m = {} # type: Dict[type, List[object]]
118+
for o in objs:
119+
m.setdefault(type(o), []).append(o)
120+
return m
121+
122+
123+
def get_path(o: object,
124+
seen: Dict[int, object],
125+
parents: Dict[int, Tuple[int, object]]) -> List[Tuple[object, object]]:
126+
path = []
127+
while id(o) in parents:
128+
pid, attr = parents[id(o)]
129+
o = seen[pid]
130+
path.append((attr, o))
131+
path.reverse()
132+
return path

mypy/test/testfinegrained.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,15 @@
3030
from mypy.test.testtypegen import ignore_node
3131
from mypy.types import TypeStrVisitor, Type
3232
from mypy.util import short_type
33+
from mypy.server.mergecheck import check_consistency
34+
3335
import pytest # type: ignore # no pytest in typeshed
3436

3537

38+
# Set to True to perform (somewhat expensive) checks for duplicate AST nodes after merge
39+
CHECK_CONSISTENCY = False
40+
41+
3642
class FineGrainedSuite(DataSuite):
3743
files = [
3844
'fine-grained.test',
@@ -80,6 +86,8 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
8086
fine_grained_manager = None
8187
if not self.use_cache:
8288
fine_grained_manager = FineGrainedBuildManager(manager, graph)
89+
if CHECK_CONSISTENCY:
90+
check_consistency(fine_grained_manager)
8391

8492
steps = testcase.find_steps()
8593
all_triggered = []
@@ -107,6 +115,8 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
107115
fine_grained_manager = FineGrainedBuildManager(manager, graph)
108116

109117
new_messages = fine_grained_manager.update(modules)
118+
if CHECK_CONSISTENCY:
119+
check_consistency(fine_grained_manager)
110120
all_triggered.append(fine_grained_manager.triggered)
111121
new_messages = normalize_messages(new_messages)
112122

test-data/unit/merge.test

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1333,3 +1333,28 @@ target:
13331333
C: TypeInfo<3>
13341334
D: TypeInfo<5>
13351335
NewType: Var<4>
1336+
1337+
[case testCallable_symtable-skip]
1338+
# The TypeInfo is currently not being merged correctly
1339+
import target
1340+
[file target.py]
1341+
def g(o: object) -> None:
1342+
if callable(o):
1343+
pass
1344+
[file target.py.next]
1345+
def g(o: object) -> None:
1346+
if callable(o):
1347+
o()
1348+
[builtins fixtures/callable.pyi]
1349+
[out]
1350+
__main__:
1351+
target: MypyFile<0>
1352+
target:
1353+
<callable subtype of object>: TypeInfo<1>
1354+
g: FuncDef<2>
1355+
==>
1356+
__main__:
1357+
target: MypyFile<0>
1358+
target:
1359+
<callable subtype of object>: TypeInfo<1>
1360+
g: FuncDef<2>

0 commit comments

Comments
 (0)