16
16
17
17
from mypy .defaults import PYTHON3_VERSION_MIN
18
18
from mypy .fscache import FileSystemCache
19
+ from mypy .nodes import MypyFile
19
20
from mypy .options import Options
20
21
from mypy import sitepkgs
21
22
@@ -92,6 +93,33 @@ def __repr__(self) -> str:
92
93
self .base_dir )
93
94
94
95
96
+ class BuildSourceSet :
97
+ """Efficiently test a file's membership in the set of build sources."""
98
+
99
+ def __init__ (self , sources : List [BuildSource ]) -> None :
100
+ self .source_text_present = False
101
+ self .source_modules = {} # type: Dict[str, str]
102
+ self .source_paths = set () # type: Set[str]
103
+
104
+ for source in sources :
105
+ if source .text is not None :
106
+ self .source_text_present = True
107
+ if source .path :
108
+ self .source_paths .add (source .path )
109
+ if source .module :
110
+ self .source_modules [source .module ] = source .path or ''
111
+
112
+ def is_source (self , file : MypyFile ) -> bool :
113
+ if file .path and file .path in self .source_paths :
114
+ return True
115
+ elif file ._fullname in self .source_modules :
116
+ return True
117
+ elif self .source_text_present :
118
+ return True
119
+ else :
120
+ return False
121
+
122
+
95
123
class FindModuleCache :
96
124
"""Module finder with integrated cache.
97
125
@@ -107,8 +135,10 @@ def __init__(self,
107
135
search_paths : SearchPaths ,
108
136
fscache : Optional [FileSystemCache ] = None ,
109
137
options : Optional [Options ] = None ,
110
- ns_packages : Optional [List [str ]] = None ) -> None :
138
+ ns_packages : Optional [List [str ]] = None ,
139
+ source_set : Optional [BuildSourceSet ] = None ) -> None :
111
140
self .search_paths = search_paths
141
+ self .source_set = source_set
112
142
self .fscache = fscache or FileSystemCache ()
113
143
# Cache for get_toplevel_possibilities:
114
144
# search_paths -> (toplevel_id -> list(package_dirs))
@@ -124,6 +154,39 @@ def clear(self) -> None:
124
154
self .initial_components .clear ()
125
155
self .ns_ancestors .clear ()
126
156
157
+ def find_module_via_source_set (self , id : str ) -> Optional [ModuleSearchResult ]:
158
+ if not self .source_set :
159
+ return None
160
+ p = self .source_set .source_modules .get (id , None )
161
+ if p and self .fscache .isfile (p ):
162
+ # NB: need to make sure we still have __init__.py all the way up
163
+ # otherwise we might have false positives compared to slow path
164
+ d = os .path .dirname (p )
165
+ for i in range (id .count ('.' )):
166
+ if not self .fscache .isfile (os .path .join (d , '__init__.py' )):
167
+ return None
168
+ d = os .path .dirname (d )
169
+ return p
170
+
171
+ idx = id .rfind ('.' )
172
+ if idx != - 1 :
173
+ parent = self .find_module_via_source_set (id [:idx ])
174
+ if (
175
+ parent and isinstance (parent , str )
176
+ and not parent .endswith ('__init__.py' )
177
+ and not self .fscache .isdir (os .path .splitext (parent )[0 ])
178
+ ):
179
+ # if
180
+ # 1. we're looking for foo.bar.baz
181
+ # 2. foo.bar.py[i] is in the source set
182
+ # 3. foo.bar is not a directory
183
+ # then we don't want to go spelunking in other search paths to find
184
+ # another 'bar' module, because it's a waste of time and even in the
185
+ # unlikely event that we did find one that matched, it probably would
186
+ # be completely unrelated and undesirable
187
+ return ModuleNotFoundReason .NOT_FOUND
188
+ return None
189
+
127
190
def find_lib_path_dirs (self , id : str , lib_path : Tuple [str , ...]) -> PackageDirs :
128
191
"""Find which elements of a lib_path have the directory a module needs to exist.
129
192
@@ -209,8 +272,8 @@ def _can_find_module_in_parent_dir(self, id: str) -> bool:
209
272
"""
210
273
working_dir = os .getcwd ()
211
274
parent_search = FindModuleCache (SearchPaths ((), (), (), ()))
212
- while any (file . endswith (( "__init__.py" , "__init__.pyi" ))
213
- for file in os . listdir ( working_dir ) ):
275
+ while any (os . path . exists ( os . path . join ( working_dir , f ))
276
+ for f in [ "__init__.py" , "__init__.pyi" ] ):
214
277
working_dir = os .path .dirname (working_dir )
215
278
parent_search .search_paths = SearchPaths ((working_dir ,), (), (), ())
216
279
if not isinstance (parent_search ._find_module (id ), ModuleNotFoundReason ):
@@ -220,6 +283,37 @@ def _can_find_module_in_parent_dir(self, id: str) -> bool:
220
283
def _find_module (self , id : str ) -> ModuleSearchResult :
221
284
fscache = self .fscache
222
285
286
+ # fast path for any modules in the current source set
287
+ # this is particularly important when there are a large number of search
288
+ # paths which share the first (few) component(s) due to the use of namespace
289
+ # packages, for instance
290
+ # foo/
291
+ # company/
292
+ # __init__.py
293
+ # foo/
294
+ # bar/
295
+ # company/
296
+ # __init__.py
297
+ # bar/
298
+ # baz/
299
+ # company/
300
+ # __init__.py
301
+ # baz/
302
+ #
303
+ # mypy gets [foo/company/foo, foo/company/bar, foo/company/baz, ...] as input
304
+ # and computes [foo, bar, baz, ...] as the module search path
305
+ #
306
+ # This would result in O(n) search for every import of company.* and since,
307
+ # leading to O(n**2) behavior in load_graph as such imports are unsurprisingly
308
+ # present at least once, and usually many more times than that, in each and
309
+ # every file being parsed
310
+ #
311
+ # Thankfully, such cases are efficiently handled by looking up the module path
312
+ # via BuildSourceSet
313
+ p = self .find_module_via_source_set (id )
314
+ if p :
315
+ return p
316
+
223
317
# If we're looking for a module like 'foo.bar.baz', it's likely that most of the
224
318
# many elements of lib_path don't even have a subdirectory 'foo/bar'. Discover
225
319
# that only once and cache it for when we look for modules like 'foo.bar.blah'
0 commit comments