From b40347144c6e47d92072f214e861f164bc14674a Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Tue, 10 Apr 2018 15:23:25 -0700 Subject: [PATCH 1/4] Rework (and heavily optimize!) mypy.ini per-module configuration Currently, we compute the options for each module by scanning through the full list of per-module configuration options and seeing if the pattern matches against the module name. This is incredibly slow for large configuration files. On the internal S repo, mypy spends 23 seconds (!!) just computing per-module options. To fix this, we instead precompute an Options object for each config section, and in `clone_for_module` do a search for the most specific configured Options (so for foo.bar, we try foo.bar, foo.bar.*, foo.*, in that order). This cuts down the processing time from 23s to about 80ms. The catch is that this is actually a backwards-incompatible semantics change, in two ways: * Patterns can be of the form `foo.bar` and `foo.bar.*`, but can no longer be general purpose globs. We produce an error message to detect this misusage. * Patterns are now always applied based on specificity and not on the order they appear in the file. This means that some poorly-formed configuration files that contained options that were always overridden may have their meaning changed. This is probably not too common, so we don't do anything to deal with it. We could emit a warning when specificity-order and file-order disagree, but it seems wrong to lock people into the old behavior, and it seems like a silly thing to make configurable. --- docs/source/config_file.rst | 21 ++++++--- mypy/main.py | 14 +++--- mypy/options.py | 83 ++++++++++++++++++++++------------- test-data/unit/cmdline.test | 86 ++++++++++++++++++++++++++----------- 4 files changed, 137 insertions(+), 67 deletions(-) diff --git a/docs/source/config_file.rst b/docs/source/config_file.rst index 0b6899045a7a..82fb755cfd03 100644 --- a/docs/source/config_file.rst +++ b/docs/source/config_file.rst @@ -29,10 +29,15 @@ characters. the global flags. The ``setup.cfg`` file is an exception to this. - Additional sections named ``[mypy-PATTERN1,PATTERN2,...]`` may be - present, where ``PATTERN1``, ``PATTERN2`` etc. are `fnmatch patterns - `_ - separated by commas. These sections specify additional flags that - only apply to *modules* whose name matches at least one of the patterns. + present, where ``PATTERN1``, ``PATTERN2`` etc. are comma-separated + patterns of the form ``dotted_module_name`` or ``dotted_module_name.*``. + These sections specify additional flags that only apply to *modules* + whose name matches at least one of the patterns. + + A pattern of the form ``dotted_module_name`` matches only the named module, + while ``dotted_module_name.*`` matches ``dotted_module_name`` and any + submodules (so ``foo.bar.*`` would match all of ``foo.bar``, + ``foo.bar.baz``, and ``foo.bar.baz.quux``). .. note:: @@ -137,8 +142,12 @@ overridden by the pattern sections matching the module name. .. note:: - If multiple pattern sections match a module they are processed in - order of their occurrence in the config file. + If multiple pattern sections match a module, the options from the + most specific section are used where they disagree. This means + that ``foo.bar`` will take values from sections with the patterns + ``foo.bar``, ``foo.bar.*``, and ``foo.*``, but when they specify + different values, it will use values from ``foo.bar`` before + ``foo.bar.*`` before ``foo.*``. - ``follow_imports`` (string, default ``normal``) directs what to do with imports when the imported module is found as a ``.py`` file and diff --git a/mypy/main.py b/mypy/main.py index eccf6ba67432..eb5da612746d 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -3,7 +3,6 @@ import argparse import ast import configparser -import fnmatch import os import re import subprocess @@ -90,7 +89,8 @@ def flush_errors(new_messages: List[str], serious: bool) -> None: if options.warn_unused_configs and options.unused_configs: print("Warning: unused section(s) in %s: %s" % (options.config_file, - ", ".join("[mypy-%s]" % glob for glob in options.unused_configs.values())), + ", ".join("[mypy-%s]" % glob for glob in options.per_module_options.keys() + if glob in options.unused_configs)), file=sys.stderr) if options.junit_xml: t1 = time.time() @@ -730,10 +730,14 @@ def parse_config_file(options: Options, filename: Optional[str]) -> None: glob = glob.replace(os.sep, '.') if os.altsep: glob = glob.replace(os.altsep, '.') - pattern = re.compile(fnmatch.translate(glob)) - options.per_module_options[pattern] = updates - options.unused_configs[pattern] = glob + if (any(c in glob for c in '?[]!') or + ('*' in glob and (not glob.endswith('.*') or '*' in glob[:-2]))): + print("%s: Invalid pattern. Patterns must be 'module_name' or 'module_name.*'" % + prefix, + file=sys.stderr) + else: + options.per_module_options[glob] = updates def parse_section(prefix: str, template: Options, section: Mapping[str, str]) -> Tuple[Dict[str, object], Dict[str, str]]: diff --git a/mypy/options.py b/mypy/options.py index bde93ca42e52..e36bb96d89b8 100644 --- a/mypy/options.py +++ b/mypy/options.py @@ -1,9 +1,8 @@ from collections import OrderedDict -import fnmatch import pprint import sys -from typing import Dict, List, Mapping, MutableMapping, Optional, Pattern, Set, Tuple +from typing import Dict, List, Mapping, MutableMapping, Optional, Set, Tuple from mypy import defaults @@ -51,7 +50,7 @@ class Options: def __init__(self) -> None: # Cache for clone_for_module() - self.clone_cache = {} # type: Dict[str, Options] + self.per_module_cache = None # type: Optional[Dict[str, Options]] # -- build options -- self.build_type = BuildType.STANDARD @@ -167,10 +166,9 @@ def __init__(self) -> None: self.plugins = [] # type: List[str] # Per-module options (raw) - pm_opts = OrderedDict() # type: OrderedDict[Pattern[str], Dict[str, object]] + pm_opts = OrderedDict() # type: OrderedDict[str, Dict[str, object]] self.per_module_options = pm_opts - # Map pattern back to glob - self.unused_configs = OrderedDict() # type: OrderedDict[Pattern[str], str] + self.unused_configs = set() # type: Set[str] # -- development options -- self.verbosity = 0 # More verbose messages (for troubleshooting) @@ -202,38 +200,63 @@ def __ne__(self, other: object) -> bool: def __repr__(self) -> str: d = dict(self.__dict__) - del d['clone_cache'] + del d['per_module_cache'] return 'Options({})'.format(pprint.pformat(d)) + def build_per_module_cache(self) -> None: + self.per_module_cache = {} + # Since configs inherit from glob configs above them in the hierarchy, + # we need to process per-module configs in a careful order. + # We have to process foo.* before foo.bar.* before foo.bar. + # To do this, process all glob configs before non-glob configs and + # exploit the fact that foo.* sorts earlier ASCIIbetically (unicodebetically?) + # than foo.bar.*. + keys = (sorted(k for k in self.per_module_options.keys() if k.endswith('.*')) + + [k for k in self.per_module_options.keys() if not k.endswith('.*')]) + for key in keys: + # Find what the options for this key would be, just based + # on inheriting from parent configs. + options = self.clone_for_module(key) + # And then update it with its per-module options. + new_options = Options() + new_options.__dict__.update(options.__dict__) + new_options.__dict__.update(self.per_module_options[key]) + self.per_module_cache[key] = new_options + + self.unused_configs = set(keys) + def clone_for_module(self, module: str) -> 'Options': """Create an Options object that incorporates per-module options. NOTE: Once this method is called all Options objects should be considered read-only, else the caching might be incorrect. """ - res = self.clone_cache.get(module) - if res is not None: - return res - updates = {} - for pattern in self.per_module_options: - if self.module_matches_pattern(module, pattern): - if pattern in self.unused_configs: - del self.unused_configs[pattern] - updates.update(self.per_module_options[pattern]) - if not updates: - self.clone_cache[module] = self - return self - new_options = Options() - new_options.__dict__.update(self.__dict__) - new_options.__dict__.update(updates) - self.clone_cache[module] = new_options - return new_options - - def module_matches_pattern(self, module: str, pattern: Pattern[str]) -> bool: - # If the pattern is 'mod.*', we want 'mod' to match that too. - # (That's so that a pattern specifying a package also matches - # that package's __init__.) - return pattern.match(module) is not None or pattern.match(module + '.') is not None + if self.per_module_cache is None: + self.build_per_module_cache() + assert self.per_module_cache is not None + + # If the module just directly has a config entry, use it. + if module in self.per_module_cache: + self.unused_configs.discard(module) + return self.per_module_cache[module] + + # If not, search for glob paths at all the parents. So if we are looking for + # options for foo.bar.baz, we search foo.bar.baz.*, foo.bar.*, foo.*, + # in that order, looking for an entry. + # This is technically quadratic in the length of the path, but module paths + # don't actually get all that long. + path = module.split('.') + for i in range(len(path), 0, -1): + key = '.'.join(path[:i] + ['*']) + if key in self.per_module_cache: + self.unused_configs.discard(key) + return self.per_module_cache[key] + + # We could update the cache to directly point to modules once + # they have been looked up, but in testing this made things + # slower and not faster, so we don't bother. + + return self def select_options_affecting_cache(self) -> Mapping[str, object]: return {opt: getattr(self, opt) for opt in self.OPTIONS_AFFECTING_CACHE} diff --git a/test-data/unit/cmdline.test b/test-data/unit/cmdline.test index 311be9326a89..a3a1b262aa0f 100644 --- a/test-data/unit/cmdline.test +++ b/test-data/unit/cmdline.test @@ -94,6 +94,7 @@ sub.pkg is not a valid Python package name mypy: can't decode file 'a.py': unknown encoding: uft-8 == Return code: 2 +-- ' [case testCannotIgnoreDuplicateModule] # cmd: mypy one/mod/__init__.py two/mod/__init__.py [file one/mod/__init__.py] @@ -157,9 +158,9 @@ def f(): [file mypy.ini] [[mypy] disallow_untyped_defs = True -[[mypy-y*] +[[mypy-y] disallow_untyped_defs = False -[[mypy-z*] +[[mypy-z] disallow_untyped_calls = True [file x.py] def f(a): @@ -181,7 +182,7 @@ z.py:1: error: Function is missing a type annotation z.py:4: error: Call to untyped function "f" in typed context x.py:1: error: Function is missing a type annotation -[case testPerFileConfigSectionMultipleMatches] +[case testPerFileConfigSectionMultipleMatchesDisallowed] # cmd: mypy xx.py xy.py yx.py yy.py [file mypy.ini] [[mypy] @@ -202,18 +203,15 @@ def g(a: int) -> int: return f(a) def f(a): pass def g(a: int) -> int: return f(a) [out] -yy.py:2: error: Call to untyped function "f" in typed context -yx.py:1: error: Function is missing a type annotation -yx.py:2: error: Call to untyped function "f" in typed context -xy.py:1: error: Function is missing a type annotation -xy.py:2: error: Call to untyped function "f" in typed context -xx.py:1: error: Function is missing a type annotation +mypy.ini: [mypy-*x*]: Invalid pattern. Patterns must be 'module_name' or 'module_name.*' +mypy.ini: [mypy-*y*]: Invalid pattern. Patterns must be 'module_name' or 'module_name.*' +== Return code: 0 [case testMultipleGlobConfigSection] # cmd: mypy x.py y.py z.py [file mypy.ini] [[mypy] -[[mypy-x*,z*] +[[mypy-x.*,z.*] disallow_untyped_defs = True [file x.py] def f(a): pass @@ -266,10 +264,11 @@ mypy.ini: [mypy]: ignore_missing_imports: Not a boolean: nah # cmd: mypy -c pass [file mypy.ini] [[mypy] -[[mypy-*] +[[mypy-foo.*] python_version = 3.4 [out] mypy.ini: [mypy-*]: Per-module sections should only specify per-module flags (python_version) +mypy.ini: [mypy-*]: Invalid pattern. Patterns must be 'module_name' or 'module_name.*' == Return code: 0 [case testConfigMypyPath] @@ -572,7 +571,7 @@ main.py:3: error: Argument 1 to "f" becomes "Any" due to an unfollowed import # cmd: mypy m.py [file mypy.ini] [[mypy] -[[mypy-m*] +[[mypy-m] disallow_any_explicit = True [file m.py] @@ -597,7 +596,7 @@ m.py:9: error: Explicit "Any" is not allowed [file mypy.ini] [[mypy] -[[mypy-m*] +[[mypy-m] disallow_any_explicit = True [file m.py] @@ -617,7 +616,7 @@ m.py:5: error: Explicit "Any" is not allowed [file mypy.ini] [[mypy] -[[mypy-m*] +[[mypy-m] disallow_any_explicit = True [file m.py] @@ -631,7 +630,7 @@ m.py:2: error: Explicit "Any" is not allowed [file mypy.ini] [[mypy] -[[mypy-m*] +[[mypy-m] disallow_any_explicit = True [file m.py] @@ -651,7 +650,7 @@ m.py:6: error: Explicit "Any" is not allowed [file mypy.ini] [[mypy] -[[mypy-m*] +[[mypy-m] disallow_any_explicit = True [file m.py] @@ -673,7 +672,7 @@ m.py:4: error: Explicit "Any" is not allowed [file mypy.ini] [[mypy] -[[mypy-m*] +[[mypy-m] disallow_any_explicit = True [file m.py] @@ -698,7 +697,7 @@ m.py:10: error: Explicit "Any" is not allowed [file mypy.ini] [[mypy] -[[mypy-m*] +[[mypy-m] disallow_any_explicit = True [file m.py] @@ -716,7 +715,7 @@ m.py:5: error: Explicit "Any" is not allowed [file mypy.ini] [[mypy] -[[mypy-m*] +[[mypy-m] disallow_any_explicit = True [file m.py] @@ -733,7 +732,7 @@ m.py:3: error: Explicit "Any" is not allowed [file mypy.ini] [[mypy] -[[mypy-m*] +[[mypy-m] disallow_any_explicit = True [file m.py] @@ -748,7 +747,7 @@ m.py:3: error: Explicit "Any" is not allowed [file mypy.ini] [[mypy] -[[mypy-m*] +[[mypy-m] disallow_any_explicit = True [file m.py] @@ -766,7 +765,7 @@ m.py:4: error: Explicit "Any" is not allowed [file mypy.ini] [[mypy] -[[mypy-m*] +[[mypy-m] disallow_any_explicit = True [file m.py] @@ -784,7 +783,7 @@ m.py:4: error: Explicit "Any" is not allowed [file mypy.ini] [[mypy] -[[mypy-m*] +[[mypy-m] disallow_any_explicit = True [file m.py] @@ -1008,28 +1007,40 @@ class ShouldNotBeFine(x): ... [out] y.py:5: error: Class cannot subclass 'x' (has type 'Any') -[case testDeterministicSectionOrdering] +[case testSectionInheritance] # cmd: mypy a [file a/__init__.py] +0() +[file a/foo.py] +0() [file a/b/__init__.py] [file a/b/c/__init__.py] +0() [file a/b/c/d/__init__.py] [file a/b/c/d/e/__init__.py] -0() +from typing import List +def g(x: List) -> None: pass +g(None) [file mypy.ini] [[mypy] +disallow_any_generics = False [[mypy-a.*] ignore_errors = True [[mypy-a.b.*] +disallow_any_generics = True ignore_errors = True [[mypy-a.b.c.*] ignore_errors = True [[mypy-a.b.c.d.*] ignore_errors = True +[[mypy-a.b.c.d.e.*] +ignore_errors = True +strict_optional = True [[mypy-a.b.c.d.e] ignore_errors = False [out] -a/b/c/d/e/__init__.py:1: error: "int" not callable +a/b/c/d/e/__init__.py:2: error: Missing type parameters for generic type +a/b/c/d/e/__init__.py:3: error: Argument 1 to "g" has incompatible type "None"; expected "List[Any]" [case testDisallowUntypedDefsAndGenerics] # cmd: mypy a.py @@ -1048,6 +1059,7 @@ a.py:1: error: Function is missing a type annotation [out] mypy: can't read file 'nope.py': No such file or directory == Return code: 2 +--' [case testParseError] # cmd: mypy a.py @@ -1113,3 +1125,25 @@ follow_imports_for_stubs = True [file main.py] import math math.frobnicate() + +[case testConfigWarnUnusedSection1] +# cmd: mypy foo.py quux.py spam/eggs.py +# flags: --follow-imports=skip +[file mypy.ini] +[[mypy] +warn_unused_configs = True +[[mypy-bar] +[[mypy-foo] +[[mypy-baz.*] +[[mypy-quux.*] +[[mypy-spam.*] +[[mypy-spam.eggs] +[[mypy-emarg.*] +[[mypy-emarg.hatch] +[file foo.py] +[file quux.py] +[file spam/__init__.py] +[file spam/eggs.py] +[out] +Warning: unused section(s) in mypy.ini: [mypy-bar], [mypy-baz.*], [mypy-emarg.*], [mypy-emarg.hatch] +== Return code: 0 From 78e1738a4813a09aa40e837f9f6956ef65457398 Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Thu, 12 Apr 2018 13:22:10 -0700 Subject: [PATCH 2/4] fix a test messup --- test-data/unit/cmdline.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test-data/unit/cmdline.test b/test-data/unit/cmdline.test index a3a1b262aa0f..84da97c5bec8 100644 --- a/test-data/unit/cmdline.test +++ b/test-data/unit/cmdline.test @@ -264,7 +264,7 @@ mypy.ini: [mypy]: ignore_missing_imports: Not a boolean: nah # cmd: mypy -c pass [file mypy.ini] [[mypy] -[[mypy-foo.*] +[[mypy-*] python_version = 3.4 [out] mypy.ini: [mypy-*]: Per-module sections should only specify per-module flags (python_version) From 3500618ea4511dcd90eaa141b120c899fd541bf0 Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Thu, 12 Apr 2018 13:49:00 -0700 Subject: [PATCH 3/4] fix lint --- mypy/main.py | 5 +++-- typeshed | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/mypy/main.py b/mypy/main.py index eb5da612746d..846d60f0d686 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -733,12 +733,13 @@ def parse_config_file(options: Options, filename: Optional[str]) -> None: if (any(c in glob for c in '?[]!') or ('*' in glob and (not glob.endswith('.*') or '*' in glob[:-2]))): - print("%s: Invalid pattern. Patterns must be 'module_name' or 'module_name.*'" % - prefix, + print("%s: Invalid pattern. Patterns must be 'module_name' or 'module_name.*'" + % prefix, file=sys.stderr) else: options.per_module_options[glob] = updates + def parse_section(prefix: str, template: Options, section: Mapping[str, str]) -> Tuple[Dict[str, object], Dict[str, str]]: """Parse one section of a config file. diff --git a/typeshed b/typeshed index 81a6ea43a580..dca53fb2bd90 160000 --- a/typeshed +++ b/typeshed @@ -1 +1 @@ -Subproject commit 81a6ea43a580c68a6a78154a10258dd0b98ebbd3 +Subproject commit dca53fb2bd900648dd146e2b0efb317f48bd1707 From 7691a2c633e6452e85a6d9794a2f6eaa0f8b693a Mon Sep 17 00:00:00 2001 From: Michael Sullivan Date: Fri, 13 Apr 2018 10:02:38 -0700 Subject: [PATCH 4/4] add some commas --- docs/source/config_file.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/config_file.rst b/docs/source/config_file.rst index 82fb755cfd03..2d80e7811cd3 100644 --- a/docs/source/config_file.rst +++ b/docs/source/config_file.rst @@ -29,7 +29,7 @@ characters. the global flags. The ``setup.cfg`` file is an exception to this. - Additional sections named ``[mypy-PATTERN1,PATTERN2,...]`` may be - present, where ``PATTERN1``, ``PATTERN2`` etc. are comma-separated + present, where ``PATTERN1``, ``PATTERN2``, etc., are comma-separated patterns of the form ``dotted_module_name`` or ``dotted_module_name.*``. These sections specify additional flags that only apply to *modules* whose name matches at least one of the patterns.