From 78ef768d5928b087e084af92c0d90ef67094ef0a Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Tue, 2 Mar 2021 17:43:09 +0000 Subject: [PATCH 1/5] replace bool with bool_t in pandas/core/generic --- .pre-commit-config.yaml | 5 ++ LICENSES/PYUPGRADE_LICENSE | 19 +++++ scripts/no_bool_in_generic.py | 89 ++++++++++++++++++++++++ scripts/tests/test_no_bool_in_generic.py | 18 +++++ 4 files changed, 131 insertions(+) create mode 100644 LICENSES/PYUPGRADE_LICENSE create mode 100644 scripts/no_bool_in_generic.py create mode 100644 scripts/tests/test_no_bool_in_generic.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c63f50b3c1421..2088ca3cad8b6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -219,3 +219,8 @@ repos: files: ^pandas/core/ exclude: ^pandas/core/api\.py$ types: [python] + - id: no-bool-in-core-generic + name: Use bool_t instead of bool in pandas/core/generic.py + entry: python scripts/no_bool_in_generic.py + language: python + files: ^pandas/core/generic\.py$ diff --git a/LICENSES/PYUPGRADE_LICENSE b/LICENSES/PYUPGRADE_LICENSE new file mode 100644 index 0000000000000..522fbe20b8991 --- /dev/null +++ b/LICENSES/PYUPGRADE_LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2017 Anthony Sottile + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/scripts/no_bool_in_generic.py b/scripts/no_bool_in_generic.py new file mode 100644 index 0000000000000..270d5dfce0954 --- /dev/null +++ b/scripts/no_bool_in_generic.py @@ -0,0 +1,89 @@ +""" +Check that pandas/core/generic.py doesn't use bool as a type annotation. + +There is already the method `bool`, so the alias `bool_t` should be used instead. + +This is meant to be run as a pre-commit hook - to run it manually, you can do: + + pre-commit run no-bool-in-core-generic --all-files + +The function `visit` is adapted from a function by the same name in pyupgrade: +https://github.com/asottile/pyupgrade/blob/5495a248f2165941c5d3b82ac3226ba7ad1fa59d/pyupgrade/_data.py#L70-L113 +""" + +import argparse +import ast +import collections +from typing import ( + Dict, + List, + Optional, + Sequence, + Tuple, +) + + +def visit( + tree: ast.Module, +) -> Dict[int, List[int]]: + in_annotation = False + nodes: List[Tuple[bool, ast.AST]] = [(in_annotation, tree)] + to_replace = collections.defaultdict(list) + + while nodes: + in_annotation, node = nodes.pop() + + if isinstance(node, ast.Name) and in_annotation and node.id == "bool": + to_replace[node.lineno].append(node.col_offset) + + for name in reversed(node._fields): + value = getattr(node, name) + if name in {"annotation", "returns"}: + next_in_annotation = True + else: + next_in_annotation = in_annotation + if isinstance(value, ast.AST): + nodes.append((next_in_annotation, value)) + elif isinstance(value, list): + for value in reversed(value): + if isinstance(value, ast.AST): + nodes.append((next_in_annotation, value)) + + return to_replace + + +def replace_bool_with_bool_t(to_replace, content: str) -> str: + new_lines = [] + + for n, line in enumerate(content.splitlines(), start=1): + if n in to_replace: + for col_offset in reversed(to_replace[n]): + line = line[:col_offset] + "bool_t" + line[col_offset + 4 :] + new_lines.append(line) + return "\n".join(new_lines) + + +def check_for_bool_in_generic(content: str) -> Tuple[bool, str]: + tree = ast.parse(content) + to_replace = visit(tree) + if not to_replace: + return False, content + return True, replace_bool_with_bool_t(to_replace, content) + + +def main(argv: Optional[Sequence[str]] = None) -> None: + parser = argparse.ArgumentParser() + parser.add_argument("paths", nargs="*") + args = parser.parse_args(argv) + + for path in args.paths: + with open(path, encoding="utf-8") as fd: + content = fd.read() + replace, new_content = check_for_bool_in_generic(content) + if replace: + with open(path, "w", encoding="utf-8") as fd: + fd.write(new_content) + + +if __name__ == "__main__": + main() diff --git a/scripts/tests/test_no_bool_in_generic.py b/scripts/tests/test_no_bool_in_generic.py new file mode 100644 index 0000000000000..2f16db3e8ce5c --- /dev/null +++ b/scripts/tests/test_no_bool_in_generic.py @@ -0,0 +1,18 @@ +from scripts.no_bool_in_generic import check_for_bool_in_generic + +BAD_FILE = "def foo(a: bool) -> bool:\n return bool(0)" +GOOD_FILE = "def foo(a: bool_t) -> bool_t:\n return bool(0)" + + +def test_bad_file_with_replace(): + content = BAD_FILE + _, result = check_for_bool_in_generic(content) + expected = GOOD_FILE + assert result == expected + + +def test_good_file_with_replace(): + content = GOOD_FILE + _, result = check_for_bool_in_generic(content) + expected = content + assert result == expected From 7f503d20b7a17d5beaa6d569c329b76b2bcfe44c Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 3 Apr 2021 21:31:01 +0100 Subject: [PATCH 2/5] run hook on generic --- pandas/core/generic.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6b4e3c7caef50..dd7548da6ba3a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -240,7 +240,7 @@ class NDFrame(PandasObject, SelectionMixin, indexing.IndexingMixin): def __init__( self, data: Manager, - copy: bool = False, + copy: bool_t = False, attrs: Optional[Mapping[Hashable, Any]] = None, ): # copy kwarg is retained for mypy compat, is not used @@ -257,7 +257,7 @@ def __init__( @classmethod def _init_mgr( - cls, mgr, axes, dtype: Optional[Dtype] = None, copy: bool = False + cls, mgr, axes, dtype: Optional[Dtype] = None, copy: bool_t = False ) -> Manager: """ passed a manager and a axes dict """ for a, axe in axes.items(): @@ -385,8 +385,8 @@ def flags(self) -> Flags: def set_flags( self: FrameOrSeries, *, - copy: bool = False, - allows_duplicate_labels: Optional[bool] = None, + copy: bool_t = False, + allows_duplicate_labels: Optional[bool_t] = None, ) -> FrameOrSeries: """ Return a new object with updated flags. @@ -475,7 +475,7 @@ def _data(self): _stat_axis_name = "index" _AXIS_ORDERS: List[str] _AXIS_TO_AXIS_NUMBER: Dict[Axis, int] = {0: 0, "index": 0, "rows": 0} - _AXIS_REVERSED: bool + _AXIS_REVERSED: bool_t _info_axis_number: int _info_axis_name: str _AXIS_LEN: int @@ -502,7 +502,7 @@ def _construct_axes_dict(self, axes=None, **kwargs): @final @classmethod def _construct_axes_from_arguments( - cls, args, kwargs, require_all: bool = False, sentinel=None + cls, args, kwargs, require_all: bool_t = False, sentinel=None ): """ Construct and returns axes if supplied in args/kwargs. @@ -722,11 +722,11 @@ def set_axis(self: FrameOrSeries, labels, *, inplace: Literal[True]) -> None: @overload def set_axis( - self: FrameOrSeries, labels, axis: Axis = ..., inplace: bool = ... + self: FrameOrSeries, labels, axis: Axis = ..., inplace: bool_t = ... ) -> Optional[FrameOrSeries]: ... - def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): + def set_axis(self, labels, axis: Axis = 0, inplace: bool_t = False): """ Assign desired index to given axis. @@ -757,7 +757,7 @@ def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): return self._set_axis_nocheck(labels, axis, inplace) @final - def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool): + def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t): # NDFrame.rename with inplace=False calls set_axis(inplace=True) on a copy. if inplace: setattr(self, self._get_axis_name(axis), labels) @@ -1003,8 +1003,8 @@ def rename( index: Optional[Renamer] = None, columns: Optional[Renamer] = None, axis: Optional[Axis] = None, - copy: bool = True, - inplace: bool = False, + copy: bool_t = True, + inplace: bool_t = False, level: Optional[Level] = None, errors: str = "ignore", ) -> Optional[FrameOrSeries]: @@ -1410,13 +1410,13 @@ def _set_axis_name(self, name, axis=0, inplace=False): # Comparison Methods @final - def _indexed_same(self, other) -> bool: + def _indexed_same(self, other) -> bool_t: return all( self._get_axis(a).equals(other._get_axis(a)) for a in self._AXIS_ORDERS ) @final - def equals(self, other: object) -> bool: + def equals(self, other: object) -> bool_t: """ Test whether two objects contain the same elements. @@ -5078,7 +5078,7 @@ def filter( return self.reindex(**{name: [r for r in items if r in labels]}) elif like: - def f(x) -> bool: + def f(x) -> bool_t: assert like is not None # needed for mypy return like in ensure_str(x) @@ -5086,7 +5086,7 @@ def f(x) -> bool: return self.loc(axis=axis)[values] elif regex: - def f(x) -> bool: + def f(x) -> bool_t: return matcher.search(ensure_str(x)) is not None matcher = re.compile(regex) From 4219293ece0d69890abb53b4c13afe6d6d868227 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 3 Apr 2021 21:34:12 +0100 Subject: [PATCH 3/5] :art: --- scripts/no_bool_in_generic.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/no_bool_in_generic.py b/scripts/no_bool_in_generic.py index 270d5dfce0954..0b3f2bff33bdc 100644 --- a/scripts/no_bool_in_generic.py +++ b/scripts/no_bool_in_generic.py @@ -23,9 +23,8 @@ ) -def visit( - tree: ast.Module, -) -> Dict[int, List[int]]: +def visit(tree: ast.Module) -> Dict[int, List[int]]: + "Step through tree, recording when nodes are in annotations." in_annotation = False nodes: List[Tuple[bool, ast.AST]] = [(in_annotation, tree)] to_replace = collections.defaultdict(list) From 0e100929cc4c75856220cb7d26a7edb50702b67b Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 3 Apr 2021 21:37:14 +0100 Subject: [PATCH 4/5] rename --- scripts/no_bool_in_generic.py | 12 ++++++++---- scripts/tests/test_no_bool_in_generic.py | 6 ++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/scripts/no_bool_in_generic.py b/scripts/no_bool_in_generic.py index 0b3f2bff33bdc..f80eff56b2729 100644 --- a/scripts/no_bool_in_generic.py +++ b/scripts/no_bool_in_generic.py @@ -65,9 +65,13 @@ def replace_bool_with_bool_t(to_replace, content: str) -> str: def check_for_bool_in_generic(content: str) -> Tuple[bool, str]: tree = ast.parse(content) to_replace = visit(tree) + if not to_replace: - return False, content - return True, replace_bool_with_bool_t(to_replace, content) + mutated = False + return mutated, content + + mutated = True + return mutated, replace_bool_with_bool_t(to_replace, content) def main(argv: Optional[Sequence[str]] = None) -> None: @@ -78,8 +82,8 @@ def main(argv: Optional[Sequence[str]] = None) -> None: for path in args.paths: with open(path, encoding="utf-8") as fd: content = fd.read() - replace, new_content = check_for_bool_in_generic(content) - if replace: + mutated, new_content = check_for_bool_in_generic(content) + if mutated: with open(path, "w", encoding="utf-8") as fd: fd.write(new_content) diff --git a/scripts/tests/test_no_bool_in_generic.py b/scripts/tests/test_no_bool_in_generic.py index 2f16db3e8ce5c..0bc91c5d1cf1e 100644 --- a/scripts/tests/test_no_bool_in_generic.py +++ b/scripts/tests/test_no_bool_in_generic.py @@ -6,13 +6,15 @@ def test_bad_file_with_replace(): content = BAD_FILE - _, result = check_for_bool_in_generic(content) + mutated, result = check_for_bool_in_generic(content) expected = GOOD_FILE assert result == expected + assert mutated def test_good_file_with_replace(): content = GOOD_FILE - _, result = check_for_bool_in_generic(content) + mutated, result = check_for_bool_in_generic(content) expected = content assert result == expected + assert not mutated From 2329179ac2effa08c753dddb2df0cebcb0ac55c4 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 7 Apr 2021 20:44:31 +0100 Subject: [PATCH 5/5] fixup pre-commit failure :flushed: --- pandas/core/generic.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b3689af5125c9..8e20eeb16c7a8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -232,7 +232,7 @@ class NDFrame(PandasObject, SelectionMixin, indexing.IndexingMixin): def __init__( self, data: Manager, - copy: bool = False, + copy: bool_t = False, attrs: Mapping[Hashable, Any] | None = None, ): # copy kwarg is retained for mypy compat, is not used @@ -249,7 +249,7 @@ def __init__( @classmethod def _init_mgr( - cls, mgr, axes, dtype: Dtype | None = None, copy: bool = False + cls, mgr, axes, dtype: Dtype | None = None, copy: bool_t = False ) -> Manager: """ passed a manager and a axes dict """ for a, axe in axes.items(): @@ -377,8 +377,8 @@ def flags(self) -> Flags: def set_flags( self: FrameOrSeries, *, - copy: bool = False, - allows_duplicate_labels: bool | None = None, + copy: bool_t = False, + allows_duplicate_labels: bool_t | None = None, ) -> FrameOrSeries: """ Return a new object with updated flags. @@ -467,7 +467,7 @@ def _data(self): _stat_axis_name = "index" _AXIS_ORDERS: list[str] _AXIS_TO_AXIS_NUMBER: dict[Axis, int] = {0: 0, "index": 0, "rows": 0} - _AXIS_REVERSED: bool + _AXIS_REVERSED: bool_t _info_axis_number: int _info_axis_name: str _AXIS_LEN: int @@ -714,7 +714,7 @@ def set_axis(self: FrameOrSeries, labels, *, inplace: Literal[True]) -> None: @overload def set_axis( - self: FrameOrSeries, labels, axis: Axis = ..., inplace: bool = ... + self: FrameOrSeries, labels, axis: Axis = ..., inplace: bool_t = ... ) -> FrameOrSeries | None: ... @@ -995,8 +995,8 @@ def rename( index: Renamer | None = None, columns: Renamer | None = None, axis: Axis | None = None, - copy: bool = True, - inplace: bool = False, + copy: bool_t = True, + inplace: bool_t = False, level: Level | None = None, errors: str = "ignore", ) -> FrameOrSeries | None: