Skip to content

STYLE pre-commit autoupdate, and other cleanups #49896

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/code-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ jobs:

- name: Run pre-commit
uses: pre-commit/[email protected]
with:
extra_args: --verbose --all-files

docstring_typing_pylint:
name: Docstring validation, typing, and pylint
Expand Down Expand Up @@ -89,7 +91,7 @@ jobs:
- name: Typing + pylint
uses: pre-commit/[email protected]
with:
extra_args: --hook-stage manual --all-files
extra_args: --verbose --hook-stage manual --all-files
if: ${{ steps.build.outcome == 'success' && always() }}

- name: Run docstring validation script tests
Expand Down
30 changes: 19 additions & 11 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,18 @@ repos:
entry: python scripts/run_vulture.py
pass_filenames: true
require_serial: false
- repo: https://github.com/python/black
rev: 22.10.0
hooks:
- id: black
- repo: https://github.com/codespell-project/codespell
rev: v2.2.2
hooks:
- id: codespell
types_or: [python, rst, markdown]
- repo: https://github.com/MarcoGorelli/cython-lint
rev: v0.2.1
rev: v0.9.1
hooks:
- id: cython-lint
- id: double-quote-cython-strings
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
rev: v4.4.0
hooks:
- id: debug-statements
- id: end-of-file-fixer
Expand All @@ -50,22 +47,22 @@ repos:
exclude: ^pandas/_libs/src/(klib|headers)/
args: [--quiet, '--extensions=c,h', '--headers=h', --recursive, '--filter=-readability/casting,-runtime/int,-build/include_subdir']
- repo: https://github.com/PyCQA/flake8
rev: 5.0.4
rev: 6.0.0
hooks:
- id: flake8
# Need to patch os.remove rule in pandas-dev-flaker
exclude: ^ci/fix_wheels.py
additional_dependencies: &flake8_dependencies
- flake8==5.0.4
- flake8==6.0.0
- flake8-bugbear==22.7.1
- pandas-dev-flaker==0.5.0
- repo: https://github.com/pycqa/pylint
rev: v2.15.5
rev: v2.15.6
hooks:
- id: pylint
stages: [manual]
- repo: https://github.com/pycqa/pylint
rev: v2.15.5
rev: v2.15.6
hooks:
- id: pylint
alias: redefined-outer-name
Expand All @@ -88,7 +85,7 @@ repos:
hooks:
- id: isort
- repo: https://github.com/asottile/pyupgrade
rev: v3.2.0
rev: v3.2.2
hooks:
- id: pyupgrade
args: [--py38-plus]
Expand All @@ -111,8 +108,19 @@ repos:
hooks:
- id: yesqa
additional_dependencies: *flake8_dependencies
stages: [manual]
- repo: local
hooks:
# NOTE: we make `black` a local hook because if it's installed from
# PyPI (rather than from source) then it'll run twice as fast thanks to mypyc
- id: black
name: black
description: "Black: The uncompromising Python code formatter"
entry: black
language: python
require_serial: true
types_or: [python, pyi]
additional_dependencies: [black==22.10.0]
- id: pyright
# note: assumes python env is setup and activated
name: pyright
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ dependencies:
# code checks
- black=22.3.0
- cpplint
- flake8=5.0.4
- flake8=6.0.0
- flake8-bugbear=22.7.1 # used by flake8, find likely bugs
- isort>=5.2.1 # check that imports are in the right order
- mypy=0.990
Expand Down
20 changes: 10 additions & 10 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def is_lexsorted(list_of_arrays: list) -> bint:
cdef int64_t **vecs = <int64_t**>malloc(nlevels * sizeof(int64_t*))
for i in range(nlevels):
arr = list_of_arrays[i]
assert arr.dtype.name == 'int64'
assert arr.dtype.name == "int64"
vecs[i] = <int64_t*>cnp.PyArray_DATA(arr)

# Assume uniqueness??
Expand Down Expand Up @@ -514,9 +514,9 @@ def validate_limit(nobs: int | None, limit=None) -> int:
lim = nobs
else:
if not util.is_integer_object(limit):
raise ValueError('Limit must be an integer')
raise ValueError("Limit must be an integer")
if limit < 1:
raise ValueError('Limit must be greater than 0')
raise ValueError("Limit must be greater than 0")
lim = limit

return lim
Expand Down Expand Up @@ -958,7 +958,7 @@ def rank_1d(
if not ascending:
tiebreak = TIEBREAK_FIRST_DESCENDING

keep_na = na_option == 'keep'
keep_na = na_option == "keep"

N = len(values)
if labels is not None:
Expand All @@ -984,7 +984,7 @@ def rank_1d(
# with mask, without obfuscating location of missing data
# in values array
if numeric_object_t is object and values.dtype != np.object_:
masked_vals = values.astype('O')
masked_vals = values.astype("O")
else:
masked_vals = values.copy()

Expand All @@ -1005,7 +1005,7 @@ def rank_1d(
# If descending, fill with highest value since descending
# will flip the ordering to still end up with lowest rank.
# Symmetric logic applies to `na_option == 'bottom'`
nans_rank_highest = ascending ^ (na_option == 'top')
nans_rank_highest = ascending ^ (na_option == "top")
nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, <numeric_object_t>0)
if nans_rank_highest:
order = [masked_vals, mask]
Expand Down Expand Up @@ -1345,7 +1345,7 @@ def rank_2d(
if not ascending:
tiebreak = TIEBREAK_FIRST_DESCENDING

keep_na = na_option == 'keep'
keep_na = na_option == "keep"

# For cases where a mask is not possible, we can avoid mask checks
check_mask = (
Expand All @@ -1362,9 +1362,9 @@ def rank_2d(

if numeric_object_t is object:
if values.dtype != np.object_:
values = values.astype('O')
values = values.astype("O")

nans_rank_highest = ascending ^ (na_option == 'top')
nans_rank_highest = ascending ^ (na_option == "top")
if check_mask:
nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, <numeric_object_t>0)

Expand All @@ -1385,7 +1385,7 @@ def rank_2d(
order = (values, ~np.asarray(mask))

n, k = (<object>values).shape
out = np.empty((n, k), dtype='f8', order='F')
out = np.empty((n, k), dtype="f8", order="F")
grp_sizes = np.ones(n, dtype=np.int64)

# lexsort is slower, so only use if we need to worry about the mask
Expand Down
16 changes: 8 additions & 8 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -604,12 +604,12 @@ def group_any_all(
intp_t lab
int8_t flag_val, val

if val_test == 'all':
if val_test == "all":
# Because the 'all' value of an empty iterable in Python is True we can
# start with an array full of ones and set to zero when a False value
# is encountered
flag_val = 0
elif val_test == 'any':
elif val_test == "any":
# Because the 'any' value of an empty iterable in Python is False we
# can start with an array full of zeros and set to one only if any
# value encountered is True
Expand Down Expand Up @@ -1061,7 +1061,7 @@ def group_ohlc(
N, K = (<object>values).shape

if out.shape[1] != 4:
raise ValueError('Output array must have 4 columns')
raise ValueError("Output array must have 4 columns")

if K > 1:
raise NotImplementedError("Argument 'values' must have only one dimension")
Expand Down Expand Up @@ -1157,11 +1157,11 @@ def group_quantile(
)

inter_methods = {
'linear': INTERPOLATION_LINEAR,
'lower': INTERPOLATION_LOWER,
'higher': INTERPOLATION_HIGHER,
'nearest': INTERPOLATION_NEAREST,
'midpoint': INTERPOLATION_MIDPOINT,
"linear": INTERPOLATION_LINEAR,
"lower": INTERPOLATION_LOWER,
"higher": INTERPOLATION_HIGHER,
"nearest": INTERPOLATION_NEAREST,
"midpoint": INTERPOLATION_MIDPOINT,
}
interp = inter_methods[interpolation]

Expand Down
28 changes: 14 additions & 14 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,8 @@ cdef class IndexEngine:
if self.is_monotonic_increasing:
values = self.values
try:
left = values.searchsorted(val, side='left')
right = values.searchsorted(val, side='right')
left = values.searchsorted(val, side="left")
right = values.searchsorted(val, side="right")
except TypeError:
# e.g. GH#29189 get_loc(None) with a Float64Index
# 2021-09-29 Now only reached for object-dtype
Expand Down Expand Up @@ -353,8 +353,8 @@ cdef class IndexEngine:
remaining_stargets = set()
for starget in stargets:
try:
start = values.searchsorted(starget, side='left')
end = values.searchsorted(starget, side='right')
start = values.searchsorted(starget, side="left")
end = values.searchsorted(starget, side="right")
except TypeError: # e.g. if we tried to search for string in int array
remaining_stargets.add(starget)
else:
Expand Down Expand Up @@ -551,7 +551,7 @@ cdef class DatetimeEngine(Int64Engine):
return self._get_loc_duplicates(conv)
values = self.values

loc = values.searchsorted(conv, side='left')
loc = values.searchsorted(conv, side="left")

if loc == len(values) or values[loc] != conv:
raise KeyError(val)
Expand Down Expand Up @@ -655,8 +655,8 @@ cdef class BaseMultiIndexCodesEngine:
# with positive integers (-1 for NaN becomes 1). This enables us to
# differentiate between values that are missing in other and matching
# NaNs. We will set values that are not found to 0 later:
labels_arr = np.array(labels, dtype='int64').T + multiindex_nulls_shift
codes = labels_arr.astype('uint64', copy=False)
labels_arr = np.array(labels, dtype="int64").T + multiindex_nulls_shift
codes = labels_arr.astype("uint64", copy=False)
self.level_has_nans = [-1 in lab for lab in labels]

# Map each codes combination in the index to an integer unambiguously
Expand Down Expand Up @@ -693,7 +693,7 @@ cdef class BaseMultiIndexCodesEngine:
if self.level_has_nans[i] and codes.hasnans:
result[codes.isna()] += 1
level_codes.append(result)
return self._codes_to_ints(np.array(level_codes, dtype='uint64').T)
return self._codes_to_ints(np.array(level_codes, dtype="uint64").T)

def get_indexer(self, target: np.ndarray) -> np.ndarray:
"""
Expand Down Expand Up @@ -754,12 +754,12 @@ cdef class BaseMultiIndexCodesEngine:
ndarray[int64_t, ndim=1] new_codes, new_target_codes
ndarray[intp_t, ndim=1] sorted_indexer

target_order = np.argsort(target).astype('int64')
target_order = np.argsort(target).astype("int64")
target_values = target[target_order]
num_values, num_target_values = len(values), len(target_values)
new_codes, new_target_codes = (
np.empty((num_values,)).astype('int64'),
np.empty((num_target_values,)).astype('int64'),
np.empty((num_values,)).astype("int64"),
np.empty((num_target_values,)).astype("int64"),
)

# `values` and `target_values` are both sorted, so we walk through them
Expand Down Expand Up @@ -809,7 +809,7 @@ cdef class BaseMultiIndexCodesEngine:
raise KeyError(key)

# Transform indices into single integer:
lab_int = self._codes_to_ints(np.array(indices, dtype='uint64'))
lab_int = self._codes_to_ints(np.array(indices, dtype="uint64"))

return self._base.get_loc(self, lab_int)

Expand Down Expand Up @@ -940,8 +940,8 @@ cdef class SharedEngine:
if self.is_monotonic_increasing:
values = self.values
try:
left = values.searchsorted(val, side='left')
right = values.searchsorted(val, side='right')
left = values.searchsorted(val, side="left")
right = values.searchsorted(val, side="right")
except TypeError:
# e.g. GH#29189 get_loc(None) with a Float64Index
raise KeyError(val)
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ cdef class BlockPlacement:
or not cnp.PyArray_ISWRITEABLE(val)
or (<ndarray>val).descr.type_num != cnp.NPY_INTP
):
arr = np.require(val, dtype=np.intp, requirements='W')
arr = np.require(val, dtype=np.intp, requirements="W")
else:
arr = val
# Caller is responsible for ensuring arr.ndim == 1
Expand Down
Loading