diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 6b03e76a1d691..2bf2be082f639 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -8,6 +8,7 @@ from pandas._config import get_option from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib +from pandas._typing import Dtype, Scalar from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask @@ -37,7 +38,7 @@ _USE_BOTTLENECK = False -def set_use_bottleneck(v=True): +def set_use_bottleneck(v: bool = True) -> None: # set/unset to use bottleneck global _USE_BOTTLENECK if _BOTTLENECK_INSTALLED: @@ -93,7 +94,9 @@ def __call__(self, alt): bn_func = None @functools.wraps(alt) - def f(values, axis=None, skipna=True, **kwds): + def f( + values: np.ndarray, axis: Optional[int] = None, skipna: bool = True, **kwds + ): if len(self.kwargs) > 0: for k, v in self.kwargs.items(): if k not in kwds: @@ -129,10 +132,10 @@ def f(values, axis=None, skipna=True, **kwds): return f -def _bn_ok_dtype(dt, name: str) -> bool: +def _bn_ok_dtype(dtype: Dtype, name: str) -> bool: # Bottleneck chokes on datetime64 - if not is_object_dtype(dt) and not ( - is_datetime_or_timedelta_dtype(dt) or is_datetime64tz_dtype(dt) + if not is_object_dtype(dtype) and not ( + is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype) ): # GH 15507 @@ -163,7 +166,9 @@ def _has_infs(result) -> bool: return False -def _get_fill_value(dtype, fill_value=None, fill_value_typ=None): +def _get_fill_value( + dtype: Dtype, fill_value: Optional[Scalar] = None, fill_value_typ=None +): """ return the correct fill value for the dtype of the values """ if fill_value is not None: return fill_value @@ -326,12 +331,12 @@ def _get_values( return values, mask, dtype, dtype_max, fill_value -def _na_ok_dtype(dtype): +def _na_ok_dtype(dtype) -> bool: # TODO: what about datetime64tz? PeriodDtype? return not issubclass(dtype.type, (np.integer, np.timedelta64, np.datetime64)) -def _wrap_results(result, dtype, fill_value=None): +def _wrap_results(result, dtype: Dtype, fill_value=None): """ wrap our results if needed """ if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): @@ -362,7 +367,9 @@ def _wrap_results(result, dtype, fill_value=None): return result -def _na_for_min_count(values, axis: Optional[int]): +def _na_for_min_count( + values: np.ndarray, axis: Optional[int] +) -> Union[Scalar, np.ndarray]: """ Return the missing value for `values`. @@ -393,7 +400,12 @@ def _na_for_min_count(values, axis: Optional[int]): return result -def nanany(values, axis=None, skipna: bool = True, mask=None): +def nanany( + values: np.ndarray, + axis: Optional[int] = None, + skipna: bool = True, + mask: Optional[np.ndarray] = None, +) -> bool: """ Check if any elements along an axis evaluate to True. @@ -425,7 +437,12 @@ def nanany(values, axis=None, skipna: bool = True, mask=None): return values.any(axis) -def nanall(values, axis=None, skipna: bool = True, mask=None): +def nanall( + values: np.ndarray, + axis: Optional[int] = None, + skipna: bool = True, + mask: Optional[np.ndarray] = None, +) -> bool: """ Check if all elements along an axis evaluate to True. @@ -458,7 +475,13 @@ def nanall(values, axis=None, skipna: bool = True, mask=None): @disallow("M8") -def nansum(values, axis=None, skipna=True, min_count=0, mask=None): +def nansum( + values: np.ndarray, + axis: Optional[int] = None, + skipna: bool = True, + min_count: int = 0, + mask: Optional[np.ndarray] = None, +) -> float: """ Sum the elements along an axis ignoring NaNs @@ -629,7 +652,7 @@ def _get_counts_nanvar( mask: Optional[np.ndarray], axis: Optional[int], ddof: int, - dtype=float, + dtype: Dtype = float, ) -> Tuple[Union[int, np.ndarray], Union[int, np.ndarray]]: """ Get the count of non-null values along an axis, accounting for degrees of freedom. @@ -776,7 +799,13 @@ def nanvar(values, axis=None, skipna=True, ddof=1, mask=None): @disallow("M8", "m8") -def nansem(values, axis=None, skipna=True, ddof=1, mask=None): +def nansem( + values: np.ndarray, + axis: Optional[int] = None, + skipna: bool = True, + ddof: int = 1, + mask: Optional[np.ndarray] = None, +) -> float: """ Compute the standard error in the mean along given axis while ignoring NaNs @@ -821,7 +850,12 @@ def nansem(values, axis=None, skipna=True, ddof=1, mask=None): def _nanminmax(meth, fill_value_typ): @bottleneck_switch(name="nan" + meth) - def reduction(values, axis=None, skipna=True, mask=None): + def reduction( + values: np.ndarray, + axis: Optional[int] = None, + skipna: bool = True, + mask: Optional[np.ndarray] = None, + ) -> Dtype: values, mask, dtype, dtype_max, fill_value = _get_values( values, skipna, fill_value_typ=fill_value_typ, mask=mask @@ -847,7 +881,12 @@ def reduction(values, axis=None, skipna=True, mask=None): @disallow("O") -def nanargmax(values, axis=None, skipna=True, mask=None): +def nanargmax( + values: np.ndarray, + axis: Optional[int] = None, + skipna: bool = True, + mask: Optional[np.ndarray] = None, +) -> int: """ Parameters ---------- @@ -878,7 +917,12 @@ def nanargmax(values, axis=None, skipna=True, mask=None): @disallow("O") -def nanargmin(values, axis=None, skipna=True, mask=None): +def nanargmin( + values: np.ndarray, + axis: Optional[int] = None, + skipna: bool = True, + mask: Optional[np.ndarray] = None, +) -> int: """ Parameters ---------- @@ -909,7 +953,12 @@ def nanargmin(values, axis=None, skipna=True, mask=None): @disallow("M8", "m8") -def nanskew(values, axis=None, skipna=True, mask=None): +def nanskew( + values: np.ndarray, + axis: Optional[int] = None, + skipna: bool = True, + mask: Optional[np.ndarray] = None, +) -> float: """ Compute the sample skewness. The statistic computed here is the adjusted Fisher-Pearson standardized @@ -987,7 +1036,12 @@ def nanskew(values, axis=None, skipna=True, mask=None): @disallow("M8", "m8") -def nankurt(values, axis=None, skipna=True, mask=None): +def nankurt( + values: np.ndarray, + axis: Optional[int] = None, + skipna: bool = True, + mask: Optional[np.ndarray] = None, +) -> float: """ Compute the sample excess kurtosis @@ -1075,7 +1129,13 @@ def nankurt(values, axis=None, skipna=True, mask=None): @disallow("M8", "m8") -def nanprod(values, axis=None, skipna=True, min_count=0, mask=None): +def nanprod( + values: np.ndarray, + axis: Optional[int] = None, + skipna: bool = True, + min_count: int = 0, + mask: Optional[np.ndarray] = None, +) -> float: """ Parameters ---------- @@ -1088,7 +1148,8 @@ def nanprod(values, axis=None, skipna=True, min_count=0, mask=None): Returns ------- - result : dtype + Dtype + The product of all elements on a given axis. ( NaNs are treated as 1) Examples -------- @@ -1096,10 +1157,6 @@ def nanprod(values, axis=None, skipna=True, min_count=0, mask=None): >>> s = pd.Series([1, 2, 3, np.nan]) >>> nanops.nanprod(s) 6.0 - - Returns - ------- - The product of all elements on a given axis. ( NaNs are treated as 1) """ mask = _maybe_get_mask(values, skipna, mask) @@ -1138,7 +1195,7 @@ def _get_counts( values_shape: Tuple[int], mask: Optional[np.ndarray], axis: Optional[int], - dtype=float, + dtype: Dtype = float, ) -> Union[int, np.ndarray]: """ Get the count of non-null values along an axis @@ -1184,7 +1241,13 @@ def _maybe_null_out( mask: Optional[np.ndarray], shape: Tuple, min_count: int = 1, -) -> np.ndarray: +) -> float: + """ + Returns + ------- + Dtype + The product of all elements on a given axis. ( NaNs are treated as 1) + """ if mask is not None and axis is not None and getattr(result, "ndim", False): null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0 if np.any(null_mask): @@ -1218,7 +1281,9 @@ def _zero_out_fperr(arg): @disallow("M8", "m8") -def nancorr(a, b, method="pearson", min_periods=None): +def nancorr( + a: np.ndarray, b: np.ndarray, method="pearson", min_periods: Optional[int] = None, +): """ a, b: ndarrays """ @@ -1268,7 +1333,7 @@ def _spearman(a, b): @disallow("M8", "m8") -def nancov(a, b, min_periods=None): +def nancov(a: np.ndarray, b: np.ndarray, min_periods: Optional[int] = None): if len(a) != len(b): raise AssertionError("Operands to nancov must have same size") @@ -1341,7 +1406,9 @@ def f(x, y): nanne = make_nancomp(operator.ne) -def _nanpercentile_1d(values, mask, q, na_value, interpolation): +def _nanpercentile_1d( + values: np.ndarray, mask: np.ndarray, q, na_value: Scalar, interpolation +) -> Union[Scalar, np.ndarray]: """ Wrapper for np.percentile that skips missing values, specialized to 1-dimensional case. @@ -1372,7 +1439,15 @@ def _nanpercentile_1d(values, mask, q, na_value, interpolation): return np.percentile(values, q, interpolation=interpolation) -def nanpercentile(values, q, axis, na_value, mask, ndim, interpolation): +def nanpercentile( + values: np.ndarray, + q, + axis: int, + na_value, + mask: np.ndarray, + ndim: int, + interpolation, +): """ Wrapper for np.percentile that skips missing values.