Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/deps/azure-36-32bit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dependencies:
- numpy=1.14.*
- python-dateutil
- pytz=2017.2
- scipy>=1.1

# see comment above
- pip
Expand Down
1 change: 1 addition & 0 deletions ci/deps/azure-macos-36.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ dependencies:
- pytables
- python-dateutil==2.6.1
- pytz
- scipy>=1.1
- xarray
- xlrd
- xlsxwriter
Expand Down
143 changes: 108 additions & 35 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import functools
import itertools
import operator
from typing import Any, Optional, Tuple, Union
from typing import Any, Callable, Optional, Tuple, Union

import numpy as np

from pandas._config import get_option

from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib
from pandas._typing import Dtype, Scalar
from pandas.compat._optional import import_optional_dependency

from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask
Expand Down Expand Up @@ -37,7 +38,7 @@
_USE_BOTTLENECK = False


def set_use_bottleneck(v=True):
def set_use_bottleneck(v: bool = True) -> None:
# set/unset to use bottleneck
global _USE_BOTTLENECK
if _BOTTLENECK_INSTALLED:
Expand All @@ -55,7 +56,7 @@ def __init__(self, *dtypes):
def check(self, obj) -> bool:
return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes)

def __call__(self, f):
def __call__(self, f) -> Callable:
@functools.wraps(f)
def _f(*args, **kwargs):
obj_iter = itertools.chain(args, kwargs.values())
Expand All @@ -80,11 +81,11 @@ def _f(*args, **kwargs):


class bottleneck_switch:
def __init__(self, name=None, **kwargs):
def __init__(self, name: Optional[str] = None, **kwargs):
self.name = name
self.kwargs = kwargs

def __call__(self, alt):
def __call__(self, alt: Callable) -> Callable:
bn_name = self.name or alt.__name__

try:
Expand All @@ -93,7 +94,9 @@ def __call__(self, alt):
bn_func = None

@functools.wraps(alt)
def f(values, axis=None, skipna=True, **kwds):
def f(
values: np.ndarray, axis: Optional[int] = None, skipna: bool = True, **kwds
):
if len(self.kwargs) > 0:
for k, v in self.kwargs.items():
if k not in kwds:
Expand Down Expand Up @@ -129,7 +132,7 @@ def f(values, axis=None, skipna=True, **kwds):
return f


def _bn_ok_dtype(dt, name: str) -> bool:
def _bn_ok_dtype(dt: Dtype, name: str) -> bool:
# Bottleneck chokes on datetime64
if not is_object_dtype(dt) and not (
is_datetime_or_timedelta_dtype(dt) or is_datetime64tz_dtype(dt)
Expand Down Expand Up @@ -163,7 +166,11 @@ def _has_infs(result) -> bool:
return False


def _get_fill_value(dtype, fill_value=None, fill_value_typ=None):
def _get_fill_value(
dtype: Dtype,
fill_value: Optional[Scalar] = None,
fill_value_typ: Optional[str] = None,
):
""" return the correct fill value for the dtype of the values """
if fill_value is not None:
return fill_value
Expand Down Expand Up @@ -326,12 +333,12 @@ def _get_values(
return values, mask, dtype, dtype_max, fill_value


def _na_ok_dtype(dtype):
def _na_ok_dtype(dtype) -> bool:
# TODO: what about datetime64tz? PeriodDtype?
return not issubclass(dtype.type, (np.integer, np.timedelta64, np.datetime64))


def _wrap_results(result, dtype, fill_value=None):
def _wrap_results(result, dtype: Dtype, fill_value=None):
""" wrap our results if needed """

if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
Expand Down Expand Up @@ -362,7 +369,9 @@ def _wrap_results(result, dtype, fill_value=None):
return result


def _na_for_min_count(values, axis: Optional[int]):
def _na_for_min_count(
values: np.ndarray, axis: Optional[int]
) -> Union[Scalar, np.ndarray]:
"""
Return the missing value for `values`.

Expand Down Expand Up @@ -393,7 +402,12 @@ def _na_for_min_count(values, axis: Optional[int]):
return result


def nanany(values, axis=None, skipna: bool = True, mask=None):
def nanany(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
mask: Optional[np.ndarray] = None,
) -> bool:
"""
Check if any elements along an axis evaluate to True.

Expand Down Expand Up @@ -425,7 +439,12 @@ def nanany(values, axis=None, skipna: bool = True, mask=None):
return values.any(axis)


def nanall(values, axis=None, skipna: bool = True, mask=None):
def nanall(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
mask: Optional[np.ndarray] = None,
) -> bool:
"""
Check if all elements along an axis evaluate to True.

Expand Down Expand Up @@ -458,7 +477,13 @@ def nanall(values, axis=None, skipna: bool = True, mask=None):


@disallow("M8")
def nansum(values, axis=None, skipna=True, min_count=0, mask=None):
def nansum(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
min_count: int = 0,
mask: Optional[np.ndarray] = None,
) -> Dtype:
"""
Sum the elements along an axis ignoring NaNs

Expand Down Expand Up @@ -776,7 +801,13 @@ def nanvar(values, axis=None, skipna=True, ddof=1, mask=None):


@disallow("M8", "m8")
def nansem(values, axis=None, skipna=True, ddof=1, mask=None):
def nansem(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
ddof: int = 1,
mask: Optional[np.ndarray] = None,
) -> float:
"""
Compute the standard error in the mean along given axis while ignoring NaNs

Expand Down Expand Up @@ -819,9 +850,14 @@ def nansem(values, axis=None, skipna=True, ddof=1, mask=None):
return np.sqrt(var) / np.sqrt(count)


def _nanminmax(meth, fill_value_typ):
def _nanminmax(meth: str, fill_value_typ: str) -> Callable:
@bottleneck_switch(name="nan" + meth)
def reduction(values, axis=None, skipna=True, mask=None):
def reduction(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
mask: Optional[np.ndarray] = None,
) -> np.ndarray:

values, mask, dtype, dtype_max, fill_value = _get_values(
values, skipna, fill_value_typ=fill_value_typ, mask=mask
Expand All @@ -847,7 +883,12 @@ def reduction(values, axis=None, skipna=True, mask=None):


@disallow("O")
def nanargmax(values, axis=None, skipna=True, mask=None):
def nanargmax(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
mask: Optional[np.ndarray] = None,
) -> int:
"""
Parameters
----------
Expand Down Expand Up @@ -878,7 +919,12 @@ def nanargmax(values, axis=None, skipna=True, mask=None):


@disallow("O")
def nanargmin(values, axis=None, skipna=True, mask=None):
def nanargmin(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
mask: Optional[np.ndarray] = None,
) -> int:
"""
Parameters
----------
Expand Down Expand Up @@ -909,7 +955,12 @@ def nanargmin(values, axis=None, skipna=True, mask=None):


@disallow("M8", "m8")
def nanskew(values, axis=None, skipna=True, mask=None):
def nanskew(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
mask: Optional[np.ndarray] = None,
) -> float:
""" Compute the sample skewness.

The statistic computed here is the adjusted Fisher-Pearson standardized
Expand Down Expand Up @@ -987,7 +1038,12 @@ def nanskew(values, axis=None, skipna=True, mask=None):


@disallow("M8", "m8")
def nankurt(values, axis=None, skipna=True, mask=None):
def nankurt(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
mask: Optional[np.ndarray] = None,
) -> float:
"""
Compute the sample excess kurtosis

Expand Down Expand Up @@ -1075,7 +1131,13 @@ def nankurt(values, axis=None, skipna=True, mask=None):


@disallow("M8", "m8")
def nanprod(values, axis=None, skipna=True, min_count=0, mask=None):
def nanprod(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
min_count: int = 0,
mask: Optional[np.ndarray] = None,
):
"""
Parameters
----------
Expand All @@ -1088,18 +1150,14 @@ def nanprod(values, axis=None, skipna=True, min_count=0, mask=None):

Returns
-------
result : dtype
The product of all elements on a given axis. ( NaNs are treated as 1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be type and then description. same as _maybe_null_out?


Examples
--------
>>> import pandas.core.nanops as nanops
>>> s = pd.Series([1, 2, 3, np.nan])
>>> nanops.nanprod(s)
6.0

Returns
-------
The product of all elements on a given axis. ( NaNs are treated as 1)
"""
mask = _maybe_get_mask(values, skipna, mask)

Expand Down Expand Up @@ -1138,7 +1196,7 @@ def _get_counts(
values_shape: Tuple[int],
mask: Optional[np.ndarray],
axis: Optional[int],
dtype=float,
dtype: Dtype = float,
) -> Union[int, np.ndarray]:
""" Get the count of non-null values along an axis

Expand Down Expand Up @@ -1218,7 +1276,12 @@ def _zero_out_fperr(arg):


@disallow("M8", "m8")
def nancorr(a, b, method="pearson", min_periods=None):
def nancorr(
a: np.ndarray,
b: np.ndarray,
method: str = "pearson",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this needs to be Union[str, Callable[...]]

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mypy raises errors

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then something else is wrong

min_periods: Optional[int] = None,
):
"""
a, b: ndarrays
"""
Expand All @@ -1240,8 +1303,8 @@ def nancorr(a, b, method="pearson", min_periods=None):
return f(a, b)


def get_corr_func(method):
if method in ["kendall", "spearman"]:
def get_corr_func(method) -> Callable:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

method: Union[str, Callable]

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In 7442310 I removed all annotations of str and Callable that were added in this PR, it just gave me too much trouble.

if method in ["kendall", "spearman", "pearson"]:
from scipy.stats import kendalltau, spearmanr
elif callable(method):
return method
Expand All @@ -1262,7 +1325,7 @@ def _spearman(a, b):


@disallow("M8", "m8")
def nancov(a, b, min_periods=None):
def nancov(a: np.ndarray, b: np.ndarray, min_periods: Optional[int] = None):
if len(a) != len(b):
raise AssertionError("Operands to nancov must have same size")

Expand Down Expand Up @@ -1308,7 +1371,7 @@ def _ensure_numeric(x):
# NA-friendly array comparisons


def make_nancomp(op):
def make_nancomp(op) -> Callable:
def f(x, y):
xmask = isna(x)
ymask = isna(y)
Expand All @@ -1335,7 +1398,9 @@ def f(x, y):
nanne = make_nancomp(operator.ne)


def _nanpercentile_1d(values, mask, q, na_value, interpolation):
def _nanpercentile_1d(
values: np.ndarray, mask: np.ndarray, q, na_value: Scalar, interpolation: str
) -> Union[Scalar, np.ndarray]:
"""
Wrapper for np.percentile that skips missing values, specialized to
1-dimensional case.
Expand Down Expand Up @@ -1366,7 +1431,15 @@ def _nanpercentile_1d(values, mask, q, na_value, interpolation):
return np.percentile(values, q, interpolation=interpolation)


def nanpercentile(values, q, axis, na_value, mask, ndim, interpolation):
def nanpercentile(
values: np.ndarray,
q,
axis: int,
na_value,
mask: np.ndarray,
ndim: int,
interpolation: str,
):
"""
Wrapper for np.percentile that skips missing values.

Expand Down