From f38ffe1b7f36d356f8313b50d60d819acec2a3f2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 14 Dec 2018 20:03:53 -0800 Subject: [PATCH 01/14] standardize signature for Index reductions, implement nanmean for datetime64 dtypes --- pandas/core/base.py | 36 ++++++++++++++++++++++++--- pandas/core/dtypes/missing.py | 7 ++++-- pandas/core/indexes/datetimelike.py | 8 +++--- pandas/core/indexes/range.py | 6 +++-- pandas/core/nanops.py | 33 +++++++++++++++++------- pandas/tests/series/test_analytics.py | 2 +- pandas/tests/test_nanops.py | 18 ++++++++++++++ 7 files changed, 88 insertions(+), 22 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 928e90977f95b..0b7fb29d792af 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -920,10 +920,16 @@ def _ndarray_values(self): def empty(self): return not self.size - def max(self): + def max(self, axis=None, skipna=True): """ Return the maximum value of the Index. + Parameters + ---------- + axis : {None} + Dummy argument for consistency with Series + skipna : bool, default True + Returns ------- scalar @@ -951,22 +957,36 @@ def max(self): >>> idx.max() ('b', 2) """ + nv.validate_minmax_axis(axis) return nanops.nanmax(self.values) - def argmax(self, axis=None): + def argmax(self, axis=None, skipna=True): """ Return a ndarray of the maximum argument indexer. + Parameters + ---------- + axis : {None} + Dummy argument for consistency with Series + skipna : bool, default True + See Also -------- numpy.ndarray.argmax """ + nv.validate_minmax_axis(axis) return nanops.nanargmax(self.values) - def min(self): + def min(self, axis=None, skipna=True): """ Return the minimum value of the Index. + Parameters + ---------- + axis : {None} + Dummy argument for consistency with Series + skipna : bool, default True + Returns ------- scalar @@ -994,16 +1014,24 @@ def min(self): >>> idx.min() ('a', 1) """ + nv.validate_minmax_axis(axis) return nanops.nanmin(self.values) - def argmin(self, axis=None): + def argmin(self, axis=None, skipna=True): """ Return a ndarray of the minimum argument indexer. + Parameters + ---------- + axis : {None} + Dummy argument for consistency with Series + skipna : bool, default True + See Also -------- numpy.ndarray.argmin """ + nv.validate_minmax_axis(axis) return nanops.nanargmin(self.values) def tolist(self): diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 809dcbd054ea0..0054a7c65b91a 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -14,7 +14,8 @@ is_period_dtype, is_scalar, is_string_dtype, is_string_like_dtype, is_timedelta64_dtype, needs_i8_conversion, pandas_dtype) from .generic import ( - ABCExtensionArray, ABCGeneric, ABCIndexClass, ABCMultiIndex, ABCSeries) + ABCDatetimeArray, ABCExtensionArray, ABCGeneric, ABCIndexClass, + ABCMultiIndex, ABCSeries) from .inference import is_list_like isposinf_scalar = libmissing.isposinf_scalar @@ -108,7 +109,7 @@ def _isna_new(obj): elif isinstance(obj, ABCMultiIndex): raise NotImplementedError("isna is not defined for MultiIndex") elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass, - ABCExtensionArray)): + ABCExtensionArray, ABCDatetimeArray)): return _isna_ndarraylike(obj) elif isinstance(obj, ABCGeneric): return obj._constructor(obj._data.isna(func=isna)) @@ -196,6 +197,8 @@ def _isna_ndarraylike(obj): else: values = obj result = values.isna() + elif isinstance(obj, ABCDatetimeArray): + return obj.isna() elif is_string_dtype(dtype): # Working around NumPy ticket 1542 shape = values.shape diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index dd2537c11a94c..a9b52b6c7f32b 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -258,7 +258,7 @@ def tolist(self): """ return list(self.astype(object)) - def min(self, axis=None, *args, **kwargs): + def min(self, axis=None, skipna=True, *args, **kwargs): """ Return the minimum value of the Index or minimum along an axis. @@ -286,7 +286,7 @@ def min(self, axis=None, *args, **kwargs): except ValueError: return self._na_value - def argmin(self, axis=None, *args, **kwargs): + def argmin(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the minimum values along an axis. @@ -309,7 +309,7 @@ def argmin(self, axis=None, *args, **kwargs): i8[mask] = np.iinfo('int64').max return i8.argmin() - def max(self, axis=None, *args, **kwargs): + def max(self, axis=None, skipna=True, *args, **kwargs): """ Return the maximum value of the Index or maximum along an axis. @@ -337,7 +337,7 @@ def max(self, axis=None, *args, **kwargs): except ValueError: return self._na_value - def argmax(self, axis=None, *args, **kwargs): + def argmax(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the maximum values along an axis. diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 0da924de244ed..110c9f4025bd8 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -297,12 +297,14 @@ def _minmax(self, meth): return self._start + self._step * no_steps - def min(self): + def min(self, axis=None, skipna=True): """The minimum value of the RangeIndex""" + nv.validate_minmax_axis(axis) return self._minmax('min') - def max(self): + def max(self, axis=None, skipna=True): """The maximum value of the RangeIndex""" + nv.validate_minmax_axis(axis) return self._minmax('max') def argsort(self, *args, **kwargs): diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 027f458614bd8..58b8d2981384a 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -6,14 +6,14 @@ import numpy as np -from pandas._libs import lib, tslibs +from pandas._libs import iNaT, lib, tslibs import pandas.compat as compat from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask from pandas.core.dtypes.common import ( _get_dtype, is_any_int_dtype, is_bool_dtype, is_complex, is_complex_dtype, - is_datetime64_dtype, is_datetime_or_timedelta_dtype, is_float, - is_float_dtype, is_integer, is_integer_dtype, is_numeric_dtype, + is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, + is_float, is_float_dtype, is_integer, is_integer_dtype, is_numeric_dtype, is_object_dtype, is_scalar, is_timedelta64_dtype) from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna @@ -203,6 +203,7 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, if necessary copy and mask using the specified fill_value copy = True will force the copy """ + orig_values = values values = com.values_from_object(values) if mask is None: @@ -212,6 +213,10 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, mask = isna(values) dtype = values.dtype + if is_datetime64tz_dtype(orig_values): + dtype = orig_values.dtype + + values = getattr(values, 'asi8', values) dtype_ok = _na_ok_dtype(dtype) # get our fill value (in case we need to provide an alternative @@ -261,19 +266,25 @@ def _na_ok_dtype(dtype): def _view_if_needed(values): if is_datetime_or_timedelta_dtype(values): - return values.view(np.int64) + try: + # TODO: once DatetimeArray has `view`, get rid of this + return values.asi8 + except AttributeError: + return values.view(np.int64) return values def _wrap_results(result, dtype, fill_value=None): """ wrap our results if needed """ - if is_datetime64_dtype(dtype): + # TODO: datetime64tz_dtype + if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): if not isinstance(result, np.ndarray): + tz = getattr(dtype, 'tz', None) assert not isna(fill_value), "Expected non-null fill_value" if result == fill_value: result = np.nan - result = tslibs.Timestamp(result) + result = tslibs.Timestamp(result, tz=tz) else: result = result.view(dtype) elif is_timedelta64_dtype(dtype): @@ -426,7 +437,6 @@ def nansum(values, axis=None, skipna=True, min_count=0, mask=None): return _wrap_results(the_sum, dtype) -@disallow('M8') @bottleneck_switch() def nanmean(values, axis=None, skipna=True, mask=None): """ @@ -457,7 +467,8 @@ def nanmean(values, axis=None, skipna=True, mask=None): values, skipna, 0, mask=mask) dtype_sum = dtype_max dtype_count = np.float64 - if is_integer_dtype(dtype) or is_timedelta64_dtype(dtype): + if (is_integer_dtype(dtype) or is_timedelta64_dtype(dtype) or + is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype)): dtype_sum = np.float64 elif is_float_dtype(dtype): dtype_sum = dtype @@ -473,7 +484,11 @@ def nanmean(values, axis=None, skipna=True, mask=None): else: the_mean = the_sum / count if count > 0 else np.nan - return _wrap_results(the_mean, dtype) + fill_value = None + if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): + fill_value = iNaT + + return _wrap_results(the_mean, dtype, fill_value=fill_value) @disallow('M8') diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 81d60aba44b0f..693100a705399 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -517,7 +517,7 @@ def _check_stat_op(self, name, alternate, string_series_, string_series_[5:15] = np.NaN # idxmax, idxmin, min, and max are valid for dates - if name not in ['max', 'min']: + if name not in ['max', 'min', 'mean']: ds = Series(date_range('1/1/2001', periods=10)) pytest.raises(TypeError, f, ds) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index e214d4c1985a9..1e08914811402 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -14,6 +14,7 @@ from pandas import Series, isna from pandas.compat.numpy import _np_version_under1p13 from pandas.core.dtypes.common import is_integer_dtype +from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray use_bn = nanops._USE_BOTTLENECK @@ -998,6 +999,23 @@ def prng(self): return np.random.RandomState(1234) +class TestDatetime64NaNOps(object): + @pytest.mark.parametrize('tz', [None, 'UTC']) + def test_nanmean(self, tz): + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + expected = dti[1] + + for obj in [dti, DatetimeArray(dti), Series(dti)]: + result = nanops.nanmean(obj) + assert result == expected + + dti2 = dti.insert(1, pd.NaT) + + for obj in [dti2, DatetimeArray(dti2), Series(dti2)]: + result = nanops.nanmean(obj) + assert result == expected + + def test_use_bottleneck(): if nanops._BOTTLENECK_INSTALLED: From 04cf1f76211634014c74dbff7057c96fb91bad13 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 15 Dec 2018 10:01:57 -0800 Subject: [PATCH 02/14] suppress warnings --- pandas/core/nanops.py | 9 +++++---- pandas/tests/series/test_analytics.py | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 58b8d2981384a..d2dc285adeed5 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -267,17 +267,16 @@ def _na_ok_dtype(dtype): def _view_if_needed(values): if is_datetime_or_timedelta_dtype(values): try: + return values.view(np.int64) + except AttributeError: # TODO: once DatetimeArray has `view`, get rid of this return values.asi8 - except AttributeError: - return values.view(np.int64) return values def _wrap_results(result, dtype, fill_value=None): """ wrap our results if needed """ - # TODO: datetime64tz_dtype if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): if not isinstance(result, np.ndarray): tz = getattr(dtype, 'tz', None) @@ -477,7 +476,9 @@ def nanmean(values, axis=None, skipna=True, mask=None): the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum)) if axis is not None and getattr(the_sum, 'ndim', False): - the_mean = the_sum / count + with np.errstate(all="ignore"): + # suppress division by zero warnings + the_mean = the_sum / count ct_mask = count == 0 if ct_mask.any(): the_mean[ct_mask] = np.nan diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 693100a705399..a3c61e330c37c 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -519,7 +519,8 @@ def _check_stat_op(self, name, alternate, string_series_, # idxmax, idxmin, min, and max are valid for dates if name not in ['max', 'min', 'mean']: ds = Series(date_range('1/1/2001', periods=10)) - pytest.raises(TypeError, f, ds) + with pytest.raises(TypeError): + f(ds) # skipna or no assert notna(f(string_series_)) From 3efab79058c64f8fe9546484cff19ca35821608a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 15 Dec 2018 11:56:01 -0800 Subject: [PATCH 03/14] requested edits --- pandas/core/nanops.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index d2dc285adeed5..0a6d5853ecdaa 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -278,6 +278,9 @@ def _wrap_results(result, dtype, fill_value=None): """ wrap our results if needed """ if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): + if fill_value is None: + # GH#24293 + fill_value = iNaT if not isinstance(result, np.ndarray): tz = getattr(dtype, 'tz', None) assert not isna(fill_value), "Expected non-null fill_value" @@ -485,11 +488,7 @@ def nanmean(values, axis=None, skipna=True, mask=None): else: the_mean = the_sum / count if count > 0 else np.nan - fill_value = None - if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): - fill_value = iNaT - - return _wrap_results(the_mean, dtype, fill_value=fill_value) + return _wrap_results(the_mean, dtype) @disallow('M8') From a65243946a01e63baaafc7805873bf1a8b627907 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 15 Dec 2018 14:47:37 -0800 Subject: [PATCH 04/14] implement view --- pandas/core/arrays/datetimelike.py | 20 ++++++++++++++++++++ pandas/core/indexes/datetimelike.py | 1 + pandas/core/nanops.py | 6 +----- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index fa6941476522d..ffa37738e0a06 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -483,6 +483,26 @@ def _maybe_mask_results(self, result, fill_value=iNaT, convert=None): result[self._isnan] = fill_value return result + # ------------------------------------------------------------------ + # Additional array methods + # These are not part of the EA API, but we implement them because + # pandas currently assumes they're there. + + def view(self, dtype=None): + """ + New view on this array with the same data. + + Parameters + ---------- + dtype : numpy dtype, optional + + Returns + ------- + ndarray + With the specified `dtype`. + """ + return self._data.view(dtype=dtype) + # ------------------------------------------------------------------ # Frequency Properties/Methods diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index a9b52b6c7f32b..bd5d261d53f26 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -41,6 +41,7 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): copy = Index.copy unique = Index.unique take = Index.take + view = Index.view # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are # properties there. They can be made into cache_readonly for Index diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 0a6d5853ecdaa..9e1d3d7b9fd62 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -266,11 +266,7 @@ def _na_ok_dtype(dtype): def _view_if_needed(values): if is_datetime_or_timedelta_dtype(values): - try: - return values.view(np.int64) - except AttributeError: - # TODO: once DatetimeArray has `view`, get rid of this - return values.asi8 + return values.view(np.int64) return values From 2380af6e0df408ab5a205e1f0525c9423fcd940b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 19 Dec 2018 15:59:47 -0800 Subject: [PATCH 05/14] pass skipna, tests --- pandas/core/base.py | 10 ++--- pandas/core/indexes/datetimelike.py | 26 ++++++++---- pandas/core/series.py | 6 ++- pandas/tests/indexes/test_range.py | 8 ++++ pandas/tests/series/test_analytics.py | 4 +- pandas/tests/series/test_datetime_values.py | 4 ++ pandas/tests/test_base.py | 44 ++++++++++++++++++++- 7 files changed, 86 insertions(+), 16 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 4015b10b1bb91..8d9f20fe76b86 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -995,7 +995,7 @@ def max(self, axis=None, skipna=True): ('b', 2) """ nv.validate_minmax_axis(axis) - return nanops.nanmax(self.values) + return nanops.nanmax(self._values, skipna=skipna) def argmax(self, axis=None, skipna=True): """ @@ -1012,7 +1012,7 @@ def argmax(self, axis=None, skipna=True): numpy.ndarray.argmax """ nv.validate_minmax_axis(axis) - return nanops.nanargmax(self.values) + return nanops.nanargmax(self._values, skipna=skipna) def min(self, axis=None, skipna=True): """ @@ -1052,7 +1052,7 @@ def min(self, axis=None, skipna=True): ('a', 1) """ nv.validate_minmax_axis(axis) - return nanops.nanmin(self.values) + return nanops.nanmin(self._values, skipna=skipna) def argmin(self, axis=None, skipna=True): """ @@ -1069,7 +1069,7 @@ def argmin(self, axis=None, skipna=True): numpy.ndarray.argmin """ nv.validate_minmax_axis(axis) - return nanops.nanargmin(self.values) + return nanops.nanargmin(self._values, skipna=skipna) def tolist(self): """ @@ -1116,7 +1116,7 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, if func is None: raise TypeError("{klass} cannot perform the operation {op}".format( klass=self.__class__.__name__, op=name)) - return func(**kwds) + return func(skipna=skipna, **kwds) def _map_values(self, mapper, na_action=None): """ diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index bd5d261d53f26..eb64072c9873e 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -271,16 +271,21 @@ def min(self, axis=None, skipna=True, *args, **kwargs): nv.validate_min(args, kwargs) nv.validate_minmax_axis(axis) + i8 = self.asi8 try: - i8 = self.asi8 - # quick check if len(i8) and self.is_monotonic: if i8[0] != iNaT: return self._box_func(i8[0]) + if not len(self): + return self._na_value + if self.hasnans: - min_stamp = self[~self._isnan].asi8.min() + if skipna: + min_stamp = self[~self._isnan].asi8.min() + else: + return self._na_value else: min_stamp = i8.min() return self._box_func(min_stamp) @@ -304,7 +309,7 @@ def argmin(self, axis=None, skipna=True, *args, **kwargs): i8 = self.asi8 if self.hasnans: mask = self._isnan - if mask.all(): + if mask.all() or not skipna: return -1 i8 = i8.copy() i8[mask] = np.iinfo('int64').max @@ -322,16 +327,21 @@ def max(self, axis=None, skipna=True, *args, **kwargs): nv.validate_max(args, kwargs) nv.validate_minmax_axis(axis) + i8 = self.asi8 try: - i8 = self.asi8 - # quick check if len(i8) and self.is_monotonic: if i8[-1] != iNaT: return self._box_func(i8[-1]) + if not len(self): + return self._na_value + if self.hasnans: - max_stamp = self[~self._isnan].asi8.max() + if skipna: + max_stamp = self[~self._isnan].asi8.max() + else: + return self._na_value else: max_stamp = i8.max() return self._box_func(max_stamp) @@ -355,7 +365,7 @@ def argmax(self, axis=None, skipna=True, *args, **kwargs): i8 = self.asi8 if self.hasnans: mask = self._isnan - if mask.all(): + if mask.all() or not skipna: return -1 i8 = i8.copy() i8[mask] = 0 diff --git a/pandas/core/series.py b/pandas/core/series.py index 773f2d17cf0fc..a737d98ec3b6f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -17,7 +17,8 @@ from pandas.core.dtypes.common import ( _is_unorderable_exception, ensure_platform_int, is_bool, - is_categorical_dtype, is_datetime64tz_dtype, is_datetimelike, is_dict_like, + is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, + is_datetimelike, is_dict_like, is_extension_array_dtype, is_extension_type, is_hashable, is_integer, is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype) from pandas.core.dtypes.generic import ( @@ -3491,6 +3492,9 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, # dispatch to ExtensionArray interface if isinstance(delegate, ExtensionArray): return delegate._reduce(name, skipna=skipna, **kwds) + elif is_datetime64_dtype(delegate): + # use DatetimeIndex implementation to handle skipna correctly + delegate = DatetimeIndex(delegate) # dispatch to numpy arrays elif isinstance(delegate, np.ndarray): diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 90aa7602c2b62..0167a2334457a 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -1002,10 +1002,18 @@ def test_max_min(self, start, stop, step): result = idx.max() assert result == expected + # skipna should be irrelevant since RangeIndex should never have NAs + result2 = idx.max(skipna=False) + assert result2 == expected + expected = idx._int64index.min() result = idx.min() assert result == expected + # skipna should be irrelevant since RangeIndex should never have NAs + result2 = idx.min(skipna=False) + assert result2 == expected + # empty idx = RangeIndex(start, stop, -step) assert isna(idx.max()) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index a3c61e330c37c..d2034cfda451f 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1333,11 +1333,13 @@ def test_ptp(self): check_stacklevel=False): s.ptp(numeric_only=True) - def test_empty_timeseries_redections_return_nat(self): + def test_empty_timeseries_reductions_return_nat(self): # covers #11245 for dtype in ('m8[ns]', 'm8[ns]', 'M8[ns]', 'M8[ns, UTC]'): assert Series([], dtype=dtype).min() is pd.NaT assert Series([], dtype=dtype).max() is pd.NaT + assert Series([], dtype=dtype).min(skipna=False) is pd.NaT + assert Series([], dtype=dtype).max(skipna=False) is pd.NaT def test_repeat(self): s = Series(np.random.randn(3), index=['a', 'b', 'c']) diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 0d617d5a26706..6d3d6b504bffe 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -539,6 +539,8 @@ def test_minmax_nat_series(self, nat): # GH 23282 assert nat.min() is pd.NaT assert nat.max() is pd.NaT + assert nat.min(skipna=False) is pd.NaT + assert nat.max(skipna=False) is pd.NaT @pytest.mark.parametrize('nat', [ # GH 23282 @@ -548,6 +550,8 @@ def test_minmax_nat_series(self, nat): def test_minmax_nat_dataframe(self, nat): assert nat.min()[0] is pd.NaT assert nat.max()[0] is pd.NaT + assert nat.min(skipna=False)[0] is pd.NaT + assert nat.max(skipna=False)[0] is pd.NaT def test_setitem_with_string_index(self): # GH 23451 diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 6eada0e89b506..edf9491f722b2 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -360,23 +360,57 @@ def test_nanops(self): # GH 7261 for op in ['max', 'min']: for klass in [Index, Series]: + arg_op = 'arg' + op if klass is Index else 'idx' + op obj = klass([np.nan, 2.0]) assert getattr(obj, op)() == 2.0 + result = getattr(obj, op)(skipna=False) + assert np.isnan(result) + obj = klass([np.nan]) assert pd.isna(getattr(obj, op)()) + assert pd.isna(getattr(obj, op)(skipna=False)) obj = klass([]) assert pd.isna(getattr(obj, op)()) + assert pd.isna(getattr(obj, op)(skipna=False)) obj = klass([pd.NaT, datetime(2011, 11, 1)]) # check DatetimeIndex monotonic path assert getattr(obj, op)() == datetime(2011, 11, 1) + assert getattr(obj, op)(skipna=False) is pd.NaT + + assert getattr(obj, arg_op)() == 1 + result = getattr(obj, arg_op)(skipna=False) + if klass is Series: + assert np.isnan(result) + else: + assert result == -1 obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT]) # check DatetimeIndex non-monotonic path - assert getattr(obj, op)(), datetime(2011, 11, 1) + assert getattr(obj, op)() == datetime(2011, 11, 1) + assert getattr(obj, op)(skipna=False) is pd.NaT + + assert getattr(obj, arg_op)() == 1 + result = getattr(obj, arg_op)(skipna=False) + if klass is Series: + assert np.isnan(result) + else: + assert result == -1 + + for dtype in ["M8[ns]", "datetime64[ns, UTC]"]: + # cases with empty Series/DatetimeIndex + obj = klass([], dtype=dtype) + + assert getattr(obj, op)() is pd.NaT + assert getattr(obj, op)(skipna=False) is pd.NaT + + with pytest.raises(ValueError, match="empty sequence"): + getattr(obj, arg_op)() + with pytest.raises(ValueError, match="empty sequence"): + getattr(obj, arg_op)(skipna=False) # argmin/max obj = Index(np.arange(5, dtype='int64')) @@ -386,19 +420,27 @@ def test_nanops(self): obj = Index([np.nan, 1, np.nan, 2]) assert obj.argmin() == 1 assert obj.argmax() == 3 + assert obj.argmin(skipna=False) == -1 + assert obj.argmax(skipna=False) == -1 obj = Index([np.nan]) assert obj.argmin() == -1 assert obj.argmax() == -1 + assert obj.argmin(skipna=False) == -1 + assert obj.argmax(skipna=False) == -1 obj = Index([pd.NaT, datetime(2011, 11, 1), datetime(2011, 11, 2), pd.NaT]) assert obj.argmin() == 1 assert obj.argmax() == 2 + assert obj.argmin(skipna=False) == -1 + assert obj.argmax(skipna=False) == -1 obj = Index([pd.NaT]) assert obj.argmin() == -1 assert obj.argmax() == -1 + assert obj.argmin(skipna=False) == -1 + assert obj.argmax(skipna=False) == -1 def test_value_counts_unique_nunique(self): for orig in self.objs: From 4157f0b1709dd5c043e62524f4ffb48e9d92db77 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 19 Dec 2018 18:07:14 -0800 Subject: [PATCH 06/14] fixup isort --- pandas/core/series.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index a737d98ec3b6f..7efe3488ca114 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -18,9 +18,9 @@ from pandas.core.dtypes.common import ( _is_unorderable_exception, ensure_platform_int, is_bool, is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, - is_datetimelike, is_dict_like, - is_extension_array_dtype, is_extension_type, is_hashable, is_integer, - is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype) + is_datetimelike, is_dict_like, is_extension_array_dtype, is_extension_type, + is_hashable, is_integer, is_iterator, is_list_like, is_scalar, + is_string_like, is_timedelta64_dtype) from pandas.core.dtypes.generic import ( ABCDataFrame, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries) from pandas.core.dtypes.missing import ( From 0baedf3b231254d6376621da1b510e40726eb4ec Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 23 Dec 2018 14:03:34 -0800 Subject: [PATCH 07/14] fixup rebase screwups --- pandas/tests/reductions/test_reductions.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index fa0296bc629d1..0633ccd6d44a2 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -56,23 +56,23 @@ def test_nanops(self): # GH#7261 for opname in ['max', 'min']: for klass in [Index, Series]: - arg_op = 'arg' + op if klass is Index else 'idx' + op + arg_op = 'arg' + opname if klass is Index else 'idx' + opname obj = klass([np.nan, 2.0]) assert getattr(obj, opname)() == 2.0 obj = klass([np.nan]) assert pd.isna(getattr(obj, opname)()) - assert pd.isna(getattr(obj, op)(skipna=False)) + assert pd.isna(getattr(obj, opname)(skipna=False)) obj = klass([]) assert pd.isna(getattr(obj, opname)()) - assert pd.isna(getattr(obj, op)(skipna=False)) + assert pd.isna(getattr(obj, opname)(skipna=False)) obj = klass([pd.NaT, datetime(2011, 11, 1)]) # check DatetimeIndex monotonic path assert getattr(obj, opname)() == datetime(2011, 11, 1) - assert getattr(obj, op)(skipna=False) is pd.NaT + assert getattr(obj, opname)(skipna=False) is pd.NaT assert getattr(obj, arg_op)() == 1 result = getattr(obj, arg_op)(skipna=False) @@ -84,7 +84,7 @@ def test_nanops(self): obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT]) # check DatetimeIndex non-monotonic path assert getattr(obj, opname)(), datetime(2011, 11, 1) - assert getattr(obj, op)(skipna=False) is pd.NaT + assert getattr(obj, opname)(skipna=False) is pd.NaT assert getattr(obj, arg_op)() == 1 result = getattr(obj, arg_op)(skipna=False) @@ -97,8 +97,8 @@ def test_nanops(self): # cases with empty Series/DatetimeIndex obj = klass([], dtype=dtype) - assert getattr(obj, op)() is pd.NaT - assert getattr(obj, op)(skipna=False) is pd.NaT + assert getattr(obj, opname)() is pd.NaT + assert getattr(obj, opname)(skipna=False) is pd.NaT with pytest.raises(ValueError, match="empty sequence"): getattr(obj, arg_op)() @@ -610,8 +610,8 @@ def test_minmax_nat_dataframe(self, nat_df): # GH#23282 assert nat_df.min()[0] is pd.NaT assert nat_df.max()[0] is pd.NaT - assert nat.min(skipna=False)[0] is pd.NaT - assert nat.max(skipna=False)[0] is pd.NaT + assert nat_df.min(skipna=False)[0] is pd.NaT + assert nat_df.max(skipna=False)[0] is pd.NaT def test_min_max(self): rng = pd.date_range('1/1/2000', '12/31/2000') From 6e0e69fa54222556c59fc134b8a3efb264df0499 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 23 Dec 2018 14:33:59 -0800 Subject: [PATCH 08/14] move len-self checks --- pandas/core/indexes/datetimelike.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 4881cad4ea07d..66e9550bc2455 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -257,6 +257,9 @@ def min(self, axis=None, skipna=True, *args, **kwargs): nv.validate_min(args, kwargs) nv.validate_minmax_axis(axis) + if not len(self): + return self._na_value + i8 = self.asi8 try: # quick check @@ -264,9 +267,6 @@ def min(self, axis=None, skipna=True, *args, **kwargs): if i8[0] != iNaT: return self._box_func(i8[0]) - if not len(self): - return self._na_value - if self.hasnans: if skipna: min_stamp = self[~self._isnan].asi8.min() @@ -313,6 +313,9 @@ def max(self, axis=None, skipna=True, *args, **kwargs): nv.validate_max(args, kwargs) nv.validate_minmax_axis(axis) + if not len(self): + return self._na_value + i8 = self.asi8 try: # quick check @@ -320,9 +323,6 @@ def max(self, axis=None, skipna=True, *args, **kwargs): if i8[-1] != iNaT: return self._box_func(i8[-1]) - if not len(self): - return self._na_value - if self.hasnans: if skipna: max_stamp = self[~self._isnan].asi8.max() From e2c301be95d0b2b04e56ca657f594aa942016e16 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 23 Dec 2018 15:16:15 -0800 Subject: [PATCH 09/14] fixup more rebase screwups --- pandas/tests/reductions/test_reductions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 0633ccd6d44a2..d27308029fa19 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -599,8 +599,8 @@ def test_minmax_nat_series(self, nat_ser): # GH#23282 assert nat_ser.min() is pd.NaT assert nat_ser.max() is pd.NaT - assert nat.min(skipna=False) is pd.NaT - assert nat.max(skipna=False) is pd.NaT + assert nat_ser.min(skipna=False) is pd.NaT + assert nat_ser.max(skipna=False) is pd.NaT @pytest.mark.parametrize('nat_df', [ pd.DataFrame([pd.NaT, pd.NaT]), From 4b4979f5682bc556e360d8514a68fb8c13171fd1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 24 Dec 2018 15:21:46 -0800 Subject: [PATCH 10/14] do values viewing in one place --- pandas/core/nanops.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 9e1d3d7b9fd62..3d13ad5f5d1bd 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -203,8 +203,17 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, if necessary copy and mask using the specified fill_value copy = True will force the copy """ - orig_values = values - values = com.values_from_object(values) + + if is_datetime64tz_dtype(values): + # com.values_from_object returns M8[ns] dtype instead of tz-aware, + # so this case must be handled separately from the rest + dtype = values.dtype + values = np.array(values.asi8) + else: + values = com.values_from_object(values) + dtype = values.dtype + + values = _view_if_needed(values) if mask is None: if isfinite: @@ -212,11 +221,6 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, else: mask = isna(values) - dtype = values.dtype - if is_datetime64tz_dtype(orig_values): - dtype = orig_values.dtype - - values = getattr(values, 'asi8', values) dtype_ok = _na_ok_dtype(dtype) # get our fill value (in case we need to provide an alternative @@ -237,8 +241,6 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, elif copy: values = values.copy() - values = _view_if_needed(values) - # return a platform independent precision dtype dtype_max = dtype if is_integer_dtype(dtype) or is_bool_dtype(dtype): @@ -265,6 +267,7 @@ def _na_ok_dtype(dtype): def _view_if_needed(values): + values = getattr(values, 'asi8', values) if is_datetime_or_timedelta_dtype(values): return values.view(np.int64) return values From 82b8cdf4dbaa30da6de7c6b9e86d083bb52b5698 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 27 Dec 2018 13:37:30 -0800 Subject: [PATCH 11/14] unpack Series --- pandas/core/nanops.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 3d13ad5f5d1bd..5d94c34105b44 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -208,6 +208,7 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, # com.values_from_object returns M8[ns] dtype instead of tz-aware, # so this case must be handled separately from the rest dtype = values.dtype + values = getattr(values, "_values", values) values = np.array(values.asi8) else: values = com.values_from_object(values) From 07c31026115bc3bbc4dd9a175bc6a9c21af41ce9 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 27 Dec 2018 18:06:48 -0800 Subject: [PATCH 12/14] implement _extract_datetimelike_values_and_dtype --- pandas/core/nanops.py | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 5d94c34105b44..fcbc6c8177ed6 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -204,17 +204,7 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, copy = True will force the copy """ - if is_datetime64tz_dtype(values): - # com.values_from_object returns M8[ns] dtype instead of tz-aware, - # so this case must be handled separately from the rest - dtype = values.dtype - values = getattr(values, "_values", values) - values = np.array(values.asi8) - else: - values = com.values_from_object(values) - dtype = values.dtype - - values = _view_if_needed(values) + values, dtype = _extract_datetimelike_values_and_dtype(values) if mask is None: if isfinite: @@ -267,11 +257,35 @@ def _na_ok_dtype(dtype): (np.integer, np.timedelta64, np.datetime64)) -def _view_if_needed(values): +def _extract_datetimelike_values_and_dtype(values): + """ + Find the appropriate values and dtype to use, with special handling + for datetime64tz-dtype. + + Parameters + ---------- + values : ndarray, ExtensionArray, Index, Series + + Returns + ------- + values : ndarray + dtype : numpy.dtype + """ + if is_datetime64tz_dtype(values): + # com.values_from_object returns M8[ns] dtype instead of tz-aware, + # so this case must be handled separately from the rest + dtype = values.dtype + values = getattr(values, "_values", values) + values = np.array(values.asi8) + else: + values = com.values_from_object(values) + dtype = values.dtype + values = getattr(values, 'asi8', values) if is_datetime_or_timedelta_dtype(values): - return values.view(np.int64) - return values + values = values.view(np.int64) + + return values, dtype def _wrap_results(result, dtype, fill_value=None): From 6c93410aac1efe2a5d4fb1e4ce1dc8536206e3be Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 27 Dec 2018 19:16:24 -0800 Subject: [PATCH 13/14] fix mask --- pandas/core/nanops.py | 46 +++++++++++++------------------------------ 1 file changed, 14 insertions(+), 32 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index fcbc6c8177ed6..f95c133163ddb 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -204,7 +204,14 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, copy = True will force the copy """ - values, dtype = _extract_datetimelike_values_and_dtype(values) + if is_datetime64tz_dtype(values): + # com.values_from_object returns M8[ns] dtype instead of tz-aware, + # so this case must be handled separately from the rest + dtype = values.dtype + values = getattr(values, "_values", values) + else: + values = com.values_from_object(values) + dtype = values.dtype if mask is None: if isfinite: @@ -212,6 +219,12 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, else: mask = isna(values) + if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values): + # changing timedelta64/datetime64 to int64 needs to happen after + # finding `mask` above + values = getattr(values, "asi8", values) + values = values.view(np.int64) + dtype_ok = _na_ok_dtype(dtype) # get our fill value (in case we need to provide an alternative @@ -257,37 +270,6 @@ def _na_ok_dtype(dtype): (np.integer, np.timedelta64, np.datetime64)) -def _extract_datetimelike_values_and_dtype(values): - """ - Find the appropriate values and dtype to use, with special handling - for datetime64tz-dtype. - - Parameters - ---------- - values : ndarray, ExtensionArray, Index, Series - - Returns - ------- - values : ndarray - dtype : numpy.dtype - """ - if is_datetime64tz_dtype(values): - # com.values_from_object returns M8[ns] dtype instead of tz-aware, - # so this case must be handled separately from the rest - dtype = values.dtype - values = getattr(values, "_values", values) - values = np.array(values.asi8) - else: - values = com.values_from_object(values) - dtype = values.dtype - - values = getattr(values, 'asi8', values) - if is_datetime_or_timedelta_dtype(values): - values = values.view(np.int64) - - return values, dtype - - def _wrap_results(result, dtype, fill_value=None): """ wrap our results if needed """ From 4777b75f04e511dda662b9ebe13e859717d5a798 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 28 Dec 2018 15:36:43 -0800 Subject: [PATCH 14/14] requested docstring edits, remoe duplicated view --- pandas/core/arrays/datetimelike.py | 20 -------------------- pandas/core/base.py | 4 ++-- pandas/core/indexes/datetimelike.py | 2 ++ 3 files changed, 4 insertions(+), 22 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 042cd7a8c8dcb..df2b5977bbe7c 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -696,26 +696,6 @@ def _maybe_mask_results(self, result, fill_value=iNaT, convert=None): result[self._isnan] = fill_value return result - # ------------------------------------------------------------------ - # Additional array methods - # These are not part of the EA API, but we implement them because - # pandas currently assumes they're there. - - def view(self, dtype=None): - """ - New view on this array with the same data. - - Parameters - ---------- - dtype : numpy dtype, optional - - Returns - ------- - ndarray - With the specified `dtype`. - """ - return self._data.view(dtype=dtype) - # ------------------------------------------------------------------ # Frequency Properties/Methods diff --git a/pandas/core/base.py b/pandas/core/base.py index f2dd5b1922bd2..8af4b59c4634b 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -979,8 +979,8 @@ def max(self, axis=None, skipna=True): Parameters ---------- - axis : {None} - Dummy argument for consistency with Series + axis : int, optional + For compatibility with NumPy. Only 0 or None are allowed. skipna : bool, default True Returns diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index bad34d56a203b..135eefada951d 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -274,6 +274,7 @@ def min(self, axis=None, skipna=True, *args, **kwargs): See Also -------- numpy.ndarray.min + Series.min : Return the minimum value in a Series. """ nv.validate_min(args, kwargs) nv.validate_minmax_axis(axis) @@ -330,6 +331,7 @@ def max(self, axis=None, skipna=True, *args, **kwargs): See Also -------- numpy.ndarray.max + Series.max : Return the maximum value in a Series. """ nv.validate_max(args, kwargs) nv.validate_minmax_axis(axis)