Skip to content

BUG: Bug in idxmin/idxmax of datetime64[ns] Series with NaT (GH2982) #2985

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 7, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ pandas 0.11.0

- Bug on in-place putmasking on an ``integer`` series that needs to be converted to ``float`` (GH2746_)
- Bug in argsort of ``datetime64[ns]`` Series with ``NaT`` (GH2967_)
- Bug in idxmin/idxmax of ``datetime64[ns]`` Series with ``NaT`` (GH2982__)

.. _GH622: https://github.com/pydata/pandas/issues/622
.. _GH797: https://github.com/pydata/pandas/issues/797
Expand All @@ -147,6 +148,7 @@ pandas 0.11.0
.. _GH2931: https://github.com/pydata/pandas/issues/2931
.. _GH2973: https://github.com/pydata/pandas/issues/2973
.. _GH2967: https://github.com/pydata/pandas/issues/2967
.. _GH2982: https://github.com/pydata/pandas/issues/2982


pandas 0.10.1
Expand Down
2 changes: 1 addition & 1 deletion doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1186,7 +1186,7 @@ A query is specified using the ``Term`` class under the hood.

Valid terms can be created from ``dict, list, tuple, or
string``. Objects can be embeded as values. Allowed operations are: ``<,
<=, >, >=, =``. ``=`` will be inferred as an implicit set operation
<=, >, >=, =, !=``. ``=`` will be inferred as an implicit set operation
(e.g. if 2 or more values are provided). The following are all valid
terms.

Expand Down
43 changes: 31 additions & 12 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,19 +682,30 @@ def _infer_dtype_from_scalar(val):


def _maybe_promote(dtype, fill_value=np.nan):

# if we passed an array here, determine the fill value by dtype
if isinstance(fill_value,np.ndarray):
if issubclass(fill_value.dtype.type, (np.datetime64,np.timedelta64)):
fill_value = tslib.iNaT
else:
fill_value = np.nan

# returns tuple of (dtype, fill_value)
if issubclass(dtype.type, np.datetime64):
if issubclass(dtype.type, (np.datetime64,np.timedelta64)):
# for now: refuse to upcast datetime64
# (this is because datetime64 will not implicitly upconvert
# to object correctly as of numpy 1.6.1)
if isnull(fill_value):
fill_value = tslib.iNaT
else:
try:
fill_value = lib.Timestamp(fill_value).value
except:
# the proper thing to do here would probably be to upcast to
# object (but numpy 1.6.1 doesn't do this properly)
if issubclass(dtype.type, np.datetime64):
try:
fill_value = lib.Timestamp(fill_value).value
except:
# the proper thing to do here would probably be to upcast to
# object (but numpy 1.6.1 doesn't do this properly)
fill_value = tslib.iNaT
else:
fill_value = tslib.iNaT
elif is_float(fill_value):
if issubclass(dtype.type, np.bool_):
Expand Down Expand Up @@ -722,24 +733,33 @@ def _maybe_promote(dtype, fill_value=np.nan):
return dtype, fill_value


def _maybe_upcast_putmask(result, mask, other):
def _maybe_upcast_putmask(result, mask, other, dtype=None):
""" a safe version of put mask that (potentially upcasts the result
return the result and a changed flag """
try:
np.putmask(result, mask, other)
except:
# our type is wrong here, need to upcast
if (-mask).any():
result, fill_value = _maybe_upcast(result, copy=True)
result, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
np.putmask(result, mask, other)
return result, True

return result, False

def _maybe_upcast(values, fill_value=np.nan, copy=False):
def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False):
""" provide explicty type promotion and coercion
if copy == True, then a copy is created even if no upcast is required """
new_dtype, fill_value = _maybe_promote(values.dtype, fill_value)

Parameters
----------
values : the ndarray that we want to maybe upcast
fill_value : what we want to fill with
dtype : if None, then use the dtype of the values, else coerce to this type
copy : if True always make a copy even if no upcast is required """

if dtype is None:
dtype = values.dtype
new_dtype, fill_value = _maybe_promote(dtype, fill_value)
if new_dtype != values.dtype:
values = values.astype(new_dtype)
elif copy:
Expand Down Expand Up @@ -915,7 +935,6 @@ def _possibly_convert_platform(values):

return values


def _possibly_cast_to_timedelta(value, coerce=True):
""" try to cast to timedelta64 w/o coercion """

Expand Down
118 changes: 63 additions & 55 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pandas.lib as lib
import pandas.algos as algos
import pandas.hashtable as _hash
import pandas.tslib as tslib

try:
import bottleneck as bn
Expand Down Expand Up @@ -69,8 +70,61 @@ def _has_infs(result):
else:
return np.isinf(result) or np.isneginf(result)

def _get_fill_value(dtype, fill_value=None, fill_value_typ=None):
""" return the correct fill value for the dtype of the values """
if fill_value is not None:
return fill_value
if _na_ok_dtype(dtype):
if fill_value_typ is None:
return np.nan
else:
if fill_value_typ == '+inf':
return np.inf
else:
return -np.inf
else:
if fill_value_typ is None:
return tslib.iNaT
else:
if fill_value_typ == '+inf':
# need the max int here
return np.iinfo(np.int64).max
else:
return tslib.iNaT

def _get_values(values, skipna, fill_value=None, fill_value_typ=None, isfinite=False, copy=True):
""" utility to get the values view, mask, dtype
if necessary copy and mask using the specified fill_value
copy = True will force the copy """
if isfinite:
mask = _isfinite(values)
else:
mask = isnull(values)

dtype = values.dtype
dtype_ok = _na_ok_dtype(dtype)

# get our fill value (in case we need to provide an alternative dtype for it)
fill_value = _get_fill_value(dtype, fill_value=fill_value, fill_value_typ=fill_value_typ)

if skipna:
if copy:
values = values.copy()
if dtype_ok:
np.putmask(values, mask, fill_value)

# promote if needed
else:
values, changed = com._maybe_upcast_putmask(values, mask, fill_value)

elif copy:
values = values.copy()

values = _view_if_needed(values)
return values, mask, dtype

def _isfinite(values):
if issubclass(values.dtype.type, np.timedelta64):
if issubclass(values.dtype.type, (np.timedelta64,np.datetime64)):
return isnull(values)
return -np.isfinite(values)

Expand Down Expand Up @@ -99,43 +153,21 @@ def _wrap_results(result,dtype):
return result

def nanany(values, axis=None, skipna=True):
mask = isnull(values)

if skipna:
values = values.copy()
np.putmask(values, mask, False)
values, mask, dtype = _get_values(values, skipna, False, copy=skipna)
return values.any(axis)


def nanall(values, axis=None, skipna=True):
mask = isnull(values)

if skipna:
values = values.copy()
np.putmask(values, mask, True)
values, mask, dtype = _get_values(values, skipna, True, copy=skipna)
return values.all(axis)


def _nansum(values, axis=None, skipna=True):
mask = isnull(values)

if skipna and not issubclass(values.dtype.type, np.integer):
values = values.copy()
np.putmask(values, mask, 0)

values, mask, dtype = _get_values(values, skipna, 0)
the_sum = values.sum(axis)
the_sum = _maybe_null_out(the_sum, axis, mask)

return the_sum


def _nanmean(values, axis=None, skipna=True):
mask = isnull(values)

if skipna and not issubclass(values.dtype.type, np.integer):
values = values.copy()
np.putmask(values, mask, 0)

values, mask, dtype = _get_values(values, skipna, 0)
the_sum = _ensure_numeric(values.sum(axis))
count = _get_counts(mask, axis)

Expand Down Expand Up @@ -186,15 +218,7 @@ def _nanvar(values, axis=None, skipna=True, ddof=1):


def _nanmin(values, axis=None, skipna=True):
mask = isnull(values)

dtype = values.dtype

if skipna and _na_ok_dtype(dtype):
values = values.copy()
np.putmask(values, mask, np.inf)

values = _view_if_needed(values)
values, mask, dtype = _get_values(values, skipna, fill_value_typ = '+inf')

# numpy 1.6.1 workaround in Python 3.x
if (values.dtype == np.object_
Expand All @@ -218,15 +242,7 @@ def _nanmin(values, axis=None, skipna=True):


def _nanmax(values, axis=None, skipna=True):
mask = isnull(values)

dtype = values.dtype

if skipna and _na_ok_dtype(dtype):
values = values.copy()
np.putmask(values, mask, -np.inf)

values = _view_if_needed(values)
values, mask, dtype = _get_values(values, skipna, fill_value_typ ='-inf')

# numpy 1.6.1 workaround in Python 3.x
if (values.dtype == np.object_
Expand Down Expand Up @@ -254,11 +270,7 @@ def nanargmax(values, axis=None, skipna=True):
"""
Returns -1 in the NA case
"""
mask = _isfinite(values)
values = _view_if_needed(values)
if not issubclass(values.dtype.type, np.integer):
values = values.copy()
np.putmask(values, mask, -np.inf)
values, mask, dtype = _get_values(values, skipna, fill_value_typ = '-inf', isfinite=True)
result = values.argmax(axis)
result = _maybe_arg_null_out(result, axis, mask, skipna)
return result
Expand All @@ -268,11 +280,7 @@ def nanargmin(values, axis=None, skipna=True):
"""
Returns -1 in the NA case
"""
mask = _isfinite(values)
values = _view_if_needed(values)
if not issubclass(values.dtype.type, np.integer):
values = values.copy()
np.putmask(values, mask, np.inf)
values, mask, dtype = _get_values(values, skipna, fill_value_typ = '+inf', isfinite=True)
result = values.argmin(axis)
result = _maybe_arg_null_out(result, axis, mask, skipna)
return result
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2112,13 +2112,13 @@ def argsort(self, axis=0, kind='quicksort', order=None):
mask = isnull(values)

if mask.any():
result = Series(-1,index=self.index,name=self.name)
result = Series(-1,index=self.index,name=self.name,dtype='int64')
notmask = -mask
result.values[notmask] = np.argsort(self.values[notmask], kind=kind)
return result
else:
return Series(np.argsort(values, kind=kind), index=self.index,
name=self.name)
name=self.name,dtype='int64')

def rank(self, method='average', na_option='keep', ascending=True):
"""
Expand Down
30 changes: 25 additions & 5 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1816,14 +1816,15 @@ def test_timedelta64_functions(self):
result = td.idxmax()
self.assert_(result == 2)

# with NaT (broken)
# GH 2982
# with NaT
td[0] = np.nan

#result = td.idxmin()
#self.assert_(result == 1)
result = td.idxmin()
self.assert_(result == 1)

#result = td.idxmax()
#self.assert_(result == 2)
result = td.idxmax()
self.assert_(result == 2)

# abs
s1 = Series(date_range('20120101',periods=3))
Expand Down Expand Up @@ -2065,6 +2066,16 @@ def test_idxmin(self):
allna = self.series * nan
self.assert_(isnull(allna.idxmin()))

# datetime64[ns]
from pandas import date_range
s = Series(date_range('20130102',periods=6))
result = s.idxmin()
self.assert_(result == 0)

s[0] = np.nan
result = s.idxmin()
self.assert_(result == 1)

def test_idxmax(self):
# test idxmax
# _check_stat_op approach can not be used here because of isnull check.
Expand All @@ -2086,6 +2097,15 @@ def test_idxmax(self):
allna = self.series * nan
self.assert_(isnull(allna.idxmax()))

from pandas import date_range
s = Series(date_range('20130102',periods=6))
result = s.idxmax()
self.assert_(result == 5)

s[5] = np.nan
result = s.idxmax()
self.assert_(result == 4)

def test_operators_corner(self):
series = self.ts

Expand Down