Skip to content

Add test for core.nanops and fix several bugs in nanops #7358

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
57 changes: 38 additions & 19 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ def f(values, axis=None, skipna=True, **kwds):
result.fill(0)
return result

if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name):
if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype,
bn_name):
result = bn_func(values, axis=axis, **kwds)

# prefer to treat inf/-inf as NA, but must compute the func
Expand All @@ -94,7 +95,8 @@ def f(values, axis=None, skipna=True, **kwds):

def _bn_ok_dtype(dt, name):
# Bottleneck chokes on datetime64
if dt != np.object_ and not issubclass(dt.type, (np.datetime64, np.timedelta64)):
if dt != np.object_ and not issubclass(dt.type, (np.datetime64,
np.timedelta64)):

# bottleneck does not properly upcast during the sum
# so can overflow
Expand All @@ -105,14 +107,18 @@ def _bn_ok_dtype(dt, name):
return True
return False


def _has_infs(result):
if isinstance(result, np.ndarray):
if result.dtype == 'f8':
return lib.has_infs_f8(result)
return lib.has_infs_f8(result.ravel())
elif result.dtype == 'f4':
return lib.has_infs_f4(result)
return lib.has_infs_f4(result.ravel())
try:
return np.isinf(result).any()
except (TypeError, NotImplementedError) as e:
# if it doesn't support infs, then it can't have infs
return False
return np.isinf(result) or np.isneginf(result)


def _get_fill_value(dtype, fill_value=None, fill_value_typ=None):
Expand Down Expand Up @@ -175,8 +181,9 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None,

# return a platform independent precision dtype
dtype_max = dtype
if dtype.kind == 'i' and not issubclass(
dtype.type, (np.bool, np.datetime64, np.timedelta64)):
if dtype.kind == 'i' and not issubclass(dtype.type, (np.bool,
np.datetime64,
np.timedelta64)):
dtype_max = np.int64
elif dtype.kind in ['b'] or issubclass(dtype.type, np.bool):
dtype_max = np.int64
Expand All @@ -190,7 +197,7 @@ def _isfinite(values):
if issubclass(values.dtype.type, (np.timedelta64, np.datetime64)):
return isnull(values)
elif isinstance(values.dtype, object):
return ~np.isfinite(values.astype('float64'))
return ~np.isfinite(values.astype('complex128'))

return ~np.isfinite(values)

Expand Down Expand Up @@ -247,7 +254,7 @@ def nanall(values, axis=None, skipna=True):
@bottleneck_switch(zero_value=0)
def nansum(values, axis=None, skipna=True):
values, mask, dtype, dtype_max = _get_values(values, skipna, 0)
the_sum = values.sum(axis,dtype=dtype_max)
the_sum = values.sum(axis, dtype=dtype_max)
the_sum = _maybe_null_out(the_sum, axis, mask)

return _wrap_results(the_sum, dtype)
Expand All @@ -260,7 +267,7 @@ def nanmean(values, axis=None, skipna=True):
the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_max))
count = _get_counts(mask, axis)

if axis is not None:
if axis is not None and getattr(the_sum, 'ndim', False):
the_mean = the_sum / count
ct_mask = count == 0
if ct_mask.any():
Expand All @@ -286,6 +293,9 @@ def get_median(x):
if values.dtype != np.float64:
values = values.astype('f8')

if axis is None:
values = values.ravel()

notempty = values.size

# an array from a frame
Expand Down Expand Up @@ -358,7 +368,8 @@ def nansem(values, axis=None, skipna=True, ddof=1):

@bottleneck_switch()
def nanmin(values, axis=None, skipna=True):
values, mask, dtype, dtype_max = _get_values(values, skipna, fill_value_typ='+inf')
values, mask, dtype, dtype_max = _get_values(values, skipna,
fill_value_typ='+inf')

# numpy 1.6.1 workaround in Python 3.x
if (values.dtype == np.object_ and compat.PY3):
Expand All @@ -374,7 +385,7 @@ def nanmin(values, axis=None, skipna=True):
if ((axis is not None and values.shape[axis] == 0)
or values.size == 0):
try:
result = com.ensure_float(values.sum(axis,dtype=dtype_max))
result = com.ensure_float(values.sum(axis, dtype=dtype_max))
result.fill(np.nan)
except:
result = np.nan
Expand All @@ -387,7 +398,8 @@ def nanmin(values, axis=None, skipna=True):

@bottleneck_switch()
def nanmax(values, axis=None, skipna=True):
values, mask, dtype, dtype_max = _get_values(values, skipna, fill_value_typ='-inf')
values, mask, dtype, dtype_max = _get_values(values, skipna,
fill_value_typ='-inf')

# numpy 1.6.1 workaround in Python 3.x
if (values.dtype == np.object_ and compat.PY3):
Expand Down Expand Up @@ -420,7 +432,7 @@ def nanargmax(values, axis=None, skipna=True):
Returns -1 in the NA case
"""
values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf',
isfinite=True)
isfinite=True)
result = values.argmax(axis)
result = _maybe_arg_null_out(result, axis, mask, skipna)
return result
Expand All @@ -431,7 +443,7 @@ def nanargmin(values, axis=None, skipna=True):
Returns -1 in the NA case
"""
values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='+inf',
isfinite=True)
isfinite=True)
result = values.argmin(axis)
result = _maybe_arg_null_out(result, axis, mask, skipna)
return result
Expand Down Expand Up @@ -517,7 +529,7 @@ def nanprod(values, axis=None, skipna=True):

def _maybe_arg_null_out(result, axis, mask, skipna):
# helper function for nanargmin/nanargmax
if axis is None:
if axis is None or not result.ndim:
if skipna:
if mask.all():
result = -1
Expand All @@ -544,10 +556,13 @@ def _get_counts(mask, axis):


def _maybe_null_out(result, axis, mask):
if axis is not None:
if axis is not None and getattr(result, 'ndim', False):
null_mask = (mask.shape[axis] - mask.sum(axis)) == 0
if null_mask.any():
result = result.astype('f8')
if np.iscomplexobj(result):
result = result.astype('c16')
else:
result = result.astype('f8')
result[null_mask] = np.nan
else:
null_mask = mask.size - mask.sum()
Expand Down Expand Up @@ -633,7 +648,11 @@ def nancov(a, b, min_periods=None):
def _ensure_numeric(x):
if isinstance(x, np.ndarray):
if x.dtype == np.object_:
x = x.astype(np.float64)
try:
x = x.astype(np.complex128)
x = x.real if not np.any(x.imag) else x
except TypeError:
x = x.astype(np.float64)
elif not (com.is_float(x) or com.is_integer(x) or com.is_complex(x)):
try:
x = float(x)
Expand Down
49 changes: 0 additions & 49 deletions pandas/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import pandas.core.common as com
import pandas.util.testing as tm
import pandas.core.config as cf
from pandas.core import nanops

_multiprocess_can_split_ = True

Expand Down Expand Up @@ -394,54 +393,6 @@ def test_ensure_int32():
assert(result.dtype == np.int32)


class TestEnsureNumeric(tm.TestCase):
def test_numeric_values(self):
# Test integer
self.assertEqual(nanops._ensure_numeric(1), 1, 'Failed for int')
# Test float
self.assertEqual(nanops._ensure_numeric(1.1), 1.1, 'Failed for float')
# Test complex
self.assertEqual(nanops._ensure_numeric(1 + 2j), 1 + 2j,
'Failed for complex')

def test_ndarray(self):
# Test numeric ndarray
values = np.array([1, 2, 3])
self.assertTrue(np.allclose(nanops._ensure_numeric(values), values),
'Failed for numeric ndarray')

# Test object ndarray
o_values = values.astype(object)
self.assertTrue(np.allclose(nanops._ensure_numeric(o_values), values),
'Failed for object ndarray')

# Test convertible string ndarray
s_values = np.array(['1', '2', '3'], dtype=object)
self.assertTrue(np.allclose(nanops._ensure_numeric(s_values), values),
'Failed for convertible string ndarray')

# Test non-convertible string ndarray
s_values = np.array(['foo', 'bar', 'baz'], dtype=object)
self.assertRaises(ValueError,
lambda: nanops._ensure_numeric(s_values))

def test_convertable_values(self):
self.assertTrue(np.allclose(nanops._ensure_numeric('1'), 1.0),
'Failed for convertible integer string')
self.assertTrue(np.allclose(nanops._ensure_numeric('1.1'), 1.1),
'Failed for convertible float string')
self.assertTrue(np.allclose(nanops._ensure_numeric('1+1j'), 1 + 1j),
'Failed for convertible complex string')

def test_non_convertable_values(self):
self.assertRaises(TypeError,
lambda: nanops._ensure_numeric('foo'))
self.assertRaises(TypeError,
lambda: nanops._ensure_numeric({}))
self.assertRaises(TypeError,
lambda: nanops._ensure_numeric([]))


def test_ensure_platform_int():

# verify that when we create certain types of indices
Expand Down
Loading