Skip to content

catch complex nan in util.is_nan, de-dup+optimize libmissing, tests #24628

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 13 additions & 48 deletions pandas/_libs/missing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -12,33 +12,16 @@ cimport pandas._libs.util as util

from pandas._libs.tslibs.np_datetime cimport (
get_timedelta64_value, get_datetime64_value)
from pandas._libs.tslibs.nattype cimport checknull_with_nat, c_NaT
from pandas._libs.tslibs.nattype cimport (
checknull_with_nat, c_NaT as NaT, is_null_datetimelike)


cdef float64_t INF = <float64_t>np.inf
cdef float64_t NEGINF = -INF

cdef int64_t NPY_NAT = util.get_nat()


cdef inline bint _check_all_nulls(object val):
""" utility to check if a value is any type of null """
res: bint

if isinstance(val, (float, complex)):
res = val != val
elif val is c_NaT:
res = 1
elif val is None:
res = 1
elif util.is_datetime64_object(val):
res = get_datetime64_value(val) == NPY_NAT
elif util.is_timedelta64_object(val):
res = get_timedelta64_value(val) == NPY_NAT
else:
res = 0
return res


cpdef bint checknull(object val):
"""
Return boolean describing of the input is NA-like, defined here as any
Expand All @@ -62,18 +45,7 @@ cpdef bint checknull(object val):
The difference between `checknull` and `checknull_old` is that `checknull`
does *not* consider INF or NEGINF to be NA.
"""
if util.is_float_object(val) or util.is_complex_object(val):
return val != val # and val != INF and val != NEGINF
elif util.is_datetime64_object(val):
return get_datetime64_value(val) == NPY_NAT
elif val is c_NaT:
return True
elif util.is_timedelta64_object(val):
return get_timedelta64_value(val) == NPY_NAT
elif util.is_array(val):
return False
else:
return val is None or util.is_nan(val)
return is_null_datetimelike(val, inat_is_null=False)


cpdef bint checknull_old(object val):
Expand Down Expand Up @@ -101,18 +73,11 @@ cpdef bint checknull_old(object val):
The difference between `checknull` and `checknull_old` is that `checknull`
does *not* consider INF or NEGINF to be NA.
"""
if util.is_float_object(val) or util.is_complex_object(val):
return val != val or val == INF or val == NEGINF
elif util.is_datetime64_object(val):
return get_datetime64_value(val) == NPY_NAT
elif val is c_NaT:
if checknull(val):
return True
elif util.is_timedelta64_object(val):
return get_timedelta64_value(val) == NPY_NAT
elif util.is_array(val):
return False
else:
return val is None or util.is_nan(val)
elif util.is_float_object(val) or util.is_complex_object(val):
return val == INF or val == NEGINF
return False


cdef inline bint _check_none_nan_inf_neginf(object val):
Expand All @@ -128,7 +93,7 @@ cdef inline bint _check_none_nan_inf_neginf(object val):
cpdef ndarray[uint8_t] isnaobj(ndarray arr):
"""
Return boolean mask denoting which elements of a 1-D array are na-like,
according to the criteria defined in `_check_all_nulls`:
according to the criteria defined in `checknull`:
- None
- nan
- NaT
Expand All @@ -154,7 +119,7 @@ cpdef ndarray[uint8_t] isnaobj(ndarray arr):
result = np.empty(n, dtype=np.uint8)
for i in range(n):
val = arr[i]
result[i] = _check_all_nulls(val)
result[i] = checknull(val)
return result.view(np.bool_)


Expand Down Expand Up @@ -189,7 +154,7 @@ def isnaobj_old(ndarray arr):
result = np.zeros(n, dtype=np.uint8)
for i in range(n):
val = arr[i]
result[i] = val is c_NaT or _check_none_nan_inf_neginf(val)
result[i] = val is NaT or _check_none_nan_inf_neginf(val)
return result.view(np.bool_)


Expand Down Expand Up @@ -299,7 +264,7 @@ cdef inline bint is_null_datetime64(v):
if checknull_with_nat(v):
return True
elif util.is_datetime64_object(v):
return v.view('int64') == NPY_NAT
return get_datetime64_value(v) == NPY_NAT
return False


Expand All @@ -309,7 +274,7 @@ cdef inline bint is_null_timedelta64(v):
if checknull_with_nat(v):
return True
elif util.is_timedelta64_object(v):
return v.view('int64') == NPY_NAT
return get_timedelta64_value(v) == NPY_NAT
return False


Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/nattype.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ cdef _NaT c_NaT


cdef bint checknull_with_nat(object val)
cpdef bint is_null_datetimelike(object val)
cpdef bint is_null_datetimelike(object val, bint inat_is_null=*)
16 changes: 11 additions & 5 deletions pandas/_libs/tslibs/nattype.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ cimport numpy as cnp
from numpy cimport int64_t
cnp.import_array()

from pandas._libs.tslibs.np_datetime cimport (
get_datetime64_value, get_timedelta64_value)
cimport pandas._libs.tslibs.util as util
from pandas._libs.tslibs.util cimport (
get_nat, is_integer_object, is_float_object, is_datetime64_object,
Expand Down Expand Up @@ -686,26 +688,30 @@ cdef inline bint checknull_with_nat(object val):
return val is None or util.is_nan(val) or val is c_NaT


cpdef bint is_null_datetimelike(object val):
cpdef bint is_null_datetimelike(object val, bint inat_is_null=True):
"""
Determine if we have a null for a timedelta/datetime (or integer versions)

Parameters
----------
val : object
inat_is_null : bool, default True
Whether to treat integer iNaT value as null

Returns
-------
null_datetimelike : bool
"""
if val is None or util.is_nan(val):
if val is None:
return True
elif val is c_NaT:
return True
elif util.is_float_object(val) or util.is_complex_object(val):
return val != val
elif util.is_timedelta64_object(val):
return val.view('int64') == NPY_NAT
return get_timedelta64_value(val) == NPY_NAT
elif util.is_datetime64_object(val):
return val.view('int64') == NPY_NAT
elif util.is_integer_object(val):
return get_datetime64_value(val) == NPY_NAT
elif inat_is_null and util.is_integer_object(val):
return val == NPY_NAT
return False
5 changes: 3 additions & 2 deletions pandas/_libs/tslibs/util.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,8 @@ cdef inline bint is_offset_object(object val):

cdef inline bint is_nan(object val):
"""
Check if val is a Not-A-Number float, including float('NaN') and np.nan.
Check if val is a Not-A-Number float or complex, including
float('NaN') and np.nan.

Parameters
----------
Expand All @@ -225,4 +226,4 @@ cdef inline bint is_nan(object val):
-------
is_nan : bool
"""
return is_float_object(val) and val != val
return (is_float_object(val) or is_complex_object(val)) and val != val
106 changes: 105 additions & 1 deletion pandas/tests/dtypes/test_missing.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# -*- coding: utf-8 -*-

from datetime import datetime
from decimal import Decimal
from warnings import catch_warnings, filterwarnings, simplefilter

import numpy as np
import pytest

from pandas._libs import missing as libmissing
from pandas._libs.tslib import iNaT
from pandas._libs.tslibs import iNaT, is_null_datetimelike
from pandas.compat import u

from pandas.core.dtypes.common import is_scalar
Expand Down Expand Up @@ -392,3 +393,106 @@ def test_empty_like(self):
expected = np.array([True])

self._check_behavior(arr, expected)


m8_units = ['as', 'ps', 'ns', 'us', 'ms', 's',
'm', 'h', 'D', 'W', 'M', 'Y']

na_vals = [
None,
NaT,
float('NaN'),
complex('NaN'),
np.nan,
np.float64('NaN'),
np.float32('NaN'),
np.complex64(np.nan),
np.complex128(np.nan),
np.datetime64('NaT'),
np.timedelta64('NaT'),
] + [
np.datetime64('NaT', unit) for unit in m8_units
] + [
np.timedelta64('NaT', unit) for unit in m8_units
]

inf_vals = [
float('inf'),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should np.inf and np.NINF be included too?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good idea, will add

float('-inf'),
complex('inf'),
complex('-inf'),
np.inf,
np.NINF,
]

int_na_vals = [
# Values that match iNaT, which we treat as null in specific cases
np.int64(NaT.value),
int(NaT.value),
]

sometimes_na_vals = [
Decimal('NaN'),
]

never_na_vals = [
# float/complex values that when viewed as int64 match iNaT
-0.0,
np.float64('-0.0'),
-0j,
np.complex64(-0j),
]


class TestLibMissing(object):
def test_checknull(self):
for value in na_vals:
assert libmissing.checknull(value)

for value in inf_vals:
assert not libmissing.checknull(value)

for value in int_na_vals:
assert not libmissing.checknull(value)

for value in sometimes_na_vals:
assert not libmissing.checknull(value)

for value in never_na_vals:
assert not libmissing.checknull(value)

def checknull_old(self):
for value in na_vals:
assert libmissing.checknull_old(value)

for value in inf_vals:
assert libmissing.checknull_old(value)

for value in int_na_vals:
assert not libmissing.checknull_old(value)

for value in sometimes_na_vals:
assert not libmissing.checknull_old(value)

for value in never_na_vals:
assert not libmissing.checknull_old(value)

def test_is_null_datetimelike(self):
for value in na_vals:
assert is_null_datetimelike(value)
assert is_null_datetimelike(value, False)

for value in inf_vals:
assert not is_null_datetimelike(value)
assert not is_null_datetimelike(value, False)

for value in int_na_vals:
assert is_null_datetimelike(value)
assert not is_null_datetimelike(value, False)

for value in sometimes_na_vals:
assert not is_null_datetimelike(value)
assert not is_null_datetimelike(value, False)

for value in never_na_vals:
assert not is_null_datetimelike(value)