diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index e922a5d1c3b27..229edbac4992d 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -12,7 +12,9 @@ cimport pandas._libs.util as util from pandas._libs.tslibs.np_datetime cimport ( get_timedelta64_value, get_datetime64_value) -from pandas._libs.tslibs.nattype cimport checknull_with_nat, c_NaT +from pandas._libs.tslibs.nattype cimport ( + checknull_with_nat, c_NaT as NaT, is_null_datetimelike) + cdef float64_t INF = np.inf cdef float64_t NEGINF = -INF @@ -20,25 +22,6 @@ cdef float64_t NEGINF = -INF cdef int64_t NPY_NAT = util.get_nat() -cdef inline bint _check_all_nulls(object val): - """ utility to check if a value is any type of null """ - res: bint - - if isinstance(val, (float, complex)): - res = val != val - elif val is c_NaT: - res = 1 - elif val is None: - res = 1 - elif util.is_datetime64_object(val): - res = get_datetime64_value(val) == NPY_NAT - elif util.is_timedelta64_object(val): - res = get_timedelta64_value(val) == NPY_NAT - else: - res = 0 - return res - - cpdef bint checknull(object val): """ Return boolean describing of the input is NA-like, defined here as any @@ -62,18 +45,7 @@ cpdef bint checknull(object val): The difference between `checknull` and `checknull_old` is that `checknull` does *not* consider INF or NEGINF to be NA. """ - if util.is_float_object(val) or util.is_complex_object(val): - return val != val # and val != INF and val != NEGINF - elif util.is_datetime64_object(val): - return get_datetime64_value(val) == NPY_NAT - elif val is c_NaT: - return True - elif util.is_timedelta64_object(val): - return get_timedelta64_value(val) == NPY_NAT - elif util.is_array(val): - return False - else: - return val is None or util.is_nan(val) + return is_null_datetimelike(val, inat_is_null=False) cpdef bint checknull_old(object val): @@ -101,18 +73,11 @@ cpdef bint checknull_old(object val): The difference between `checknull` and `checknull_old` is that `checknull` does *not* consider INF or NEGINF to be NA. """ - if util.is_float_object(val) or util.is_complex_object(val): - return val != val or val == INF or val == NEGINF - elif util.is_datetime64_object(val): - return get_datetime64_value(val) == NPY_NAT - elif val is c_NaT: + if checknull(val): return True - elif util.is_timedelta64_object(val): - return get_timedelta64_value(val) == NPY_NAT - elif util.is_array(val): - return False - else: - return val is None or util.is_nan(val) + elif util.is_float_object(val) or util.is_complex_object(val): + return val == INF or val == NEGINF + return False cdef inline bint _check_none_nan_inf_neginf(object val): @@ -128,7 +93,7 @@ cdef inline bint _check_none_nan_inf_neginf(object val): cpdef ndarray[uint8_t] isnaobj(ndarray arr): """ Return boolean mask denoting which elements of a 1-D array are na-like, - according to the criteria defined in `_check_all_nulls`: + according to the criteria defined in `checknull`: - None - nan - NaT @@ -154,7 +119,7 @@ cpdef ndarray[uint8_t] isnaobj(ndarray arr): result = np.empty(n, dtype=np.uint8) for i in range(n): val = arr[i] - result[i] = _check_all_nulls(val) + result[i] = checknull(val) return result.view(np.bool_) @@ -189,7 +154,7 @@ def isnaobj_old(ndarray arr): result = np.zeros(n, dtype=np.uint8) for i in range(n): val = arr[i] - result[i] = val is c_NaT or _check_none_nan_inf_neginf(val) + result[i] = val is NaT or _check_none_nan_inf_neginf(val) return result.view(np.bool_) @@ -299,7 +264,7 @@ cdef inline bint is_null_datetime64(v): if checknull_with_nat(v): return True elif util.is_datetime64_object(v): - return v.view('int64') == NPY_NAT + return get_datetime64_value(v) == NPY_NAT return False @@ -309,7 +274,7 @@ cdef inline bint is_null_timedelta64(v): if checknull_with_nat(v): return True elif util.is_timedelta64_object(v): - return v.view('int64') == NPY_NAT + return get_timedelta64_value(v) == NPY_NAT return False diff --git a/pandas/_libs/tslibs/nattype.pxd b/pandas/_libs/tslibs/nattype.pxd index ee8d5ca3d861c..dae5bdc3f93b1 100644 --- a/pandas/_libs/tslibs/nattype.pxd +++ b/pandas/_libs/tslibs/nattype.pxd @@ -17,4 +17,4 @@ cdef _NaT c_NaT cdef bint checknull_with_nat(object val) -cpdef bint is_null_datetimelike(object val) +cpdef bint is_null_datetimelike(object val, bint inat_is_null=*) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index df083f27ad653..a55d15a7c4e85 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -14,6 +14,8 @@ cimport numpy as cnp from numpy cimport int64_t cnp.import_array() +from pandas._libs.tslibs.np_datetime cimport ( + get_datetime64_value, get_timedelta64_value) cimport pandas._libs.tslibs.util as util from pandas._libs.tslibs.util cimport ( get_nat, is_integer_object, is_float_object, is_datetime64_object, @@ -686,26 +688,30 @@ cdef inline bint checknull_with_nat(object val): return val is None or util.is_nan(val) or val is c_NaT -cpdef bint is_null_datetimelike(object val): +cpdef bint is_null_datetimelike(object val, bint inat_is_null=True): """ Determine if we have a null for a timedelta/datetime (or integer versions) Parameters ---------- val : object + inat_is_null : bool, default True + Whether to treat integer iNaT value as null Returns ------- null_datetimelike : bool """ - if val is None or util.is_nan(val): + if val is None: return True elif val is c_NaT: return True + elif util.is_float_object(val) or util.is_complex_object(val): + return val != val elif util.is_timedelta64_object(val): - return val.view('int64') == NPY_NAT + return get_timedelta64_value(val) == NPY_NAT elif util.is_datetime64_object(val): - return val.view('int64') == NPY_NAT - elif util.is_integer_object(val): + return get_datetime64_value(val) == NPY_NAT + elif inat_is_null and util.is_integer_object(val): return val == NPY_NAT return False diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index 0ba61fcc58f46..ef7065a44f18b 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -215,7 +215,8 @@ cdef inline bint is_offset_object(object val): cdef inline bint is_nan(object val): """ - Check if val is a Not-A-Number float, including float('NaN') and np.nan. + Check if val is a Not-A-Number float or complex, including + float('NaN') and np.nan. Parameters ---------- @@ -225,4 +226,4 @@ cdef inline bint is_nan(object val): ------- is_nan : bool """ - return is_float_object(val) and val != val + return (is_float_object(val) or is_complex_object(val)) and val != val diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 965e5e000d026..d913d2ad299ce 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -1,13 +1,14 @@ # -*- coding: utf-8 -*- from datetime import datetime +from decimal import Decimal from warnings import catch_warnings, filterwarnings, simplefilter import numpy as np import pytest from pandas._libs import missing as libmissing -from pandas._libs.tslib import iNaT +from pandas._libs.tslibs import iNaT, is_null_datetimelike from pandas.compat import u from pandas.core.dtypes.common import is_scalar @@ -392,3 +393,106 @@ def test_empty_like(self): expected = np.array([True]) self._check_behavior(arr, expected) + + +m8_units = ['as', 'ps', 'ns', 'us', 'ms', 's', + 'm', 'h', 'D', 'W', 'M', 'Y'] + +na_vals = [ + None, + NaT, + float('NaN'), + complex('NaN'), + np.nan, + np.float64('NaN'), + np.float32('NaN'), + np.complex64(np.nan), + np.complex128(np.nan), + np.datetime64('NaT'), + np.timedelta64('NaT'), +] + [ + np.datetime64('NaT', unit) for unit in m8_units +] + [ + np.timedelta64('NaT', unit) for unit in m8_units +] + +inf_vals = [ + float('inf'), + float('-inf'), + complex('inf'), + complex('-inf'), + np.inf, + np.NINF, +] + +int_na_vals = [ + # Values that match iNaT, which we treat as null in specific cases + np.int64(NaT.value), + int(NaT.value), +] + +sometimes_na_vals = [ + Decimal('NaN'), +] + +never_na_vals = [ + # float/complex values that when viewed as int64 match iNaT + -0.0, + np.float64('-0.0'), + -0j, + np.complex64(-0j), +] + + +class TestLibMissing(object): + def test_checknull(self): + for value in na_vals: + assert libmissing.checknull(value) + + for value in inf_vals: + assert not libmissing.checknull(value) + + for value in int_na_vals: + assert not libmissing.checknull(value) + + for value in sometimes_na_vals: + assert not libmissing.checknull(value) + + for value in never_na_vals: + assert not libmissing.checknull(value) + + def checknull_old(self): + for value in na_vals: + assert libmissing.checknull_old(value) + + for value in inf_vals: + assert libmissing.checknull_old(value) + + for value in int_na_vals: + assert not libmissing.checknull_old(value) + + for value in sometimes_na_vals: + assert not libmissing.checknull_old(value) + + for value in never_na_vals: + assert not libmissing.checknull_old(value) + + def test_is_null_datetimelike(self): + for value in na_vals: + assert is_null_datetimelike(value) + assert is_null_datetimelike(value, False) + + for value in inf_vals: + assert not is_null_datetimelike(value) + assert not is_null_datetimelike(value, False) + + for value in int_na_vals: + assert is_null_datetimelike(value) + assert not is_null_datetimelike(value, False) + + for value in sometimes_na_vals: + assert not is_null_datetimelike(value) + assert not is_null_datetimelike(value, False) + + for value in never_na_vals: + assert not is_null_datetimelike(value)