diff --git a/properties/test_encode_decode.py b/properties/test_encode_decode.py index 13f63f259cf..4b9aa8928b4 100644 --- a/properties/test_encode_decode.py +++ b/properties/test_encode_decode.py @@ -4,8 +4,6 @@ These ones pass, just as you'd hope! """ -from __future__ import absolute_import, division, print_function - import hypothesis.extra.numpy as npst import hypothesis.strategies as st from hypothesis import given, settings diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index c4db95cfd4e..ac204df568f 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -63,19 +63,51 @@ def fail_on_dask_array_input(values, msg=None, func_name=None): around = _dask_or_eager_func('around') isclose = _dask_or_eager_func('isclose') -notnull = _dask_or_eager_func('notnull', eager_module=pd) -_isnull = _dask_or_eager_func('isnull', eager_module=pd) + +if hasattr(np, 'isnat') and ( + dask_array is None or hasattr(dask_array_type, '__array_ufunc__')): + # np.isnat is available since NumPy 1.13, so __array_ufunc__ is always + # supported. + isnat = np.isnat +else: + isnat = _dask_or_eager_func('isnull', eager_module=pd) +isnan = _dask_or_eager_func('isnan') +zeros_like = _dask_or_eager_func('zeros_like') + + +pandas_isnull = _dask_or_eager_func('isnull', eager_module=pd) def isnull(data): - # GH837, GH861 - # isnull fcn from pandas will throw TypeError when run on numpy structured - # array therefore for dims that are np structured arrays we assume all - # data is present - try: - return _isnull(data) - except TypeError: - return np.zeros(data.shape, dtype=bool) + data = asarray(data) + scalar_type = data.dtype.type + if issubclass(scalar_type, (np.datetime64, np.timedelta64)): + # datetime types use NaT for null + # note: must check timedelta64 before integers, because currently + # timedelta64 inherits from np.integer + return isnat(data) + elif issubclass(scalar_type, np.inexact): + # float types use NaN for null + return isnan(data) + elif issubclass( + scalar_type, (np.bool_, np.integer, np.character, np.void) + ): + # these types cannot represent missing values + return zeros_like(data, dtype=bool) + else: + # at this point, array should have dtype=object + if isinstance(data, (np.ndarray, dask_array_type)): + return pandas_isnull(data) + else: + # Not reachable yet, but intended for use with other duck array + # types. For full consistency with pandas, we should accept None as + # a null value as well as NaN, but it isn't clear how to do this + # with duck typing. + return data != data + + +def notnull(data): + return ~isnull(data) transpose = _dask_or_eager_func('transpose') diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index b0b7cfbd943..bb22a15e227 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -178,14 +178,18 @@ def test_wrong_shape(self): assert not array_notnull_equiv(a, b) @pytest.mark.parametrize("val1, val2, val3, null", [ - (1, 2, 3, None), + (np.datetime64('2000'), + np.datetime64('2001'), + np.datetime64('2002'), + np.datetime64('NaT')), (1., 2., 3., np.nan), - (1., 2., 3., None), ('foo', 'bar', 'baz', None), + ('foo', 'bar', 'baz', np.nan), ]) def test_types(self, val1, val2, val3, null): - arr1 = np.array([val1, null, val3, null]) - arr2 = np.array([val1, val2, null, null]) + dtype = object if isinstance(val1, str) else None + arr1 = np.array([val1, null, val3, null], dtype=dtype) + arr2 = np.array([val1, val2, null, null], dtype=dtype) assert array_notnull_equiv(arr1, arr2) @@ -432,6 +436,19 @@ def test_argmin_max_error(): da.argmin(dim='y') +@pytest.mark.parametrize('array', [ + np.array([np.datetime64('2000-01-01'), np.datetime64('NaT')]), + np.array([np.timedelta64(1, 'h'), np.timedelta64('NaT')]), + np.array([0.0, np.nan]), + np.array([1j, np.nan]), + np.array(['foo', np.nan], dtype=object), +]) +def test_isnull(array): + expected = np.array([False, True]) + actual = duck_array_ops.isnull(array) + np.testing.assert_equal(expected, actual) + + @requires_dask def test_isnull_with_dask(): da = construct_dataarray(2, np.float32, contains_nan=True, dask=True)